{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 25.0, "eval_steps": 500, "global_step": 26000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0009615384615384616, "grad_norm": 29.99985694885254, "learning_rate": 0.0, "loss": 1.0741, "step": 1 }, { "epoch": 0.0019230769230769232, "grad_norm": 30.104595184326172, "learning_rate": 2.5641025641025643e-08, "loss": 1.2278, "step": 2 }, { "epoch": 0.0028846153846153848, "grad_norm": 28.94898796081543, "learning_rate": 5.1282051282051286e-08, "loss": 1.356, "step": 3 }, { "epoch": 0.0038461538461538464, "grad_norm": 34.369197845458984, "learning_rate": 7.692307692307694e-08, "loss": 1.3588, "step": 4 }, { "epoch": 0.004807692307692308, "grad_norm": 30.254777908325195, "learning_rate": 1.0256410256410257e-07, "loss": 1.7658, "step": 5 }, { "epoch": 0.0057692307692307696, "grad_norm": 37.19833755493164, "learning_rate": 1.282051282051282e-07, "loss": 1.2134, "step": 6 }, { "epoch": 0.006730769230769231, "grad_norm": 27.250391006469727, "learning_rate": 1.5384615384615387e-07, "loss": 1.3594, "step": 7 }, { "epoch": 0.007692307692307693, "grad_norm": 28.81546401977539, "learning_rate": 1.7948717948717948e-07, "loss": 1.6763, "step": 8 }, { "epoch": 0.008653846153846154, "grad_norm": 36.55729675292969, "learning_rate": 2.0512820512820514e-07, "loss": 1.6186, "step": 9 }, { "epoch": 0.009615384615384616, "grad_norm": 30.699676513671875, "learning_rate": 2.307692307692308e-07, "loss": 1.4926, "step": 10 }, { "epoch": 0.010576923076923078, "grad_norm": 30.503713607788086, "learning_rate": 2.564102564102564e-07, "loss": 1.4816, "step": 11 }, { "epoch": 0.011538461538461539, "grad_norm": 40.72938537597656, "learning_rate": 2.820512820512821e-07, "loss": 1.4262, "step": 12 }, { "epoch": 0.0125, "grad_norm": 25.398672103881836, "learning_rate": 3.0769230769230774e-07, "loss": 0.9628, "step": 13 }, { "epoch": 0.013461538461538462, "grad_norm": 19.380809783935547, "learning_rate": 3.3333333333333335e-07, "loss": 1.0969, "step": 14 }, { "epoch": 0.014423076923076924, "grad_norm": 25.976146697998047, "learning_rate": 3.5897435897435896e-07, "loss": 1.1948, "step": 15 }, { "epoch": 0.015384615384615385, "grad_norm": 20.815950393676758, "learning_rate": 3.846153846153847e-07, "loss": 1.0305, "step": 16 }, { "epoch": 0.016346153846153847, "grad_norm": 24.83697509765625, "learning_rate": 4.102564102564103e-07, "loss": 1.3541, "step": 17 }, { "epoch": 0.01730769230769231, "grad_norm": 23.892704010009766, "learning_rate": 4.358974358974359e-07, "loss": 1.0205, "step": 18 }, { "epoch": 0.01826923076923077, "grad_norm": 20.30869483947754, "learning_rate": 4.615384615384616e-07, "loss": 1.0856, "step": 19 }, { "epoch": 0.019230769230769232, "grad_norm": 17.67920684814453, "learning_rate": 4.871794871794872e-07, "loss": 1.0903, "step": 20 }, { "epoch": 0.020192307692307693, "grad_norm": 19.372783660888672, "learning_rate": 5.128205128205128e-07, "loss": 1.2673, "step": 21 }, { "epoch": 0.021153846153846155, "grad_norm": 16.221025466918945, "learning_rate": 5.384615384615386e-07, "loss": 1.1237, "step": 22 }, { "epoch": 0.022115384615384617, "grad_norm": 17.134536743164062, "learning_rate": 5.641025641025642e-07, "loss": 1.258, "step": 23 }, { "epoch": 0.023076923076923078, "grad_norm": 17.938322067260742, "learning_rate": 5.897435897435898e-07, "loss": 0.7556, "step": 24 }, { "epoch": 0.02403846153846154, "grad_norm": 17.31209373474121, "learning_rate": 6.153846153846155e-07, "loss": 0.7388, "step": 25 }, { "epoch": 0.025, "grad_norm": 16.547494888305664, "learning_rate": 6.41025641025641e-07, "loss": 1.2903, "step": 26 }, { "epoch": 0.025961538461538463, "grad_norm": 16.819061279296875, "learning_rate": 6.666666666666667e-07, "loss": 0.9005, "step": 27 }, { "epoch": 0.026923076923076925, "grad_norm": 15.726744651794434, "learning_rate": 6.923076923076924e-07, "loss": 1.2351, "step": 28 }, { "epoch": 0.027884615384615386, "grad_norm": 15.726744651794434, "learning_rate": 7.179487179487179e-07, "loss": 0.9265, "step": 29 }, { "epoch": 0.028846153846153848, "grad_norm": 18.192899703979492, "learning_rate": 7.179487179487179e-07, "loss": 1.0299, "step": 30 }, { "epoch": 0.02980769230769231, "grad_norm": 16.628816604614258, "learning_rate": 7.435897435897436e-07, "loss": 0.7892, "step": 31 }, { "epoch": 0.03076923076923077, "grad_norm": 18.22395896911621, "learning_rate": 7.692307692307694e-07, "loss": 1.3196, "step": 32 }, { "epoch": 0.03173076923076923, "grad_norm": 17.079439163208008, "learning_rate": 7.948717948717949e-07, "loss": 1.2121, "step": 33 }, { "epoch": 0.032692307692307694, "grad_norm": 15.577134132385254, "learning_rate": 8.205128205128206e-07, "loss": 0.7669, "step": 34 }, { "epoch": 0.03365384615384615, "grad_norm": 14.226288795471191, "learning_rate": 8.461538461538463e-07, "loss": 1.0913, "step": 35 }, { "epoch": 0.03461538461538462, "grad_norm": 12.827913284301758, "learning_rate": 8.717948717948718e-07, "loss": 0.8396, "step": 36 }, { "epoch": 0.035576923076923075, "grad_norm": 14.760544776916504, "learning_rate": 8.974358974358975e-07, "loss": 0.9204, "step": 37 }, { "epoch": 0.03653846153846154, "grad_norm": 14.751412391662598, "learning_rate": 9.230769230769232e-07, "loss": 0.8711, "step": 38 }, { "epoch": 0.0375, "grad_norm": 13.778837203979492, "learning_rate": 9.487179487179487e-07, "loss": 1.0763, "step": 39 }, { "epoch": 0.038461538461538464, "grad_norm": 13.630474090576172, "learning_rate": 9.743589743589745e-07, "loss": 0.7025, "step": 40 }, { "epoch": 0.03942307692307692, "grad_norm": 11.812657356262207, "learning_rate": 1.0000000000000002e-06, "loss": 1.2617, "step": 41 }, { "epoch": 0.04038461538461539, "grad_norm": 15.650359153747559, "learning_rate": 1.0256410256410257e-06, "loss": 1.5392, "step": 42 }, { "epoch": 0.041346153846153845, "grad_norm": 16.111225128173828, "learning_rate": 1.0512820512820514e-06, "loss": 0.9673, "step": 43 }, { "epoch": 0.04230769230769231, "grad_norm": 14.739701271057129, "learning_rate": 1.076923076923077e-06, "loss": 0.9712, "step": 44 }, { "epoch": 0.04326923076923077, "grad_norm": 13.80407428741455, "learning_rate": 1.1025641025641026e-06, "loss": 0.8222, "step": 45 }, { "epoch": 0.04423076923076923, "grad_norm": 12.27470874786377, "learning_rate": 1.1282051282051283e-06, "loss": 0.8134, "step": 46 }, { "epoch": 0.04519230769230769, "grad_norm": 11.777482986450195, "learning_rate": 1.153846153846154e-06, "loss": 0.4915, "step": 47 }, { "epoch": 0.046153846153846156, "grad_norm": 11.960409164428711, "learning_rate": 1.1794871794871795e-06, "loss": 0.8085, "step": 48 }, { "epoch": 0.047115384615384615, "grad_norm": 11.960409164428711, "learning_rate": 1.2051282051282053e-06, "loss": 1.0093, "step": 49 }, { "epoch": 0.04807692307692308, "grad_norm": 13.22179126739502, "learning_rate": 1.2051282051282053e-06, "loss": 1.0734, "step": 50 }, { "epoch": 0.04903846153846154, "grad_norm": 11.868070602416992, "learning_rate": 1.230769230769231e-06, "loss": 0.7865, "step": 51 }, { "epoch": 0.05, "grad_norm": 12.814583778381348, "learning_rate": 1.2564102564102565e-06, "loss": 0.8576, "step": 52 }, { "epoch": 0.05096153846153846, "grad_norm": 12.538263320922852, "learning_rate": 1.282051282051282e-06, "loss": 0.831, "step": 53 }, { "epoch": 0.051923076923076926, "grad_norm": 10.708611488342285, "learning_rate": 1.307692307692308e-06, "loss": 0.5643, "step": 54 }, { "epoch": 0.052884615384615384, "grad_norm": 14.20469856262207, "learning_rate": 1.3333333333333334e-06, "loss": 1.2509, "step": 55 }, { "epoch": 0.05384615384615385, "grad_norm": 10.933833122253418, "learning_rate": 1.358974358974359e-06, "loss": 0.6411, "step": 56 }, { "epoch": 0.05480769230769231, "grad_norm": 12.724555015563965, "learning_rate": 1.3846153846153848e-06, "loss": 0.9072, "step": 57 }, { "epoch": 0.05576923076923077, "grad_norm": 11.048248291015625, "learning_rate": 1.4102564102564104e-06, "loss": 1.0854, "step": 58 }, { "epoch": 0.05673076923076923, "grad_norm": 12.49885082244873, "learning_rate": 1.4358974358974359e-06, "loss": 1.1386, "step": 59 }, { "epoch": 0.057692307692307696, "grad_norm": 13.372956275939941, "learning_rate": 1.4615384615384618e-06, "loss": 1.1104, "step": 60 }, { "epoch": 0.058653846153846154, "grad_norm": 11.407519340515137, "learning_rate": 1.4871794871794873e-06, "loss": 0.6441, "step": 61 }, { "epoch": 0.05961538461538462, "grad_norm": 10.310783386230469, "learning_rate": 1.5128205128205128e-06, "loss": 0.6751, "step": 62 }, { "epoch": 0.06057692307692308, "grad_norm": 11.731295585632324, "learning_rate": 1.5384615384615387e-06, "loss": 0.9149, "step": 63 }, { "epoch": 0.06153846153846154, "grad_norm": 9.933897018432617, "learning_rate": 1.5641025641025642e-06, "loss": 0.5103, "step": 64 }, { "epoch": 0.0625, "grad_norm": 8.502243041992188, "learning_rate": 1.5897435897435897e-06, "loss": 0.4706, "step": 65 }, { "epoch": 0.06346153846153846, "grad_norm": 9.33025074005127, "learning_rate": 1.6153846153846157e-06, "loss": 0.5653, "step": 66 }, { "epoch": 0.06442307692307692, "grad_norm": 12.97715950012207, "learning_rate": 1.6410256410256412e-06, "loss": 1.0828, "step": 67 }, { "epoch": 0.06538461538461539, "grad_norm": 10.350533485412598, "learning_rate": 1.6666666666666667e-06, "loss": 0.8039, "step": 68 }, { "epoch": 0.06634615384615385, "grad_norm": 10.17438793182373, "learning_rate": 1.6923076923076926e-06, "loss": 1.0558, "step": 69 }, { "epoch": 0.0673076923076923, "grad_norm": 11.997801780700684, "learning_rate": 1.717948717948718e-06, "loss": 1.06, "step": 70 }, { "epoch": 0.06826923076923076, "grad_norm": 11.197973251342773, "learning_rate": 1.7435897435897436e-06, "loss": 0.4404, "step": 71 }, { "epoch": 0.06923076923076923, "grad_norm": 13.309876441955566, "learning_rate": 1.7692307692307695e-06, "loss": 1.1769, "step": 72 }, { "epoch": 0.07019230769230769, "grad_norm": 12.886967658996582, "learning_rate": 1.794871794871795e-06, "loss": 1.3754, "step": 73 }, { "epoch": 0.07115384615384615, "grad_norm": 10.555200576782227, "learning_rate": 1.8205128205128205e-06, "loss": 0.7708, "step": 74 }, { "epoch": 0.07211538461538461, "grad_norm": 11.964073181152344, "learning_rate": 1.8461538461538465e-06, "loss": 1.4502, "step": 75 }, { "epoch": 0.07307692307692308, "grad_norm": 12.591039657592773, "learning_rate": 1.871794871794872e-06, "loss": 1.1176, "step": 76 }, { "epoch": 0.07403846153846154, "grad_norm": 11.395910263061523, "learning_rate": 1.8974358974358975e-06, "loss": 1.0175, "step": 77 }, { "epoch": 0.075, "grad_norm": 10.393385887145996, "learning_rate": 1.9230769230769234e-06, "loss": 0.6348, "step": 78 }, { "epoch": 0.07596153846153846, "grad_norm": 12.742334365844727, "learning_rate": 1.948717948717949e-06, "loss": 0.9647, "step": 79 }, { "epoch": 0.07692307692307693, "grad_norm": 10.676291465759277, "learning_rate": 1.9743589743589744e-06, "loss": 0.7499, "step": 80 }, { "epoch": 0.07788461538461539, "grad_norm": 10.923957824707031, "learning_rate": 2.0000000000000003e-06, "loss": 0.919, "step": 81 }, { "epoch": 0.07884615384615384, "grad_norm": 10.672959327697754, "learning_rate": 2.025641025641026e-06, "loss": 1.3124, "step": 82 }, { "epoch": 0.0798076923076923, "grad_norm": 11.3402099609375, "learning_rate": 2.0512820512820513e-06, "loss": 0.7317, "step": 83 }, { "epoch": 0.08076923076923077, "grad_norm": 11.424199104309082, "learning_rate": 2.0769230769230773e-06, "loss": 1.012, "step": 84 }, { "epoch": 0.08173076923076923, "grad_norm": 10.33788776397705, "learning_rate": 2.1025641025641028e-06, "loss": 0.9729, "step": 85 }, { "epoch": 0.08269230769230769, "grad_norm": 10.74868106842041, "learning_rate": 2.1282051282051283e-06, "loss": 0.7432, "step": 86 }, { "epoch": 0.08365384615384615, "grad_norm": 11.421142578125, "learning_rate": 2.153846153846154e-06, "loss": 0.8304, "step": 87 }, { "epoch": 0.08461538461538462, "grad_norm": 9.584710121154785, "learning_rate": 2.1794871794871797e-06, "loss": 0.8879, "step": 88 }, { "epoch": 0.08557692307692308, "grad_norm": 10.109071731567383, "learning_rate": 2.2051282051282052e-06, "loss": 0.7171, "step": 89 }, { "epoch": 0.08653846153846154, "grad_norm": 11.038488388061523, "learning_rate": 2.230769230769231e-06, "loss": 0.9559, "step": 90 }, { "epoch": 0.0875, "grad_norm": 10.230183601379395, "learning_rate": 2.2564102564102566e-06, "loss": 0.6175, "step": 91 }, { "epoch": 0.08846153846153847, "grad_norm": 8.5918607711792, "learning_rate": 2.282051282051282e-06, "loss": 0.3454, "step": 92 }, { "epoch": 0.08942307692307692, "grad_norm": 9.218935012817383, "learning_rate": 2.307692307692308e-06, "loss": 0.5915, "step": 93 }, { "epoch": 0.09038461538461538, "grad_norm": 11.3999605178833, "learning_rate": 2.3333333333333336e-06, "loss": 0.5355, "step": 94 }, { "epoch": 0.09134615384615384, "grad_norm": 9.89294147491455, "learning_rate": 2.358974358974359e-06, "loss": 0.932, "step": 95 }, { "epoch": 0.09230769230769231, "grad_norm": 10.172468185424805, "learning_rate": 2.384615384615385e-06, "loss": 0.7323, "step": 96 }, { "epoch": 0.09326923076923077, "grad_norm": 11.253607749938965, "learning_rate": 2.4102564102564105e-06, "loss": 0.8262, "step": 97 }, { "epoch": 0.09423076923076923, "grad_norm": 10.393219947814941, "learning_rate": 2.435897435897436e-06, "loss": 0.7232, "step": 98 }, { "epoch": 0.09519230769230769, "grad_norm": 8.691957473754883, "learning_rate": 2.461538461538462e-06, "loss": 0.6073, "step": 99 }, { "epoch": 0.09615384615384616, "grad_norm": 11.659120559692383, "learning_rate": 2.4871794871794875e-06, "loss": 0.7126, "step": 100 }, { "epoch": 0.09711538461538462, "grad_norm": 12.339609146118164, "learning_rate": 2.512820512820513e-06, "loss": 0.9261, "step": 101 }, { "epoch": 0.09807692307692308, "grad_norm": 8.803972244262695, "learning_rate": 2.5384615384615385e-06, "loss": 0.4927, "step": 102 }, { "epoch": 0.09903846153846153, "grad_norm": 11.167093276977539, "learning_rate": 2.564102564102564e-06, "loss": 0.7379, "step": 103 }, { "epoch": 0.1, "grad_norm": 9.172099113464355, "learning_rate": 2.5897435897435903e-06, "loss": 0.649, "step": 104 }, { "epoch": 0.10096153846153846, "grad_norm": 11.628144264221191, "learning_rate": 2.615384615384616e-06, "loss": 1.7749, "step": 105 }, { "epoch": 0.10192307692307692, "grad_norm": 13.213857650756836, "learning_rate": 2.6410256410256413e-06, "loss": 1.0598, "step": 106 }, { "epoch": 0.10288461538461538, "grad_norm": 9.982540130615234, "learning_rate": 2.666666666666667e-06, "loss": 0.7662, "step": 107 }, { "epoch": 0.10384615384615385, "grad_norm": 10.48576831817627, "learning_rate": 2.6923076923076923e-06, "loss": 0.6528, "step": 108 }, { "epoch": 0.10480769230769231, "grad_norm": 12.111198425292969, "learning_rate": 2.717948717948718e-06, "loss": 0.8987, "step": 109 }, { "epoch": 0.10576923076923077, "grad_norm": 11.314216613769531, "learning_rate": 2.743589743589744e-06, "loss": 1.0976, "step": 110 }, { "epoch": 0.10673076923076923, "grad_norm": 12.612529754638672, "learning_rate": 2.7692307692307697e-06, "loss": 1.184, "step": 111 }, { "epoch": 0.1076923076923077, "grad_norm": 11.470283508300781, "learning_rate": 2.794871794871795e-06, "loss": 0.7783, "step": 112 }, { "epoch": 0.10865384615384616, "grad_norm": 9.09618854522705, "learning_rate": 2.8205128205128207e-06, "loss": 0.6693, "step": 113 }, { "epoch": 0.10961538461538461, "grad_norm": 11.090051651000977, "learning_rate": 2.846153846153846e-06, "loss": 1.0197, "step": 114 }, { "epoch": 0.11057692307692307, "grad_norm": 13.490358352661133, "learning_rate": 2.8717948717948717e-06, "loss": 1.0033, "step": 115 }, { "epoch": 0.11153846153846154, "grad_norm": 11.595834732055664, "learning_rate": 2.897435897435898e-06, "loss": 1.0878, "step": 116 }, { "epoch": 0.1125, "grad_norm": 10.286130905151367, "learning_rate": 2.9230769230769236e-06, "loss": 0.8394, "step": 117 }, { "epoch": 0.11346153846153846, "grad_norm": 10.041020393371582, "learning_rate": 2.948717948717949e-06, "loss": 0.7988, "step": 118 }, { "epoch": 0.11442307692307692, "grad_norm": 10.483716011047363, "learning_rate": 2.9743589743589746e-06, "loss": 0.7223, "step": 119 }, { "epoch": 0.11538461538461539, "grad_norm": 10.82847785949707, "learning_rate": 3e-06, "loss": 0.9167, "step": 120 }, { "epoch": 0.11634615384615385, "grad_norm": 12.247166633605957, "learning_rate": 3.0256410256410256e-06, "loss": 0.7655, "step": 121 }, { "epoch": 0.11730769230769231, "grad_norm": 9.969038009643555, "learning_rate": 3.051282051282052e-06, "loss": 0.8922, "step": 122 }, { "epoch": 0.11826923076923077, "grad_norm": 11.01846694946289, "learning_rate": 3.0769230769230774e-06, "loss": 0.956, "step": 123 }, { "epoch": 0.11923076923076924, "grad_norm": 11.933003425598145, "learning_rate": 3.102564102564103e-06, "loss": 0.9991, "step": 124 }, { "epoch": 0.1201923076923077, "grad_norm": 8.92406940460205, "learning_rate": 3.1282051282051284e-06, "loss": 0.3863, "step": 125 }, { "epoch": 0.12115384615384615, "grad_norm": 10.634058952331543, "learning_rate": 3.153846153846154e-06, "loss": 0.591, "step": 126 }, { "epoch": 0.12211538461538461, "grad_norm": 9.755243301391602, "learning_rate": 3.1794871794871795e-06, "loss": 0.6139, "step": 127 }, { "epoch": 0.12307692307692308, "grad_norm": 7.816664695739746, "learning_rate": 3.205128205128206e-06, "loss": 0.404, "step": 128 }, { "epoch": 0.12403846153846154, "grad_norm": 9.23216438293457, "learning_rate": 3.2307692307692313e-06, "loss": 0.4824, "step": 129 }, { "epoch": 0.125, "grad_norm": 10.285029411315918, "learning_rate": 3.256410256410257e-06, "loss": 0.7609, "step": 130 }, { "epoch": 0.12596153846153846, "grad_norm": 9.602670669555664, "learning_rate": 3.2820512820512823e-06, "loss": 0.828, "step": 131 }, { "epoch": 0.12692307692307692, "grad_norm": 9.553510665893555, "learning_rate": 3.307692307692308e-06, "loss": 0.5426, "step": 132 }, { "epoch": 0.12788461538461537, "grad_norm": 10.087485313415527, "learning_rate": 3.3333333333333333e-06, "loss": 0.6199, "step": 133 }, { "epoch": 0.12884615384615383, "grad_norm": 8.162088394165039, "learning_rate": 3.358974358974359e-06, "loss": 0.9046, "step": 134 }, { "epoch": 0.12980769230769232, "grad_norm": 10.555160522460938, "learning_rate": 3.384615384615385e-06, "loss": 1.4847, "step": 135 }, { "epoch": 0.13076923076923078, "grad_norm": 9.462635040283203, "learning_rate": 3.4102564102564107e-06, "loss": 0.9427, "step": 136 }, { "epoch": 0.13173076923076923, "grad_norm": 8.5938720703125, "learning_rate": 3.435897435897436e-06, "loss": 0.4624, "step": 137 }, { "epoch": 0.1326923076923077, "grad_norm": 9.184378623962402, "learning_rate": 3.4615384615384617e-06, "loss": 0.5186, "step": 138 }, { "epoch": 0.13365384615384615, "grad_norm": 10.225860595703125, "learning_rate": 3.487179487179487e-06, "loss": 0.6507, "step": 139 }, { "epoch": 0.1346153846153846, "grad_norm": 9.815143585205078, "learning_rate": 3.5128205128205127e-06, "loss": 0.9369, "step": 140 }, { "epoch": 0.13557692307692307, "grad_norm": 10.965766906738281, "learning_rate": 3.538461538461539e-06, "loss": 0.8677, "step": 141 }, { "epoch": 0.13653846153846153, "grad_norm": 10.377737998962402, "learning_rate": 3.5641025641025646e-06, "loss": 1.2102, "step": 142 }, { "epoch": 0.1375, "grad_norm": 9.624357223510742, "learning_rate": 3.58974358974359e-06, "loss": 1.007, "step": 143 }, { "epoch": 0.13846153846153847, "grad_norm": 9.91395092010498, "learning_rate": 3.6153846153846156e-06, "loss": 0.6418, "step": 144 }, { "epoch": 0.13942307692307693, "grad_norm": 11.899982452392578, "learning_rate": 3.641025641025641e-06, "loss": 0.8352, "step": 145 }, { "epoch": 0.14038461538461539, "grad_norm": 9.297306060791016, "learning_rate": 3.6666666666666666e-06, "loss": 0.6222, "step": 146 }, { "epoch": 0.14134615384615384, "grad_norm": 10.34272575378418, "learning_rate": 3.692307692307693e-06, "loss": 0.8862, "step": 147 }, { "epoch": 0.1423076923076923, "grad_norm": 9.79200553894043, "learning_rate": 3.7179487179487184e-06, "loss": 0.8505, "step": 148 }, { "epoch": 0.14326923076923076, "grad_norm": 10.248441696166992, "learning_rate": 3.743589743589744e-06, "loss": 0.8382, "step": 149 }, { "epoch": 0.14423076923076922, "grad_norm": 10.01071834564209, "learning_rate": 3.7692307692307694e-06, "loss": 0.736, "step": 150 }, { "epoch": 0.1451923076923077, "grad_norm": 9.262495040893555, "learning_rate": 3.794871794871795e-06, "loss": 0.592, "step": 151 }, { "epoch": 0.14615384615384616, "grad_norm": 10.389930725097656, "learning_rate": 3.8205128205128204e-06, "loss": 0.8667, "step": 152 }, { "epoch": 0.14711538461538462, "grad_norm": 10.571329116821289, "learning_rate": 3.846153846153847e-06, "loss": 0.8059, "step": 153 }, { "epoch": 0.14807692307692308, "grad_norm": 8.784279823303223, "learning_rate": 3.871794871794872e-06, "loss": 0.6467, "step": 154 }, { "epoch": 0.14903846153846154, "grad_norm": 10.203702926635742, "learning_rate": 3.897435897435898e-06, "loss": 1.0168, "step": 155 }, { "epoch": 0.15, "grad_norm": 9.413726806640625, "learning_rate": 3.923076923076923e-06, "loss": 0.7756, "step": 156 }, { "epoch": 0.15096153846153845, "grad_norm": 10.32897663116455, "learning_rate": 3.948717948717949e-06, "loss": 1.0589, "step": 157 }, { "epoch": 0.1519230769230769, "grad_norm": 10.876960754394531, "learning_rate": 3.974358974358974e-06, "loss": 0.9142, "step": 158 }, { "epoch": 0.1528846153846154, "grad_norm": 10.112897872924805, "learning_rate": 4.000000000000001e-06, "loss": 1.0254, "step": 159 }, { "epoch": 0.15384615384615385, "grad_norm": 9.875511169433594, "learning_rate": 4.025641025641026e-06, "loss": 0.5514, "step": 160 }, { "epoch": 0.1548076923076923, "grad_norm": 10.521202087402344, "learning_rate": 4.051282051282052e-06, "loss": 0.8716, "step": 161 }, { "epoch": 0.15576923076923077, "grad_norm": 9.661408424377441, "learning_rate": 4.076923076923077e-06, "loss": 1.322, "step": 162 }, { "epoch": 0.15673076923076923, "grad_norm": 10.429271697998047, "learning_rate": 4.102564102564103e-06, "loss": 0.5511, "step": 163 }, { "epoch": 0.1576923076923077, "grad_norm": 9.75400447845459, "learning_rate": 4.128205128205128e-06, "loss": 1.0702, "step": 164 }, { "epoch": 0.15865384615384615, "grad_norm": 9.985607147216797, "learning_rate": 4.1538461538461545e-06, "loss": 0.8453, "step": 165 }, { "epoch": 0.1596153846153846, "grad_norm": 8.770146369934082, "learning_rate": 4.17948717948718e-06, "loss": 0.8319, "step": 166 }, { "epoch": 0.1605769230769231, "grad_norm": 11.527228355407715, "learning_rate": 4.2051282051282055e-06, "loss": 1.0886, "step": 167 }, { "epoch": 0.16153846153846155, "grad_norm": 10.02247142791748, "learning_rate": 4.230769230769231e-06, "loss": 0.9429, "step": 168 }, { "epoch": 0.1625, "grad_norm": 10.208101272583008, "learning_rate": 4.2564102564102566e-06, "loss": 0.5656, "step": 169 }, { "epoch": 0.16346153846153846, "grad_norm": 10.516668319702148, "learning_rate": 4.282051282051282e-06, "loss": 0.7339, "step": 170 }, { "epoch": 0.16442307692307692, "grad_norm": 8.880390167236328, "learning_rate": 4.307692307692308e-06, "loss": 0.4651, "step": 171 }, { "epoch": 0.16538461538461538, "grad_norm": 7.895445823669434, "learning_rate": 4.333333333333334e-06, "loss": 0.5794, "step": 172 }, { "epoch": 0.16634615384615384, "grad_norm": 9.336357116699219, "learning_rate": 4.358974358974359e-06, "loss": 0.7899, "step": 173 }, { "epoch": 0.1673076923076923, "grad_norm": 10.01447582244873, "learning_rate": 4.384615384615385e-06, "loss": 0.8947, "step": 174 }, { "epoch": 0.16826923076923078, "grad_norm": 8.891045570373535, "learning_rate": 4.4102564102564104e-06, "loss": 1.0523, "step": 175 }, { "epoch": 0.16923076923076924, "grad_norm": 11.183197021484375, "learning_rate": 4.435897435897436e-06, "loss": 0.8004, "step": 176 }, { "epoch": 0.1701923076923077, "grad_norm": 11.238788604736328, "learning_rate": 4.461538461538462e-06, "loss": 1.0118, "step": 177 }, { "epoch": 0.17115384615384616, "grad_norm": 9.837037086486816, "learning_rate": 4.487179487179488e-06, "loss": 0.8667, "step": 178 }, { "epoch": 0.17211538461538461, "grad_norm": 11.680953979492188, "learning_rate": 4.512820512820513e-06, "loss": 1.7174, "step": 179 }, { "epoch": 0.17307692307692307, "grad_norm": 9.134516716003418, "learning_rate": 4.538461538461539e-06, "loss": 0.8102, "step": 180 }, { "epoch": 0.17403846153846153, "grad_norm": 8.321429252624512, "learning_rate": 4.564102564102564e-06, "loss": 0.47, "step": 181 }, { "epoch": 0.175, "grad_norm": 10.133270263671875, "learning_rate": 4.58974358974359e-06, "loss": 0.5252, "step": 182 }, { "epoch": 0.17596153846153847, "grad_norm": 9.039522171020508, "learning_rate": 4.615384615384616e-06, "loss": 0.701, "step": 183 }, { "epoch": 0.17692307692307693, "grad_norm": 9.526537895202637, "learning_rate": 4.641025641025642e-06, "loss": 0.5456, "step": 184 }, { "epoch": 0.1778846153846154, "grad_norm": 10.085619926452637, "learning_rate": 4.666666666666667e-06, "loss": 0.5291, "step": 185 }, { "epoch": 0.17884615384615385, "grad_norm": 9.824097633361816, "learning_rate": 4.692307692307693e-06, "loss": 0.6028, "step": 186 }, { "epoch": 0.1798076923076923, "grad_norm": 10.214094161987305, "learning_rate": 4.717948717948718e-06, "loss": 1.132, "step": 187 }, { "epoch": 0.18076923076923077, "grad_norm": 8.114581108093262, "learning_rate": 4.743589743589744e-06, "loss": 0.4612, "step": 188 }, { "epoch": 0.18173076923076922, "grad_norm": 8.327912330627441, "learning_rate": 4.76923076923077e-06, "loss": 0.438, "step": 189 }, { "epoch": 0.18269230769230768, "grad_norm": 9.334941864013672, "learning_rate": 4.7948717948717955e-06, "loss": 0.8109, "step": 190 }, { "epoch": 0.18365384615384617, "grad_norm": 7.453091621398926, "learning_rate": 4.820512820512821e-06, "loss": 0.6344, "step": 191 }, { "epoch": 0.18461538461538463, "grad_norm": 8.692304611206055, "learning_rate": 4.8461538461538465e-06, "loss": 0.5584, "step": 192 }, { "epoch": 0.18557692307692308, "grad_norm": 10.67364501953125, "learning_rate": 4.871794871794872e-06, "loss": 1.1937, "step": 193 }, { "epoch": 0.18653846153846154, "grad_norm": 24.86941909790039, "learning_rate": 4.8974358974358975e-06, "loss": 0.5617, "step": 194 }, { "epoch": 0.1875, "grad_norm": 9.180996894836426, "learning_rate": 4.923076923076924e-06, "loss": 0.6623, "step": 195 }, { "epoch": 0.18846153846153846, "grad_norm": 8.419547080993652, "learning_rate": 4.948717948717949e-06, "loss": 0.4987, "step": 196 }, { "epoch": 0.18942307692307692, "grad_norm": 10.07659912109375, "learning_rate": 4.974358974358975e-06, "loss": 0.9913, "step": 197 }, { "epoch": 0.19038461538461537, "grad_norm": 8.988810539245605, "learning_rate": 5e-06, "loss": 0.481, "step": 198 }, { "epoch": 0.19134615384615383, "grad_norm": 11.48502254486084, "learning_rate": 5.025641025641026e-06, "loss": 1.0946, "step": 199 }, { "epoch": 0.19230769230769232, "grad_norm": 9.221864700317383, "learning_rate": 5.051282051282051e-06, "loss": 1.0798, "step": 200 }, { "epoch": 0.19326923076923078, "grad_norm": 9.590997695922852, "learning_rate": 5.076923076923077e-06, "loss": 0.661, "step": 201 }, { "epoch": 0.19423076923076923, "grad_norm": 8.582722663879395, "learning_rate": 5.1025641025641024e-06, "loss": 0.5683, "step": 202 }, { "epoch": 0.1951923076923077, "grad_norm": 10.143598556518555, "learning_rate": 5.128205128205128e-06, "loss": 0.728, "step": 203 }, { "epoch": 0.19615384615384615, "grad_norm": 8.933074951171875, "learning_rate": 5.1538461538461534e-06, "loss": 0.5056, "step": 204 }, { "epoch": 0.1971153846153846, "grad_norm": 8.639031410217285, "learning_rate": 5.179487179487181e-06, "loss": 0.653, "step": 205 }, { "epoch": 0.19807692307692307, "grad_norm": 9.455964088439941, "learning_rate": 5.205128205128206e-06, "loss": 0.8817, "step": 206 }, { "epoch": 0.19903846153846153, "grad_norm": 9.303647994995117, "learning_rate": 5.230769230769232e-06, "loss": 0.7968, "step": 207 }, { "epoch": 0.2, "grad_norm": 9.916182518005371, "learning_rate": 5.256410256410257e-06, "loss": 0.8629, "step": 208 }, { "epoch": 0.20096153846153847, "grad_norm": 9.221867561340332, "learning_rate": 5.282051282051283e-06, "loss": 0.6148, "step": 209 }, { "epoch": 0.20192307692307693, "grad_norm": 9.902376174926758, "learning_rate": 5.307692307692308e-06, "loss": 1.0584, "step": 210 }, { "epoch": 0.20288461538461539, "grad_norm": 9.834601402282715, "learning_rate": 5.333333333333334e-06, "loss": 0.9289, "step": 211 }, { "epoch": 0.20384615384615384, "grad_norm": 8.60289478302002, "learning_rate": 5.358974358974359e-06, "loss": 0.5898, "step": 212 }, { "epoch": 0.2048076923076923, "grad_norm": 10.477520942687988, "learning_rate": 5.384615384615385e-06, "loss": 0.871, "step": 213 }, { "epoch": 0.20576923076923076, "grad_norm": 9.637504577636719, "learning_rate": 5.41025641025641e-06, "loss": 0.7656, "step": 214 }, { "epoch": 0.20673076923076922, "grad_norm": 11.007869720458984, "learning_rate": 5.435897435897436e-06, "loss": 0.8884, "step": 215 }, { "epoch": 0.2076923076923077, "grad_norm": 11.534982681274414, "learning_rate": 5.461538461538461e-06, "loss": 1.0378, "step": 216 }, { "epoch": 0.20865384615384616, "grad_norm": 10.444128036499023, "learning_rate": 5.487179487179488e-06, "loss": 0.9315, "step": 217 }, { "epoch": 0.20961538461538462, "grad_norm": 9.72168254852295, "learning_rate": 5.512820512820514e-06, "loss": 0.7518, "step": 218 }, { "epoch": 0.21057692307692308, "grad_norm": 10.537256240844727, "learning_rate": 5.538461538461539e-06, "loss": 0.8489, "step": 219 }, { "epoch": 0.21153846153846154, "grad_norm": 11.352910995483398, "learning_rate": 5.564102564102565e-06, "loss": 1.4251, "step": 220 }, { "epoch": 0.2125, "grad_norm": 11.909002304077148, "learning_rate": 5.58974358974359e-06, "loss": 1.0292, "step": 221 }, { "epoch": 0.21346153846153845, "grad_norm": 8.798715591430664, "learning_rate": 5.615384615384616e-06, "loss": 0.5375, "step": 222 }, { "epoch": 0.2144230769230769, "grad_norm": 9.143534660339355, "learning_rate": 5.641025641025641e-06, "loss": 0.7752, "step": 223 }, { "epoch": 0.2153846153846154, "grad_norm": 11.00376033782959, "learning_rate": 5.666666666666667e-06, "loss": 1.0137, "step": 224 }, { "epoch": 0.21634615384615385, "grad_norm": 9.159578323364258, "learning_rate": 5.692307692307692e-06, "loss": 1.0219, "step": 225 }, { "epoch": 0.2173076923076923, "grad_norm": 9.993102073669434, "learning_rate": 5.717948717948718e-06, "loss": 0.9018, "step": 226 }, { "epoch": 0.21826923076923077, "grad_norm": 9.829999923706055, "learning_rate": 5.743589743589743e-06, "loss": 0.8988, "step": 227 }, { "epoch": 0.21923076923076923, "grad_norm": 9.23063850402832, "learning_rate": 5.769230769230769e-06, "loss": 0.7943, "step": 228 }, { "epoch": 0.2201923076923077, "grad_norm": 8.117528915405273, "learning_rate": 5.794871794871796e-06, "loss": 0.5178, "step": 229 }, { "epoch": 0.22115384615384615, "grad_norm": 10.083499908447266, "learning_rate": 5.820512820512822e-06, "loss": 1.0957, "step": 230 }, { "epoch": 0.2221153846153846, "grad_norm": 9.320472717285156, "learning_rate": 5.846153846153847e-06, "loss": 0.9013, "step": 231 }, { "epoch": 0.2230769230769231, "grad_norm": 9.355833053588867, "learning_rate": 5.871794871794873e-06, "loss": 0.9777, "step": 232 }, { "epoch": 0.22403846153846155, "grad_norm": 8.316394805908203, "learning_rate": 5.897435897435898e-06, "loss": 0.6193, "step": 233 }, { "epoch": 0.225, "grad_norm": 9.539997100830078, "learning_rate": 5.923076923076924e-06, "loss": 0.9321, "step": 234 }, { "epoch": 0.22596153846153846, "grad_norm": 9.685453414916992, "learning_rate": 5.948717948717949e-06, "loss": 0.7453, "step": 235 }, { "epoch": 0.22692307692307692, "grad_norm": 10.575407981872559, "learning_rate": 5.974358974358975e-06, "loss": 1.4467, "step": 236 }, { "epoch": 0.22788461538461538, "grad_norm": 10.063942909240723, "learning_rate": 6e-06, "loss": 0.8902, "step": 237 }, { "epoch": 0.22884615384615384, "grad_norm": 9.834362983703613, "learning_rate": 6.025641025641026e-06, "loss": 1.0974, "step": 238 }, { "epoch": 0.2298076923076923, "grad_norm": 8.9702787399292, "learning_rate": 6.051282051282051e-06, "loss": 0.5389, "step": 239 }, { "epoch": 0.23076923076923078, "grad_norm": 9.008927345275879, "learning_rate": 6.076923076923077e-06, "loss": 1.1304, "step": 240 }, { "epoch": 0.23173076923076924, "grad_norm": 7.504274368286133, "learning_rate": 6.102564102564104e-06, "loss": 0.5579, "step": 241 }, { "epoch": 0.2326923076923077, "grad_norm": 7.552238941192627, "learning_rate": 6.128205128205129e-06, "loss": 0.5934, "step": 242 }, { "epoch": 0.23365384615384616, "grad_norm": 9.713563919067383, "learning_rate": 6.153846153846155e-06, "loss": 1.0211, "step": 243 }, { "epoch": 0.23461538461538461, "grad_norm": 7.878355979919434, "learning_rate": 6.17948717948718e-06, "loss": 0.8042, "step": 244 }, { "epoch": 0.23557692307692307, "grad_norm": 9.088546752929688, "learning_rate": 6.205128205128206e-06, "loss": 0.4784, "step": 245 }, { "epoch": 0.23653846153846153, "grad_norm": 9.100921630859375, "learning_rate": 6.230769230769231e-06, "loss": 0.4676, "step": 246 }, { "epoch": 0.2375, "grad_norm": 9.136134147644043, "learning_rate": 6.256410256410257e-06, "loss": 0.6642, "step": 247 }, { "epoch": 0.23846153846153847, "grad_norm": 7.753800868988037, "learning_rate": 6.282051282051282e-06, "loss": 0.6917, "step": 248 }, { "epoch": 0.23942307692307693, "grad_norm": 8.77303695678711, "learning_rate": 6.307692307692308e-06, "loss": 0.7638, "step": 249 }, { "epoch": 0.2403846153846154, "grad_norm": 10.04815673828125, "learning_rate": 6.333333333333333e-06, "loss": 0.9061, "step": 250 }, { "epoch": 0.24134615384615385, "grad_norm": 9.215681076049805, "learning_rate": 6.358974358974359e-06, "loss": 0.6946, "step": 251 }, { "epoch": 0.2423076923076923, "grad_norm": 10.475281715393066, "learning_rate": 6.384615384615384e-06, "loss": 0.6836, "step": 252 }, { "epoch": 0.24326923076923077, "grad_norm": 8.994176864624023, "learning_rate": 6.410256410256412e-06, "loss": 0.5271, "step": 253 }, { "epoch": 0.24423076923076922, "grad_norm": 10.816271781921387, "learning_rate": 6.435897435897437e-06, "loss": 1.1519, "step": 254 }, { "epoch": 0.24519230769230768, "grad_norm": 9.812688827514648, "learning_rate": 6.461538461538463e-06, "loss": 0.7732, "step": 255 }, { "epoch": 0.24615384615384617, "grad_norm": 10.864325523376465, "learning_rate": 6.487179487179488e-06, "loss": 0.9837, "step": 256 }, { "epoch": 0.24711538461538463, "grad_norm": 8.360908508300781, "learning_rate": 6.512820512820514e-06, "loss": 0.9228, "step": 257 }, { "epoch": 0.24807692307692308, "grad_norm": 8.40225887298584, "learning_rate": 6.538461538461539e-06, "loss": 0.9813, "step": 258 }, { "epoch": 0.24903846153846154, "grad_norm": 9.412943840026855, "learning_rate": 6.564102564102565e-06, "loss": 0.9867, "step": 259 }, { "epoch": 0.25, "grad_norm": 9.899882316589355, "learning_rate": 6.58974358974359e-06, "loss": 0.6357, "step": 260 }, { "epoch": 0.25096153846153846, "grad_norm": 7.880748748779297, "learning_rate": 6.615384615384616e-06, "loss": 0.756, "step": 261 }, { "epoch": 0.2519230769230769, "grad_norm": 9.454109191894531, "learning_rate": 6.641025641025641e-06, "loss": 1.2695, "step": 262 }, { "epoch": 0.2528846153846154, "grad_norm": 8.986023902893066, "learning_rate": 6.666666666666667e-06, "loss": 0.7602, "step": 263 }, { "epoch": 0.25384615384615383, "grad_norm": 10.429808616638184, "learning_rate": 6.692307692307692e-06, "loss": 1.5441, "step": 264 }, { "epoch": 0.2548076923076923, "grad_norm": 9.188091278076172, "learning_rate": 6.717948717948718e-06, "loss": 0.7489, "step": 265 }, { "epoch": 0.25576923076923075, "grad_norm": 9.165108680725098, "learning_rate": 6.743589743589745e-06, "loss": 0.5397, "step": 266 }, { "epoch": 0.2567307692307692, "grad_norm": 9.099048614501953, "learning_rate": 6.76923076923077e-06, "loss": 0.7736, "step": 267 }, { "epoch": 0.25769230769230766, "grad_norm": 7.913120746612549, "learning_rate": 6.794871794871796e-06, "loss": 0.4634, "step": 268 }, { "epoch": 0.2586538461538462, "grad_norm": 8.05789566040039, "learning_rate": 6.820512820512821e-06, "loss": 0.9735, "step": 269 }, { "epoch": 0.25961538461538464, "grad_norm": 9.683671951293945, "learning_rate": 6.846153846153847e-06, "loss": 0.8637, "step": 270 }, { "epoch": 0.2605769230769231, "grad_norm": 11.880501747131348, "learning_rate": 6.871794871794872e-06, "loss": 1.0754, "step": 271 }, { "epoch": 0.26153846153846155, "grad_norm": 8.729684829711914, "learning_rate": 6.897435897435898e-06, "loss": 1.2982, "step": 272 }, { "epoch": 0.2625, "grad_norm": 8.881952285766602, "learning_rate": 6.923076923076923e-06, "loss": 0.8567, "step": 273 }, { "epoch": 0.26346153846153847, "grad_norm": 8.35590934753418, "learning_rate": 6.948717948717949e-06, "loss": 0.687, "step": 274 }, { "epoch": 0.2644230769230769, "grad_norm": 7.92506742477417, "learning_rate": 6.974358974358974e-06, "loss": 0.6649, "step": 275 }, { "epoch": 0.2653846153846154, "grad_norm": 9.699445724487305, "learning_rate": 7e-06, "loss": 0.7157, "step": 276 }, { "epoch": 0.26634615384615384, "grad_norm": 9.186095237731934, "learning_rate": 7.025641025641025e-06, "loss": 0.8958, "step": 277 }, { "epoch": 0.2673076923076923, "grad_norm": 8.765289306640625, "learning_rate": 7.051282051282053e-06, "loss": 0.7021, "step": 278 }, { "epoch": 0.26826923076923076, "grad_norm": 8.03092098236084, "learning_rate": 7.076923076923078e-06, "loss": 0.6529, "step": 279 }, { "epoch": 0.2692307692307692, "grad_norm": 9.99845027923584, "learning_rate": 7.102564102564104e-06, "loss": 0.9096, "step": 280 }, { "epoch": 0.2701923076923077, "grad_norm": 8.220102310180664, "learning_rate": 7.128205128205129e-06, "loss": 0.6511, "step": 281 }, { "epoch": 0.27115384615384613, "grad_norm": 10.34608268737793, "learning_rate": 7.153846153846155e-06, "loss": 1.1527, "step": 282 }, { "epoch": 0.2721153846153846, "grad_norm": 8.913369178771973, "learning_rate": 7.17948717948718e-06, "loss": 0.8163, "step": 283 }, { "epoch": 0.27307692307692305, "grad_norm": 10.340911865234375, "learning_rate": 7.205128205128206e-06, "loss": 1.2631, "step": 284 }, { "epoch": 0.27403846153846156, "grad_norm": 9.604874610900879, "learning_rate": 7.230769230769231e-06, "loss": 0.6732, "step": 285 }, { "epoch": 0.275, "grad_norm": 8.11025619506836, "learning_rate": 7.256410256410257e-06, "loss": 0.955, "step": 286 }, { "epoch": 0.2759615384615385, "grad_norm": 9.290190696716309, "learning_rate": 7.282051282051282e-06, "loss": 1.1217, "step": 287 }, { "epoch": 0.27692307692307694, "grad_norm": 7.837542533874512, "learning_rate": 7.307692307692308e-06, "loss": 1.0311, "step": 288 }, { "epoch": 0.2778846153846154, "grad_norm": 7.728721618652344, "learning_rate": 7.333333333333333e-06, "loss": 0.6261, "step": 289 }, { "epoch": 0.27884615384615385, "grad_norm": 9.610032081604004, "learning_rate": 7.35897435897436e-06, "loss": 0.983, "step": 290 }, { "epoch": 0.2798076923076923, "grad_norm": 8.987699508666992, "learning_rate": 7.384615384615386e-06, "loss": 0.8145, "step": 291 }, { "epoch": 0.28076923076923077, "grad_norm": 8.403523445129395, "learning_rate": 7.410256410256411e-06, "loss": 0.9203, "step": 292 }, { "epoch": 0.28173076923076923, "grad_norm": 9.15943431854248, "learning_rate": 7.435897435897437e-06, "loss": 1.0394, "step": 293 }, { "epoch": 0.2826923076923077, "grad_norm": 8.474885940551758, "learning_rate": 7.461538461538462e-06, "loss": 0.6136, "step": 294 }, { "epoch": 0.28365384615384615, "grad_norm": 8.235206604003906, "learning_rate": 7.487179487179488e-06, "loss": 0.4403, "step": 295 }, { "epoch": 0.2846153846153846, "grad_norm": 8.27389144897461, "learning_rate": 7.512820512820513e-06, "loss": 0.8351, "step": 296 }, { "epoch": 0.28557692307692306, "grad_norm": 8.888829231262207, "learning_rate": 7.538461538461539e-06, "loss": 0.7622, "step": 297 }, { "epoch": 0.2865384615384615, "grad_norm": 7.989358425140381, "learning_rate": 7.564102564102564e-06, "loss": 0.4929, "step": 298 }, { "epoch": 0.2875, "grad_norm": 8.429362297058105, "learning_rate": 7.58974358974359e-06, "loss": 1.0906, "step": 299 }, { "epoch": 0.28846153846153844, "grad_norm": 9.07469367980957, "learning_rate": 7.615384615384615e-06, "loss": 0.876, "step": 300 }, { "epoch": 0.28942307692307695, "grad_norm": 9.018302917480469, "learning_rate": 7.641025641025641e-06, "loss": 1.4847, "step": 301 }, { "epoch": 0.2903846153846154, "grad_norm": 9.283594131469727, "learning_rate": 7.666666666666667e-06, "loss": 0.562, "step": 302 }, { "epoch": 0.29134615384615387, "grad_norm": 9.667679786682129, "learning_rate": 7.692307692307694e-06, "loss": 0.9642, "step": 303 }, { "epoch": 0.2923076923076923, "grad_norm": 8.058027267456055, "learning_rate": 7.717948717948718e-06, "loss": 0.5347, "step": 304 }, { "epoch": 0.2932692307692308, "grad_norm": 8.258744239807129, "learning_rate": 7.743589743589745e-06, "loss": 0.6693, "step": 305 }, { "epoch": 0.29423076923076924, "grad_norm": 9.228354454040527, "learning_rate": 7.76923076923077e-06, "loss": 0.5781, "step": 306 }, { "epoch": 0.2951923076923077, "grad_norm": 9.174277305603027, "learning_rate": 7.794871794871796e-06, "loss": 1.0798, "step": 307 }, { "epoch": 0.29615384615384616, "grad_norm": 9.500144004821777, "learning_rate": 7.820512820512822e-06, "loss": 0.7205, "step": 308 }, { "epoch": 0.2971153846153846, "grad_norm": 8.871771812438965, "learning_rate": 7.846153846153847e-06, "loss": 0.9951, "step": 309 }, { "epoch": 0.2980769230769231, "grad_norm": 8.179502487182617, "learning_rate": 7.871794871794873e-06, "loss": 0.4387, "step": 310 }, { "epoch": 0.29903846153846153, "grad_norm": 10.346935272216797, "learning_rate": 7.897435897435898e-06, "loss": 1.1846, "step": 311 }, { "epoch": 0.3, "grad_norm": 8.351250648498535, "learning_rate": 7.923076923076924e-06, "loss": 0.5082, "step": 312 }, { "epoch": 0.30096153846153845, "grad_norm": 8.69798469543457, "learning_rate": 7.948717948717949e-06, "loss": 0.5236, "step": 313 }, { "epoch": 0.3019230769230769, "grad_norm": 10.513525009155273, "learning_rate": 7.974358974358975e-06, "loss": 0.9347, "step": 314 }, { "epoch": 0.30288461538461536, "grad_norm": 8.491903305053711, "learning_rate": 8.000000000000001e-06, "loss": 0.5724, "step": 315 }, { "epoch": 0.3038461538461538, "grad_norm": 7.005741119384766, "learning_rate": 8.025641025641026e-06, "loss": 0.3086, "step": 316 }, { "epoch": 0.30480769230769234, "grad_norm": 9.602910995483398, "learning_rate": 8.051282051282052e-06, "loss": 0.8077, "step": 317 }, { "epoch": 0.3057692307692308, "grad_norm": 9.804241180419922, "learning_rate": 8.076923076923077e-06, "loss": 1.0182, "step": 318 }, { "epoch": 0.30673076923076925, "grad_norm": 8.25068187713623, "learning_rate": 8.102564102564103e-06, "loss": 0.6813, "step": 319 }, { "epoch": 0.3076923076923077, "grad_norm": 9.197726249694824, "learning_rate": 8.12820512820513e-06, "loss": 0.717, "step": 320 }, { "epoch": 0.30865384615384617, "grad_norm": 10.56545352935791, "learning_rate": 8.153846153846154e-06, "loss": 1.611, "step": 321 }, { "epoch": 0.3096153846153846, "grad_norm": 8.247235298156738, "learning_rate": 8.17948717948718e-06, "loss": 0.7127, "step": 322 }, { "epoch": 0.3105769230769231, "grad_norm": 9.01939582824707, "learning_rate": 8.205128205128205e-06, "loss": 0.6649, "step": 323 }, { "epoch": 0.31153846153846154, "grad_norm": 7.817423343658447, "learning_rate": 8.230769230769232e-06, "loss": 1.0717, "step": 324 }, { "epoch": 0.3125, "grad_norm": 8.925554275512695, "learning_rate": 8.256410256410256e-06, "loss": 1.1198, "step": 325 }, { "epoch": 0.31346153846153846, "grad_norm": 7.264765739440918, "learning_rate": 8.282051282051283e-06, "loss": 0.6299, "step": 326 }, { "epoch": 0.3144230769230769, "grad_norm": 9.214244842529297, "learning_rate": 8.307692307692309e-06, "loss": 0.9225, "step": 327 }, { "epoch": 0.3153846153846154, "grad_norm": 9.621174812316895, "learning_rate": 8.333333333333334e-06, "loss": 0.8955, "step": 328 }, { "epoch": 0.31634615384615383, "grad_norm": 7.8126220703125, "learning_rate": 8.35897435897436e-06, "loss": 0.5741, "step": 329 }, { "epoch": 0.3173076923076923, "grad_norm": 8.572965621948242, "learning_rate": 8.384615384615385e-06, "loss": 1.0306, "step": 330 }, { "epoch": 0.31826923076923075, "grad_norm": 8.105307579040527, "learning_rate": 8.410256410256411e-06, "loss": 0.6302, "step": 331 }, { "epoch": 0.3192307692307692, "grad_norm": 10.710783958435059, "learning_rate": 8.435897435897436e-06, "loss": 1.0481, "step": 332 }, { "epoch": 0.32019230769230766, "grad_norm": 7.944536209106445, "learning_rate": 8.461538461538462e-06, "loss": 0.5534, "step": 333 }, { "epoch": 0.3211538461538462, "grad_norm": 9.179069519042969, "learning_rate": 8.487179487179488e-06, "loss": 0.7912, "step": 334 }, { "epoch": 0.32211538461538464, "grad_norm": 7.969615936279297, "learning_rate": 8.512820512820513e-06, "loss": 0.6604, "step": 335 }, { "epoch": 0.3230769230769231, "grad_norm": 7.965432167053223, "learning_rate": 8.53846153846154e-06, "loss": 0.5771, "step": 336 }, { "epoch": 0.32403846153846155, "grad_norm": 9.68032455444336, "learning_rate": 8.564102564102564e-06, "loss": 1.1913, "step": 337 }, { "epoch": 0.325, "grad_norm": 9.199604034423828, "learning_rate": 8.58974358974359e-06, "loss": 0.9211, "step": 338 }, { "epoch": 0.32596153846153847, "grad_norm": 7.4299445152282715, "learning_rate": 8.615384615384617e-06, "loss": 0.4437, "step": 339 }, { "epoch": 0.3269230769230769, "grad_norm": 9.290430068969727, "learning_rate": 8.641025641025641e-06, "loss": 0.8427, "step": 340 }, { "epoch": 0.3278846153846154, "grad_norm": 7.3889970779418945, "learning_rate": 8.666666666666668e-06, "loss": 0.4306, "step": 341 }, { "epoch": 0.32884615384615384, "grad_norm": 8.50532341003418, "learning_rate": 8.692307692307692e-06, "loss": 1.1415, "step": 342 }, { "epoch": 0.3298076923076923, "grad_norm": 9.662978172302246, "learning_rate": 8.717948717948719e-06, "loss": 1.2592, "step": 343 }, { "epoch": 0.33076923076923076, "grad_norm": 8.075116157531738, "learning_rate": 8.743589743589743e-06, "loss": 0.8235, "step": 344 }, { "epoch": 0.3317307692307692, "grad_norm": 8.526647567749023, "learning_rate": 8.76923076923077e-06, "loss": 1.0827, "step": 345 }, { "epoch": 0.3326923076923077, "grad_norm": 9.137289047241211, "learning_rate": 8.794871794871796e-06, "loss": 1.2826, "step": 346 }, { "epoch": 0.33365384615384613, "grad_norm": 7.309188365936279, "learning_rate": 8.820512820512821e-06, "loss": 0.4225, "step": 347 }, { "epoch": 0.3346153846153846, "grad_norm": 8.860150337219238, "learning_rate": 8.846153846153847e-06, "loss": 0.8637, "step": 348 }, { "epoch": 0.33557692307692305, "grad_norm": 7.926260471343994, "learning_rate": 8.871794871794872e-06, "loss": 0.6019, "step": 349 }, { "epoch": 0.33653846153846156, "grad_norm": 9.069371223449707, "learning_rate": 8.897435897435898e-06, "loss": 0.8765, "step": 350 }, { "epoch": 0.3375, "grad_norm": 7.745975971221924, "learning_rate": 8.923076923076925e-06, "loss": 0.6005, "step": 351 }, { "epoch": 0.3384615384615385, "grad_norm": 8.979557991027832, "learning_rate": 8.94871794871795e-06, "loss": 0.5521, "step": 352 }, { "epoch": 0.33942307692307694, "grad_norm": 10.006732940673828, "learning_rate": 8.974358974358976e-06, "loss": 1.4257, "step": 353 }, { "epoch": 0.3403846153846154, "grad_norm": 9.484447479248047, "learning_rate": 9e-06, "loss": 1.1685, "step": 354 }, { "epoch": 0.34134615384615385, "grad_norm": 9.471383094787598, "learning_rate": 9.025641025641027e-06, "loss": 1.2541, "step": 355 }, { "epoch": 0.3423076923076923, "grad_norm": 7.0869059562683105, "learning_rate": 9.051282051282051e-06, "loss": 0.5158, "step": 356 }, { "epoch": 0.34326923076923077, "grad_norm": 7.7202324867248535, "learning_rate": 9.076923076923078e-06, "loss": 0.5673, "step": 357 }, { "epoch": 0.34423076923076923, "grad_norm": 7.422638416290283, "learning_rate": 9.102564102564104e-06, "loss": 0.5753, "step": 358 }, { "epoch": 0.3451923076923077, "grad_norm": 8.475296020507812, "learning_rate": 9.128205128205129e-06, "loss": 0.9506, "step": 359 }, { "epoch": 0.34615384615384615, "grad_norm": 9.563044548034668, "learning_rate": 9.153846153846155e-06, "loss": 0.8389, "step": 360 }, { "epoch": 0.3471153846153846, "grad_norm": 7.336026191711426, "learning_rate": 9.17948717948718e-06, "loss": 0.4644, "step": 361 }, { "epoch": 0.34807692307692306, "grad_norm": 9.002288818359375, "learning_rate": 9.205128205128206e-06, "loss": 1.1292, "step": 362 }, { "epoch": 0.3490384615384615, "grad_norm": 8.545870780944824, "learning_rate": 9.230769230769232e-06, "loss": 1.2283, "step": 363 }, { "epoch": 0.35, "grad_norm": 10.292200088500977, "learning_rate": 9.256410256410257e-06, "loss": 1.2614, "step": 364 }, { "epoch": 0.35096153846153844, "grad_norm": 7.217926025390625, "learning_rate": 9.282051282051283e-06, "loss": 0.5701, "step": 365 }, { "epoch": 0.35192307692307695, "grad_norm": 8.58437442779541, "learning_rate": 9.307692307692308e-06, "loss": 0.7826, "step": 366 }, { "epoch": 0.3528846153846154, "grad_norm": 8.34713077545166, "learning_rate": 9.333333333333334e-06, "loss": 0.8318, "step": 367 }, { "epoch": 0.35384615384615387, "grad_norm": 9.026484489440918, "learning_rate": 9.358974358974359e-06, "loss": 0.9734, "step": 368 }, { "epoch": 0.3548076923076923, "grad_norm": 7.8774871826171875, "learning_rate": 9.384615384615385e-06, "loss": 0.8451, "step": 369 }, { "epoch": 0.3557692307692308, "grad_norm": 6.700931549072266, "learning_rate": 9.410256410256412e-06, "loss": 0.955, "step": 370 }, { "epoch": 0.35673076923076924, "grad_norm": 8.568182945251465, "learning_rate": 9.435897435897436e-06, "loss": 0.9794, "step": 371 }, { "epoch": 0.3576923076923077, "grad_norm": 10.102484703063965, "learning_rate": 9.461538461538463e-06, "loss": 0.9595, "step": 372 }, { "epoch": 0.35865384615384616, "grad_norm": 7.9015116691589355, "learning_rate": 9.487179487179487e-06, "loss": 0.6395, "step": 373 }, { "epoch": 0.3596153846153846, "grad_norm": 7.1612868309021, "learning_rate": 9.512820512820514e-06, "loss": 0.8679, "step": 374 }, { "epoch": 0.3605769230769231, "grad_norm": 9.129437446594238, "learning_rate": 9.53846153846154e-06, "loss": 1.1047, "step": 375 }, { "epoch": 0.36153846153846153, "grad_norm": 8.51754379272461, "learning_rate": 9.564102564102565e-06, "loss": 0.6576, "step": 376 }, { "epoch": 0.3625, "grad_norm": 8.742071151733398, "learning_rate": 9.589743589743591e-06, "loss": 1.1377, "step": 377 }, { "epoch": 0.36346153846153845, "grad_norm": 7.867262840270996, "learning_rate": 9.615384615384616e-06, "loss": 0.5862, "step": 378 }, { "epoch": 0.3644230769230769, "grad_norm": 8.97382926940918, "learning_rate": 9.641025641025642e-06, "loss": 0.805, "step": 379 }, { "epoch": 0.36538461538461536, "grad_norm": 8.339468002319336, "learning_rate": 9.666666666666667e-06, "loss": 0.8164, "step": 380 }, { "epoch": 0.3663461538461538, "grad_norm": 8.736485481262207, "learning_rate": 9.692307692307693e-06, "loss": 0.9169, "step": 381 }, { "epoch": 0.36730769230769234, "grad_norm": 8.281497955322266, "learning_rate": 9.71794871794872e-06, "loss": 0.7029, "step": 382 }, { "epoch": 0.3682692307692308, "grad_norm": 7.729371070861816, "learning_rate": 9.743589743589744e-06, "loss": 0.9259, "step": 383 }, { "epoch": 0.36923076923076925, "grad_norm": 6.943058490753174, "learning_rate": 9.76923076923077e-06, "loss": 0.8542, "step": 384 }, { "epoch": 0.3701923076923077, "grad_norm": 6.543966293334961, "learning_rate": 9.794871794871795e-06, "loss": 0.401, "step": 385 }, { "epoch": 0.37115384615384617, "grad_norm": 10.42209243774414, "learning_rate": 9.820512820512821e-06, "loss": 0.9635, "step": 386 }, { "epoch": 0.3721153846153846, "grad_norm": 9.184078216552734, "learning_rate": 9.846153846153848e-06, "loss": 0.9368, "step": 387 }, { "epoch": 0.3730769230769231, "grad_norm": 10.318554878234863, "learning_rate": 9.871794871794872e-06, "loss": 1.3675, "step": 388 }, { "epoch": 0.37403846153846154, "grad_norm": 7.358495712280273, "learning_rate": 9.897435897435899e-06, "loss": 0.7271, "step": 389 }, { "epoch": 0.375, "grad_norm": 8.230097770690918, "learning_rate": 9.923076923076923e-06, "loss": 0.6856, "step": 390 }, { "epoch": 0.37596153846153846, "grad_norm": 8.46807861328125, "learning_rate": 9.94871794871795e-06, "loss": 0.6868, "step": 391 }, { "epoch": 0.3769230769230769, "grad_norm": 6.979742050170898, "learning_rate": 9.974358974358974e-06, "loss": 0.4099, "step": 392 }, { "epoch": 0.3778846153846154, "grad_norm": 8.487154006958008, "learning_rate": 1e-05, "loss": 0.8506, "step": 393 }, { "epoch": 0.37884615384615383, "grad_norm": 9.380415916442871, "learning_rate": 1.0025641025641027e-05, "loss": 0.8587, "step": 394 }, { "epoch": 0.3798076923076923, "grad_norm": 6.502002239227295, "learning_rate": 1.0051282051282052e-05, "loss": 0.5748, "step": 395 }, { "epoch": 0.38076923076923075, "grad_norm": 8.722222328186035, "learning_rate": 1.0076923076923078e-05, "loss": 1.3082, "step": 396 }, { "epoch": 0.3817307692307692, "grad_norm": 7.609983921051025, "learning_rate": 1.0102564102564103e-05, "loss": 0.6665, "step": 397 }, { "epoch": 0.38269230769230766, "grad_norm": 8.313253402709961, "learning_rate": 1.012820512820513e-05, "loss": 0.9635, "step": 398 }, { "epoch": 0.3836538461538462, "grad_norm": 9.58018684387207, "learning_rate": 1.0153846153846154e-05, "loss": 1.3358, "step": 399 }, { "epoch": 0.38461538461538464, "grad_norm": 7.8410325050354, "learning_rate": 1.017948717948718e-05, "loss": 0.629, "step": 400 }, { "epoch": 0.3855769230769231, "grad_norm": 8.535893440246582, "learning_rate": 1.0205128205128205e-05, "loss": 0.6902, "step": 401 }, { "epoch": 0.38653846153846155, "grad_norm": 7.434895038604736, "learning_rate": 1.0230769230769231e-05, "loss": 0.6108, "step": 402 }, { "epoch": 0.3875, "grad_norm": 7.844598770141602, "learning_rate": 1.0256410256410256e-05, "loss": 0.8299, "step": 403 }, { "epoch": 0.38846153846153847, "grad_norm": 8.61905574798584, "learning_rate": 1.0282051282051282e-05, "loss": 0.7598, "step": 404 }, { "epoch": 0.3894230769230769, "grad_norm": 7.763619899749756, "learning_rate": 1.0307692307692307e-05, "loss": 0.5589, "step": 405 }, { "epoch": 0.3903846153846154, "grad_norm": 8.295074462890625, "learning_rate": 1.0333333333333335e-05, "loss": 0.9741, "step": 406 }, { "epoch": 0.39134615384615384, "grad_norm": 6.708773612976074, "learning_rate": 1.0358974358974361e-05, "loss": 0.3901, "step": 407 }, { "epoch": 0.3923076923076923, "grad_norm": 9.575977325439453, "learning_rate": 1.0384615384615386e-05, "loss": 1.2412, "step": 408 }, { "epoch": 0.39326923076923076, "grad_norm": 8.327474594116211, "learning_rate": 1.0410256410256412e-05, "loss": 1.0042, "step": 409 }, { "epoch": 0.3942307692307692, "grad_norm": 9.113064765930176, "learning_rate": 1.0435897435897437e-05, "loss": 1.2704, "step": 410 }, { "epoch": 0.3951923076923077, "grad_norm": 7.856817722320557, "learning_rate": 1.0461538461538463e-05, "loss": 0.7874, "step": 411 }, { "epoch": 0.39615384615384613, "grad_norm": 8.529354095458984, "learning_rate": 1.0487179487179488e-05, "loss": 0.7631, "step": 412 }, { "epoch": 0.3971153846153846, "grad_norm": 8.926729202270508, "learning_rate": 1.0512820512820514e-05, "loss": 0.9469, "step": 413 }, { "epoch": 0.39807692307692305, "grad_norm": 8.931944847106934, "learning_rate": 1.0538461538461539e-05, "loss": 0.9986, "step": 414 }, { "epoch": 0.39903846153846156, "grad_norm": 7.457026958465576, "learning_rate": 1.0564102564102565e-05, "loss": 0.7398, "step": 415 }, { "epoch": 0.4, "grad_norm": 7.940420150756836, "learning_rate": 1.058974358974359e-05, "loss": 0.6948, "step": 416 }, { "epoch": 0.4009615384615385, "grad_norm": 6.70521879196167, "learning_rate": 1.0615384615384616e-05, "loss": 0.5887, "step": 417 }, { "epoch": 0.40192307692307694, "grad_norm": 8.30781078338623, "learning_rate": 1.0641025641025643e-05, "loss": 0.5755, "step": 418 }, { "epoch": 0.4028846153846154, "grad_norm": 8.923882484436035, "learning_rate": 1.0666666666666667e-05, "loss": 0.8345, "step": 419 }, { "epoch": 0.40384615384615385, "grad_norm": 7.476070404052734, "learning_rate": 1.0692307692307694e-05, "loss": 0.4981, "step": 420 }, { "epoch": 0.4048076923076923, "grad_norm": 8.64346981048584, "learning_rate": 1.0717948717948718e-05, "loss": 1.1387, "step": 421 }, { "epoch": 0.40576923076923077, "grad_norm": 7.042242527008057, "learning_rate": 1.0743589743589745e-05, "loss": 0.5901, "step": 422 }, { "epoch": 0.40673076923076923, "grad_norm": 8.018682479858398, "learning_rate": 1.076923076923077e-05, "loss": 0.6741, "step": 423 }, { "epoch": 0.4076923076923077, "grad_norm": 8.20299243927002, "learning_rate": 1.0794871794871796e-05, "loss": 0.5441, "step": 424 }, { "epoch": 0.40865384615384615, "grad_norm": 8.418375968933105, "learning_rate": 1.082051282051282e-05, "loss": 1.017, "step": 425 }, { "epoch": 0.4096153846153846, "grad_norm": 7.524357318878174, "learning_rate": 1.0846153846153847e-05, "loss": 0.5358, "step": 426 }, { "epoch": 0.41057692307692306, "grad_norm": 8.001384735107422, "learning_rate": 1.0871794871794871e-05, "loss": 1.3184, "step": 427 }, { "epoch": 0.4115384615384615, "grad_norm": 8.656661987304688, "learning_rate": 1.0897435897435898e-05, "loss": 0.8281, "step": 428 }, { "epoch": 0.4125, "grad_norm": 7.513379096984863, "learning_rate": 1.0923076923076922e-05, "loss": 0.5184, "step": 429 }, { "epoch": 0.41346153846153844, "grad_norm": 10.461894035339355, "learning_rate": 1.094871794871795e-05, "loss": 0.8765, "step": 430 }, { "epoch": 0.41442307692307695, "grad_norm": 8.344414710998535, "learning_rate": 1.0974358974358977e-05, "loss": 0.8239, "step": 431 }, { "epoch": 0.4153846153846154, "grad_norm": 8.235920906066895, "learning_rate": 1.1000000000000001e-05, "loss": 0.7631, "step": 432 }, { "epoch": 0.41634615384615387, "grad_norm": 6.6696062088012695, "learning_rate": 1.1025641025641028e-05, "loss": 0.6474, "step": 433 }, { "epoch": 0.4173076923076923, "grad_norm": 9.239328384399414, "learning_rate": 1.1051282051282052e-05, "loss": 0.9831, "step": 434 }, { "epoch": 0.4182692307692308, "grad_norm": 7.416872978210449, "learning_rate": 1.1076923076923079e-05, "loss": 0.7394, "step": 435 }, { "epoch": 0.41923076923076924, "grad_norm": 7.915192604064941, "learning_rate": 1.1102564102564103e-05, "loss": 0.6898, "step": 436 }, { "epoch": 0.4201923076923077, "grad_norm": 8.96947956085205, "learning_rate": 1.112820512820513e-05, "loss": 0.8277, "step": 437 }, { "epoch": 0.42115384615384616, "grad_norm": 8.207382202148438, "learning_rate": 1.1153846153846154e-05, "loss": 0.7907, "step": 438 }, { "epoch": 0.4221153846153846, "grad_norm": 8.701887130737305, "learning_rate": 1.117948717948718e-05, "loss": 0.7046, "step": 439 }, { "epoch": 0.4230769230769231, "grad_norm": 9.812532424926758, "learning_rate": 1.1205128205128205e-05, "loss": 1.3103, "step": 440 }, { "epoch": 0.42403846153846153, "grad_norm": 9.953906059265137, "learning_rate": 1.1230769230769232e-05, "loss": 1.0536, "step": 441 }, { "epoch": 0.425, "grad_norm": 10.1231050491333, "learning_rate": 1.1256410256410258e-05, "loss": 1.3544, "step": 442 }, { "epoch": 0.42596153846153845, "grad_norm": 9.050368309020996, "learning_rate": 1.1282051282051283e-05, "loss": 1.4575, "step": 443 }, { "epoch": 0.4269230769230769, "grad_norm": 9.093111991882324, "learning_rate": 1.1307692307692309e-05, "loss": 0.7703, "step": 444 }, { "epoch": 0.42788461538461536, "grad_norm": 8.184704780578613, "learning_rate": 1.1333333333333334e-05, "loss": 0.6527, "step": 445 }, { "epoch": 0.4288461538461538, "grad_norm": 8.017678260803223, "learning_rate": 1.135897435897436e-05, "loss": 0.7337, "step": 446 }, { "epoch": 0.42980769230769234, "grad_norm": 7.031343936920166, "learning_rate": 1.1384615384615385e-05, "loss": 0.8203, "step": 447 }, { "epoch": 0.4307692307692308, "grad_norm": 6.514930725097656, "learning_rate": 1.1410256410256411e-05, "loss": 0.4544, "step": 448 }, { "epoch": 0.43173076923076925, "grad_norm": 8.156758308410645, "learning_rate": 1.1435897435897436e-05, "loss": 1.0836, "step": 449 }, { "epoch": 0.4326923076923077, "grad_norm": 8.961572647094727, "learning_rate": 1.1461538461538462e-05, "loss": 1.6157, "step": 450 }, { "epoch": 0.43365384615384617, "grad_norm": 7.261416912078857, "learning_rate": 1.1487179487179487e-05, "loss": 0.7036, "step": 451 }, { "epoch": 0.4346153846153846, "grad_norm": 7.604365348815918, "learning_rate": 1.1512820512820513e-05, "loss": 0.7255, "step": 452 }, { "epoch": 0.4355769230769231, "grad_norm": 8.588265419006348, "learning_rate": 1.1538461538461538e-05, "loss": 1.3198, "step": 453 }, { "epoch": 0.43653846153846154, "grad_norm": 7.790297031402588, "learning_rate": 1.1564102564102566e-05, "loss": 0.7266, "step": 454 }, { "epoch": 0.4375, "grad_norm": 9.284027099609375, "learning_rate": 1.1589743589743592e-05, "loss": 0.8574, "step": 455 }, { "epoch": 0.43846153846153846, "grad_norm": 7.703795433044434, "learning_rate": 1.1615384615384617e-05, "loss": 0.7547, "step": 456 }, { "epoch": 0.4394230769230769, "grad_norm": 9.829050064086914, "learning_rate": 1.1641025641025643e-05, "loss": 1.1195, "step": 457 }, { "epoch": 0.4403846153846154, "grad_norm": 9.434103965759277, "learning_rate": 1.1666666666666668e-05, "loss": 1.4554, "step": 458 }, { "epoch": 0.44134615384615383, "grad_norm": 7.93222188949585, "learning_rate": 1.1692307692307694e-05, "loss": 1.1234, "step": 459 }, { "epoch": 0.4423076923076923, "grad_norm": 7.750814914703369, "learning_rate": 1.1717948717948719e-05, "loss": 0.799, "step": 460 }, { "epoch": 0.44326923076923075, "grad_norm": 8.874356269836426, "learning_rate": 1.1743589743589745e-05, "loss": 0.7024, "step": 461 }, { "epoch": 0.4442307692307692, "grad_norm": 6.455907344818115, "learning_rate": 1.176923076923077e-05, "loss": 0.4273, "step": 462 }, { "epoch": 0.44519230769230766, "grad_norm": 7.953812122344971, "learning_rate": 1.1794871794871796e-05, "loss": 1.0307, "step": 463 }, { "epoch": 0.4461538461538462, "grad_norm": 8.901692390441895, "learning_rate": 1.1820512820512821e-05, "loss": 0.7478, "step": 464 }, { "epoch": 0.44711538461538464, "grad_norm": 7.995438098907471, "learning_rate": 1.1846153846153847e-05, "loss": 0.734, "step": 465 }, { "epoch": 0.4480769230769231, "grad_norm": 7.660943984985352, "learning_rate": 1.1871794871794872e-05, "loss": 0.6478, "step": 466 }, { "epoch": 0.44903846153846155, "grad_norm": 8.341322898864746, "learning_rate": 1.1897435897435898e-05, "loss": 1.0197, "step": 467 }, { "epoch": 0.45, "grad_norm": 6.672694206237793, "learning_rate": 1.1923076923076925e-05, "loss": 0.3765, "step": 468 }, { "epoch": 0.45096153846153847, "grad_norm": 7.818190097808838, "learning_rate": 1.194871794871795e-05, "loss": 0.648, "step": 469 }, { "epoch": 0.4519230769230769, "grad_norm": 7.730825424194336, "learning_rate": 1.1974358974358976e-05, "loss": 1.1672, "step": 470 }, { "epoch": 0.4528846153846154, "grad_norm": 6.2274322509765625, "learning_rate": 1.2e-05, "loss": 0.4636, "step": 471 }, { "epoch": 0.45384615384615384, "grad_norm": 8.187274932861328, "learning_rate": 1.2025641025641027e-05, "loss": 0.9874, "step": 472 }, { "epoch": 0.4548076923076923, "grad_norm": 7.19549560546875, "learning_rate": 1.2051282051282051e-05, "loss": 0.6971, "step": 473 }, { "epoch": 0.45576923076923076, "grad_norm": 6.905342102050781, "learning_rate": 1.2076923076923078e-05, "loss": 0.638, "step": 474 }, { "epoch": 0.4567307692307692, "grad_norm": 7.720895767211914, "learning_rate": 1.2102564102564102e-05, "loss": 0.6913, "step": 475 }, { "epoch": 0.4576923076923077, "grad_norm": 7.188044548034668, "learning_rate": 1.2128205128205129e-05, "loss": 0.6554, "step": 476 }, { "epoch": 0.45865384615384613, "grad_norm": 6.8078742027282715, "learning_rate": 1.2153846153846153e-05, "loss": 0.6465, "step": 477 }, { "epoch": 0.4596153846153846, "grad_norm": 9.170162200927734, "learning_rate": 1.217948717948718e-05, "loss": 0.9519, "step": 478 }, { "epoch": 0.46057692307692305, "grad_norm": 9.736847877502441, "learning_rate": 1.2205128205128208e-05, "loss": 1.566, "step": 479 }, { "epoch": 0.46153846153846156, "grad_norm": 8.107067108154297, "learning_rate": 1.2230769230769232e-05, "loss": 1.0082, "step": 480 }, { "epoch": 0.4625, "grad_norm": 7.765689373016357, "learning_rate": 1.2256410256410259e-05, "loss": 0.8989, "step": 481 }, { "epoch": 0.4634615384615385, "grad_norm": 9.959980964660645, "learning_rate": 1.2282051282051283e-05, "loss": 0.9586, "step": 482 }, { "epoch": 0.46442307692307694, "grad_norm": 9.025643348693848, "learning_rate": 1.230769230769231e-05, "loss": 1.1365, "step": 483 }, { "epoch": 0.4653846153846154, "grad_norm": 8.855652809143066, "learning_rate": 1.2333333333333334e-05, "loss": 0.8469, "step": 484 }, { "epoch": 0.46634615384615385, "grad_norm": 7.422694683074951, "learning_rate": 1.235897435897436e-05, "loss": 0.5353, "step": 485 }, { "epoch": 0.4673076923076923, "grad_norm": 7.596504211425781, "learning_rate": 1.2384615384615385e-05, "loss": 0.7027, "step": 486 }, { "epoch": 0.46826923076923077, "grad_norm": 7.856118679046631, "learning_rate": 1.2410256410256412e-05, "loss": 0.967, "step": 487 }, { "epoch": 0.46923076923076923, "grad_norm": 7.51730489730835, "learning_rate": 1.2435897435897436e-05, "loss": 0.8639, "step": 488 }, { "epoch": 0.4701923076923077, "grad_norm": 7.394543170928955, "learning_rate": 1.2461538461538463e-05, "loss": 0.4666, "step": 489 }, { "epoch": 0.47115384615384615, "grad_norm": 9.68587875366211, "learning_rate": 1.2487179487179487e-05, "loss": 1.3349, "step": 490 }, { "epoch": 0.4721153846153846, "grad_norm": 10.412981033325195, "learning_rate": 1.2512820512820514e-05, "loss": 1.883, "step": 491 }, { "epoch": 0.47307692307692306, "grad_norm": 7.710290431976318, "learning_rate": 1.253846153846154e-05, "loss": 0.8326, "step": 492 }, { "epoch": 0.4740384615384615, "grad_norm": 8.308316230773926, "learning_rate": 1.2564102564102565e-05, "loss": 1.1381, "step": 493 }, { "epoch": 0.475, "grad_norm": 7.887458801269531, "learning_rate": 1.2589743589743591e-05, "loss": 0.8211, "step": 494 }, { "epoch": 0.47596153846153844, "grad_norm": 8.91278076171875, "learning_rate": 1.2615384615384616e-05, "loss": 1.2087, "step": 495 }, { "epoch": 0.47692307692307695, "grad_norm": 7.351099967956543, "learning_rate": 1.2641025641025642e-05, "loss": 0.5465, "step": 496 }, { "epoch": 0.4778846153846154, "grad_norm": 8.045005798339844, "learning_rate": 1.2666666666666667e-05, "loss": 0.786, "step": 497 }, { "epoch": 0.47884615384615387, "grad_norm": 8.04033088684082, "learning_rate": 1.2692307692307693e-05, "loss": 0.8907, "step": 498 }, { "epoch": 0.4798076923076923, "grad_norm": 6.949219226837158, "learning_rate": 1.2717948717948718e-05, "loss": 0.5668, "step": 499 }, { "epoch": 0.4807692307692308, "grad_norm": 7.7279229164123535, "learning_rate": 1.2743589743589744e-05, "loss": 0.749, "step": 500 }, { "epoch": 0.48173076923076924, "grad_norm": 7.597370624542236, "learning_rate": 1.2769230769230769e-05, "loss": 0.7688, "step": 501 }, { "epoch": 0.4826923076923077, "grad_norm": 8.653313636779785, "learning_rate": 1.2794871794871795e-05, "loss": 0.985, "step": 502 }, { "epoch": 0.48365384615384616, "grad_norm": 6.855794906616211, "learning_rate": 1.2820512820512823e-05, "loss": 0.5028, "step": 503 }, { "epoch": 0.4846153846153846, "grad_norm": 7.0230021476745605, "learning_rate": 1.2846153846153848e-05, "loss": 0.6477, "step": 504 }, { "epoch": 0.4855769230769231, "grad_norm": 7.528388023376465, "learning_rate": 1.2871794871794874e-05, "loss": 0.6604, "step": 505 }, { "epoch": 0.48653846153846153, "grad_norm": 8.55418586730957, "learning_rate": 1.2897435897435899e-05, "loss": 1.1533, "step": 506 }, { "epoch": 0.4875, "grad_norm": 8.692137718200684, "learning_rate": 1.2923076923076925e-05, "loss": 1.1561, "step": 507 }, { "epoch": 0.48846153846153845, "grad_norm": 7.554074287414551, "learning_rate": 1.294871794871795e-05, "loss": 0.6916, "step": 508 }, { "epoch": 0.4894230769230769, "grad_norm": 7.34942626953125, "learning_rate": 1.2974358974358976e-05, "loss": 0.8136, "step": 509 }, { "epoch": 0.49038461538461536, "grad_norm": 8.675580978393555, "learning_rate": 1.3000000000000001e-05, "loss": 0.7611, "step": 510 }, { "epoch": 0.4913461538461538, "grad_norm": 7.299642562866211, "learning_rate": 1.3025641025641027e-05, "loss": 0.7824, "step": 511 }, { "epoch": 0.49230769230769234, "grad_norm": 8.491894721984863, "learning_rate": 1.3051282051282052e-05, "loss": 0.7294, "step": 512 }, { "epoch": 0.4932692307692308, "grad_norm": 6.390843391418457, "learning_rate": 1.3076923076923078e-05, "loss": 0.6742, "step": 513 }, { "epoch": 0.49423076923076925, "grad_norm": 7.220247268676758, "learning_rate": 1.3102564102564103e-05, "loss": 0.426, "step": 514 }, { "epoch": 0.4951923076923077, "grad_norm": 7.98738431930542, "learning_rate": 1.312820512820513e-05, "loss": 0.9467, "step": 515 }, { "epoch": 0.49615384615384617, "grad_norm": 6.882865905761719, "learning_rate": 1.3153846153846156e-05, "loss": 0.78, "step": 516 }, { "epoch": 0.4971153846153846, "grad_norm": 8.366512298583984, "learning_rate": 1.317948717948718e-05, "loss": 0.8516, "step": 517 }, { "epoch": 0.4980769230769231, "grad_norm": 8.064122200012207, "learning_rate": 1.3205128205128207e-05, "loss": 0.9697, "step": 518 }, { "epoch": 0.49903846153846154, "grad_norm": 7.268953800201416, "learning_rate": 1.3230769230769231e-05, "loss": 0.7024, "step": 519 }, { "epoch": 0.5, "grad_norm": 8.361438751220703, "learning_rate": 1.3256410256410258e-05, "loss": 1.3434, "step": 520 }, { "epoch": 0.5009615384615385, "grad_norm": 8.332623481750488, "learning_rate": 1.3282051282051282e-05, "loss": 1.2325, "step": 521 }, { "epoch": 0.5019230769230769, "grad_norm": 7.26149320602417, "learning_rate": 1.3307692307692309e-05, "loss": 0.623, "step": 522 }, { "epoch": 0.5028846153846154, "grad_norm": 7.557464122772217, "learning_rate": 1.3333333333333333e-05, "loss": 1.2287, "step": 523 }, { "epoch": 0.5038461538461538, "grad_norm": 8.32526683807373, "learning_rate": 1.335897435897436e-05, "loss": 0.8783, "step": 524 }, { "epoch": 0.5048076923076923, "grad_norm": 7.680133819580078, "learning_rate": 1.3384615384615384e-05, "loss": 1.0229, "step": 525 }, { "epoch": 0.5057692307692307, "grad_norm": 8.334049224853516, "learning_rate": 1.341025641025641e-05, "loss": 1.154, "step": 526 }, { "epoch": 0.5067307692307692, "grad_norm": 7.339221477508545, "learning_rate": 1.3435897435897435e-05, "loss": 1.035, "step": 527 }, { "epoch": 0.5076923076923077, "grad_norm": 8.143875122070312, "learning_rate": 1.3461538461538463e-05, "loss": 0.9459, "step": 528 }, { "epoch": 0.5086538461538461, "grad_norm": 7.870586395263672, "learning_rate": 1.348717948717949e-05, "loss": 0.7356, "step": 529 }, { "epoch": 0.5096153846153846, "grad_norm": 8.320731163024902, "learning_rate": 1.3512820512820514e-05, "loss": 1.139, "step": 530 }, { "epoch": 0.510576923076923, "grad_norm": 9.08300495147705, "learning_rate": 1.353846153846154e-05, "loss": 0.8781, "step": 531 }, { "epoch": 0.5115384615384615, "grad_norm": 7.674694061279297, "learning_rate": 1.3564102564102565e-05, "loss": 0.7314, "step": 532 }, { "epoch": 0.5125, "grad_norm": 6.561878681182861, "learning_rate": 1.3589743589743592e-05, "loss": 0.4393, "step": 533 }, { "epoch": 0.5134615384615384, "grad_norm": 8.372283935546875, "learning_rate": 1.3615384615384616e-05, "loss": 0.9977, "step": 534 }, { "epoch": 0.5144230769230769, "grad_norm": 6.4248270988464355, "learning_rate": 1.3641025641025643e-05, "loss": 0.7914, "step": 535 }, { "epoch": 0.5153846153846153, "grad_norm": 8.095027923583984, "learning_rate": 1.3666666666666667e-05, "loss": 0.8121, "step": 536 }, { "epoch": 0.5163461538461539, "grad_norm": 8.571704864501953, "learning_rate": 1.3692307692307694e-05, "loss": 1.0945, "step": 537 }, { "epoch": 0.5173076923076924, "grad_norm": 7.320196151733398, "learning_rate": 1.3717948717948718e-05, "loss": 0.7917, "step": 538 }, { "epoch": 0.5182692307692308, "grad_norm": 8.053816795349121, "learning_rate": 1.3743589743589745e-05, "loss": 0.9224, "step": 539 }, { "epoch": 0.5192307692307693, "grad_norm": 8.663751602172852, "learning_rate": 1.3769230769230771e-05, "loss": 1.0903, "step": 540 }, { "epoch": 0.5201923076923077, "grad_norm": 7.716609477996826, "learning_rate": 1.3794871794871796e-05, "loss": 0.6318, "step": 541 }, { "epoch": 0.5211538461538462, "grad_norm": 7.837393760681152, "learning_rate": 1.3820512820512822e-05, "loss": 0.9504, "step": 542 }, { "epoch": 0.5221153846153846, "grad_norm": 7.708962440490723, "learning_rate": 1.3846153846153847e-05, "loss": 0.8091, "step": 543 }, { "epoch": 0.5230769230769231, "grad_norm": 7.15969705581665, "learning_rate": 1.3871794871794873e-05, "loss": 0.7648, "step": 544 }, { "epoch": 0.5240384615384616, "grad_norm": 7.928896427154541, "learning_rate": 1.3897435897435898e-05, "loss": 0.9527, "step": 545 }, { "epoch": 0.525, "grad_norm": 6.767192840576172, "learning_rate": 1.3923076923076924e-05, "loss": 0.5749, "step": 546 }, { "epoch": 0.5259615384615385, "grad_norm": 8.133550643920898, "learning_rate": 1.3948717948717949e-05, "loss": 0.9161, "step": 547 }, { "epoch": 0.5269230769230769, "grad_norm": 7.955926418304443, "learning_rate": 1.3974358974358975e-05, "loss": 0.7588, "step": 548 }, { "epoch": 0.5278846153846154, "grad_norm": 6.889575958251953, "learning_rate": 1.4e-05, "loss": 0.9309, "step": 549 }, { "epoch": 0.5288461538461539, "grad_norm": 9.42802619934082, "learning_rate": 1.4025641025641026e-05, "loss": 0.6813, "step": 550 }, { "epoch": 0.5298076923076923, "grad_norm": 8.187005996704102, "learning_rate": 1.405128205128205e-05, "loss": 0.8593, "step": 551 }, { "epoch": 0.5307692307692308, "grad_norm": 7.50822639465332, "learning_rate": 1.4076923076923079e-05, "loss": 0.8694, "step": 552 }, { "epoch": 0.5317307692307692, "grad_norm": 8.53701114654541, "learning_rate": 1.4102564102564105e-05, "loss": 1.0857, "step": 553 }, { "epoch": 0.5326923076923077, "grad_norm": 6.526849746704102, "learning_rate": 1.412820512820513e-05, "loss": 0.6337, "step": 554 }, { "epoch": 0.5336538461538461, "grad_norm": 8.950912475585938, "learning_rate": 1.4153846153846156e-05, "loss": 0.9655, "step": 555 }, { "epoch": 0.5346153846153846, "grad_norm": 7.762608051300049, "learning_rate": 1.4179487179487181e-05, "loss": 0.8779, "step": 556 }, { "epoch": 0.5355769230769231, "grad_norm": 6.748490810394287, "learning_rate": 1.4205128205128207e-05, "loss": 0.5247, "step": 557 }, { "epoch": 0.5365384615384615, "grad_norm": 8.672950744628906, "learning_rate": 1.4230769230769232e-05, "loss": 1.3523, "step": 558 }, { "epoch": 0.5375, "grad_norm": 7.780045986175537, "learning_rate": 1.4256410256410258e-05, "loss": 0.6882, "step": 559 }, { "epoch": 0.5384615384615384, "grad_norm": 5.938601493835449, "learning_rate": 1.4282051282051283e-05, "loss": 0.4423, "step": 560 }, { "epoch": 0.5394230769230769, "grad_norm": 7.651951313018799, "learning_rate": 1.430769230769231e-05, "loss": 1.3337, "step": 561 }, { "epoch": 0.5403846153846154, "grad_norm": 7.8874897956848145, "learning_rate": 1.4333333333333334e-05, "loss": 0.7694, "step": 562 }, { "epoch": 0.5413461538461538, "grad_norm": 8.926796913146973, "learning_rate": 1.435897435897436e-05, "loss": 0.9213, "step": 563 }, { "epoch": 0.5423076923076923, "grad_norm": 6.814283847808838, "learning_rate": 1.4384615384615387e-05, "loss": 0.5315, "step": 564 }, { "epoch": 0.5432692307692307, "grad_norm": 6.763952732086182, "learning_rate": 1.4410256410256411e-05, "loss": 0.5016, "step": 565 }, { "epoch": 0.5442307692307692, "grad_norm": 7.38077449798584, "learning_rate": 1.4435897435897438e-05, "loss": 0.9955, "step": 566 }, { "epoch": 0.5451923076923076, "grad_norm": 6.658726692199707, "learning_rate": 1.4461538461538462e-05, "loss": 0.6814, "step": 567 }, { "epoch": 0.5461538461538461, "grad_norm": 7.994447231292725, "learning_rate": 1.4487179487179489e-05, "loss": 1.1964, "step": 568 }, { "epoch": 0.5471153846153847, "grad_norm": 7.463157653808594, "learning_rate": 1.4512820512820513e-05, "loss": 0.9771, "step": 569 }, { "epoch": 0.5480769230769231, "grad_norm": 10.048480987548828, "learning_rate": 1.453846153846154e-05, "loss": 0.9857, "step": 570 }, { "epoch": 0.5490384615384616, "grad_norm": 6.908421516418457, "learning_rate": 1.4564102564102564e-05, "loss": 0.9458, "step": 571 }, { "epoch": 0.55, "grad_norm": 8.508092880249023, "learning_rate": 1.458974358974359e-05, "loss": 1.0269, "step": 572 }, { "epoch": 0.5509615384615385, "grad_norm": 7.042726516723633, "learning_rate": 1.4615384615384615e-05, "loss": 0.7754, "step": 573 }, { "epoch": 0.551923076923077, "grad_norm": 7.3538818359375, "learning_rate": 1.4641025641025642e-05, "loss": 1.0168, "step": 574 }, { "epoch": 0.5528846153846154, "grad_norm": 6.740646839141846, "learning_rate": 1.4666666666666666e-05, "loss": 1.2298, "step": 575 }, { "epoch": 0.5538461538461539, "grad_norm": 6.27580451965332, "learning_rate": 1.4692307692307694e-05, "loss": 0.5467, "step": 576 }, { "epoch": 0.5548076923076923, "grad_norm": 8.040300369262695, "learning_rate": 1.471794871794872e-05, "loss": 0.7528, "step": 577 }, { "epoch": 0.5557692307692308, "grad_norm": 7.562304496765137, "learning_rate": 1.4743589743589745e-05, "loss": 1.0391, "step": 578 }, { "epoch": 0.5567307692307693, "grad_norm": 7.520763874053955, "learning_rate": 1.4769230769230772e-05, "loss": 0.8364, "step": 579 }, { "epoch": 0.5576923076923077, "grad_norm": 6.440932273864746, "learning_rate": 1.4794871794871796e-05, "loss": 0.7264, "step": 580 }, { "epoch": 0.5586538461538462, "grad_norm": 8.287086486816406, "learning_rate": 1.4820512820512823e-05, "loss": 1.2197, "step": 581 }, { "epoch": 0.5596153846153846, "grad_norm": 6.127686977386475, "learning_rate": 1.4846153846153847e-05, "loss": 0.5677, "step": 582 }, { "epoch": 0.5605769230769231, "grad_norm": 6.795361042022705, "learning_rate": 1.4871794871794874e-05, "loss": 0.476, "step": 583 }, { "epoch": 0.5615384615384615, "grad_norm": 8.468616485595703, "learning_rate": 1.4897435897435898e-05, "loss": 1.0809, "step": 584 }, { "epoch": 0.5625, "grad_norm": 7.933391571044922, "learning_rate": 1.4923076923076925e-05, "loss": 1.0852, "step": 585 }, { "epoch": 0.5634615384615385, "grad_norm": 6.761209964752197, "learning_rate": 1.494871794871795e-05, "loss": 0.6666, "step": 586 }, { "epoch": 0.5644230769230769, "grad_norm": 5.992575168609619, "learning_rate": 1.4974358974358976e-05, "loss": 0.5244, "step": 587 }, { "epoch": 0.5653846153846154, "grad_norm": 6.192136764526367, "learning_rate": 1.5000000000000002e-05, "loss": 0.6167, "step": 588 }, { "epoch": 0.5663461538461538, "grad_norm": 7.205977439880371, "learning_rate": 1.5025641025641027e-05, "loss": 0.5866, "step": 589 }, { "epoch": 0.5673076923076923, "grad_norm": 6.892002582550049, "learning_rate": 1.5051282051282053e-05, "loss": 0.3771, "step": 590 }, { "epoch": 0.5682692307692307, "grad_norm": 7.57423734664917, "learning_rate": 1.5076923076923078e-05, "loss": 0.8663, "step": 591 }, { "epoch": 0.5692307692307692, "grad_norm": 7.439033508300781, "learning_rate": 1.5102564102564104e-05, "loss": 0.5934, "step": 592 }, { "epoch": 0.5701923076923077, "grad_norm": 7.323331356048584, "learning_rate": 1.5128205128205129e-05, "loss": 0.8682, "step": 593 }, { "epoch": 0.5711538461538461, "grad_norm": 7.064901351928711, "learning_rate": 1.5153846153846155e-05, "loss": 0.8149, "step": 594 }, { "epoch": 0.5721153846153846, "grad_norm": 9.88998794555664, "learning_rate": 1.517948717948718e-05, "loss": 1.1928, "step": 595 }, { "epoch": 0.573076923076923, "grad_norm": 8.368447303771973, "learning_rate": 1.5205128205128206e-05, "loss": 1.0416, "step": 596 }, { "epoch": 0.5740384615384615, "grad_norm": 6.7092437744140625, "learning_rate": 1.523076923076923e-05, "loss": 0.8739, "step": 597 }, { "epoch": 0.575, "grad_norm": 7.243898868560791, "learning_rate": 1.5256410256410257e-05, "loss": 0.6762, "step": 598 }, { "epoch": 0.5759615384615384, "grad_norm": 9.727128028869629, "learning_rate": 1.5282051282051282e-05, "loss": 1.1412, "step": 599 }, { "epoch": 0.5769230769230769, "grad_norm": 6.646552562713623, "learning_rate": 1.5307692307692308e-05, "loss": 0.639, "step": 600 }, { "epoch": 0.5778846153846153, "grad_norm": 7.229769229888916, "learning_rate": 1.5333333333333334e-05, "loss": 1.0942, "step": 601 }, { "epoch": 0.5788461538461539, "grad_norm": 7.871640682220459, "learning_rate": 1.535897435897436e-05, "loss": 0.8833, "step": 602 }, { "epoch": 0.5798076923076924, "grad_norm": 8.029166221618652, "learning_rate": 1.5384615384615387e-05, "loss": 1.1114, "step": 603 }, { "epoch": 0.5807692307692308, "grad_norm": 6.441917419433594, "learning_rate": 1.5410256410256414e-05, "loss": 0.5669, "step": 604 }, { "epoch": 0.5817307692307693, "grad_norm": 6.692221641540527, "learning_rate": 1.5435897435897436e-05, "loss": 0.6622, "step": 605 }, { "epoch": 0.5826923076923077, "grad_norm": 7.867910861968994, "learning_rate": 1.5461538461538463e-05, "loss": 0.8931, "step": 606 }, { "epoch": 0.5836538461538462, "grad_norm": 9.759093284606934, "learning_rate": 1.548717948717949e-05, "loss": 1.046, "step": 607 }, { "epoch": 0.5846153846153846, "grad_norm": 7.832681179046631, "learning_rate": 1.5512820512820516e-05, "loss": 1.0544, "step": 608 }, { "epoch": 0.5855769230769231, "grad_norm": 6.277987957000732, "learning_rate": 1.553846153846154e-05, "loss": 0.6548, "step": 609 }, { "epoch": 0.5865384615384616, "grad_norm": 7.255360126495361, "learning_rate": 1.5564102564102565e-05, "loss": 0.6308, "step": 610 }, { "epoch": 0.5875, "grad_norm": 7.080732822418213, "learning_rate": 1.558974358974359e-05, "loss": 0.5899, "step": 611 }, { "epoch": 0.5884615384615385, "grad_norm": 6.825521469116211, "learning_rate": 1.5615384615384618e-05, "loss": 0.7674, "step": 612 }, { "epoch": 0.5894230769230769, "grad_norm": 7.95991849899292, "learning_rate": 1.5641025641025644e-05, "loss": 0.9074, "step": 613 }, { "epoch": 0.5903846153846154, "grad_norm": 8.688343048095703, "learning_rate": 1.5666666666666667e-05, "loss": 0.8006, "step": 614 }, { "epoch": 0.5913461538461539, "grad_norm": 10.479850769042969, "learning_rate": 1.5692307692307693e-05, "loss": 1.1637, "step": 615 }, { "epoch": 0.5923076923076923, "grad_norm": 7.0457048416137695, "learning_rate": 1.571794871794872e-05, "loss": 0.8478, "step": 616 }, { "epoch": 0.5932692307692308, "grad_norm": 7.000866889953613, "learning_rate": 1.5743589743589746e-05, "loss": 0.7055, "step": 617 }, { "epoch": 0.5942307692307692, "grad_norm": 7.556613922119141, "learning_rate": 1.576923076923077e-05, "loss": 0.8488, "step": 618 }, { "epoch": 0.5951923076923077, "grad_norm": 7.970667362213135, "learning_rate": 1.5794871794871795e-05, "loss": 1.1181, "step": 619 }, { "epoch": 0.5961538461538461, "grad_norm": 6.649800777435303, "learning_rate": 1.582051282051282e-05, "loss": 0.5417, "step": 620 }, { "epoch": 0.5971153846153846, "grad_norm": 7.47381067276001, "learning_rate": 1.5846153846153848e-05, "loss": 0.7572, "step": 621 }, { "epoch": 0.5980769230769231, "grad_norm": 7.644420623779297, "learning_rate": 1.587179487179487e-05, "loss": 0.8669, "step": 622 }, { "epoch": 0.5990384615384615, "grad_norm": 7.88212776184082, "learning_rate": 1.5897435897435897e-05, "loss": 0.8681, "step": 623 }, { "epoch": 0.6, "grad_norm": 6.817503929138184, "learning_rate": 1.5923076923076924e-05, "loss": 0.7934, "step": 624 }, { "epoch": 0.6009615384615384, "grad_norm": 6.500703811645508, "learning_rate": 1.594871794871795e-05, "loss": 0.623, "step": 625 }, { "epoch": 0.6019230769230769, "grad_norm": 6.5479631423950195, "learning_rate": 1.5974358974358976e-05, "loss": 0.5315, "step": 626 }, { "epoch": 0.6028846153846154, "grad_norm": 6.841207027435303, "learning_rate": 1.6000000000000003e-05, "loss": 0.8886, "step": 627 }, { "epoch": 0.6038461538461538, "grad_norm": 6.4276018142700195, "learning_rate": 1.602564102564103e-05, "loss": 0.5555, "step": 628 }, { "epoch": 0.6048076923076923, "grad_norm": 6.400624752044678, "learning_rate": 1.6051282051282052e-05, "loss": 0.616, "step": 629 }, { "epoch": 0.6057692307692307, "grad_norm": 7.66914701461792, "learning_rate": 1.607692307692308e-05, "loss": 0.8706, "step": 630 }, { "epoch": 0.6067307692307692, "grad_norm": 7.767228126525879, "learning_rate": 1.6102564102564105e-05, "loss": 0.9519, "step": 631 }, { "epoch": 0.6076923076923076, "grad_norm": 7.662623405456543, "learning_rate": 1.612820512820513e-05, "loss": 1.0542, "step": 632 }, { "epoch": 0.6086538461538461, "grad_norm": 9.438959121704102, "learning_rate": 1.6153846153846154e-05, "loss": 1.074, "step": 633 }, { "epoch": 0.6096153846153847, "grad_norm": 7.501358509063721, "learning_rate": 1.617948717948718e-05, "loss": 1.1981, "step": 634 }, { "epoch": 0.6105769230769231, "grad_norm": 7.101503849029541, "learning_rate": 1.6205128205128207e-05, "loss": 0.814, "step": 635 }, { "epoch": 0.6115384615384616, "grad_norm": 6.438178062438965, "learning_rate": 1.6230769230769233e-05, "loss": 0.5707, "step": 636 }, { "epoch": 0.6125, "grad_norm": 8.53757095336914, "learning_rate": 1.625641025641026e-05, "loss": 0.824, "step": 637 }, { "epoch": 0.6134615384615385, "grad_norm": 6.807065010070801, "learning_rate": 1.6282051282051282e-05, "loss": 0.7328, "step": 638 }, { "epoch": 0.614423076923077, "grad_norm": 8.458263397216797, "learning_rate": 1.630769230769231e-05, "loss": 0.7014, "step": 639 }, { "epoch": 0.6153846153846154, "grad_norm": 7.958715915679932, "learning_rate": 1.6333333333333335e-05, "loss": 1.5698, "step": 640 }, { "epoch": 0.6163461538461539, "grad_norm": 7.9010138511657715, "learning_rate": 1.635897435897436e-05, "loss": 0.9614, "step": 641 }, { "epoch": 0.6173076923076923, "grad_norm": 7.13578987121582, "learning_rate": 1.6384615384615384e-05, "loss": 0.8666, "step": 642 }, { "epoch": 0.6182692307692308, "grad_norm": 7.103766441345215, "learning_rate": 1.641025641025641e-05, "loss": 0.9716, "step": 643 }, { "epoch": 0.6192307692307693, "grad_norm": 6.43604040145874, "learning_rate": 1.6435897435897437e-05, "loss": 1.0456, "step": 644 }, { "epoch": 0.6201923076923077, "grad_norm": 8.331916809082031, "learning_rate": 1.6461538461538463e-05, "loss": 0.7803, "step": 645 }, { "epoch": 0.6211538461538462, "grad_norm": 7.316093444824219, "learning_rate": 1.6487179487179486e-05, "loss": 0.8004, "step": 646 }, { "epoch": 0.6221153846153846, "grad_norm": 9.387686729431152, "learning_rate": 1.6512820512820513e-05, "loss": 1.761, "step": 647 }, { "epoch": 0.6230769230769231, "grad_norm": 6.3310546875, "learning_rate": 1.653846153846154e-05, "loss": 0.6805, "step": 648 }, { "epoch": 0.6240384615384615, "grad_norm": 8.517004013061523, "learning_rate": 1.6564102564102565e-05, "loss": 0.7715, "step": 649 }, { "epoch": 0.625, "grad_norm": 6.651610374450684, "learning_rate": 1.6589743589743592e-05, "loss": 0.6586, "step": 650 }, { "epoch": 0.6259615384615385, "grad_norm": 6.691784381866455, "learning_rate": 1.6615384615384618e-05, "loss": 0.7544, "step": 651 }, { "epoch": 0.6269230769230769, "grad_norm": 6.103257656097412, "learning_rate": 1.6641025641025645e-05, "loss": 0.5134, "step": 652 }, { "epoch": 0.6278846153846154, "grad_norm": 7.018590450286865, "learning_rate": 1.6666666666666667e-05, "loss": 0.6765, "step": 653 }, { "epoch": 0.6288461538461538, "grad_norm": 6.892549514770508, "learning_rate": 1.6692307692307694e-05, "loss": 1.1609, "step": 654 }, { "epoch": 0.6298076923076923, "grad_norm": 5.762831211090088, "learning_rate": 1.671794871794872e-05, "loss": 0.5307, "step": 655 }, { "epoch": 0.6307692307692307, "grad_norm": 6.764204502105713, "learning_rate": 1.6743589743589747e-05, "loss": 0.815, "step": 656 }, { "epoch": 0.6317307692307692, "grad_norm": 6.281445026397705, "learning_rate": 1.676923076923077e-05, "loss": 0.9962, "step": 657 }, { "epoch": 0.6326923076923077, "grad_norm": 7.37977409362793, "learning_rate": 1.6794871794871796e-05, "loss": 1.1042, "step": 658 }, { "epoch": 0.6336538461538461, "grad_norm": 7.112977504730225, "learning_rate": 1.6820512820512822e-05, "loss": 1.2664, "step": 659 }, { "epoch": 0.6346153846153846, "grad_norm": 6.2857346534729, "learning_rate": 1.684615384615385e-05, "loss": 1.04, "step": 660 }, { "epoch": 0.635576923076923, "grad_norm": 8.684266090393066, "learning_rate": 1.687179487179487e-05, "loss": 1.73, "step": 661 }, { "epoch": 0.6365384615384615, "grad_norm": 6.462116241455078, "learning_rate": 1.6897435897435898e-05, "loss": 0.6606, "step": 662 }, { "epoch": 0.6375, "grad_norm": 7.701472282409668, "learning_rate": 1.6923076923076924e-05, "loss": 1.312, "step": 663 }, { "epoch": 0.6384615384615384, "grad_norm": 6.619521617889404, "learning_rate": 1.694871794871795e-05, "loss": 0.7932, "step": 664 }, { "epoch": 0.6394230769230769, "grad_norm": 6.537534236907959, "learning_rate": 1.6974358974358977e-05, "loss": 1.0811, "step": 665 }, { "epoch": 0.6403846153846153, "grad_norm": 6.040229797363281, "learning_rate": 1.7e-05, "loss": 0.7077, "step": 666 }, { "epoch": 0.6413461538461539, "grad_norm": 6.802707672119141, "learning_rate": 1.7025641025641026e-05, "loss": 0.5137, "step": 667 }, { "epoch": 0.6423076923076924, "grad_norm": 7.75311803817749, "learning_rate": 1.7051282051282053e-05, "loss": 1.2827, "step": 668 }, { "epoch": 0.6432692307692308, "grad_norm": 7.4662394523620605, "learning_rate": 1.707692307692308e-05, "loss": 0.7814, "step": 669 }, { "epoch": 0.6442307692307693, "grad_norm": 7.046069622039795, "learning_rate": 1.7102564102564102e-05, "loss": 1.0527, "step": 670 }, { "epoch": 0.6451923076923077, "grad_norm": 6.7947163581848145, "learning_rate": 1.7128205128205128e-05, "loss": 0.8446, "step": 671 }, { "epoch": 0.6461538461538462, "grad_norm": 7.35598087310791, "learning_rate": 1.7153846153846155e-05, "loss": 0.9462, "step": 672 }, { "epoch": 0.6471153846153846, "grad_norm": 7.950256824493408, "learning_rate": 1.717948717948718e-05, "loss": 1.0677, "step": 673 }, { "epoch": 0.6480769230769231, "grad_norm": 7.560853004455566, "learning_rate": 1.7205128205128207e-05, "loss": 0.8951, "step": 674 }, { "epoch": 0.6490384615384616, "grad_norm": 6.269211292266846, "learning_rate": 1.7230769230769234e-05, "loss": 0.6666, "step": 675 }, { "epoch": 0.65, "grad_norm": 6.08966064453125, "learning_rate": 1.725641025641026e-05, "loss": 0.9635, "step": 676 }, { "epoch": 0.6509615384615385, "grad_norm": 7.001126766204834, "learning_rate": 1.7282051282051283e-05, "loss": 0.8507, "step": 677 }, { "epoch": 0.6519230769230769, "grad_norm": 6.575096607208252, "learning_rate": 1.730769230769231e-05, "loss": 0.6417, "step": 678 }, { "epoch": 0.6528846153846154, "grad_norm": 6.513644218444824, "learning_rate": 1.7333333333333336e-05, "loss": 0.7964, "step": 679 }, { "epoch": 0.6538461538461539, "grad_norm": 6.478466033935547, "learning_rate": 1.7358974358974362e-05, "loss": 0.7523, "step": 680 }, { "epoch": 0.6548076923076923, "grad_norm": 7.111272811889648, "learning_rate": 1.7384615384615385e-05, "loss": 0.6459, "step": 681 }, { "epoch": 0.6557692307692308, "grad_norm": 6.3295416831970215, "learning_rate": 1.741025641025641e-05, "loss": 0.5889, "step": 682 }, { "epoch": 0.6567307692307692, "grad_norm": 6.151467800140381, "learning_rate": 1.7435897435897438e-05, "loss": 0.7328, "step": 683 }, { "epoch": 0.6576923076923077, "grad_norm": 6.104226112365723, "learning_rate": 1.7461538461538464e-05, "loss": 1.1542, "step": 684 }, { "epoch": 0.6586538461538461, "grad_norm": 7.0829033851623535, "learning_rate": 1.7487179487179487e-05, "loss": 0.9674, "step": 685 }, { "epoch": 0.6596153846153846, "grad_norm": 7.0829033851623535, "learning_rate": 1.7512820512820513e-05, "loss": 1.0137, "step": 686 }, { "epoch": 0.6605769230769231, "grad_norm": 8.954656600952148, "learning_rate": 1.7512820512820513e-05, "loss": 1.6053, "step": 687 }, { "epoch": 0.6615384615384615, "grad_norm": 6.847164154052734, "learning_rate": 1.753846153846154e-05, "loss": 0.8087, "step": 688 }, { "epoch": 0.6625, "grad_norm": 7.014500617980957, "learning_rate": 1.7564102564102566e-05, "loss": 0.9878, "step": 689 }, { "epoch": 0.6634615384615384, "grad_norm": 6.692909240722656, "learning_rate": 1.7589743589743592e-05, "loss": 0.8244, "step": 690 }, { "epoch": 0.6644230769230769, "grad_norm": 8.964744567871094, "learning_rate": 1.7615384615384615e-05, "loss": 1.2077, "step": 691 }, { "epoch": 0.6653846153846154, "grad_norm": 5.601660251617432, "learning_rate": 1.7641025641025642e-05, "loss": 0.4541, "step": 692 }, { "epoch": 0.6663461538461538, "grad_norm": 6.258889198303223, "learning_rate": 1.7666666666666668e-05, "loss": 0.5582, "step": 693 }, { "epoch": 0.6673076923076923, "grad_norm": 7.522148132324219, "learning_rate": 1.7692307692307694e-05, "loss": 0.9968, "step": 694 }, { "epoch": 0.6682692307692307, "grad_norm": 7.12522029876709, "learning_rate": 1.7717948717948717e-05, "loss": 0.9608, "step": 695 }, { "epoch": 0.6692307692307692, "grad_norm": 6.186212062835693, "learning_rate": 1.7743589743589744e-05, "loss": 0.677, "step": 696 }, { "epoch": 0.6701923076923076, "grad_norm": 8.62382698059082, "learning_rate": 1.776923076923077e-05, "loss": 0.9048, "step": 697 }, { "epoch": 0.6711538461538461, "grad_norm": 8.388274192810059, "learning_rate": 1.7794871794871796e-05, "loss": 0.9442, "step": 698 }, { "epoch": 0.6721153846153847, "grad_norm": 7.823852062225342, "learning_rate": 1.7820512820512823e-05, "loss": 0.8451, "step": 699 }, { "epoch": 0.6730769230769231, "grad_norm": 6.460159778594971, "learning_rate": 1.784615384615385e-05, "loss": 0.5313, "step": 700 }, { "epoch": 0.6740384615384616, "grad_norm": 7.3616557121276855, "learning_rate": 1.7871794871794875e-05, "loss": 0.8819, "step": 701 }, { "epoch": 0.675, "grad_norm": 6.2284932136535645, "learning_rate": 1.78974358974359e-05, "loss": 0.7384, "step": 702 }, { "epoch": 0.6759615384615385, "grad_norm": 6.510267734527588, "learning_rate": 1.7923076923076925e-05, "loss": 0.628, "step": 703 }, { "epoch": 0.676923076923077, "grad_norm": 6.635895252227783, "learning_rate": 1.794871794871795e-05, "loss": 0.5781, "step": 704 }, { "epoch": 0.6778846153846154, "grad_norm": 10.005219459533691, "learning_rate": 1.7974358974358977e-05, "loss": 1.169, "step": 705 }, { "epoch": 0.6788461538461539, "grad_norm": 6.945747375488281, "learning_rate": 1.8e-05, "loss": 0.7853, "step": 706 }, { "epoch": 0.6798076923076923, "grad_norm": 7.665156364440918, "learning_rate": 1.8025641025641027e-05, "loss": 1.1067, "step": 707 }, { "epoch": 0.6807692307692308, "grad_norm": 6.552615165710449, "learning_rate": 1.8051282051282053e-05, "loss": 0.6284, "step": 708 }, { "epoch": 0.6817307692307693, "grad_norm": 6.274016857147217, "learning_rate": 1.807692307692308e-05, "loss": 0.5518, "step": 709 }, { "epoch": 0.6826923076923077, "grad_norm": 7.12836217880249, "learning_rate": 1.8102564102564102e-05, "loss": 0.728, "step": 710 }, { "epoch": 0.6836538461538462, "grad_norm": 6.3859357833862305, "learning_rate": 1.812820512820513e-05, "loss": 0.5308, "step": 711 }, { "epoch": 0.6846153846153846, "grad_norm": 6.736617565155029, "learning_rate": 1.8153846153846155e-05, "loss": 0.5783, "step": 712 }, { "epoch": 0.6855769230769231, "grad_norm": 7.077742099761963, "learning_rate": 1.817948717948718e-05, "loss": 0.8128, "step": 713 }, { "epoch": 0.6865384615384615, "grad_norm": 6.650359153747559, "learning_rate": 1.8205128205128208e-05, "loss": 0.8069, "step": 714 }, { "epoch": 0.6875, "grad_norm": 7.31875467300415, "learning_rate": 1.823076923076923e-05, "loss": 0.8794, "step": 715 }, { "epoch": 0.6884615384615385, "grad_norm": 6.243699073791504, "learning_rate": 1.8256410256410257e-05, "loss": 0.4513, "step": 716 }, { "epoch": 0.6894230769230769, "grad_norm": 8.712233543395996, "learning_rate": 1.8282051282051284e-05, "loss": 1.2876, "step": 717 }, { "epoch": 0.6903846153846154, "grad_norm": 6.99497652053833, "learning_rate": 1.830769230769231e-05, "loss": 0.7971, "step": 718 }, { "epoch": 0.6913461538461538, "grad_norm": 7.098451137542725, "learning_rate": 1.8333333333333333e-05, "loss": 0.7838, "step": 719 }, { "epoch": 0.6923076923076923, "grad_norm": 8.633121490478516, "learning_rate": 1.835897435897436e-05, "loss": 0.5416, "step": 720 }, { "epoch": 0.6932692307692307, "grad_norm": 8.331951141357422, "learning_rate": 1.8384615384615386e-05, "loss": 1.0658, "step": 721 }, { "epoch": 0.6942307692307692, "grad_norm": 6.2609968185424805, "learning_rate": 1.8410256410256412e-05, "loss": 0.5843, "step": 722 }, { "epoch": 0.6951923076923077, "grad_norm": 7.270373344421387, "learning_rate": 1.8435897435897435e-05, "loss": 0.8446, "step": 723 }, { "epoch": 0.6961538461538461, "grad_norm": 8.090873718261719, "learning_rate": 1.8461538461538465e-05, "loss": 0.7892, "step": 724 }, { "epoch": 0.6971153846153846, "grad_norm": 5.122181415557861, "learning_rate": 1.848717948717949e-05, "loss": 0.4004, "step": 725 }, { "epoch": 0.698076923076923, "grad_norm": 7.310664176940918, "learning_rate": 1.8512820512820514e-05, "loss": 0.8854, "step": 726 }, { "epoch": 0.6990384615384615, "grad_norm": 5.249734401702881, "learning_rate": 1.853846153846154e-05, "loss": 0.3642, "step": 727 }, { "epoch": 0.7, "grad_norm": 6.3562116622924805, "learning_rate": 1.8564102564102567e-05, "loss": 0.6444, "step": 728 }, { "epoch": 0.7009615384615384, "grad_norm": 6.517514228820801, "learning_rate": 1.8589743589743593e-05, "loss": 0.8148, "step": 729 }, { "epoch": 0.7019230769230769, "grad_norm": 5.852412700653076, "learning_rate": 1.8615384615384616e-05, "loss": 0.4157, "step": 730 }, { "epoch": 0.7028846153846153, "grad_norm": 6.829611778259277, "learning_rate": 1.8641025641025642e-05, "loss": 1.1035, "step": 731 }, { "epoch": 0.7038461538461539, "grad_norm": 7.446195602416992, "learning_rate": 1.866666666666667e-05, "loss": 0.9673, "step": 732 }, { "epoch": 0.7048076923076924, "grad_norm": 7.229966640472412, "learning_rate": 1.8692307692307695e-05, "loss": 0.8371, "step": 733 }, { "epoch": 0.7057692307692308, "grad_norm": 6.253291130065918, "learning_rate": 1.8717948717948718e-05, "loss": 0.6127, "step": 734 }, { "epoch": 0.7067307692307693, "grad_norm": 6.56795072555542, "learning_rate": 1.8743589743589744e-05, "loss": 0.6514, "step": 735 }, { "epoch": 0.7076923076923077, "grad_norm": 5.530077934265137, "learning_rate": 1.876923076923077e-05, "loss": 0.4207, "step": 736 }, { "epoch": 0.7086538461538462, "grad_norm": 7.806577682495117, "learning_rate": 1.8794871794871797e-05, "loss": 0.9337, "step": 737 }, { "epoch": 0.7096153846153846, "grad_norm": 7.2873945236206055, "learning_rate": 1.8820512820512823e-05, "loss": 1.1587, "step": 738 }, { "epoch": 0.7105769230769231, "grad_norm": 6.0485615730285645, "learning_rate": 1.8846153846153846e-05, "loss": 0.4952, "step": 739 }, { "epoch": 0.7115384615384616, "grad_norm": 7.830602169036865, "learning_rate": 1.8871794871794873e-05, "loss": 0.8833, "step": 740 }, { "epoch": 0.7125, "grad_norm": 7.807419300079346, "learning_rate": 1.88974358974359e-05, "loss": 1.0317, "step": 741 }, { "epoch": 0.7134615384615385, "grad_norm": 7.739832878112793, "learning_rate": 1.8923076923076925e-05, "loss": 0.8864, "step": 742 }, { "epoch": 0.7144230769230769, "grad_norm": 6.3233208656311035, "learning_rate": 1.894871794871795e-05, "loss": 0.6446, "step": 743 }, { "epoch": 0.7153846153846154, "grad_norm": 6.584401607513428, "learning_rate": 1.8974358974358975e-05, "loss": 0.8109, "step": 744 }, { "epoch": 0.7163461538461539, "grad_norm": 5.977737903594971, "learning_rate": 1.9e-05, "loss": 0.5698, "step": 745 }, { "epoch": 0.7173076923076923, "grad_norm": 7.065290927886963, "learning_rate": 1.9025641025641027e-05, "loss": 0.905, "step": 746 }, { "epoch": 0.7182692307692308, "grad_norm": 7.503466606140137, "learning_rate": 1.905128205128205e-05, "loss": 1.4905, "step": 747 }, { "epoch": 0.7192307692307692, "grad_norm": 8.705888748168945, "learning_rate": 1.907692307692308e-05, "loss": 0.8914, "step": 748 }, { "epoch": 0.7201923076923077, "grad_norm": 8.177573204040527, "learning_rate": 1.9102564102564106e-05, "loss": 1.0557, "step": 749 }, { "epoch": 0.7211538461538461, "grad_norm": 7.217212677001953, "learning_rate": 1.912820512820513e-05, "loss": 1.0065, "step": 750 }, { "epoch": 0.7221153846153846, "grad_norm": 6.623650550842285, "learning_rate": 1.9153846153846156e-05, "loss": 0.5874, "step": 751 }, { "epoch": 0.7230769230769231, "grad_norm": 8.010212898254395, "learning_rate": 1.9179487179487182e-05, "loss": 0.9484, "step": 752 }, { "epoch": 0.7240384615384615, "grad_norm": 7.632513046264648, "learning_rate": 1.920512820512821e-05, "loss": 1.0166, "step": 753 }, { "epoch": 0.725, "grad_norm": 6.424896717071533, "learning_rate": 1.923076923076923e-05, "loss": 0.5942, "step": 754 }, { "epoch": 0.7259615384615384, "grad_norm": 8.016042709350586, "learning_rate": 1.9256410256410258e-05, "loss": 0.6772, "step": 755 }, { "epoch": 0.7269230769230769, "grad_norm": 6.695675373077393, "learning_rate": 1.9282051282051284e-05, "loss": 0.7744, "step": 756 }, { "epoch": 0.7278846153846154, "grad_norm": 5.839437484741211, "learning_rate": 1.930769230769231e-05, "loss": 0.7609, "step": 757 }, { "epoch": 0.7288461538461538, "grad_norm": 6.39902925491333, "learning_rate": 1.9333333333333333e-05, "loss": 0.7223, "step": 758 }, { "epoch": 0.7298076923076923, "grad_norm": 6.639665603637695, "learning_rate": 1.935897435897436e-05, "loss": 0.8897, "step": 759 }, { "epoch": 0.7307692307692307, "grad_norm": 6.388294696807861, "learning_rate": 1.9384615384615386e-05, "loss": 0.4663, "step": 760 }, { "epoch": 0.7317307692307692, "grad_norm": 7.119065761566162, "learning_rate": 1.9410256410256413e-05, "loss": 1.0806, "step": 761 }, { "epoch": 0.7326923076923076, "grad_norm": 7.664358615875244, "learning_rate": 1.943589743589744e-05, "loss": 1.1378, "step": 762 }, { "epoch": 0.7336538461538461, "grad_norm": 6.3949785232543945, "learning_rate": 1.9461538461538462e-05, "loss": 1.0545, "step": 763 }, { "epoch": 0.7346153846153847, "grad_norm": 5.9556074142456055, "learning_rate": 1.9487179487179488e-05, "loss": 0.7742, "step": 764 }, { "epoch": 0.7355769230769231, "grad_norm": 7.985556602478027, "learning_rate": 1.9512820512820515e-05, "loss": 1.4216, "step": 765 }, { "epoch": 0.7365384615384616, "grad_norm": 6.549108505249023, "learning_rate": 1.953846153846154e-05, "loss": 0.7452, "step": 766 }, { "epoch": 0.7375, "grad_norm": 6.428933143615723, "learning_rate": 1.9564102564102564e-05, "loss": 0.6025, "step": 767 }, { "epoch": 0.7384615384615385, "grad_norm": 6.587007999420166, "learning_rate": 1.958974358974359e-05, "loss": 0.9126, "step": 768 }, { "epoch": 0.739423076923077, "grad_norm": 6.4898905754089355, "learning_rate": 1.9615384615384617e-05, "loss": 1.1048, "step": 769 }, { "epoch": 0.7403846153846154, "grad_norm": 5.574278354644775, "learning_rate": 1.9641025641025643e-05, "loss": 0.643, "step": 770 }, { "epoch": 0.7413461538461539, "grad_norm": 5.429538249969482, "learning_rate": 1.9666666666666666e-05, "loss": 0.5477, "step": 771 }, { "epoch": 0.7423076923076923, "grad_norm": 6.562385559082031, "learning_rate": 1.9692307692307696e-05, "loss": 1.0124, "step": 772 }, { "epoch": 0.7432692307692308, "grad_norm": 6.5750412940979, "learning_rate": 1.9717948717948722e-05, "loss": 0.4606, "step": 773 }, { "epoch": 0.7442307692307693, "grad_norm": 5.206079959869385, "learning_rate": 1.9743589743589745e-05, "loss": 0.3261, "step": 774 }, { "epoch": 0.7451923076923077, "grad_norm": 6.874477863311768, "learning_rate": 1.976923076923077e-05, "loss": 1.2151, "step": 775 }, { "epoch": 0.7461538461538462, "grad_norm": 5.597405910491943, "learning_rate": 1.9794871794871798e-05, "loss": 0.5744, "step": 776 }, { "epoch": 0.7471153846153846, "grad_norm": 7.470310688018799, "learning_rate": 1.9820512820512824e-05, "loss": 1.0754, "step": 777 }, { "epoch": 0.7480769230769231, "grad_norm": 6.39849853515625, "learning_rate": 1.9846153846153847e-05, "loss": 0.9385, "step": 778 }, { "epoch": 0.7490384615384615, "grad_norm": 7.9699811935424805, "learning_rate": 1.9871794871794873e-05, "loss": 1.084, "step": 779 }, { "epoch": 0.75, "grad_norm": 7.682103157043457, "learning_rate": 1.98974358974359e-05, "loss": 1.2559, "step": 780 }, { "epoch": 0.7509615384615385, "grad_norm": 6.697262287139893, "learning_rate": 1.9923076923076926e-05, "loss": 0.8907, "step": 781 }, { "epoch": 0.7519230769230769, "grad_norm": 7.027126789093018, "learning_rate": 1.994871794871795e-05, "loss": 1.0619, "step": 782 }, { "epoch": 0.7528846153846154, "grad_norm": 6.714702129364014, "learning_rate": 1.9974358974358975e-05, "loss": 0.7713, "step": 783 }, { "epoch": 0.7538461538461538, "grad_norm": 6.360355377197266, "learning_rate": 2e-05, "loss": 1.013, "step": 784 }, { "epoch": 0.7548076923076923, "grad_norm": 5.15418815612793, "learning_rate": 1.9999999922414677e-05, "loss": 0.5864, "step": 785 }, { "epoch": 0.7557692307692307, "grad_norm": 5.677567481994629, "learning_rate": 1.9999999689658704e-05, "loss": 0.6157, "step": 786 }, { "epoch": 0.7567307692307692, "grad_norm": 6.680411338806152, "learning_rate": 1.999999930173208e-05, "loss": 0.8141, "step": 787 }, { "epoch": 0.7576923076923077, "grad_norm": 6.852912425994873, "learning_rate": 1.9999998758634823e-05, "loss": 0.8029, "step": 788 }, { "epoch": 0.7586538461538461, "grad_norm": 6.326913356781006, "learning_rate": 1.9999998060366933e-05, "loss": 1.1651, "step": 789 }, { "epoch": 0.7596153846153846, "grad_norm": 5.72829532623291, "learning_rate": 1.9999997206928425e-05, "loss": 0.663, "step": 790 }, { "epoch": 0.760576923076923, "grad_norm": 6.168932914733887, "learning_rate": 1.99999961983193e-05, "loss": 0.6196, "step": 791 }, { "epoch": 0.7615384615384615, "grad_norm": 7.7129998207092285, "learning_rate": 1.9999995034539596e-05, "loss": 1.211, "step": 792 }, { "epoch": 0.7625, "grad_norm": 6.162902355194092, "learning_rate": 1.9999993715589313e-05, "loss": 0.8704, "step": 793 }, { "epoch": 0.7634615384615384, "grad_norm": 6.94567346572876, "learning_rate": 1.9999992241468476e-05, "loss": 0.7574, "step": 794 }, { "epoch": 0.7644230769230769, "grad_norm": 5.656796455383301, "learning_rate": 1.9999990612177112e-05, "loss": 0.6685, "step": 795 }, { "epoch": 0.7653846153846153, "grad_norm": 6.81712007522583, "learning_rate": 1.999998882771524e-05, "loss": 0.8839, "step": 796 }, { "epoch": 0.7663461538461539, "grad_norm": 7.0042338371276855, "learning_rate": 1.9999986888082895e-05, "loss": 1.1592, "step": 797 }, { "epoch": 0.7673076923076924, "grad_norm": 5.584681510925293, "learning_rate": 1.9999984793280102e-05, "loss": 0.7326, "step": 798 }, { "epoch": 0.7682692307692308, "grad_norm": 6.3739705085754395, "learning_rate": 1.9999982543306894e-05, "loss": 0.582, "step": 799 }, { "epoch": 0.7692307692307693, "grad_norm": 6.291205406188965, "learning_rate": 1.9999980138163305e-05, "loss": 0.6027, "step": 800 }, { "epoch": 0.7701923076923077, "grad_norm": 7.100127220153809, "learning_rate": 1.9999977577849375e-05, "loss": 1.1459, "step": 801 }, { "epoch": 0.7711538461538462, "grad_norm": 7.133500099182129, "learning_rate": 1.9999974862365142e-05, "loss": 0.8107, "step": 802 }, { "epoch": 0.7721153846153846, "grad_norm": 6.657430171966553, "learning_rate": 1.9999971991710653e-05, "loss": 0.8696, "step": 803 }, { "epoch": 0.7730769230769231, "grad_norm": 6.984677791595459, "learning_rate": 1.9999968965885945e-05, "loss": 0.9338, "step": 804 }, { "epoch": 0.7740384615384616, "grad_norm": 6.120626926422119, "learning_rate": 1.9999965784891065e-05, "loss": 0.7116, "step": 805 }, { "epoch": 0.775, "grad_norm": 5.790870666503906, "learning_rate": 1.999996244872607e-05, "loss": 0.4994, "step": 806 }, { "epoch": 0.7759615384615385, "grad_norm": 5.812870025634766, "learning_rate": 1.9999958957391007e-05, "loss": 0.5749, "step": 807 }, { "epoch": 0.7769230769230769, "grad_norm": 5.931691646575928, "learning_rate": 1.999995531088593e-05, "loss": 0.659, "step": 808 }, { "epoch": 0.7778846153846154, "grad_norm": 7.06583309173584, "learning_rate": 1.9999951509210893e-05, "loss": 1.1038, "step": 809 }, { "epoch": 0.7788461538461539, "grad_norm": 6.832769393920898, "learning_rate": 1.999994755236596e-05, "loss": 0.6966, "step": 810 }, { "epoch": 0.7798076923076923, "grad_norm": 5.791548252105713, "learning_rate": 1.999994344035119e-05, "loss": 0.5801, "step": 811 }, { "epoch": 0.7807692307692308, "grad_norm": 7.300523281097412, "learning_rate": 1.9999939173166648e-05, "loss": 1.1482, "step": 812 }, { "epoch": 0.7817307692307692, "grad_norm": 6.466034412384033, "learning_rate": 1.99999347508124e-05, "loss": 0.825, "step": 813 }, { "epoch": 0.7826923076923077, "grad_norm": 7.562474250793457, "learning_rate": 1.9999930173288518e-05, "loss": 1.3436, "step": 814 }, { "epoch": 0.7836538461538461, "grad_norm": 6.201574802398682, "learning_rate": 1.9999925440595063e-05, "loss": 0.5672, "step": 815 }, { "epoch": 0.7846153846153846, "grad_norm": 6.358977794647217, "learning_rate": 1.9999920552732118e-05, "loss": 0.6756, "step": 816 }, { "epoch": 0.7855769230769231, "grad_norm": 6.654088973999023, "learning_rate": 1.999991550969975e-05, "loss": 0.7503, "step": 817 }, { "epoch": 0.7865384615384615, "grad_norm": 6.851992607116699, "learning_rate": 1.999991031149805e-05, "loss": 0.9098, "step": 818 }, { "epoch": 0.7875, "grad_norm": 6.14333963394165, "learning_rate": 1.9999904958127087e-05, "loss": 0.6114, "step": 819 }, { "epoch": 0.7884615384615384, "grad_norm": 5.401691436767578, "learning_rate": 1.999989944958695e-05, "loss": 0.4349, "step": 820 }, { "epoch": 0.7894230769230769, "grad_norm": 6.054240703582764, "learning_rate": 1.9999893785877723e-05, "loss": 0.8767, "step": 821 }, { "epoch": 0.7903846153846154, "grad_norm": 5.981778621673584, "learning_rate": 1.9999887966999493e-05, "loss": 0.5698, "step": 822 }, { "epoch": 0.7913461538461538, "grad_norm": 6.316657066345215, "learning_rate": 1.9999881992952353e-05, "loss": 0.7196, "step": 823 }, { "epoch": 0.7923076923076923, "grad_norm": 8.206835746765137, "learning_rate": 1.9999875863736393e-05, "loss": 1.2859, "step": 824 }, { "epoch": 0.7932692307692307, "grad_norm": 6.5433244705200195, "learning_rate": 1.9999869579351708e-05, "loss": 0.6697, "step": 825 }, { "epoch": 0.7942307692307692, "grad_norm": 7.464853286743164, "learning_rate": 1.9999863139798394e-05, "loss": 0.8892, "step": 826 }, { "epoch": 0.7951923076923076, "grad_norm": 6.936417102813721, "learning_rate": 1.9999856545076555e-05, "loss": 0.9275, "step": 827 }, { "epoch": 0.7961538461538461, "grad_norm": 5.633030414581299, "learning_rate": 1.9999849795186296e-05, "loss": 0.7014, "step": 828 }, { "epoch": 0.7971153846153847, "grad_norm": 5.082230567932129, "learning_rate": 1.9999842890127713e-05, "loss": 0.384, "step": 829 }, { "epoch": 0.7980769230769231, "grad_norm": 6.881920337677002, "learning_rate": 1.9999835829900917e-05, "loss": 0.7601, "step": 830 }, { "epoch": 0.7990384615384616, "grad_norm": 7.7981719970703125, "learning_rate": 1.999982861450602e-05, "loss": 0.8264, "step": 831 }, { "epoch": 0.8, "grad_norm": 6.1263427734375, "learning_rate": 1.999982124394313e-05, "loss": 0.669, "step": 832 }, { "epoch": 0.8009615384615385, "grad_norm": 6.3207831382751465, "learning_rate": 1.9999813718212365e-05, "loss": 0.6906, "step": 833 }, { "epoch": 0.801923076923077, "grad_norm": 8.731743812561035, "learning_rate": 1.9999806037313842e-05, "loss": 0.8001, "step": 834 }, { "epoch": 0.8028846153846154, "grad_norm": 5.880492687225342, "learning_rate": 1.9999798201247675e-05, "loss": 0.4813, "step": 835 }, { "epoch": 0.8038461538461539, "grad_norm": 7.087016582489014, "learning_rate": 1.999979021001399e-05, "loss": 1.0991, "step": 836 }, { "epoch": 0.8048076923076923, "grad_norm": 6.4498796463012695, "learning_rate": 1.9999782063612908e-05, "loss": 0.8519, "step": 837 }, { "epoch": 0.8057692307692308, "grad_norm": 6.421937465667725, "learning_rate": 1.9999773762044562e-05, "loss": 0.6857, "step": 838 }, { "epoch": 0.8067307692307693, "grad_norm": 7.17828369140625, "learning_rate": 1.9999765305309074e-05, "loss": 0.6944, "step": 839 }, { "epoch": 0.8076923076923077, "grad_norm": 7.6537065505981445, "learning_rate": 1.999975669340658e-05, "loss": 1.2263, "step": 840 }, { "epoch": 0.8086538461538462, "grad_norm": 7.388794898986816, "learning_rate": 1.9999747926337206e-05, "loss": 0.785, "step": 841 }, { "epoch": 0.8096153846153846, "grad_norm": 8.343512535095215, "learning_rate": 1.9999739004101094e-05, "loss": 1.1963, "step": 842 }, { "epoch": 0.8105769230769231, "grad_norm": 7.130254745483398, "learning_rate": 1.9999729926698385e-05, "loss": 0.8702, "step": 843 }, { "epoch": 0.8115384615384615, "grad_norm": 5.8507819175720215, "learning_rate": 1.9999720694129215e-05, "loss": 0.9087, "step": 844 }, { "epoch": 0.8125, "grad_norm": 7.260091304779053, "learning_rate": 1.999971130639373e-05, "loss": 1.3561, "step": 845 }, { "epoch": 0.8134615384615385, "grad_norm": 6.310152530670166, "learning_rate": 1.9999701763492067e-05, "loss": 0.6952, "step": 846 }, { "epoch": 0.8144230769230769, "grad_norm": 6.2014546394348145, "learning_rate": 1.999969206542439e-05, "loss": 1.0157, "step": 847 }, { "epoch": 0.8153846153846154, "grad_norm": 7.2193603515625, "learning_rate": 1.9999682212190835e-05, "loss": 1.1539, "step": 848 }, { "epoch": 0.8163461538461538, "grad_norm": 7.298185348510742, "learning_rate": 1.9999672203791564e-05, "loss": 1.0193, "step": 849 }, { "epoch": 0.8173076923076923, "grad_norm": 5.908012866973877, "learning_rate": 1.9999662040226727e-05, "loss": 0.6811, "step": 850 }, { "epoch": 0.8182692307692307, "grad_norm": 6.415647506713867, "learning_rate": 1.9999651721496488e-05, "loss": 0.7765, "step": 851 }, { "epoch": 0.8192307692307692, "grad_norm": 5.49400520324707, "learning_rate": 1.9999641247601e-05, "loss": 0.526, "step": 852 }, { "epoch": 0.8201923076923077, "grad_norm": 4.78850793838501, "learning_rate": 1.9999630618540425e-05, "loss": 0.4152, "step": 853 }, { "epoch": 0.8211538461538461, "grad_norm": 6.344641208648682, "learning_rate": 1.999961983431494e-05, "loss": 0.6673, "step": 854 }, { "epoch": 0.8221153846153846, "grad_norm": 6.574865341186523, "learning_rate": 1.9999608894924698e-05, "loss": 0.8399, "step": 855 }, { "epoch": 0.823076923076923, "grad_norm": 5.884858131408691, "learning_rate": 1.9999597800369874e-05, "loss": 0.4279, "step": 856 }, { "epoch": 0.8240384615384615, "grad_norm": 6.791820049285889, "learning_rate": 1.999958655065064e-05, "loss": 0.8263, "step": 857 }, { "epoch": 0.825, "grad_norm": 7.290653228759766, "learning_rate": 1.9999575145767176e-05, "loss": 1.0, "step": 858 }, { "epoch": 0.8259615384615384, "grad_norm": 5.403120517730713, "learning_rate": 1.999956358571965e-05, "loss": 0.5843, "step": 859 }, { "epoch": 0.8269230769230769, "grad_norm": 7.102540493011475, "learning_rate": 1.999955187050825e-05, "loss": 0.8168, "step": 860 }, { "epoch": 0.8278846153846153, "grad_norm": 6.172536849975586, "learning_rate": 1.9999540000133147e-05, "loss": 0.6961, "step": 861 }, { "epoch": 0.8288461538461539, "grad_norm": 5.685908317565918, "learning_rate": 1.999952797459453e-05, "loss": 0.6569, "step": 862 }, { "epoch": 0.8298076923076924, "grad_norm": 6.5500712394714355, "learning_rate": 1.9999515793892595e-05, "loss": 0.8204, "step": 863 }, { "epoch": 0.8307692307692308, "grad_norm": 7.923806667327881, "learning_rate": 1.9999503458027522e-05, "loss": 0.7815, "step": 864 }, { "epoch": 0.8317307692307693, "grad_norm": 5.4576520919799805, "learning_rate": 1.99994909669995e-05, "loss": 0.6619, "step": 865 }, { "epoch": 0.8326923076923077, "grad_norm": 7.51089334487915, "learning_rate": 1.9999478320808728e-05, "loss": 0.7958, "step": 866 }, { "epoch": 0.8336538461538462, "grad_norm": 7.624989032745361, "learning_rate": 1.9999465519455398e-05, "loss": 1.1984, "step": 867 }, { "epoch": 0.8346153846153846, "grad_norm": 7.24540901184082, "learning_rate": 1.9999452562939715e-05, "loss": 1.3214, "step": 868 }, { "epoch": 0.8355769230769231, "grad_norm": 6.98395299911499, "learning_rate": 1.999943945126187e-05, "loss": 0.8487, "step": 869 }, { "epoch": 0.8365384615384616, "grad_norm": 9.129624366760254, "learning_rate": 1.9999426184422083e-05, "loss": 0.8527, "step": 870 }, { "epoch": 0.8375, "grad_norm": 6.384477138519287, "learning_rate": 1.999941276242054e-05, "loss": 0.7583, "step": 871 }, { "epoch": 0.8384615384615385, "grad_norm": 6.366124153137207, "learning_rate": 1.9999399185257464e-05, "loss": 1.3878, "step": 872 }, { "epoch": 0.8394230769230769, "grad_norm": 7.738968849182129, "learning_rate": 1.999938545293306e-05, "loss": 1.2291, "step": 873 }, { "epoch": 0.8403846153846154, "grad_norm": 6.58411979675293, "learning_rate": 1.999937156544754e-05, "loss": 1.039, "step": 874 }, { "epoch": 0.8413461538461539, "grad_norm": 6.646825790405273, "learning_rate": 1.9999357522801125e-05, "loss": 1.329, "step": 875 }, { "epoch": 0.8423076923076923, "grad_norm": 6.600164890289307, "learning_rate": 1.9999343324994024e-05, "loss": 1.0614, "step": 876 }, { "epoch": 0.8432692307692308, "grad_norm": 7.758087635040283, "learning_rate": 1.9999328972026465e-05, "loss": 1.2619, "step": 877 }, { "epoch": 0.8442307692307692, "grad_norm": 6.871426582336426, "learning_rate": 1.9999314463898666e-05, "loss": 1.2908, "step": 878 }, { "epoch": 0.8451923076923077, "grad_norm": 6.79558801651001, "learning_rate": 1.9999299800610857e-05, "loss": 0.9811, "step": 879 }, { "epoch": 0.8461538461538461, "grad_norm": 6.726817607879639, "learning_rate": 1.9999284982163257e-05, "loss": 1.0018, "step": 880 }, { "epoch": 0.8471153846153846, "grad_norm": 6.495977878570557, "learning_rate": 1.9999270008556108e-05, "loss": 0.9574, "step": 881 }, { "epoch": 0.8480769230769231, "grad_norm": 6.522866249084473, "learning_rate": 1.9999254879789633e-05, "loss": 0.7314, "step": 882 }, { "epoch": 0.8490384615384615, "grad_norm": 5.972560405731201, "learning_rate": 1.9999239595864073e-05, "loss": 0.6494, "step": 883 }, { "epoch": 0.85, "grad_norm": 5.085941791534424, "learning_rate": 1.9999224156779658e-05, "loss": 0.3916, "step": 884 }, { "epoch": 0.8509615384615384, "grad_norm": 5.783792018890381, "learning_rate": 1.9999208562536632e-05, "loss": 0.614, "step": 885 }, { "epoch": 0.8519230769230769, "grad_norm": 5.866331577301025, "learning_rate": 1.999919281313524e-05, "loss": 0.6462, "step": 886 }, { "epoch": 0.8528846153846154, "grad_norm": 7.3942365646362305, "learning_rate": 1.9999176908575716e-05, "loss": 0.8421, "step": 887 }, { "epoch": 0.8538461538461538, "grad_norm": 7.346724033355713, "learning_rate": 1.999916084885832e-05, "loss": 1.0568, "step": 888 }, { "epoch": 0.8548076923076923, "grad_norm": 6.514260768890381, "learning_rate": 1.999914463398329e-05, "loss": 0.6023, "step": 889 }, { "epoch": 0.8557692307692307, "grad_norm": 6.110990524291992, "learning_rate": 1.9999128263950885e-05, "loss": 0.6353, "step": 890 }, { "epoch": 0.8567307692307692, "grad_norm": 6.351659774780273, "learning_rate": 1.999911173876136e-05, "loss": 0.6814, "step": 891 }, { "epoch": 0.8576923076923076, "grad_norm": 6.335273742675781, "learning_rate": 1.9999095058414962e-05, "loss": 0.6545, "step": 892 }, { "epoch": 0.8586538461538461, "grad_norm": 6.89870023727417, "learning_rate": 1.999907822291196e-05, "loss": 0.7588, "step": 893 }, { "epoch": 0.8596153846153847, "grad_norm": 7.227647304534912, "learning_rate": 1.999906123225261e-05, "loss": 1.0766, "step": 894 }, { "epoch": 0.8605769230769231, "grad_norm": 5.751060485839844, "learning_rate": 1.9999044086437174e-05, "loss": 0.7543, "step": 895 }, { "epoch": 0.8615384615384616, "grad_norm": 5.867012977600098, "learning_rate": 1.999902678546592e-05, "loss": 0.8399, "step": 896 }, { "epoch": 0.8625, "grad_norm": 5.541935443878174, "learning_rate": 1.999900932933912e-05, "loss": 0.5272, "step": 897 }, { "epoch": 0.8634615384615385, "grad_norm": 6.746384143829346, "learning_rate": 1.999899171805704e-05, "loss": 1.0981, "step": 898 }, { "epoch": 0.864423076923077, "grad_norm": 7.099907875061035, "learning_rate": 1.9998973951619956e-05, "loss": 0.9076, "step": 899 }, { "epoch": 0.8653846153846154, "grad_norm": 6.086820602416992, "learning_rate": 1.9998956030028144e-05, "loss": 0.5672, "step": 900 }, { "epoch": 0.8663461538461539, "grad_norm": 7.171673774719238, "learning_rate": 1.999893795328188e-05, "loss": 0.9669, "step": 901 }, { "epoch": 0.8673076923076923, "grad_norm": 6.493378639221191, "learning_rate": 1.9998919721381445e-05, "loss": 0.9212, "step": 902 }, { "epoch": 0.8682692307692308, "grad_norm": 6.235860347747803, "learning_rate": 1.9998901334327125e-05, "loss": 0.8274, "step": 903 }, { "epoch": 0.8692307692307693, "grad_norm": 5.833316802978516, "learning_rate": 1.99988827921192e-05, "loss": 0.6973, "step": 904 }, { "epoch": 0.8701923076923077, "grad_norm": 5.696380615234375, "learning_rate": 1.9998864094757963e-05, "loss": 0.4836, "step": 905 }, { "epoch": 0.8711538461538462, "grad_norm": 6.219531536102295, "learning_rate": 1.99988452422437e-05, "loss": 0.8699, "step": 906 }, { "epoch": 0.8721153846153846, "grad_norm": 5.927292346954346, "learning_rate": 1.9998826234576705e-05, "loss": 0.5856, "step": 907 }, { "epoch": 0.8730769230769231, "grad_norm": 5.761472225189209, "learning_rate": 1.9998807071757272e-05, "loss": 0.8112, "step": 908 }, { "epoch": 0.8740384615384615, "grad_norm": 6.69612979888916, "learning_rate": 1.99987877537857e-05, "loss": 0.9528, "step": 909 }, { "epoch": 0.875, "grad_norm": 5.898858547210693, "learning_rate": 1.9998768280662288e-05, "loss": 0.8141, "step": 910 }, { "epoch": 0.8759615384615385, "grad_norm": 6.807584762573242, "learning_rate": 1.9998748652387338e-05, "loss": 0.8836, "step": 911 }, { "epoch": 0.8769230769230769, "grad_norm": 5.395894527435303, "learning_rate": 1.999872886896116e-05, "loss": 0.5525, "step": 912 }, { "epoch": 0.8778846153846154, "grad_norm": 7.083970069885254, "learning_rate": 1.9998708930384046e-05, "loss": 0.7017, "step": 913 }, { "epoch": 0.8788461538461538, "grad_norm": 7.698013782501221, "learning_rate": 1.9998688836656322e-05, "loss": 1.0399, "step": 914 }, { "epoch": 0.8798076923076923, "grad_norm": 5.347697734832764, "learning_rate": 1.9998668587778295e-05, "loss": 0.4858, "step": 915 }, { "epoch": 0.8807692307692307, "grad_norm": 6.263169288635254, "learning_rate": 1.9998648183750272e-05, "loss": 0.5429, "step": 916 }, { "epoch": 0.8817307692307692, "grad_norm": 7.332420349121094, "learning_rate": 1.9998627624572577e-05, "loss": 0.8573, "step": 917 }, { "epoch": 0.8826923076923077, "grad_norm": 5.052174091339111, "learning_rate": 1.9998606910245527e-05, "loss": 0.6722, "step": 918 }, { "epoch": 0.8836538461538461, "grad_norm": 7.270461082458496, "learning_rate": 1.9998586040769444e-05, "loss": 0.8256, "step": 919 }, { "epoch": 0.8846153846153846, "grad_norm": 7.690074920654297, "learning_rate": 1.999856501614465e-05, "loss": 1.1429, "step": 920 }, { "epoch": 0.885576923076923, "grad_norm": 8.487188339233398, "learning_rate": 1.999854383637147e-05, "loss": 0.852, "step": 921 }, { "epoch": 0.8865384615384615, "grad_norm": 5.687994480133057, "learning_rate": 1.9998522501450242e-05, "loss": 1.1164, "step": 922 }, { "epoch": 0.8875, "grad_norm": 6.106229305267334, "learning_rate": 1.9998501011381285e-05, "loss": 0.6797, "step": 923 }, { "epoch": 0.8884615384615384, "grad_norm": 4.704649925231934, "learning_rate": 1.9998479366164938e-05, "loss": 0.5148, "step": 924 }, { "epoch": 0.8894230769230769, "grad_norm": 7.135677814483643, "learning_rate": 1.9998457565801536e-05, "loss": 1.3273, "step": 925 }, { "epoch": 0.8903846153846153, "grad_norm": 6.0757598876953125, "learning_rate": 1.9998435610291417e-05, "loss": 0.8336, "step": 926 }, { "epoch": 0.8913461538461539, "grad_norm": 6.18604040145874, "learning_rate": 1.9998413499634927e-05, "loss": 0.7209, "step": 927 }, { "epoch": 0.8923076923076924, "grad_norm": 5.885686874389648, "learning_rate": 1.99983912338324e-05, "loss": 0.6951, "step": 928 }, { "epoch": 0.8932692307692308, "grad_norm": 5.187516689300537, "learning_rate": 1.9998368812884184e-05, "loss": 0.6747, "step": 929 }, { "epoch": 0.8942307692307693, "grad_norm": 5.848902225494385, "learning_rate": 1.9998346236790636e-05, "loss": 1.0667, "step": 930 }, { "epoch": 0.8951923076923077, "grad_norm": 6.040404796600342, "learning_rate": 1.9998323505552097e-05, "loss": 0.5263, "step": 931 }, { "epoch": 0.8961538461538462, "grad_norm": 6.588641166687012, "learning_rate": 1.999830061916892e-05, "loss": 0.8368, "step": 932 }, { "epoch": 0.8971153846153846, "grad_norm": 10.793196678161621, "learning_rate": 1.9998277577641462e-05, "loss": 1.2131, "step": 933 }, { "epoch": 0.8980769230769231, "grad_norm": 6.3086419105529785, "learning_rate": 1.9998254380970082e-05, "loss": 1.0137, "step": 934 }, { "epoch": 0.8990384615384616, "grad_norm": 6.189152240753174, "learning_rate": 1.9998231029155134e-05, "loss": 0.4767, "step": 935 }, { "epoch": 0.9, "grad_norm": 6.759917736053467, "learning_rate": 1.9998207522196988e-05, "loss": 0.7333, "step": 936 }, { "epoch": 0.9009615384615385, "grad_norm": 6.706419944763184, "learning_rate": 1.999818386009601e-05, "loss": 0.7916, "step": 937 }, { "epoch": 0.9019230769230769, "grad_norm": 7.419964790344238, "learning_rate": 1.9998160042852557e-05, "loss": 1.1929, "step": 938 }, { "epoch": 0.9028846153846154, "grad_norm": 6.5323662757873535, "learning_rate": 1.9998136070467008e-05, "loss": 0.8068, "step": 939 }, { "epoch": 0.9038461538461539, "grad_norm": 6.527603626251221, "learning_rate": 1.9998111942939727e-05, "loss": 1.018, "step": 940 }, { "epoch": 0.9048076923076923, "grad_norm": 6.560862064361572, "learning_rate": 1.99980876602711e-05, "loss": 0.7428, "step": 941 }, { "epoch": 0.9057692307692308, "grad_norm": 5.247538089752197, "learning_rate": 1.9998063222461487e-05, "loss": 0.4097, "step": 942 }, { "epoch": 0.9067307692307692, "grad_norm": 6.600739002227783, "learning_rate": 1.9998038629511283e-05, "loss": 0.3599, "step": 943 }, { "epoch": 0.9076923076923077, "grad_norm": 6.46824312210083, "learning_rate": 1.999801388142086e-05, "loss": 1.1253, "step": 944 }, { "epoch": 0.9086538461538461, "grad_norm": 5.955251216888428, "learning_rate": 1.9997988978190606e-05, "loss": 0.657, "step": 945 }, { "epoch": 0.9096153846153846, "grad_norm": 6.622871398925781, "learning_rate": 1.9997963919820914e-05, "loss": 0.9248, "step": 946 }, { "epoch": 0.9105769230769231, "grad_norm": 5.815270900726318, "learning_rate": 1.9997938706312156e-05, "loss": 0.5138, "step": 947 }, { "epoch": 0.9115384615384615, "grad_norm": 6.039912223815918, "learning_rate": 1.9997913337664737e-05, "loss": 0.8094, "step": 948 }, { "epoch": 0.9125, "grad_norm": 6.954077243804932, "learning_rate": 1.999788781387905e-05, "loss": 1.079, "step": 949 }, { "epoch": 0.9134615384615384, "grad_norm": 8.715128898620605, "learning_rate": 1.9997862134955482e-05, "loss": 1.1791, "step": 950 }, { "epoch": 0.9144230769230769, "grad_norm": 7.413211822509766, "learning_rate": 1.9997836300894436e-05, "loss": 0.9528, "step": 951 }, { "epoch": 0.9153846153846154, "grad_norm": 8.2767972946167, "learning_rate": 1.999781031169632e-05, "loss": 0.9011, "step": 952 }, { "epoch": 0.9163461538461538, "grad_norm": 6.818759918212891, "learning_rate": 1.9997784167361526e-05, "loss": 0.569, "step": 953 }, { "epoch": 0.9173076923076923, "grad_norm": 7.01605749130249, "learning_rate": 1.9997757867890465e-05, "loss": 1.0057, "step": 954 }, { "epoch": 0.9182692307692307, "grad_norm": 6.332067489624023, "learning_rate": 1.999773141328355e-05, "loss": 0.6128, "step": 955 }, { "epoch": 0.9192307692307692, "grad_norm": 5.373310089111328, "learning_rate": 1.999770480354118e-05, "loss": 0.533, "step": 956 }, { "epoch": 0.9201923076923076, "grad_norm": 6.527740955352783, "learning_rate": 1.999767803866378e-05, "loss": 0.9015, "step": 957 }, { "epoch": 0.9211538461538461, "grad_norm": 6.475638389587402, "learning_rate": 1.9997651118651758e-05, "loss": 0.8425, "step": 958 }, { "epoch": 0.9221153846153847, "grad_norm": 5.932764530181885, "learning_rate": 1.9997624043505532e-05, "loss": 0.8366, "step": 959 }, { "epoch": 0.9230769230769231, "grad_norm": 5.560042381286621, "learning_rate": 1.9997596813225523e-05, "loss": 0.8311, "step": 960 }, { "epoch": 0.9240384615384616, "grad_norm": 6.285022258758545, "learning_rate": 1.9997569427812155e-05, "loss": 0.8557, "step": 961 }, { "epoch": 0.925, "grad_norm": 5.48519229888916, "learning_rate": 1.9997541887265855e-05, "loss": 0.7773, "step": 962 }, { "epoch": 0.9259615384615385, "grad_norm": 7.271998405456543, "learning_rate": 1.9997514191587043e-05, "loss": 0.8083, "step": 963 }, { "epoch": 0.926923076923077, "grad_norm": 6.874059677124023, "learning_rate": 1.9997486340776155e-05, "loss": 0.8059, "step": 964 }, { "epoch": 0.9278846153846154, "grad_norm": 6.732748031616211, "learning_rate": 1.9997458334833623e-05, "loss": 0.7693, "step": 965 }, { "epoch": 0.9288461538461539, "grad_norm": 6.503231048583984, "learning_rate": 1.9997430173759876e-05, "loss": 0.6997, "step": 966 }, { "epoch": 0.9298076923076923, "grad_norm": 7.512804985046387, "learning_rate": 1.9997401857555355e-05, "loss": 0.6387, "step": 967 }, { "epoch": 0.9307692307692308, "grad_norm": 6.385407447814941, "learning_rate": 1.9997373386220504e-05, "loss": 0.8439, "step": 968 }, { "epoch": 0.9317307692307693, "grad_norm": 6.374464511871338, "learning_rate": 1.9997344759755756e-05, "loss": 0.6609, "step": 969 }, { "epoch": 0.9326923076923077, "grad_norm": 6.53533935546875, "learning_rate": 1.999731597816156e-05, "loss": 0.9842, "step": 970 }, { "epoch": 0.9336538461538462, "grad_norm": 6.414409637451172, "learning_rate": 1.999728704143836e-05, "loss": 0.6812, "step": 971 }, { "epoch": 0.9346153846153846, "grad_norm": 6.762994766235352, "learning_rate": 1.999725794958661e-05, "loss": 1.021, "step": 972 }, { "epoch": 0.9355769230769231, "grad_norm": 5.60960054397583, "learning_rate": 1.9997228702606754e-05, "loss": 0.6339, "step": 973 }, { "epoch": 0.9365384615384615, "grad_norm": 7.388233661651611, "learning_rate": 1.9997199300499256e-05, "loss": 0.9059, "step": 974 }, { "epoch": 0.9375, "grad_norm": 4.932934284210205, "learning_rate": 1.999716974326456e-05, "loss": 0.3993, "step": 975 }, { "epoch": 0.9384615384615385, "grad_norm": 5.426871299743652, "learning_rate": 1.9997140030903135e-05, "loss": 0.7481, "step": 976 }, { "epoch": 0.9394230769230769, "grad_norm": 8.421114921569824, "learning_rate": 1.999711016341544e-05, "loss": 0.5883, "step": 977 }, { "epoch": 0.9403846153846154, "grad_norm": 6.753684043884277, "learning_rate": 1.9997080140801932e-05, "loss": 0.9688, "step": 978 }, { "epoch": 0.9413461538461538, "grad_norm": 7.062712669372559, "learning_rate": 1.999704996306308e-05, "loss": 0.9666, "step": 979 }, { "epoch": 0.9423076923076923, "grad_norm": 6.5961833000183105, "learning_rate": 1.999701963019936e-05, "loss": 1.1338, "step": 980 }, { "epoch": 0.9432692307692307, "grad_norm": 5.818157196044922, "learning_rate": 1.999698914221123e-05, "loss": 0.8254, "step": 981 }, { "epoch": 0.9442307692307692, "grad_norm": 5.75553560256958, "learning_rate": 1.9996958499099172e-05, "loss": 0.5744, "step": 982 }, { "epoch": 0.9451923076923077, "grad_norm": 6.633662700653076, "learning_rate": 1.999692770086366e-05, "loss": 0.9097, "step": 983 }, { "epoch": 0.9461538461538461, "grad_norm": 6.493154048919678, "learning_rate": 1.999689674750517e-05, "loss": 1.0245, "step": 984 }, { "epoch": 0.9471153846153846, "grad_norm": 6.3464789390563965, "learning_rate": 1.9996865639024182e-05, "loss": 1.0557, "step": 985 }, { "epoch": 0.948076923076923, "grad_norm": 6.058281898498535, "learning_rate": 1.9996834375421178e-05, "loss": 0.8281, "step": 986 }, { "epoch": 0.9490384615384615, "grad_norm": 6.934525489807129, "learning_rate": 1.999680295669665e-05, "loss": 0.8771, "step": 987 }, { "epoch": 0.95, "grad_norm": 7.29697847366333, "learning_rate": 1.9996771382851078e-05, "loss": 1.4616, "step": 988 }, { "epoch": 0.9509615384615384, "grad_norm": 5.95722770690918, "learning_rate": 1.999673965388495e-05, "loss": 0.5654, "step": 989 }, { "epoch": 0.9519230769230769, "grad_norm": 6.476313591003418, "learning_rate": 1.9996707769798766e-05, "loss": 0.953, "step": 990 }, { "epoch": 0.9528846153846153, "grad_norm": 5.331081390380859, "learning_rate": 1.999667573059302e-05, "loss": 0.6363, "step": 991 }, { "epoch": 0.9538461538461539, "grad_norm": 5.062324047088623, "learning_rate": 1.9996643536268202e-05, "loss": 0.5531, "step": 992 }, { "epoch": 0.9548076923076924, "grad_norm": 7.54680061340332, "learning_rate": 1.999661118682482e-05, "loss": 1.3181, "step": 993 }, { "epoch": 0.9557692307692308, "grad_norm": 6.755451202392578, "learning_rate": 1.999657868226337e-05, "loss": 1.0847, "step": 994 }, { "epoch": 0.9567307692307693, "grad_norm": 7.961743354797363, "learning_rate": 1.9996546022584358e-05, "loss": 0.6748, "step": 995 }, { "epoch": 0.9576923076923077, "grad_norm": 6.134245872497559, "learning_rate": 1.9996513207788295e-05, "loss": 0.8583, "step": 996 }, { "epoch": 0.9586538461538462, "grad_norm": 6.445262908935547, "learning_rate": 1.999648023787568e-05, "loss": 0.5606, "step": 997 }, { "epoch": 0.9596153846153846, "grad_norm": 5.707930088043213, "learning_rate": 1.9996447112847037e-05, "loss": 0.625, "step": 998 }, { "epoch": 0.9605769230769231, "grad_norm": 5.835326671600342, "learning_rate": 1.9996413832702874e-05, "loss": 0.5725, "step": 999 }, { "epoch": 0.9615384615384616, "grad_norm": 6.659704685211182, "learning_rate": 1.99963803974437e-05, "loss": 0.8935, "step": 1000 }, { "epoch": 0.9625, "grad_norm": 7.269262790679932, "learning_rate": 1.9996346807070047e-05, "loss": 0.633, "step": 1001 }, { "epoch": 0.9634615384615385, "grad_norm": 6.517698764801025, "learning_rate": 1.9996313061582432e-05, "loss": 0.8079, "step": 1002 }, { "epoch": 0.9644230769230769, "grad_norm": 7.109411239624023, "learning_rate": 1.9996279160981372e-05, "loss": 0.6815, "step": 1003 }, { "epoch": 0.9653846153846154, "grad_norm": 6.744853973388672, "learning_rate": 1.9996245105267405e-05, "loss": 1.1549, "step": 1004 }, { "epoch": 0.9663461538461539, "grad_norm": 6.296388149261475, "learning_rate": 1.9996210894441047e-05, "loss": 0.7063, "step": 1005 }, { "epoch": 0.9673076923076923, "grad_norm": 6.103750705718994, "learning_rate": 1.9996176528502834e-05, "loss": 0.5711, "step": 1006 }, { "epoch": 0.9682692307692308, "grad_norm": 5.867870330810547, "learning_rate": 1.9996142007453305e-05, "loss": 0.5474, "step": 1007 }, { "epoch": 0.9692307692307692, "grad_norm": 5.833316802978516, "learning_rate": 1.9996107331292986e-05, "loss": 0.4506, "step": 1008 }, { "epoch": 0.9701923076923077, "grad_norm": 6.399518966674805, "learning_rate": 1.999607250002242e-05, "loss": 0.7678, "step": 1009 }, { "epoch": 0.9711538461538461, "grad_norm": 5.513955116271973, "learning_rate": 1.999603751364215e-05, "loss": 0.5676, "step": 1010 }, { "epoch": 0.9721153846153846, "grad_norm": 6.718295097351074, "learning_rate": 1.9996002372152713e-05, "loss": 0.5964, "step": 1011 }, { "epoch": 0.9730769230769231, "grad_norm": 6.270368576049805, "learning_rate": 1.9995967075554657e-05, "loss": 0.8793, "step": 1012 }, { "epoch": 0.9740384615384615, "grad_norm": 7.981717586517334, "learning_rate": 1.999593162384853e-05, "loss": 0.8417, "step": 1013 }, { "epoch": 0.975, "grad_norm": 6.330605506896973, "learning_rate": 1.999589601703488e-05, "loss": 0.9463, "step": 1014 }, { "epoch": 0.9759615384615384, "grad_norm": 4.5753865242004395, "learning_rate": 1.9995860255114265e-05, "loss": 0.4026, "step": 1015 }, { "epoch": 0.9769230769230769, "grad_norm": 6.811415672302246, "learning_rate": 1.9995824338087236e-05, "loss": 0.7445, "step": 1016 }, { "epoch": 0.9778846153846154, "grad_norm": 7.552369594573975, "learning_rate": 1.999578826595435e-05, "loss": 1.4157, "step": 1017 }, { "epoch": 0.9788461538461538, "grad_norm": 5.122111797332764, "learning_rate": 1.9995752038716166e-05, "loss": 0.4376, "step": 1018 }, { "epoch": 0.9798076923076923, "grad_norm": 6.579594612121582, "learning_rate": 1.9995715656373247e-05, "loss": 0.9817, "step": 1019 }, { "epoch": 0.9807692307692307, "grad_norm": 8.090726852416992, "learning_rate": 1.999567911892616e-05, "loss": 1.0297, "step": 1020 }, { "epoch": 0.9817307692307692, "grad_norm": 7.3989715576171875, "learning_rate": 1.999564242637547e-05, "loss": 1.1979, "step": 1021 }, { "epoch": 0.9826923076923076, "grad_norm": 7.223077297210693, "learning_rate": 1.9995605578721745e-05, "loss": 0.7996, "step": 1022 }, { "epoch": 0.9836538461538461, "grad_norm": 6.847875595092773, "learning_rate": 1.9995568575965563e-05, "loss": 1.1368, "step": 1023 }, { "epoch": 0.9846153846153847, "grad_norm": 6.369284152984619, "learning_rate": 1.9995531418107487e-05, "loss": 1.2831, "step": 1024 }, { "epoch": 0.9855769230769231, "grad_norm": 5.5165581703186035, "learning_rate": 1.9995494105148104e-05, "loss": 0.7198, "step": 1025 }, { "epoch": 0.9865384615384616, "grad_norm": 6.249137878417969, "learning_rate": 1.9995456637087986e-05, "loss": 0.9262, "step": 1026 }, { "epoch": 0.9875, "grad_norm": 5.514549732208252, "learning_rate": 1.999541901392772e-05, "loss": 0.7897, "step": 1027 }, { "epoch": 0.9884615384615385, "grad_norm": 6.3671979904174805, "learning_rate": 1.9995381235667886e-05, "loss": 0.736, "step": 1028 }, { "epoch": 0.989423076923077, "grad_norm": 5.773316860198975, "learning_rate": 1.999534330230907e-05, "loss": 0.7762, "step": 1029 }, { "epoch": 0.9903846153846154, "grad_norm": 6.401609420776367, "learning_rate": 1.9995305213851864e-05, "loss": 0.7815, "step": 1030 }, { "epoch": 0.9913461538461539, "grad_norm": 4.719688892364502, "learning_rate": 1.9995266970296856e-05, "loss": 0.5344, "step": 1031 }, { "epoch": 0.9923076923076923, "grad_norm": 7.324151992797852, "learning_rate": 1.9995228571644638e-05, "loss": 0.7, "step": 1032 }, { "epoch": 0.9932692307692308, "grad_norm": 6.512178421020508, "learning_rate": 1.9995190017895815e-05, "loss": 0.7334, "step": 1033 }, { "epoch": 0.9942307692307693, "grad_norm": 6.044682502746582, "learning_rate": 1.9995151309050973e-05, "loss": 0.7207, "step": 1034 }, { "epoch": 0.9951923076923077, "grad_norm": 5.5250444412231445, "learning_rate": 1.9995112445110715e-05, "loss": 0.7355, "step": 1035 }, { "epoch": 0.9961538461538462, "grad_norm": 5.584804058074951, "learning_rate": 1.9995073426075655e-05, "loss": 0.7519, "step": 1036 }, { "epoch": 0.9971153846153846, "grad_norm": 4.9181671142578125, "learning_rate": 1.9995034251946386e-05, "loss": 0.4573, "step": 1037 }, { "epoch": 0.9980769230769231, "grad_norm": 6.8415327072143555, "learning_rate": 1.999499492272352e-05, "loss": 1.2133, "step": 1038 }, { "epoch": 0.9990384615384615, "grad_norm": 6.739780426025391, "learning_rate": 1.999495543840767e-05, "loss": 0.8462, "step": 1039 }, { "epoch": 1.0, "grad_norm": 4.792842864990234, "learning_rate": 1.9994915798999445e-05, "loss": 0.4894, "step": 1040 }, { "epoch": 1.0009615384615385, "grad_norm": 5.606225490570068, "learning_rate": 1.999487600449946e-05, "loss": 0.7348, "step": 1041 }, { "epoch": 1.001923076923077, "grad_norm": 4.640485763549805, "learning_rate": 1.999483605490834e-05, "loss": 0.4064, "step": 1042 }, { "epoch": 1.0028846153846154, "grad_norm": 3.6671459674835205, "learning_rate": 1.9994795950226697e-05, "loss": 0.2712, "step": 1043 }, { "epoch": 1.0038461538461538, "grad_norm": 5.446413040161133, "learning_rate": 1.9994755690455154e-05, "loss": 0.5604, "step": 1044 }, { "epoch": 1.0048076923076923, "grad_norm": 5.668412208557129, "learning_rate": 1.9994715275594334e-05, "loss": 0.4437, "step": 1045 }, { "epoch": 1.0057692307692307, "grad_norm": 3.8693366050720215, "learning_rate": 1.999467470564487e-05, "loss": 0.2686, "step": 1046 }, { "epoch": 1.0067307692307692, "grad_norm": 4.903328895568848, "learning_rate": 1.9994633980607387e-05, "loss": 0.4416, "step": 1047 }, { "epoch": 1.0076923076923077, "grad_norm": 4.506348609924316, "learning_rate": 1.999459310048252e-05, "loss": 0.3725, "step": 1048 }, { "epoch": 1.0086538461538461, "grad_norm": 4.645721435546875, "learning_rate": 1.99945520652709e-05, "loss": 0.3044, "step": 1049 }, { "epoch": 1.0096153846153846, "grad_norm": 6.300097942352295, "learning_rate": 1.9994510874973166e-05, "loss": 0.5596, "step": 1050 }, { "epoch": 1.010576923076923, "grad_norm": 6.920491695404053, "learning_rate": 1.9994469529589957e-05, "loss": 0.5733, "step": 1051 }, { "epoch": 1.0115384615384615, "grad_norm": 5.92885160446167, "learning_rate": 1.9994428029121917e-05, "loss": 0.4798, "step": 1052 }, { "epoch": 1.0125, "grad_norm": 8.496100425720215, "learning_rate": 1.9994386373569683e-05, "loss": 0.2659, "step": 1053 }, { "epoch": 1.0134615384615384, "grad_norm": 5.227762222290039, "learning_rate": 1.999434456293391e-05, "loss": 0.3953, "step": 1054 }, { "epoch": 1.0144230769230769, "grad_norm": 7.128794193267822, "learning_rate": 1.9994302597215238e-05, "loss": 0.6493, "step": 1055 }, { "epoch": 1.0153846153846153, "grad_norm": 5.377303600311279, "learning_rate": 1.9994260476414328e-05, "loss": 0.3359, "step": 1056 }, { "epoch": 1.0163461538461538, "grad_norm": 7.317755222320557, "learning_rate": 1.9994218200531823e-05, "loss": 0.3992, "step": 1057 }, { "epoch": 1.0173076923076922, "grad_norm": 5.743330001831055, "learning_rate": 1.9994175769568387e-05, "loss": 0.2748, "step": 1058 }, { "epoch": 1.0182692307692307, "grad_norm": 5.675488471984863, "learning_rate": 1.9994133183524676e-05, "loss": 0.326, "step": 1059 }, { "epoch": 1.0192307692307692, "grad_norm": 7.1307878494262695, "learning_rate": 1.999409044240135e-05, "loss": 0.4037, "step": 1060 }, { "epoch": 1.0201923076923076, "grad_norm": 5.057419776916504, "learning_rate": 1.999404754619907e-05, "loss": 0.197, "step": 1061 }, { "epoch": 1.021153846153846, "grad_norm": 8.096945762634277, "learning_rate": 1.9994004494918508e-05, "loss": 0.4439, "step": 1062 }, { "epoch": 1.0221153846153845, "grad_norm": 7.300214767456055, "learning_rate": 1.9993961288560326e-05, "loss": 0.5782, "step": 1063 }, { "epoch": 1.023076923076923, "grad_norm": 7.104685306549072, "learning_rate": 1.9993917927125196e-05, "loss": 0.403, "step": 1064 }, { "epoch": 1.0240384615384615, "grad_norm": 7.317742824554443, "learning_rate": 1.9993874410613794e-05, "loss": 0.4562, "step": 1065 }, { "epoch": 1.025, "grad_norm": 10.00743293762207, "learning_rate": 1.999383073902679e-05, "loss": 0.7278, "step": 1066 }, { "epoch": 1.0259615384615384, "grad_norm": 9.68393611907959, "learning_rate": 1.9993786912364866e-05, "loss": 0.6461, "step": 1067 }, { "epoch": 1.0269230769230768, "grad_norm": 7.881343841552734, "learning_rate": 1.9993742930628702e-05, "loss": 0.2457, "step": 1068 }, { "epoch": 1.0278846153846153, "grad_norm": 7.396327972412109, "learning_rate": 1.999369879381898e-05, "loss": 0.4925, "step": 1069 }, { "epoch": 1.0288461538461537, "grad_norm": 5.656320095062256, "learning_rate": 1.999365450193638e-05, "loss": 0.3072, "step": 1070 }, { "epoch": 1.0298076923076922, "grad_norm": 6.577585220336914, "learning_rate": 1.9993610054981595e-05, "loss": 0.5112, "step": 1071 }, { "epoch": 1.0307692307692307, "grad_norm": 5.554373264312744, "learning_rate": 1.9993565452955312e-05, "loss": 0.4595, "step": 1072 }, { "epoch": 1.0317307692307693, "grad_norm": 5.267021179199219, "learning_rate": 1.9993520695858223e-05, "loss": 0.31, "step": 1073 }, { "epoch": 1.0326923076923078, "grad_norm": 8.879277229309082, "learning_rate": 1.9993475783691026e-05, "loss": 0.5479, "step": 1074 }, { "epoch": 1.0336538461538463, "grad_norm": 6.65545654296875, "learning_rate": 1.9993430716454415e-05, "loss": 0.5412, "step": 1075 }, { "epoch": 1.0346153846153847, "grad_norm": 7.002396106719971, "learning_rate": 1.9993385494149087e-05, "loss": 0.6979, "step": 1076 }, { "epoch": 1.0355769230769232, "grad_norm": 6.439008712768555, "learning_rate": 1.9993340116775747e-05, "loss": 0.6675, "step": 1077 }, { "epoch": 1.0365384615384616, "grad_norm": 6.155060291290283, "learning_rate": 1.9993294584335102e-05, "loss": 0.2372, "step": 1078 }, { "epoch": 1.0375, "grad_norm": 4.499278545379639, "learning_rate": 1.9993248896827854e-05, "loss": 0.1979, "step": 1079 }, { "epoch": 1.0384615384615385, "grad_norm": 7.5623393058776855, "learning_rate": 1.9993203054254712e-05, "loss": 0.6456, "step": 1080 }, { "epoch": 1.039423076923077, "grad_norm": 5.692450523376465, "learning_rate": 1.999315705661639e-05, "loss": 0.2456, "step": 1081 }, { "epoch": 1.0403846153846155, "grad_norm": 6.995692253112793, "learning_rate": 1.9993110903913596e-05, "loss": 0.3944, "step": 1082 }, { "epoch": 1.041346153846154, "grad_norm": 6.709227085113525, "learning_rate": 1.999306459614705e-05, "loss": 0.5322, "step": 1083 }, { "epoch": 1.0423076923076924, "grad_norm": 6.7803449630737305, "learning_rate": 1.9993018133317476e-05, "loss": 0.4102, "step": 1084 }, { "epoch": 1.0432692307692308, "grad_norm": 6.877873420715332, "learning_rate": 1.9992971515425587e-05, "loss": 0.4696, "step": 1085 }, { "epoch": 1.0442307692307693, "grad_norm": 7.144748210906982, "learning_rate": 1.9992924742472105e-05, "loss": 0.4465, "step": 1086 }, { "epoch": 1.0451923076923078, "grad_norm": 6.464044570922852, "learning_rate": 1.9992877814457764e-05, "loss": 0.4073, "step": 1087 }, { "epoch": 1.0461538461538462, "grad_norm": 5.912546157836914, "learning_rate": 1.9992830731383292e-05, "loss": 0.4485, "step": 1088 }, { "epoch": 1.0471153846153847, "grad_norm": 6.211718559265137, "learning_rate": 1.999278349324941e-05, "loss": 0.4896, "step": 1089 }, { "epoch": 1.0480769230769231, "grad_norm": 7.128233432769775, "learning_rate": 1.999273610005686e-05, "loss": 0.4714, "step": 1090 }, { "epoch": 1.0490384615384616, "grad_norm": 7.3326802253723145, "learning_rate": 1.999268855180637e-05, "loss": 0.4349, "step": 1091 }, { "epoch": 1.05, "grad_norm": 6.750403881072998, "learning_rate": 1.9992640848498682e-05, "loss": 0.3541, "step": 1092 }, { "epoch": 1.0509615384615385, "grad_norm": 8.142142295837402, "learning_rate": 1.999259299013454e-05, "loss": 0.9746, "step": 1093 }, { "epoch": 1.051923076923077, "grad_norm": 4.710968017578125, "learning_rate": 1.999254497671468e-05, "loss": 0.2047, "step": 1094 }, { "epoch": 1.0528846153846154, "grad_norm": 6.5691142082214355, "learning_rate": 1.9992496808239852e-05, "loss": 0.3882, "step": 1095 }, { "epoch": 1.0538461538461539, "grad_norm": 5.817934989929199, "learning_rate": 1.99924484847108e-05, "loss": 0.4374, "step": 1096 }, { "epoch": 1.0548076923076923, "grad_norm": 9.228900909423828, "learning_rate": 1.9992400006128272e-05, "loss": 0.7448, "step": 1097 }, { "epoch": 1.0557692307692308, "grad_norm": 6.37177848815918, "learning_rate": 1.999235137249303e-05, "loss": 0.4514, "step": 1098 }, { "epoch": 1.0567307692307693, "grad_norm": 5.227433204650879, "learning_rate": 1.9992302583805814e-05, "loss": 0.2552, "step": 1099 }, { "epoch": 1.0576923076923077, "grad_norm": 4.961402416229248, "learning_rate": 1.9992253640067395e-05, "loss": 0.2439, "step": 1100 }, { "epoch": 1.0586538461538462, "grad_norm": 7.431980133056641, "learning_rate": 1.999220454127852e-05, "loss": 0.565, "step": 1101 }, { "epoch": 1.0596153846153846, "grad_norm": 6.033560752868652, "learning_rate": 1.9992155287439965e-05, "loss": 0.533, "step": 1102 }, { "epoch": 1.060576923076923, "grad_norm": 6.409025192260742, "learning_rate": 1.9992105878552485e-05, "loss": 0.3311, "step": 1103 }, { "epoch": 1.0615384615384615, "grad_norm": 4.791571140289307, "learning_rate": 1.9992056314616843e-05, "loss": 0.2601, "step": 1104 }, { "epoch": 1.0625, "grad_norm": 5.773436069488525, "learning_rate": 1.999200659563382e-05, "loss": 0.3208, "step": 1105 }, { "epoch": 1.0634615384615385, "grad_norm": 6.903429985046387, "learning_rate": 1.9991956721604177e-05, "loss": 0.5372, "step": 1106 }, { "epoch": 1.064423076923077, "grad_norm": 7.729345321655273, "learning_rate": 1.999190669252869e-05, "loss": 0.8663, "step": 1107 }, { "epoch": 1.0653846153846154, "grad_norm": 8.349871635437012, "learning_rate": 1.9991856508408142e-05, "loss": 0.796, "step": 1108 }, { "epoch": 1.0663461538461538, "grad_norm": 7.821302890777588, "learning_rate": 1.9991806169243302e-05, "loss": 0.5286, "step": 1109 }, { "epoch": 1.0673076923076923, "grad_norm": 6.640925884246826, "learning_rate": 1.9991755675034956e-05, "loss": 0.4734, "step": 1110 }, { "epoch": 1.0682692307692307, "grad_norm": 8.404497146606445, "learning_rate": 1.999170502578389e-05, "loss": 0.8259, "step": 1111 }, { "epoch": 1.0692307692307692, "grad_norm": 5.640302658081055, "learning_rate": 1.9991654221490887e-05, "loss": 0.3709, "step": 1112 }, { "epoch": 1.0701923076923077, "grad_norm": 3.9741883277893066, "learning_rate": 1.9991603262156736e-05, "loss": 0.2644, "step": 1113 }, { "epoch": 1.0711538461538461, "grad_norm": 6.809677600860596, "learning_rate": 1.9991552147782223e-05, "loss": 0.9083, "step": 1114 }, { "epoch": 1.0721153846153846, "grad_norm": 7.317231178283691, "learning_rate": 1.9991500878368152e-05, "loss": 0.8388, "step": 1115 }, { "epoch": 1.073076923076923, "grad_norm": 7.264547824859619, "learning_rate": 1.9991449453915307e-05, "loss": 0.781, "step": 1116 }, { "epoch": 1.0740384615384615, "grad_norm": 9.55041790008545, "learning_rate": 1.9991397874424497e-05, "loss": 0.533, "step": 1117 }, { "epoch": 1.075, "grad_norm": 8.827985763549805, "learning_rate": 1.999134613989651e-05, "loss": 0.39, "step": 1118 }, { "epoch": 1.0759615384615384, "grad_norm": 4.838763236999512, "learning_rate": 1.9991294250332158e-05, "loss": 0.1826, "step": 1119 }, { "epoch": 1.0769230769230769, "grad_norm": 6.335948467254639, "learning_rate": 1.9991242205732245e-05, "loss": 0.5069, "step": 1120 }, { "epoch": 1.0778846153846153, "grad_norm": 7.490146636962891, "learning_rate": 1.9991190006097572e-05, "loss": 0.5627, "step": 1121 }, { "epoch": 1.0788461538461538, "grad_norm": 6.4945526123046875, "learning_rate": 1.9991137651428957e-05, "loss": 0.6042, "step": 1122 }, { "epoch": 1.0798076923076922, "grad_norm": 6.522860527038574, "learning_rate": 1.999108514172721e-05, "loss": 0.7127, "step": 1123 }, { "epoch": 1.0807692307692307, "grad_norm": 7.944681644439697, "learning_rate": 1.9991032476993143e-05, "loss": 0.5075, "step": 1124 }, { "epoch": 1.0817307692307692, "grad_norm": 5.116379737854004, "learning_rate": 1.999097965722758e-05, "loss": 0.2056, "step": 1125 }, { "epoch": 1.0826923076923076, "grad_norm": 6.949309825897217, "learning_rate": 1.999092668243133e-05, "loss": 0.5006, "step": 1126 }, { "epoch": 1.083653846153846, "grad_norm": 7.227869510650635, "learning_rate": 1.9990873552605225e-05, "loss": 0.3434, "step": 1127 }, { "epoch": 1.0846153846153845, "grad_norm": 8.059642791748047, "learning_rate": 1.9990820267750082e-05, "loss": 0.304, "step": 1128 }, { "epoch": 1.085576923076923, "grad_norm": 4.098360061645508, "learning_rate": 1.999076682786673e-05, "loss": 0.1838, "step": 1129 }, { "epoch": 1.0865384615384615, "grad_norm": 6.768153667449951, "learning_rate": 1.9990713232956006e-05, "loss": 0.4079, "step": 1130 }, { "epoch": 1.0875, "grad_norm": 7.478009223937988, "learning_rate": 1.999065948301873e-05, "loss": 0.4571, "step": 1131 }, { "epoch": 1.0884615384615384, "grad_norm": 6.325808525085449, "learning_rate": 1.9990605578055743e-05, "loss": 0.3151, "step": 1132 }, { "epoch": 1.0894230769230768, "grad_norm": 6.54127836227417, "learning_rate": 1.999055151806788e-05, "loss": 0.3852, "step": 1133 }, { "epoch": 1.0903846153846153, "grad_norm": 5.063063144683838, "learning_rate": 1.9990497303055977e-05, "loss": 0.1535, "step": 1134 }, { "epoch": 1.0913461538461537, "grad_norm": 6.758224964141846, "learning_rate": 1.999044293302088e-05, "loss": 0.5029, "step": 1135 }, { "epoch": 1.0923076923076924, "grad_norm": 8.682121276855469, "learning_rate": 1.9990388407963425e-05, "loss": 0.8849, "step": 1136 }, { "epoch": 1.0932692307692307, "grad_norm": 6.893138885498047, "learning_rate": 1.999033372788447e-05, "loss": 0.415, "step": 1137 }, { "epoch": 1.0942307692307693, "grad_norm": 4.439310550689697, "learning_rate": 1.9990278892784853e-05, "loss": 0.2235, "step": 1138 }, { "epoch": 1.0951923076923076, "grad_norm": 5.8054423332214355, "learning_rate": 1.9990223902665428e-05, "loss": 0.5063, "step": 1139 }, { "epoch": 1.0961538461538463, "grad_norm": 5.854102611541748, "learning_rate": 1.9990168757527053e-05, "loss": 0.4611, "step": 1140 }, { "epoch": 1.0971153846153847, "grad_norm": 8.061192512512207, "learning_rate": 1.9990113457370577e-05, "loss": 0.7891, "step": 1141 }, { "epoch": 1.0980769230769232, "grad_norm": 8.330294609069824, "learning_rate": 1.999005800219686e-05, "loss": 0.6367, "step": 1142 }, { "epoch": 1.0990384615384616, "grad_norm": 6.979385852813721, "learning_rate": 1.9990002392006765e-05, "loss": 0.7274, "step": 1143 }, { "epoch": 1.1, "grad_norm": 7.483802795410156, "learning_rate": 1.9989946626801154e-05, "loss": 0.6118, "step": 1144 }, { "epoch": 1.1009615384615385, "grad_norm": 5.912850379943848, "learning_rate": 1.998989070658089e-05, "loss": 0.4954, "step": 1145 }, { "epoch": 1.101923076923077, "grad_norm": 3.5415449142456055, "learning_rate": 1.9989834631346842e-05, "loss": 0.1396, "step": 1146 }, { "epoch": 1.1028846153846155, "grad_norm": 4.463089942932129, "learning_rate": 1.998977840109988e-05, "loss": 0.3155, "step": 1147 }, { "epoch": 1.103846153846154, "grad_norm": 7.81223201751709, "learning_rate": 1.998972201584088e-05, "loss": 0.4812, "step": 1148 }, { "epoch": 1.1048076923076924, "grad_norm": 8.067948341369629, "learning_rate": 1.998966547557071e-05, "loss": 0.7213, "step": 1149 }, { "epoch": 1.1057692307692308, "grad_norm": 6.2695770263671875, "learning_rate": 1.9989608780290252e-05, "loss": 0.4899, "step": 1150 }, { "epoch": 1.1067307692307693, "grad_norm": 7.527367115020752, "learning_rate": 1.998955193000039e-05, "loss": 1.0318, "step": 1151 }, { "epoch": 1.1076923076923078, "grad_norm": 5.768095016479492, "learning_rate": 1.998949492470199e-05, "loss": 0.4037, "step": 1152 }, { "epoch": 1.1086538461538462, "grad_norm": 5.839761734008789, "learning_rate": 1.9989437764395955e-05, "loss": 0.6538, "step": 1153 }, { "epoch": 1.1096153846153847, "grad_norm": 6.836255073547363, "learning_rate": 1.9989380449083168e-05, "loss": 0.4691, "step": 1154 }, { "epoch": 1.1105769230769231, "grad_norm": 5.412479877471924, "learning_rate": 1.9989322978764506e-05, "loss": 0.2942, "step": 1155 }, { "epoch": 1.1115384615384616, "grad_norm": 6.525689601898193, "learning_rate": 1.9989265353440875e-05, "loss": 0.6333, "step": 1156 }, { "epoch": 1.1125, "grad_norm": 3.9147634506225586, "learning_rate": 1.9989207573113166e-05, "loss": 0.1904, "step": 1157 }, { "epoch": 1.1134615384615385, "grad_norm": 6.569454193115234, "learning_rate": 1.998914963778227e-05, "loss": 0.4409, "step": 1158 }, { "epoch": 1.114423076923077, "grad_norm": 7.598392963409424, "learning_rate": 1.998909154744909e-05, "loss": 0.5524, "step": 1159 }, { "epoch": 1.1153846153846154, "grad_norm": 4.836514949798584, "learning_rate": 1.9989033302114526e-05, "loss": 0.2816, "step": 1160 }, { "epoch": 1.1163461538461539, "grad_norm": 7.778683662414551, "learning_rate": 1.9988974901779482e-05, "loss": 0.5072, "step": 1161 }, { "epoch": 1.1173076923076923, "grad_norm": 6.011068820953369, "learning_rate": 1.9988916346444866e-05, "loss": 0.3655, "step": 1162 }, { "epoch": 1.1182692307692308, "grad_norm": 7.060034275054932, "learning_rate": 1.9988857636111586e-05, "loss": 0.7687, "step": 1163 }, { "epoch": 1.1192307692307693, "grad_norm": 6.347203254699707, "learning_rate": 1.9988798770780553e-05, "loss": 0.3681, "step": 1164 }, { "epoch": 1.1201923076923077, "grad_norm": 5.749230861663818, "learning_rate": 1.998873975045268e-05, "loss": 0.365, "step": 1165 }, { "epoch": 1.1211538461538462, "grad_norm": 7.203025817871094, "learning_rate": 1.998868057512888e-05, "loss": 0.5144, "step": 1166 }, { "epoch": 1.1221153846153846, "grad_norm": 5.385928153991699, "learning_rate": 1.9988621244810077e-05, "loss": 0.2779, "step": 1167 }, { "epoch": 1.123076923076923, "grad_norm": 7.412073135375977, "learning_rate": 1.9988561759497185e-05, "loss": 0.4585, "step": 1168 }, { "epoch": 1.1240384615384615, "grad_norm": 5.739033222198486, "learning_rate": 1.9988502119191135e-05, "loss": 0.3712, "step": 1169 }, { "epoch": 1.125, "grad_norm": 6.610757350921631, "learning_rate": 1.9988442323892844e-05, "loss": 0.5429, "step": 1170 }, { "epoch": 1.1259615384615385, "grad_norm": 8.578123092651367, "learning_rate": 1.9988382373603245e-05, "loss": 0.8472, "step": 1171 }, { "epoch": 1.126923076923077, "grad_norm": 6.726661682128906, "learning_rate": 1.998832226832327e-05, "loss": 0.5926, "step": 1172 }, { "epoch": 1.1278846153846154, "grad_norm": 5.9226393699646, "learning_rate": 1.9988262008053842e-05, "loss": 0.4646, "step": 1173 }, { "epoch": 1.1288461538461538, "grad_norm": 7.643904685974121, "learning_rate": 1.998820159279591e-05, "loss": 1.1767, "step": 1174 }, { "epoch": 1.1298076923076923, "grad_norm": 7.618354797363281, "learning_rate": 1.99881410225504e-05, "loss": 0.6389, "step": 1175 }, { "epoch": 1.1307692307692307, "grad_norm": 6.644861221313477, "learning_rate": 1.998808029731826e-05, "loss": 0.5421, "step": 1176 }, { "epoch": 1.1317307692307692, "grad_norm": 5.557538986206055, "learning_rate": 1.9988019417100424e-05, "loss": 0.3331, "step": 1177 }, { "epoch": 1.1326923076923077, "grad_norm": 4.513952732086182, "learning_rate": 1.9987958381897843e-05, "loss": 0.3908, "step": 1178 }, { "epoch": 1.1336538461538461, "grad_norm": 7.828165054321289, "learning_rate": 1.998789719171146e-05, "loss": 0.6402, "step": 1179 }, { "epoch": 1.1346153846153846, "grad_norm": 6.6526384353637695, "learning_rate": 1.998783584654223e-05, "loss": 0.6619, "step": 1180 }, { "epoch": 1.135576923076923, "grad_norm": 5.176059722900391, "learning_rate": 1.9987774346391102e-05, "loss": 0.3914, "step": 1181 }, { "epoch": 1.1365384615384615, "grad_norm": 5.614287853240967, "learning_rate": 1.998771269125903e-05, "loss": 0.4084, "step": 1182 }, { "epoch": 1.1375, "grad_norm": 5.448917865753174, "learning_rate": 1.9987650881146967e-05, "loss": 0.3082, "step": 1183 }, { "epoch": 1.1384615384615384, "grad_norm": 5.001605033874512, "learning_rate": 1.998758891605588e-05, "loss": 0.4604, "step": 1184 }, { "epoch": 1.1394230769230769, "grad_norm": 6.145385265350342, "learning_rate": 1.9987526795986723e-05, "loss": 0.4864, "step": 1185 }, { "epoch": 1.1403846153846153, "grad_norm": 7.660943508148193, "learning_rate": 1.9987464520940465e-05, "loss": 0.9034, "step": 1186 }, { "epoch": 1.1413461538461538, "grad_norm": 6.137295722961426, "learning_rate": 1.998740209091807e-05, "loss": 0.4527, "step": 1187 }, { "epoch": 1.1423076923076922, "grad_norm": 6.973448753356934, "learning_rate": 1.9987339505920505e-05, "loss": 0.5244, "step": 1188 }, { "epoch": 1.1432692307692307, "grad_norm": 7.290536880493164, "learning_rate": 1.9987276765948745e-05, "loss": 0.3895, "step": 1189 }, { "epoch": 1.1442307692307692, "grad_norm": 8.309503555297852, "learning_rate": 1.9987213871003766e-05, "loss": 0.846, "step": 1190 }, { "epoch": 1.1451923076923076, "grad_norm": 4.679155349731445, "learning_rate": 1.9987150821086536e-05, "loss": 0.1485, "step": 1191 }, { "epoch": 1.146153846153846, "grad_norm": 8.800424575805664, "learning_rate": 1.9987087616198036e-05, "loss": 0.6421, "step": 1192 }, { "epoch": 1.1471153846153845, "grad_norm": 6.471018314361572, "learning_rate": 1.998702425633925e-05, "loss": 0.4454, "step": 1193 }, { "epoch": 1.148076923076923, "grad_norm": 36.155799865722656, "learning_rate": 1.998696074151116e-05, "loss": 0.5618, "step": 1194 }, { "epoch": 1.1490384615384615, "grad_norm": 7.135603904724121, "learning_rate": 1.9986897071714744e-05, "loss": 0.5903, "step": 1195 }, { "epoch": 1.15, "grad_norm": 6.570459365844727, "learning_rate": 1.9986833246951006e-05, "loss": 0.2791, "step": 1196 }, { "epoch": 1.1509615384615384, "grad_norm": 9.909896850585938, "learning_rate": 1.9986769267220923e-05, "loss": 0.3363, "step": 1197 }, { "epoch": 1.1519230769230768, "grad_norm": 7.74652099609375, "learning_rate": 1.998670513252549e-05, "loss": 0.7806, "step": 1198 }, { "epoch": 1.1528846153846155, "grad_norm": 6.776516437530518, "learning_rate": 1.9986640842865707e-05, "loss": 0.5576, "step": 1199 }, { "epoch": 1.1538461538461537, "grad_norm": 8.52000904083252, "learning_rate": 1.9986576398242566e-05, "loss": 0.4096, "step": 1200 }, { "epoch": 1.1548076923076924, "grad_norm": 10.305787086486816, "learning_rate": 1.9986511798657072e-05, "loss": 0.7513, "step": 1201 }, { "epoch": 1.1557692307692307, "grad_norm": 9.90491771697998, "learning_rate": 1.998644704411022e-05, "loss": 0.396, "step": 1202 }, { "epoch": 1.1567307692307693, "grad_norm": 35.56431579589844, "learning_rate": 1.9986382134603025e-05, "loss": 0.3927, "step": 1203 }, { "epoch": 1.1576923076923076, "grad_norm": 27.886655807495117, "learning_rate": 1.9986317070136488e-05, "loss": 1.3611, "step": 1204 }, { "epoch": 1.1586538461538463, "grad_norm": 8.957846641540527, "learning_rate": 1.9986251850711616e-05, "loss": 0.4159, "step": 1205 }, { "epoch": 1.1596153846153845, "grad_norm": 6.141682147979736, "learning_rate": 1.998618647632943e-05, "loss": 0.4006, "step": 1206 }, { "epoch": 1.1605769230769232, "grad_norm": 6.141682147979736, "learning_rate": 1.9986120946990936e-05, "loss": 0.6172, "step": 1207 }, { "epoch": 1.1615384615384616, "grad_norm": 66.5321273803711, "learning_rate": 1.9986120946990936e-05, "loss": 0.5385, "step": 1208 }, { "epoch": 1.1625, "grad_norm": 7.530426025390625, "learning_rate": 1.9986055262697154e-05, "loss": 0.4909, "step": 1209 }, { "epoch": 1.1634615384615385, "grad_norm": 6.650116443634033, "learning_rate": 1.9985989423449102e-05, "loss": 0.828, "step": 1210 }, { "epoch": 1.164423076923077, "grad_norm": 5.09705924987793, "learning_rate": 1.9985923429247808e-05, "loss": 0.2949, "step": 1211 }, { "epoch": 1.1653846153846155, "grad_norm": 7.839517593383789, "learning_rate": 1.9985857280094284e-05, "loss": 0.7122, "step": 1212 }, { "epoch": 1.166346153846154, "grad_norm": 6.437521457672119, "learning_rate": 1.9985790975989567e-05, "loss": 0.4798, "step": 1213 }, { "epoch": 1.1673076923076924, "grad_norm": 4.640228748321533, "learning_rate": 1.998572451693468e-05, "loss": 0.4198, "step": 1214 }, { "epoch": 1.1682692307692308, "grad_norm": 8.058080673217773, "learning_rate": 1.9985657902930657e-05, "loss": 0.6816, "step": 1215 }, { "epoch": 1.1692307692307693, "grad_norm": 4.722343921661377, "learning_rate": 1.998559113397853e-05, "loss": 0.2192, "step": 1216 }, { "epoch": 1.1701923076923078, "grad_norm": 7.244505882263184, "learning_rate": 1.9985524210079336e-05, "loss": 0.6949, "step": 1217 }, { "epoch": 1.1711538461538462, "grad_norm": 7.899235725402832, "learning_rate": 1.9985457131234117e-05, "loss": 0.5979, "step": 1218 }, { "epoch": 1.1721153846153847, "grad_norm": 7.6007771492004395, "learning_rate": 1.9985389897443906e-05, "loss": 0.6676, "step": 1219 }, { "epoch": 1.1730769230769231, "grad_norm": 8.156576156616211, "learning_rate": 1.9985322508709753e-05, "loss": 0.7126, "step": 1220 }, { "epoch": 1.1740384615384616, "grad_norm": 7.5867085456848145, "learning_rate": 1.99852549650327e-05, "loss": 0.5859, "step": 1221 }, { "epoch": 1.175, "grad_norm": 8.21717357635498, "learning_rate": 1.9985187266413797e-05, "loss": 0.9449, "step": 1222 }, { "epoch": 1.1759615384615385, "grad_norm": 6.02343225479126, "learning_rate": 1.9985119412854096e-05, "loss": 0.3803, "step": 1223 }, { "epoch": 1.176923076923077, "grad_norm": 5.336263179779053, "learning_rate": 1.9985051404354642e-05, "loss": 0.4188, "step": 1224 }, { "epoch": 1.1778846153846154, "grad_norm": 6.519838809967041, "learning_rate": 1.9984983240916502e-05, "loss": 0.3669, "step": 1225 }, { "epoch": 1.1788461538461539, "grad_norm": 7.293766975402832, "learning_rate": 1.9984914922540724e-05, "loss": 1.2704, "step": 1226 }, { "epoch": 1.1798076923076923, "grad_norm": 6.836136341094971, "learning_rate": 1.998484644922837e-05, "loss": 0.4999, "step": 1227 }, { "epoch": 1.1807692307692308, "grad_norm": 4.317194938659668, "learning_rate": 1.9984777820980507e-05, "loss": 0.2245, "step": 1228 }, { "epoch": 1.1817307692307693, "grad_norm": 5.306216716766357, "learning_rate": 1.9984709037798198e-05, "loss": 0.4052, "step": 1229 }, { "epoch": 1.1826923076923077, "grad_norm": 5.235520839691162, "learning_rate": 1.998464009968251e-05, "loss": 0.3545, "step": 1230 }, { "epoch": 1.1836538461538462, "grad_norm": 4.327383995056152, "learning_rate": 1.998457100663451e-05, "loss": 0.2454, "step": 1231 }, { "epoch": 1.1846153846153846, "grad_norm": 7.491011142730713, "learning_rate": 1.9984501758655272e-05, "loss": 0.4393, "step": 1232 }, { "epoch": 1.185576923076923, "grad_norm": 7.204848766326904, "learning_rate": 1.9984432355745875e-05, "loss": 0.7579, "step": 1233 }, { "epoch": 1.1865384615384615, "grad_norm": 6.853080749511719, "learning_rate": 1.9984362797907385e-05, "loss": 0.6787, "step": 1234 }, { "epoch": 1.1875, "grad_norm": 5.607349395751953, "learning_rate": 1.9984293085140894e-05, "loss": 0.3777, "step": 1235 }, { "epoch": 1.1884615384615385, "grad_norm": 5.27255392074585, "learning_rate": 1.9984223217447476e-05, "loss": 0.3907, "step": 1236 }, { "epoch": 1.189423076923077, "grad_norm": 6.212124824523926, "learning_rate": 1.998415319482822e-05, "loss": 0.5314, "step": 1237 }, { "epoch": 1.1903846153846154, "grad_norm": 5.528546333312988, "learning_rate": 1.9984083017284203e-05, "loss": 0.3086, "step": 1238 }, { "epoch": 1.1913461538461538, "grad_norm": 4.7822675704956055, "learning_rate": 1.9984012684816526e-05, "loss": 0.3068, "step": 1239 }, { "epoch": 1.1923076923076923, "grad_norm": 4.542891502380371, "learning_rate": 1.9983942197426272e-05, "loss": 0.2893, "step": 1240 }, { "epoch": 1.1932692307692307, "grad_norm": 6.534930229187012, "learning_rate": 1.998387155511454e-05, "loss": 0.6635, "step": 1241 }, { "epoch": 1.1942307692307692, "grad_norm": 6.250768661499023, "learning_rate": 1.998380075788242e-05, "loss": 0.5676, "step": 1242 }, { "epoch": 1.1951923076923077, "grad_norm": 5.419041633605957, "learning_rate": 1.9983729805731017e-05, "loss": 0.2841, "step": 1243 }, { "epoch": 1.1961538461538461, "grad_norm": 4.4016642570495605, "learning_rate": 1.9983658698661427e-05, "loss": 0.2444, "step": 1244 }, { "epoch": 1.1971153846153846, "grad_norm": 7.5425591468811035, "learning_rate": 1.998358743667476e-05, "loss": 0.4649, "step": 1245 }, { "epoch": 1.198076923076923, "grad_norm": 6.7816243171691895, "learning_rate": 1.9983516019772114e-05, "loss": 0.4464, "step": 1246 }, { "epoch": 1.1990384615384615, "grad_norm": 6.175074100494385, "learning_rate": 1.99834444479546e-05, "loss": 0.4028, "step": 1247 }, { "epoch": 1.2, "grad_norm": 12.195141792297363, "learning_rate": 1.998337272122333e-05, "loss": 0.398, "step": 1248 }, { "epoch": 1.2009615384615384, "grad_norm": 4.5329999923706055, "learning_rate": 1.998330083957942e-05, "loss": 0.1819, "step": 1249 }, { "epoch": 1.2019230769230769, "grad_norm": 8.617181777954102, "learning_rate": 1.9983228803023977e-05, "loss": 0.5354, "step": 1250 }, { "epoch": 1.2028846153846153, "grad_norm": 9.013891220092773, "learning_rate": 1.9983156611558125e-05, "loss": 0.9456, "step": 1251 }, { "epoch": 1.2038461538461538, "grad_norm": 6.26938533782959, "learning_rate": 1.9983084265182985e-05, "loss": 0.5912, "step": 1252 }, { "epoch": 1.2048076923076922, "grad_norm": 4.6465935707092285, "learning_rate": 1.9983011763899674e-05, "loss": 0.236, "step": 1253 }, { "epoch": 1.2057692307692307, "grad_norm": 21.159944534301758, "learning_rate": 1.998293910770932e-05, "loss": 0.6333, "step": 1254 }, { "epoch": 1.2067307692307692, "grad_norm": 6.1338982582092285, "learning_rate": 1.9982866296613054e-05, "loss": 0.54, "step": 1255 }, { "epoch": 1.2076923076923076, "grad_norm": 5.29093074798584, "learning_rate": 1.9982793330611998e-05, "loss": 0.382, "step": 1256 }, { "epoch": 1.208653846153846, "grad_norm": 6.082522392272949, "learning_rate": 1.9982720209707288e-05, "loss": 0.387, "step": 1257 }, { "epoch": 1.2096153846153845, "grad_norm": 6.737794876098633, "learning_rate": 1.9982646933900065e-05, "loss": 0.4632, "step": 1258 }, { "epoch": 1.210576923076923, "grad_norm": 4.9210638999938965, "learning_rate": 1.9982573503191456e-05, "loss": 0.2847, "step": 1259 }, { "epoch": 1.2115384615384615, "grad_norm": 6.6689066886901855, "learning_rate": 1.9982499917582607e-05, "loss": 0.5952, "step": 1260 }, { "epoch": 1.2125, "grad_norm": 5.266211986541748, "learning_rate": 1.9982426177074655e-05, "loss": 0.3799, "step": 1261 }, { "epoch": 1.2134615384615384, "grad_norm": 5.526029586791992, "learning_rate": 1.998235228166875e-05, "loss": 0.3323, "step": 1262 }, { "epoch": 1.2144230769230768, "grad_norm": 5.82203483581543, "learning_rate": 1.9982278231366033e-05, "loss": 0.6354, "step": 1263 }, { "epoch": 1.2153846153846155, "grad_norm": 6.938738822937012, "learning_rate": 1.9982204026167657e-05, "loss": 0.6034, "step": 1264 }, { "epoch": 1.2163461538461537, "grad_norm": 7.2950358390808105, "learning_rate": 1.9982129666074772e-05, "loss": 0.6261, "step": 1265 }, { "epoch": 1.2173076923076924, "grad_norm": 5.512200832366943, "learning_rate": 1.998205515108853e-05, "loss": 0.315, "step": 1266 }, { "epoch": 1.2182692307692307, "grad_norm": 6.186715602874756, "learning_rate": 1.998198048121009e-05, "loss": 0.4321, "step": 1267 }, { "epoch": 1.2192307692307693, "grad_norm": 6.917178153991699, "learning_rate": 1.9981905656440613e-05, "loss": 0.7559, "step": 1268 }, { "epoch": 1.2201923076923076, "grad_norm": 5.3942060470581055, "learning_rate": 1.998183067678125e-05, "loss": 0.2979, "step": 1269 }, { "epoch": 1.2211538461538463, "grad_norm": 6.615832328796387, "learning_rate": 1.9981755542233175e-05, "loss": 0.4529, "step": 1270 }, { "epoch": 1.2221153846153845, "grad_norm": 6.9916300773620605, "learning_rate": 1.998168025279755e-05, "loss": 0.6052, "step": 1271 }, { "epoch": 1.2230769230769232, "grad_norm": 5.276325702667236, "learning_rate": 1.9981604808475543e-05, "loss": 0.2865, "step": 1272 }, { "epoch": 1.2240384615384616, "grad_norm": 7.808296203613281, "learning_rate": 1.9981529209268327e-05, "loss": 0.6062, "step": 1273 }, { "epoch": 1.225, "grad_norm": 7.851701259613037, "learning_rate": 1.998145345517707e-05, "loss": 0.7621, "step": 1274 }, { "epoch": 1.2259615384615385, "grad_norm": 3.9298391342163086, "learning_rate": 1.9981377546202954e-05, "loss": 0.2434, "step": 1275 }, { "epoch": 1.226923076923077, "grad_norm": 7.4662251472473145, "learning_rate": 1.998130148234715e-05, "loss": 0.5873, "step": 1276 }, { "epoch": 1.2278846153846155, "grad_norm": 5.601559638977051, "learning_rate": 1.9981225263610846e-05, "loss": 0.3056, "step": 1277 }, { "epoch": 1.228846153846154, "grad_norm": 4.914362907409668, "learning_rate": 1.9981148889995216e-05, "loss": 0.296, "step": 1278 }, { "epoch": 1.2298076923076924, "grad_norm": 4.007826805114746, "learning_rate": 1.998107236150145e-05, "loss": 0.2689, "step": 1279 }, { "epoch": 1.2307692307692308, "grad_norm": 6.876864433288574, "learning_rate": 1.9980995678130736e-05, "loss": 0.6747, "step": 1280 }, { "epoch": 1.2317307692307693, "grad_norm": 5.753932476043701, "learning_rate": 1.9980918839884263e-05, "loss": 0.3651, "step": 1281 }, { "epoch": 1.2326923076923078, "grad_norm": 6.358975410461426, "learning_rate": 1.9980841846763224e-05, "loss": 0.7306, "step": 1282 }, { "epoch": 1.2336538461538462, "grad_norm": 6.648561000823975, "learning_rate": 1.9980764698768814e-05, "loss": 0.716, "step": 1283 }, { "epoch": 1.2346153846153847, "grad_norm": 5.66729211807251, "learning_rate": 1.9980687395902225e-05, "loss": 0.2871, "step": 1284 }, { "epoch": 1.2355769230769231, "grad_norm": 5.583057403564453, "learning_rate": 1.9980609938164664e-05, "loss": 0.3245, "step": 1285 }, { "epoch": 1.2365384615384616, "grad_norm": 6.202103137969971, "learning_rate": 1.998053232555733e-05, "loss": 0.4509, "step": 1286 }, { "epoch": 1.2375, "grad_norm": 5.1133809089660645, "learning_rate": 1.9980454558081424e-05, "loss": 0.2734, "step": 1287 }, { "epoch": 1.2384615384615385, "grad_norm": 5.636417388916016, "learning_rate": 1.9980376635738156e-05, "loss": 0.3052, "step": 1288 }, { "epoch": 1.239423076923077, "grad_norm": 5.768444538116455, "learning_rate": 1.9980298558528738e-05, "loss": 0.3695, "step": 1289 }, { "epoch": 1.2403846153846154, "grad_norm": 6.765138149261475, "learning_rate": 1.9980220326454372e-05, "loss": 0.3222, "step": 1290 }, { "epoch": 1.2413461538461539, "grad_norm": 8.199707984924316, "learning_rate": 1.9980141939516284e-05, "loss": 0.543, "step": 1291 }, { "epoch": 1.2423076923076923, "grad_norm": 5.885818004608154, "learning_rate": 1.9980063397715685e-05, "loss": 0.39, "step": 1292 }, { "epoch": 1.2432692307692308, "grad_norm": 6.673518657684326, "learning_rate": 1.9979984701053786e-05, "loss": 0.462, "step": 1293 }, { "epoch": 1.2442307692307693, "grad_norm": 6.313223838806152, "learning_rate": 1.9979905849531822e-05, "loss": 0.5202, "step": 1294 }, { "epoch": 1.2451923076923077, "grad_norm": 6.386568546295166, "learning_rate": 1.9979826843151008e-05, "loss": 0.4707, "step": 1295 }, { "epoch": 1.2461538461538462, "grad_norm": 6.4892168045043945, "learning_rate": 1.997974768191257e-05, "loss": 0.3614, "step": 1296 }, { "epoch": 1.2471153846153846, "grad_norm": 5.210866451263428, "learning_rate": 1.997966836581774e-05, "loss": 0.3711, "step": 1297 }, { "epoch": 1.248076923076923, "grad_norm": 5.515346527099609, "learning_rate": 1.997958889486775e-05, "loss": 0.2356, "step": 1298 }, { "epoch": 1.2490384615384615, "grad_norm": 6.588945388793945, "learning_rate": 1.9979509269063826e-05, "loss": 0.5131, "step": 1299 }, { "epoch": 1.25, "grad_norm": 7.155364036560059, "learning_rate": 1.997942948840721e-05, "loss": 0.5731, "step": 1300 }, { "epoch": 1.2509615384615385, "grad_norm": 5.994906902313232, "learning_rate": 1.9979349552899134e-05, "loss": 0.6467, "step": 1301 }, { "epoch": 1.251923076923077, "grad_norm": 6.543784141540527, "learning_rate": 1.9979269462540846e-05, "loss": 0.4143, "step": 1302 }, { "epoch": 1.2528846153846154, "grad_norm": 7.182621479034424, "learning_rate": 1.997918921733358e-05, "loss": 0.8702, "step": 1303 }, { "epoch": 1.2538461538461538, "grad_norm": 6.459256649017334, "learning_rate": 1.997910881727859e-05, "loss": 0.5137, "step": 1304 }, { "epoch": 1.2548076923076923, "grad_norm": 5.6038737297058105, "learning_rate": 1.997902826237712e-05, "loss": 0.3152, "step": 1305 }, { "epoch": 1.2557692307692307, "grad_norm": 7.9097490310668945, "learning_rate": 1.9978947552630415e-05, "loss": 0.647, "step": 1306 }, { "epoch": 1.2567307692307692, "grad_norm": 6.641596794128418, "learning_rate": 1.9978866688039737e-05, "loss": 0.5703, "step": 1307 }, { "epoch": 1.2576923076923077, "grad_norm": 7.7122344970703125, "learning_rate": 1.9978785668606333e-05, "loss": 0.5231, "step": 1308 }, { "epoch": 1.2586538461538461, "grad_norm": 5.501630783081055, "learning_rate": 1.9978704494331464e-05, "loss": 0.3658, "step": 1309 }, { "epoch": 1.2596153846153846, "grad_norm": 6.627554416656494, "learning_rate": 1.9978623165216384e-05, "loss": 0.6243, "step": 1310 }, { "epoch": 1.260576923076923, "grad_norm": 5.139519214630127, "learning_rate": 1.9978541681262366e-05, "loss": 0.2454, "step": 1311 }, { "epoch": 1.2615384615384615, "grad_norm": 4.967010021209717, "learning_rate": 1.997846004247066e-05, "loss": 0.3809, "step": 1312 }, { "epoch": 1.2625, "grad_norm": 4.630777359008789, "learning_rate": 1.997837824884255e-05, "loss": 0.3081, "step": 1313 }, { "epoch": 1.2634615384615384, "grad_norm": 6.870950698852539, "learning_rate": 1.997829630037929e-05, "loss": 0.5239, "step": 1314 }, { "epoch": 1.2644230769230769, "grad_norm": 6.2018938064575195, "learning_rate": 1.9978214197082154e-05, "loss": 0.3932, "step": 1315 }, { "epoch": 1.2653846153846153, "grad_norm": 4.752409934997559, "learning_rate": 1.9978131938952427e-05, "loss": 0.3135, "step": 1316 }, { "epoch": 1.2663461538461538, "grad_norm": 6.557553291320801, "learning_rate": 1.9978049525991375e-05, "loss": 0.586, "step": 1317 }, { "epoch": 1.2673076923076922, "grad_norm": 11.93765640258789, "learning_rate": 1.9977966958200276e-05, "loss": 0.6028, "step": 1318 }, { "epoch": 1.2682692307692307, "grad_norm": 6.928224086761475, "learning_rate": 1.9977884235580422e-05, "loss": 0.6757, "step": 1319 }, { "epoch": 1.2692307692307692, "grad_norm": 9.350299835205078, "learning_rate": 1.9977801358133085e-05, "loss": 0.5032, "step": 1320 }, { "epoch": 1.2701923076923076, "grad_norm": 4.672661781311035, "learning_rate": 1.997771832585955e-05, "loss": 0.2096, "step": 1321 }, { "epoch": 1.271153846153846, "grad_norm": 7.076663970947266, "learning_rate": 1.997763513876112e-05, "loss": 0.6257, "step": 1322 }, { "epoch": 1.2721153846153845, "grad_norm": 7.833367347717285, "learning_rate": 1.997755179683907e-05, "loss": 0.5683, "step": 1323 }, { "epoch": 1.273076923076923, "grad_norm": 6.789368152618408, "learning_rate": 1.9977468300094705e-05, "loss": 0.7109, "step": 1324 }, { "epoch": 1.2740384615384617, "grad_norm": 6.94172477722168, "learning_rate": 1.9977384648529315e-05, "loss": 0.4787, "step": 1325 }, { "epoch": 1.275, "grad_norm": 6.855458736419678, "learning_rate": 1.9977300842144198e-05, "loss": 0.595, "step": 1326 }, { "epoch": 1.2759615384615386, "grad_norm": 5.543516635894775, "learning_rate": 1.997721688094065e-05, "loss": 0.4221, "step": 1327 }, { "epoch": 1.2769230769230768, "grad_norm": 4.829400539398193, "learning_rate": 1.9977132764919984e-05, "loss": 0.2825, "step": 1328 }, { "epoch": 1.2778846153846155, "grad_norm": 6.3325676918029785, "learning_rate": 1.99770484940835e-05, "loss": 0.4545, "step": 1329 }, { "epoch": 1.2788461538461537, "grad_norm": 9.141379356384277, "learning_rate": 1.9976964068432504e-05, "loss": 0.7949, "step": 1330 }, { "epoch": 1.2798076923076924, "grad_norm": 6.532257556915283, "learning_rate": 1.997687948796831e-05, "loss": 0.5285, "step": 1331 }, { "epoch": 1.2807692307692307, "grad_norm": 6.410290241241455, "learning_rate": 1.9976794752692225e-05, "loss": 0.549, "step": 1332 }, { "epoch": 1.2817307692307693, "grad_norm": 5.5022125244140625, "learning_rate": 1.997670986260557e-05, "loss": 0.4631, "step": 1333 }, { "epoch": 1.2826923076923076, "grad_norm": 6.019759178161621, "learning_rate": 1.9976624817709656e-05, "loss": 0.5154, "step": 1334 }, { "epoch": 1.2836538461538463, "grad_norm": 5.257124900817871, "learning_rate": 1.9976539618005807e-05, "loss": 0.4316, "step": 1335 }, { "epoch": 1.2846153846153845, "grad_norm": 6.98942756652832, "learning_rate": 1.9976454263495343e-05, "loss": 1.0405, "step": 1336 }, { "epoch": 1.2855769230769232, "grad_norm": 6.381976127624512, "learning_rate": 1.9976368754179593e-05, "loss": 0.5618, "step": 1337 }, { "epoch": 1.2865384615384614, "grad_norm": 7.544373989105225, "learning_rate": 1.9976283090059878e-05, "loss": 0.6553, "step": 1338 }, { "epoch": 1.2875, "grad_norm": 5.0851216316223145, "learning_rate": 1.997619727113753e-05, "loss": 0.4216, "step": 1339 }, { "epoch": 1.2884615384615383, "grad_norm": 6.945852279663086, "learning_rate": 1.9976111297413875e-05, "loss": 0.6444, "step": 1340 }, { "epoch": 1.289423076923077, "grad_norm": 6.16742467880249, "learning_rate": 1.997602516889026e-05, "loss": 0.4611, "step": 1341 }, { "epoch": 1.2903846153846155, "grad_norm": 5.086982727050781, "learning_rate": 1.997593888556801e-05, "loss": 0.4237, "step": 1342 }, { "epoch": 1.291346153846154, "grad_norm": 6.641066074371338, "learning_rate": 1.9975852447448466e-05, "loss": 0.4717, "step": 1343 }, { "epoch": 1.2923076923076924, "grad_norm": 6.5228447914123535, "learning_rate": 1.9975765854532974e-05, "loss": 0.373, "step": 1344 }, { "epoch": 1.2932692307692308, "grad_norm": 5.262583255767822, "learning_rate": 1.997567910682287e-05, "loss": 0.3108, "step": 1345 }, { "epoch": 1.2942307692307693, "grad_norm": 5.802464008331299, "learning_rate": 1.997559220431951e-05, "loss": 0.5097, "step": 1346 }, { "epoch": 1.2951923076923078, "grad_norm": 4.609960556030273, "learning_rate": 1.9975505147024234e-05, "loss": 0.2439, "step": 1347 }, { "epoch": 1.2961538461538462, "grad_norm": 7.411247253417969, "learning_rate": 1.9975417934938398e-05, "loss": 0.5339, "step": 1348 }, { "epoch": 1.2971153846153847, "grad_norm": 5.873257160186768, "learning_rate": 1.997533056806335e-05, "loss": 0.4511, "step": 1349 }, { "epoch": 1.2980769230769231, "grad_norm": 7.047757148742676, "learning_rate": 1.9975243046400454e-05, "loss": 0.5363, "step": 1350 }, { "epoch": 1.2990384615384616, "grad_norm": 7.537533283233643, "learning_rate": 1.9975155369951057e-05, "loss": 0.7123, "step": 1351 }, { "epoch": 1.3, "grad_norm": 5.992332935333252, "learning_rate": 1.997506753871653e-05, "loss": 0.7093, "step": 1352 }, { "epoch": 1.3009615384615385, "grad_norm": 5.38059139251709, "learning_rate": 1.9974979552698228e-05, "loss": 0.3256, "step": 1353 }, { "epoch": 1.301923076923077, "grad_norm": 8.194987297058105, "learning_rate": 1.997489141189752e-05, "loss": 0.7819, "step": 1354 }, { "epoch": 1.3028846153846154, "grad_norm": 5.785538196563721, "learning_rate": 1.9974803116315773e-05, "loss": 0.5212, "step": 1355 }, { "epoch": 1.3038461538461539, "grad_norm": 7.812416076660156, "learning_rate": 1.9974714665954358e-05, "loss": 0.3943, "step": 1356 }, { "epoch": 1.3048076923076923, "grad_norm": 6.61036491394043, "learning_rate": 1.997462606081465e-05, "loss": 0.5663, "step": 1357 }, { "epoch": 1.3057692307692308, "grad_norm": 6.916042804718018, "learning_rate": 1.9974537300898013e-05, "loss": 0.5777, "step": 1358 }, { "epoch": 1.3067307692307693, "grad_norm": 5.1184868812561035, "learning_rate": 1.997444838620584e-05, "loss": 0.3267, "step": 1359 }, { "epoch": 1.3076923076923077, "grad_norm": 6.049947261810303, "learning_rate": 1.99743593167395e-05, "loss": 0.5397, "step": 1360 }, { "epoch": 1.3086538461538462, "grad_norm": 6.139918804168701, "learning_rate": 1.9974270092500373e-05, "loss": 0.4546, "step": 1361 }, { "epoch": 1.3096153846153846, "grad_norm": 7.842068195343018, "learning_rate": 1.9974180713489856e-05, "loss": 0.8723, "step": 1362 }, { "epoch": 1.310576923076923, "grad_norm": 4.781641960144043, "learning_rate": 1.9974091179709327e-05, "loss": 0.361, "step": 1363 }, { "epoch": 1.3115384615384615, "grad_norm": 8.131209373474121, "learning_rate": 1.997400149116017e-05, "loss": 0.9523, "step": 1364 }, { "epoch": 1.3125, "grad_norm": 8.091280937194824, "learning_rate": 1.997391164784379e-05, "loss": 1.0363, "step": 1365 }, { "epoch": 1.3134615384615385, "grad_norm": 5.192381858825684, "learning_rate": 1.997382164976157e-05, "loss": 0.3174, "step": 1366 }, { "epoch": 1.314423076923077, "grad_norm": 7.180342197418213, "learning_rate": 1.9973731496914914e-05, "loss": 0.4902, "step": 1367 }, { "epoch": 1.3153846153846154, "grad_norm": 6.051293849945068, "learning_rate": 1.9973641189305216e-05, "loss": 0.4879, "step": 1368 }, { "epoch": 1.3163461538461538, "grad_norm": 5.1890974044799805, "learning_rate": 1.9973550726933878e-05, "loss": 0.2866, "step": 1369 }, { "epoch": 1.3173076923076923, "grad_norm": 8.043196678161621, "learning_rate": 1.9973460109802306e-05, "loss": 0.5759, "step": 1370 }, { "epoch": 1.3182692307692307, "grad_norm": 4.62629508972168, "learning_rate": 1.9973369337911905e-05, "loss": 0.2525, "step": 1371 }, { "epoch": 1.3192307692307692, "grad_norm": 7.030784606933594, "learning_rate": 1.997327841126408e-05, "loss": 0.576, "step": 1372 }, { "epoch": 1.3201923076923077, "grad_norm": 5.99280309677124, "learning_rate": 1.9973187329860252e-05, "loss": 0.4966, "step": 1373 }, { "epoch": 1.3211538461538461, "grad_norm": 6.04911994934082, "learning_rate": 1.9973096093701826e-05, "loss": 0.7299, "step": 1374 }, { "epoch": 1.3221153846153846, "grad_norm": 7.000094413757324, "learning_rate": 1.9973004702790217e-05, "loss": 0.639, "step": 1375 }, { "epoch": 1.323076923076923, "grad_norm": 6.194445610046387, "learning_rate": 1.9972913157126843e-05, "loss": 0.4946, "step": 1376 }, { "epoch": 1.3240384615384615, "grad_norm": 7.651191711425781, "learning_rate": 1.9972821456713132e-05, "loss": 0.6019, "step": 1377 }, { "epoch": 1.325, "grad_norm": 6.335244655609131, "learning_rate": 1.99727296015505e-05, "loss": 0.5132, "step": 1378 }, { "epoch": 1.3259615384615384, "grad_norm": 6.4436235427856445, "learning_rate": 1.9972637591640374e-05, "loss": 0.5155, "step": 1379 }, { "epoch": 1.3269230769230769, "grad_norm": 5.803867816925049, "learning_rate": 1.997254542698418e-05, "loss": 0.3584, "step": 1380 }, { "epoch": 1.3278846153846153, "grad_norm": 6.882993221282959, "learning_rate": 1.9972453107583352e-05, "loss": 0.8682, "step": 1381 }, { "epoch": 1.3288461538461538, "grad_norm": 6.2491536140441895, "learning_rate": 1.997236063343932e-05, "loss": 0.4524, "step": 1382 }, { "epoch": 1.3298076923076922, "grad_norm": 8.002823829650879, "learning_rate": 1.997226800455352e-05, "loss": 0.5968, "step": 1383 }, { "epoch": 1.3307692307692307, "grad_norm": 5.784348964691162, "learning_rate": 1.997217522092739e-05, "loss": 0.385, "step": 1384 }, { "epoch": 1.3317307692307692, "grad_norm": 7.583304405212402, "learning_rate": 1.9972082282562367e-05, "loss": 0.7076, "step": 1385 }, { "epoch": 1.3326923076923076, "grad_norm": 6.713935852050781, "learning_rate": 1.9971989189459896e-05, "loss": 0.501, "step": 1386 }, { "epoch": 1.333653846153846, "grad_norm": 6.924970626831055, "learning_rate": 1.997189594162142e-05, "loss": 0.5742, "step": 1387 }, { "epoch": 1.3346153846153845, "grad_norm": 6.641185760498047, "learning_rate": 1.9971802539048382e-05, "loss": 0.4927, "step": 1388 }, { "epoch": 1.335576923076923, "grad_norm": 5.600758075714111, "learning_rate": 1.997170898174224e-05, "loss": 0.3841, "step": 1389 }, { "epoch": 1.3365384615384617, "grad_norm": 6.497208118438721, "learning_rate": 1.997161526970444e-05, "loss": 0.4297, "step": 1390 }, { "epoch": 1.3375, "grad_norm": 7.084512710571289, "learning_rate": 1.9971521402936437e-05, "loss": 0.4428, "step": 1391 }, { "epoch": 1.3384615384615386, "grad_norm": 7.422995567321777, "learning_rate": 1.997142738143969e-05, "loss": 0.4001, "step": 1392 }, { "epoch": 1.3394230769230768, "grad_norm": 6.011852264404297, "learning_rate": 1.9971333205215653e-05, "loss": 0.4441, "step": 1393 }, { "epoch": 1.3403846153846155, "grad_norm": 7.877862930297852, "learning_rate": 1.9971238874265792e-05, "loss": 0.661, "step": 1394 }, { "epoch": 1.3413461538461537, "grad_norm": 5.739737510681152, "learning_rate": 1.9971144388591567e-05, "loss": 0.3323, "step": 1395 }, { "epoch": 1.3423076923076924, "grad_norm": 7.186307430267334, "learning_rate": 1.9971049748194448e-05, "loss": 0.5963, "step": 1396 }, { "epoch": 1.3432692307692307, "grad_norm": 7.308338165283203, "learning_rate": 1.9970954953075902e-05, "loss": 0.7352, "step": 1397 }, { "epoch": 1.3442307692307693, "grad_norm": 7.95021915435791, "learning_rate": 1.9970860003237397e-05, "loss": 0.5195, "step": 1398 }, { "epoch": 1.3451923076923076, "grad_norm": 7.293589115142822, "learning_rate": 1.997076489868041e-05, "loss": 0.6545, "step": 1399 }, { "epoch": 1.3461538461538463, "grad_norm": 6.601718425750732, "learning_rate": 1.9970669639406415e-05, "loss": 0.4984, "step": 1400 }, { "epoch": 1.3471153846153845, "grad_norm": 4.856103420257568, "learning_rate": 1.997057422541689e-05, "loss": 0.28, "step": 1401 }, { "epoch": 1.3480769230769232, "grad_norm": 8.047141075134277, "learning_rate": 1.997047865671332e-05, "loss": 0.5037, "step": 1402 }, { "epoch": 1.3490384615384614, "grad_norm": 5.449888706207275, "learning_rate": 1.9970382933297183e-05, "loss": 0.2708, "step": 1403 }, { "epoch": 1.35, "grad_norm": 6.193385124206543, "learning_rate": 1.9970287055169965e-05, "loss": 0.2244, "step": 1404 }, { "epoch": 1.3509615384615383, "grad_norm": 6.296280860900879, "learning_rate": 1.997019102233315e-05, "loss": 0.3617, "step": 1405 }, { "epoch": 1.351923076923077, "grad_norm": 8.247883796691895, "learning_rate": 1.997009483478824e-05, "loss": 0.7119, "step": 1406 }, { "epoch": 1.3528846153846155, "grad_norm": 5.954275608062744, "learning_rate": 1.9969998492536718e-05, "loss": 0.3365, "step": 1407 }, { "epoch": 1.353846153846154, "grad_norm": 6.785582542419434, "learning_rate": 1.996990199558008e-05, "loss": 0.4036, "step": 1408 }, { "epoch": 1.3548076923076924, "grad_norm": 5.006392002105713, "learning_rate": 1.9969805343919822e-05, "loss": 0.3784, "step": 1409 }, { "epoch": 1.3557692307692308, "grad_norm": 6.239997386932373, "learning_rate": 1.9969708537557447e-05, "loss": 0.4439, "step": 1410 }, { "epoch": 1.3567307692307693, "grad_norm": 5.081762313842773, "learning_rate": 1.996961157649446e-05, "loss": 0.3074, "step": 1411 }, { "epoch": 1.3576923076923078, "grad_norm": 5.282291412353516, "learning_rate": 1.9969514460732358e-05, "loss": 0.3033, "step": 1412 }, { "epoch": 1.3586538461538462, "grad_norm": 4.657238006591797, "learning_rate": 1.9969417190272652e-05, "loss": 0.2314, "step": 1413 }, { "epoch": 1.3596153846153847, "grad_norm": 7.307354927062988, "learning_rate": 1.996931976511685e-05, "loss": 0.4028, "step": 1414 }, { "epoch": 1.3605769230769231, "grad_norm": 4.939265251159668, "learning_rate": 1.9969222185266467e-05, "loss": 0.2572, "step": 1415 }, { "epoch": 1.3615384615384616, "grad_norm": 6.215229034423828, "learning_rate": 1.9969124450723017e-05, "loss": 0.3265, "step": 1416 }, { "epoch": 1.3625, "grad_norm": 6.517536163330078, "learning_rate": 1.9969026561488013e-05, "loss": 0.3711, "step": 1417 }, { "epoch": 1.3634615384615385, "grad_norm": 6.581320762634277, "learning_rate": 1.9968928517562973e-05, "loss": 0.4904, "step": 1418 }, { "epoch": 1.364423076923077, "grad_norm": 8.282614707946777, "learning_rate": 1.9968830318949423e-05, "loss": 0.5632, "step": 1419 }, { "epoch": 1.3653846153846154, "grad_norm": 6.511048316955566, "learning_rate": 1.9968731965648885e-05, "loss": 0.4892, "step": 1420 }, { "epoch": 1.3663461538461539, "grad_norm": 8.4370698928833, "learning_rate": 1.9968633457662887e-05, "loss": 0.7662, "step": 1421 }, { "epoch": 1.3673076923076923, "grad_norm": 10.123574256896973, "learning_rate": 1.9968534794992947e-05, "loss": 1.0035, "step": 1422 }, { "epoch": 1.3682692307692308, "grad_norm": 6.74412727355957, "learning_rate": 1.9968435977640612e-05, "loss": 0.4776, "step": 1423 }, { "epoch": 1.3692307692307693, "grad_norm": 6.844595909118652, "learning_rate": 1.9968337005607407e-05, "loss": 0.4746, "step": 1424 }, { "epoch": 1.3701923076923077, "grad_norm": 5.725183010101318, "learning_rate": 1.9968237878894865e-05, "loss": 0.3999, "step": 1425 }, { "epoch": 1.3711538461538462, "grad_norm": 6.154168128967285, "learning_rate": 1.996813859750453e-05, "loss": 0.3663, "step": 1426 }, { "epoch": 1.3721153846153846, "grad_norm": 5.766985893249512, "learning_rate": 1.9968039161437937e-05, "loss": 0.3459, "step": 1427 }, { "epoch": 1.373076923076923, "grad_norm": 3.9847662448883057, "learning_rate": 1.9967939570696636e-05, "loss": 0.2562, "step": 1428 }, { "epoch": 1.3740384615384615, "grad_norm": 6.058551788330078, "learning_rate": 1.9967839825282167e-05, "loss": 0.3952, "step": 1429 }, { "epoch": 1.375, "grad_norm": 6.287014961242676, "learning_rate": 1.996773992519608e-05, "loss": 0.3452, "step": 1430 }, { "epoch": 1.3759615384615385, "grad_norm": 5.515687465667725, "learning_rate": 1.9967639870439922e-05, "loss": 0.3194, "step": 1431 }, { "epoch": 1.376923076923077, "grad_norm": 4.190722465515137, "learning_rate": 1.996753966101525e-05, "loss": 0.2605, "step": 1432 }, { "epoch": 1.3778846153846154, "grad_norm": 5.029847621917725, "learning_rate": 1.9967439296923613e-05, "loss": 0.2536, "step": 1433 }, { "epoch": 1.3788461538461538, "grad_norm": 5.670961380004883, "learning_rate": 1.9967338778166575e-05, "loss": 0.4607, "step": 1434 }, { "epoch": 1.3798076923076923, "grad_norm": 5.692522048950195, "learning_rate": 1.9967238104745695e-05, "loss": 0.4793, "step": 1435 }, { "epoch": 1.3807692307692307, "grad_norm": 6.50143575668335, "learning_rate": 1.996713727666253e-05, "loss": 0.5491, "step": 1436 }, { "epoch": 1.3817307692307692, "grad_norm": 7.477932453155518, "learning_rate": 1.9967036293918653e-05, "loss": 0.7572, "step": 1437 }, { "epoch": 1.3826923076923077, "grad_norm": 5.314136028289795, "learning_rate": 1.996693515651562e-05, "loss": 0.3188, "step": 1438 }, { "epoch": 1.3836538461538461, "grad_norm": 5.975307941436768, "learning_rate": 1.9966833864455006e-05, "loss": 0.589, "step": 1439 }, { "epoch": 1.3846153846153846, "grad_norm": 5.616934776306152, "learning_rate": 1.9966732417738385e-05, "loss": 0.3928, "step": 1440 }, { "epoch": 1.385576923076923, "grad_norm": 5.524476528167725, "learning_rate": 1.996663081636733e-05, "loss": 0.4491, "step": 1441 }, { "epoch": 1.3865384615384615, "grad_norm": 5.7556352615356445, "learning_rate": 1.9966529060343415e-05, "loss": 0.354, "step": 1442 }, { "epoch": 1.3875, "grad_norm": 7.997722148895264, "learning_rate": 1.996642714966822e-05, "loss": 0.637, "step": 1443 }, { "epoch": 1.3884615384615384, "grad_norm": 5.832324028015137, "learning_rate": 1.9966325084343327e-05, "loss": 0.4528, "step": 1444 }, { "epoch": 1.3894230769230769, "grad_norm": 6.932498931884766, "learning_rate": 1.9966222864370318e-05, "loss": 0.6146, "step": 1445 }, { "epoch": 1.3903846153846153, "grad_norm": 6.120934009552002, "learning_rate": 1.996612048975078e-05, "loss": 0.3912, "step": 1446 }, { "epoch": 1.3913461538461538, "grad_norm": 5.574409008026123, "learning_rate": 1.9966017960486307e-05, "loss": 0.4023, "step": 1447 }, { "epoch": 1.3923076923076922, "grad_norm": 8.236074447631836, "learning_rate": 1.996591527657848e-05, "loss": 0.6733, "step": 1448 }, { "epoch": 1.3932692307692307, "grad_norm": 6.839186191558838, "learning_rate": 1.99658124380289e-05, "loss": 0.6629, "step": 1449 }, { "epoch": 1.3942307692307692, "grad_norm": 6.731932640075684, "learning_rate": 1.996570944483916e-05, "loss": 0.524, "step": 1450 }, { "epoch": 1.3951923076923076, "grad_norm": 7.676901340484619, "learning_rate": 1.9965606297010857e-05, "loss": 0.6756, "step": 1451 }, { "epoch": 1.396153846153846, "grad_norm": 7.284306049346924, "learning_rate": 1.9965502994545594e-05, "loss": 0.5991, "step": 1452 }, { "epoch": 1.3971153846153845, "grad_norm": 5.543864727020264, "learning_rate": 1.9965399537444978e-05, "loss": 0.3594, "step": 1453 }, { "epoch": 1.398076923076923, "grad_norm": 8.23983097076416, "learning_rate": 1.9965295925710602e-05, "loss": 0.7497, "step": 1454 }, { "epoch": 1.3990384615384617, "grad_norm": 7.726754188537598, "learning_rate": 1.9965192159344083e-05, "loss": 0.5214, "step": 1455 }, { "epoch": 1.4, "grad_norm": 5.880899906158447, "learning_rate": 1.996508823834703e-05, "loss": 0.4628, "step": 1456 }, { "epoch": 1.4009615384615386, "grad_norm": 6.881078720092773, "learning_rate": 1.9964984162721056e-05, "loss": 0.528, "step": 1457 }, { "epoch": 1.4019230769230768, "grad_norm": 7.459599494934082, "learning_rate": 1.9964879932467772e-05, "loss": 0.7554, "step": 1458 }, { "epoch": 1.4028846153846155, "grad_norm": 6.721276760101318, "learning_rate": 1.99647755475888e-05, "loss": 0.5302, "step": 1459 }, { "epoch": 1.4038461538461537, "grad_norm": 4.806575775146484, "learning_rate": 1.996467100808576e-05, "loss": 0.3189, "step": 1460 }, { "epoch": 1.4048076923076924, "grad_norm": 5.566277503967285, "learning_rate": 1.9964566313960265e-05, "loss": 0.6169, "step": 1461 }, { "epoch": 1.4057692307692307, "grad_norm": 6.123843669891357, "learning_rate": 1.9964461465213952e-05, "loss": 0.386, "step": 1462 }, { "epoch": 1.4067307692307693, "grad_norm": 8.243680953979492, "learning_rate": 1.996435646184844e-05, "loss": 0.8989, "step": 1463 }, { "epoch": 1.4076923076923076, "grad_norm": 6.350592613220215, "learning_rate": 1.9964251303865362e-05, "loss": 0.4961, "step": 1464 }, { "epoch": 1.4086538461538463, "grad_norm": 6.584053993225098, "learning_rate": 1.996414599126635e-05, "loss": 0.7761, "step": 1465 }, { "epoch": 1.4096153846153845, "grad_norm": 5.299328804016113, "learning_rate": 1.9964040524053033e-05, "loss": 0.2756, "step": 1466 }, { "epoch": 1.4105769230769232, "grad_norm": 7.230277061462402, "learning_rate": 1.9963934902227054e-05, "loss": 0.7452, "step": 1467 }, { "epoch": 1.4115384615384614, "grad_norm": 4.252312660217285, "learning_rate": 1.9963829125790047e-05, "loss": 0.2846, "step": 1468 }, { "epoch": 1.4125, "grad_norm": 7.604729652404785, "learning_rate": 1.996372319474366e-05, "loss": 0.4064, "step": 1469 }, { "epoch": 1.4134615384615383, "grad_norm": 5.756032466888428, "learning_rate": 1.9963617109089525e-05, "loss": 0.4216, "step": 1470 }, { "epoch": 1.414423076923077, "grad_norm": 4.7519917488098145, "learning_rate": 1.9963510868829297e-05, "loss": 0.2334, "step": 1471 }, { "epoch": 1.4153846153846155, "grad_norm": 7.066713809967041, "learning_rate": 1.9963404473964628e-05, "loss": 0.5676, "step": 1472 }, { "epoch": 1.416346153846154, "grad_norm": 4.269400596618652, "learning_rate": 1.996329792449716e-05, "loss": 0.293, "step": 1473 }, { "epoch": 1.4173076923076924, "grad_norm": 5.846036434173584, "learning_rate": 1.9963191220428552e-05, "loss": 0.3973, "step": 1474 }, { "epoch": 1.4182692307692308, "grad_norm": 10.347626686096191, "learning_rate": 1.9963084361760455e-05, "loss": 0.7175, "step": 1475 }, { "epoch": 1.4192307692307693, "grad_norm": 5.699751377105713, "learning_rate": 1.996297734849453e-05, "loss": 0.3398, "step": 1476 }, { "epoch": 1.4201923076923078, "grad_norm": 6.135166168212891, "learning_rate": 1.9962870180632442e-05, "loss": 0.4087, "step": 1477 }, { "epoch": 1.4211538461538462, "grad_norm": 7.880737781524658, "learning_rate": 1.9962762858175846e-05, "loss": 0.7129, "step": 1478 }, { "epoch": 1.4221153846153847, "grad_norm": 6.145586967468262, "learning_rate": 1.996265538112641e-05, "loss": 0.3865, "step": 1479 }, { "epoch": 1.4230769230769231, "grad_norm": 5.807899475097656, "learning_rate": 1.9962547749485807e-05, "loss": 0.4877, "step": 1480 }, { "epoch": 1.4240384615384616, "grad_norm": 6.945399284362793, "learning_rate": 1.99624399632557e-05, "loss": 0.5139, "step": 1481 }, { "epoch": 1.425, "grad_norm": 5.873696804046631, "learning_rate": 1.996233202243776e-05, "loss": 0.4319, "step": 1482 }, { "epoch": 1.4259615384615385, "grad_norm": 8.66256332397461, "learning_rate": 1.9962223927033674e-05, "loss": 0.8719, "step": 1483 }, { "epoch": 1.426923076923077, "grad_norm": 6.987420082092285, "learning_rate": 1.9962115677045104e-05, "loss": 0.6971, "step": 1484 }, { "epoch": 1.4278846153846154, "grad_norm": 5.794795989990234, "learning_rate": 1.9962007272473744e-05, "loss": 0.4333, "step": 1485 }, { "epoch": 1.4288461538461539, "grad_norm": 5.194100856781006, "learning_rate": 1.9961898713321265e-05, "loss": 0.4626, "step": 1486 }, { "epoch": 1.4298076923076923, "grad_norm": 5.78800106048584, "learning_rate": 1.9961789999589357e-05, "loss": 0.3299, "step": 1487 }, { "epoch": 1.4307692307692308, "grad_norm": 5.71204137802124, "learning_rate": 1.9961681131279706e-05, "loss": 0.4999, "step": 1488 }, { "epoch": 1.4317307692307693, "grad_norm": 5.535689830780029, "learning_rate": 1.9961572108394e-05, "loss": 0.5202, "step": 1489 }, { "epoch": 1.4326923076923077, "grad_norm": 7.057514667510986, "learning_rate": 1.996146293093393e-05, "loss": 0.5914, "step": 1490 }, { "epoch": 1.4336538461538462, "grad_norm": 7.464575290679932, "learning_rate": 1.9961353598901194e-05, "loss": 0.9531, "step": 1491 }, { "epoch": 1.4346153846153846, "grad_norm": 6.162288665771484, "learning_rate": 1.9961244112297485e-05, "loss": 0.499, "step": 1492 }, { "epoch": 1.435576923076923, "grad_norm": 6.536776542663574, "learning_rate": 1.9961134471124507e-05, "loss": 0.6609, "step": 1493 }, { "epoch": 1.4365384615384615, "grad_norm": 6.024476528167725, "learning_rate": 1.9961024675383953e-05, "loss": 0.5387, "step": 1494 }, { "epoch": 1.4375, "grad_norm": 6.845000743865967, "learning_rate": 1.9960914725077532e-05, "loss": 0.5664, "step": 1495 }, { "epoch": 1.4384615384615385, "grad_norm": 5.183524131774902, "learning_rate": 1.996080462020695e-05, "loss": 0.5134, "step": 1496 }, { "epoch": 1.439423076923077, "grad_norm": 5.630662441253662, "learning_rate": 1.996069436077392e-05, "loss": 0.4768, "step": 1497 }, { "epoch": 1.4403846153846154, "grad_norm": 5.651458263397217, "learning_rate": 1.9960583946780144e-05, "loss": 0.4918, "step": 1498 }, { "epoch": 1.4413461538461538, "grad_norm": 5.986992359161377, "learning_rate": 1.996047337822734e-05, "loss": 0.3475, "step": 1499 }, { "epoch": 1.4423076923076923, "grad_norm": 6.064839839935303, "learning_rate": 1.996036265511722e-05, "loss": 0.4984, "step": 1500 }, { "epoch": 1.4432692307692307, "grad_norm": 4.51588249206543, "learning_rate": 1.9960251777451507e-05, "loss": 0.2588, "step": 1501 }, { "epoch": 1.4442307692307692, "grad_norm": 8.092745780944824, "learning_rate": 1.9960140745231918e-05, "loss": 0.8237, "step": 1502 }, { "epoch": 1.4451923076923077, "grad_norm": 6.342174530029297, "learning_rate": 1.996002955846018e-05, "loss": 0.5417, "step": 1503 }, { "epoch": 1.4461538461538461, "grad_norm": 6.585818290710449, "learning_rate": 1.9959918217138012e-05, "loss": 0.4216, "step": 1504 }, { "epoch": 1.4471153846153846, "grad_norm": 6.353128433227539, "learning_rate": 1.9959806721267147e-05, "loss": 0.4942, "step": 1505 }, { "epoch": 1.448076923076923, "grad_norm": 8.026952743530273, "learning_rate": 1.9959695070849313e-05, "loss": 0.7945, "step": 1506 }, { "epoch": 1.4490384615384615, "grad_norm": 5.409727573394775, "learning_rate": 1.9959583265886242e-05, "loss": 0.328, "step": 1507 }, { "epoch": 1.45, "grad_norm": 7.253682613372803, "learning_rate": 1.9959471306379672e-05, "loss": 0.8252, "step": 1508 }, { "epoch": 1.4509615384615384, "grad_norm": 6.695193290710449, "learning_rate": 1.9959359192331335e-05, "loss": 0.5711, "step": 1509 }, { "epoch": 1.4519230769230769, "grad_norm": 8.879491806030273, "learning_rate": 1.9959246923742978e-05, "loss": 0.9673, "step": 1510 }, { "epoch": 1.4528846153846153, "grad_norm": 5.967322826385498, "learning_rate": 1.9959134500616336e-05, "loss": 0.4158, "step": 1511 }, { "epoch": 1.4538461538461538, "grad_norm": 5.596719264984131, "learning_rate": 1.9959021922953158e-05, "loss": 0.4291, "step": 1512 }, { "epoch": 1.4548076923076922, "grad_norm": 4.943941593170166, "learning_rate": 1.995890919075519e-05, "loss": 0.3187, "step": 1513 }, { "epoch": 1.4557692307692307, "grad_norm": 7.269772529602051, "learning_rate": 1.9958796304024176e-05, "loss": 0.5222, "step": 1514 }, { "epoch": 1.4567307692307692, "grad_norm": 6.7584333419799805, "learning_rate": 1.9958683262761874e-05, "loss": 0.4991, "step": 1515 }, { "epoch": 1.4576923076923076, "grad_norm": 8.237406730651855, "learning_rate": 1.9958570066970037e-05, "loss": 0.9129, "step": 1516 }, { "epoch": 1.458653846153846, "grad_norm": 5.728678226470947, "learning_rate": 1.9958456716650418e-05, "loss": 0.4244, "step": 1517 }, { "epoch": 1.4596153846153845, "grad_norm": 4.6701154708862305, "learning_rate": 1.9958343211804782e-05, "loss": 0.3058, "step": 1518 }, { "epoch": 1.460576923076923, "grad_norm": 5.200357913970947, "learning_rate": 1.9958229552434886e-05, "loss": 0.5818, "step": 1519 }, { "epoch": 1.4615384615384617, "grad_norm": 5.4407429695129395, "learning_rate": 1.9958115738542493e-05, "loss": 0.3353, "step": 1520 }, { "epoch": 1.4625, "grad_norm": 4.377505779266357, "learning_rate": 1.995800177012937e-05, "loss": 0.3126, "step": 1521 }, { "epoch": 1.4634615384615386, "grad_norm": 6.405202388763428, "learning_rate": 1.9957887647197287e-05, "loss": 0.5701, "step": 1522 }, { "epoch": 1.4644230769230768, "grad_norm": 8.502398490905762, "learning_rate": 1.9957773369748015e-05, "loss": 1.0651, "step": 1523 }, { "epoch": 1.4653846153846155, "grad_norm": 7.417598247528076, "learning_rate": 1.9957658937783324e-05, "loss": 0.6883, "step": 1524 }, { "epoch": 1.4663461538461537, "grad_norm": 8.515656471252441, "learning_rate": 1.995754435130499e-05, "loss": 0.4807, "step": 1525 }, { "epoch": 1.4673076923076924, "grad_norm": 7.564935684204102, "learning_rate": 1.9957429610314797e-05, "loss": 0.5747, "step": 1526 }, { "epoch": 1.4682692307692307, "grad_norm": 5.058487415313721, "learning_rate": 1.995731471481452e-05, "loss": 0.4542, "step": 1527 }, { "epoch": 1.4692307692307693, "grad_norm": 3.485891580581665, "learning_rate": 1.995719966480594e-05, "loss": 0.112, "step": 1528 }, { "epoch": 1.4701923076923076, "grad_norm": 7.281388759613037, "learning_rate": 1.9957084460290848e-05, "loss": 0.5023, "step": 1529 }, { "epoch": 1.4711538461538463, "grad_norm": 6.559213638305664, "learning_rate": 1.995696910127103e-05, "loss": 0.4505, "step": 1530 }, { "epoch": 1.4721153846153845, "grad_norm": 6.90134334564209, "learning_rate": 1.9956853587748275e-05, "loss": 0.5496, "step": 1531 }, { "epoch": 1.4730769230769232, "grad_norm": 5.705577373504639, "learning_rate": 1.9956737919724372e-05, "loss": 0.5219, "step": 1532 }, { "epoch": 1.4740384615384614, "grad_norm": 6.6408281326293945, "learning_rate": 1.9956622097201125e-05, "loss": 0.4736, "step": 1533 }, { "epoch": 1.475, "grad_norm": 6.64719295501709, "learning_rate": 1.995650612018032e-05, "loss": 0.611, "step": 1534 }, { "epoch": 1.4759615384615383, "grad_norm": 5.436254024505615, "learning_rate": 1.9956389988663762e-05, "loss": 0.3188, "step": 1535 }, { "epoch": 1.476923076923077, "grad_norm": 6.355863094329834, "learning_rate": 1.9956273702653254e-05, "loss": 0.6409, "step": 1536 }, { "epoch": 1.4778846153846155, "grad_norm": 5.72179651260376, "learning_rate": 1.9956157262150598e-05, "loss": 0.5214, "step": 1537 }, { "epoch": 1.478846153846154, "grad_norm": 4.980502605438232, "learning_rate": 1.9956040667157606e-05, "loss": 0.3082, "step": 1538 }, { "epoch": 1.4798076923076924, "grad_norm": 5.674933433532715, "learning_rate": 1.995592391767608e-05, "loss": 0.8024, "step": 1539 }, { "epoch": 1.4807692307692308, "grad_norm": 6.11177921295166, "learning_rate": 1.9955807013707838e-05, "loss": 0.5189, "step": 1540 }, { "epoch": 1.4817307692307693, "grad_norm": 6.36502742767334, "learning_rate": 1.995568995525469e-05, "loss": 0.5541, "step": 1541 }, { "epoch": 1.4826923076923078, "grad_norm": 6.910606861114502, "learning_rate": 1.995557274231845e-05, "loss": 0.6153, "step": 1542 }, { "epoch": 1.4836538461538462, "grad_norm": 7.834699630737305, "learning_rate": 1.9955455374900946e-05, "loss": 0.6182, "step": 1543 }, { "epoch": 1.4846153846153847, "grad_norm": 5.837669849395752, "learning_rate": 1.995533785300399e-05, "loss": 0.3493, "step": 1544 }, { "epoch": 1.4855769230769231, "grad_norm": 7.503654479980469, "learning_rate": 1.9955220176629412e-05, "loss": 0.759, "step": 1545 }, { "epoch": 1.4865384615384616, "grad_norm": 5.563817977905273, "learning_rate": 1.9955102345779032e-05, "loss": 0.3702, "step": 1546 }, { "epoch": 1.4875, "grad_norm": 7.9178996086120605, "learning_rate": 1.9954984360454682e-05, "loss": 0.6379, "step": 1547 }, { "epoch": 1.4884615384615385, "grad_norm": 5.757800102233887, "learning_rate": 1.9954866220658193e-05, "loss": 0.3898, "step": 1548 }, { "epoch": 1.489423076923077, "grad_norm": 6.265665054321289, "learning_rate": 1.99547479263914e-05, "loss": 0.4764, "step": 1549 }, { "epoch": 1.4903846153846154, "grad_norm": 6.058814525604248, "learning_rate": 1.9954629477656132e-05, "loss": 0.4211, "step": 1550 }, { "epoch": 1.4913461538461539, "grad_norm": 5.907123565673828, "learning_rate": 1.995451087445423e-05, "loss": 0.4914, "step": 1551 }, { "epoch": 1.4923076923076923, "grad_norm": 5.41421365737915, "learning_rate": 1.995439211678754e-05, "loss": 0.2842, "step": 1552 }, { "epoch": 1.4932692307692308, "grad_norm": 8.26587963104248, "learning_rate": 1.9954273204657898e-05, "loss": 0.6342, "step": 1553 }, { "epoch": 1.4942307692307693, "grad_norm": 5.016823768615723, "learning_rate": 1.995415413806715e-05, "loss": 0.3148, "step": 1554 }, { "epoch": 1.4951923076923077, "grad_norm": 5.451375484466553, "learning_rate": 1.9954034917017146e-05, "loss": 0.4484, "step": 1555 }, { "epoch": 1.4961538461538462, "grad_norm": 5.1458868980407715, "learning_rate": 1.9953915541509735e-05, "loss": 0.3267, "step": 1556 }, { "epoch": 1.4971153846153846, "grad_norm": 6.321378707885742, "learning_rate": 1.9953796011546768e-05, "loss": 0.6158, "step": 1557 }, { "epoch": 1.498076923076923, "grad_norm": 5.096888065338135, "learning_rate": 1.9953676327130104e-05, "loss": 0.2236, "step": 1558 }, { "epoch": 1.4990384615384615, "grad_norm": 5.123610019683838, "learning_rate": 1.9953556488261593e-05, "loss": 0.3416, "step": 1559 }, { "epoch": 1.5, "grad_norm": 6.958491325378418, "learning_rate": 1.9953436494943102e-05, "loss": 0.5454, "step": 1560 }, { "epoch": 1.5009615384615385, "grad_norm": 4.318561553955078, "learning_rate": 1.995331634717649e-05, "loss": 0.2576, "step": 1561 }, { "epoch": 1.501923076923077, "grad_norm": 8.842222213745117, "learning_rate": 1.995319604496362e-05, "loss": 0.7451, "step": 1562 }, { "epoch": 1.5028846153846154, "grad_norm": 6.344943046569824, "learning_rate": 1.9953075588306356e-05, "loss": 0.4724, "step": 1563 }, { "epoch": 1.5038461538461538, "grad_norm": 6.89406156539917, "learning_rate": 1.9952954977206574e-05, "loss": 0.4448, "step": 1564 }, { "epoch": 1.5048076923076923, "grad_norm": 5.156550407409668, "learning_rate": 1.995283421166614e-05, "loss": 0.3979, "step": 1565 }, { "epoch": 1.5057692307692307, "grad_norm": 8.382596969604492, "learning_rate": 1.9952713291686934e-05, "loss": 0.7183, "step": 1566 }, { "epoch": 1.5067307692307692, "grad_norm": 6.754648208618164, "learning_rate": 1.9952592217270826e-05, "loss": 0.4258, "step": 1567 }, { "epoch": 1.5076923076923077, "grad_norm": 5.640666961669922, "learning_rate": 1.9952470988419697e-05, "loss": 0.4321, "step": 1568 }, { "epoch": 1.5086538461538461, "grad_norm": 5.06397008895874, "learning_rate": 1.995234960513543e-05, "loss": 0.3337, "step": 1569 }, { "epoch": 1.5096153846153846, "grad_norm": 5.10022497177124, "learning_rate": 1.9952228067419908e-05, "loss": 0.2326, "step": 1570 }, { "epoch": 1.510576923076923, "grad_norm": 7.24839973449707, "learning_rate": 1.9952106375275015e-05, "loss": 0.4696, "step": 1571 }, { "epoch": 1.5115384615384615, "grad_norm": 7.447750568389893, "learning_rate": 1.995198452870264e-05, "loss": 0.7145, "step": 1572 }, { "epoch": 1.5125, "grad_norm": 6.541612148284912, "learning_rate": 1.9951862527704674e-05, "loss": 0.4809, "step": 1573 }, { "epoch": 1.5134615384615384, "grad_norm": 6.535889148712158, "learning_rate": 1.9951740372283005e-05, "loss": 0.6827, "step": 1574 }, { "epoch": 1.5144230769230769, "grad_norm": 5.092040538787842, "learning_rate": 1.9951618062439538e-05, "loss": 0.4075, "step": 1575 }, { "epoch": 1.5153846153846153, "grad_norm": 6.479968547821045, "learning_rate": 1.9951495598176168e-05, "loss": 0.5884, "step": 1576 }, { "epoch": 1.516346153846154, "grad_norm": 3.9756405353546143, "learning_rate": 1.9951372979494792e-05, "loss": 0.2301, "step": 1577 }, { "epoch": 1.5173076923076922, "grad_norm": 5.056495666503906, "learning_rate": 1.995125020639731e-05, "loss": 0.25, "step": 1578 }, { "epoch": 1.518269230769231, "grad_norm": 6.931697368621826, "learning_rate": 1.995112727888564e-05, "loss": 0.5498, "step": 1579 }, { "epoch": 1.5192307692307692, "grad_norm": 7.729986190795898, "learning_rate": 1.9951004196961673e-05, "loss": 0.9819, "step": 1580 }, { "epoch": 1.5201923076923078, "grad_norm": 5.917235374450684, "learning_rate": 1.995088096062733e-05, "loss": 0.5456, "step": 1581 }, { "epoch": 1.521153846153846, "grad_norm": 6.519207000732422, "learning_rate": 1.995075756988452e-05, "loss": 0.4405, "step": 1582 }, { "epoch": 1.5221153846153848, "grad_norm": 5.749107837677002, "learning_rate": 1.9950634024735156e-05, "loss": 0.4581, "step": 1583 }, { "epoch": 1.523076923076923, "grad_norm": 6.3666768074035645, "learning_rate": 1.995051032518116e-05, "loss": 0.5885, "step": 1584 }, { "epoch": 1.5240384615384617, "grad_norm": 5.966454982757568, "learning_rate": 1.9950386471224445e-05, "loss": 1.0495, "step": 1585 }, { "epoch": 1.525, "grad_norm": 5.74562931060791, "learning_rate": 1.9950262462866938e-05, "loss": 0.4337, "step": 1586 }, { "epoch": 1.5259615384615386, "grad_norm": 6.609324932098389, "learning_rate": 1.995013830011056e-05, "loss": 0.6325, "step": 1587 }, { "epoch": 1.5269230769230768, "grad_norm": 6.365906715393066, "learning_rate": 1.995001398295724e-05, "loss": 0.426, "step": 1588 }, { "epoch": 1.5278846153846155, "grad_norm": 4.969565391540527, "learning_rate": 1.9949889511408905e-05, "loss": 0.2744, "step": 1589 }, { "epoch": 1.5288461538461537, "grad_norm": 4.702548027038574, "learning_rate": 1.994976488546749e-05, "loss": 0.3116, "step": 1590 }, { "epoch": 1.5298076923076924, "grad_norm": 5.064096450805664, "learning_rate": 1.994964010513492e-05, "loss": 0.4278, "step": 1591 }, { "epoch": 1.5307692307692307, "grad_norm": 7.7902326583862305, "learning_rate": 1.9949515170413143e-05, "loss": 0.7484, "step": 1592 }, { "epoch": 1.5317307692307693, "grad_norm": 5.1174187660217285, "learning_rate": 1.994939008130409e-05, "loss": 0.5065, "step": 1593 }, { "epoch": 1.5326923076923076, "grad_norm": 6.885812282562256, "learning_rate": 1.9949264837809704e-05, "loss": 0.4201, "step": 1594 }, { "epoch": 1.5336538461538463, "grad_norm": 5.035685062408447, "learning_rate": 1.994913943993193e-05, "loss": 0.2827, "step": 1595 }, { "epoch": 1.5346153846153845, "grad_norm": 5.859897613525391, "learning_rate": 1.994901388767271e-05, "loss": 0.3712, "step": 1596 }, { "epoch": 1.5355769230769232, "grad_norm": 6.184844493865967, "learning_rate": 1.9948888181033996e-05, "loss": 0.4063, "step": 1597 }, { "epoch": 1.5365384615384614, "grad_norm": 4.850619792938232, "learning_rate": 1.9948762320017735e-05, "loss": 0.3188, "step": 1598 }, { "epoch": 1.5375, "grad_norm": 4.819363594055176, "learning_rate": 1.9948636304625885e-05, "loss": 0.3002, "step": 1599 }, { "epoch": 1.5384615384615383, "grad_norm": 7.072310447692871, "learning_rate": 1.9948510134860396e-05, "loss": 0.6922, "step": 1600 }, { "epoch": 1.539423076923077, "grad_norm": 5.283030033111572, "learning_rate": 1.9948383810723227e-05, "loss": 0.2442, "step": 1601 }, { "epoch": 1.5403846153846152, "grad_norm": 6.660545349121094, "learning_rate": 1.9948257332216343e-05, "loss": 0.5271, "step": 1602 }, { "epoch": 1.541346153846154, "grad_norm": 5.240242004394531, "learning_rate": 1.9948130699341703e-05, "loss": 0.2805, "step": 1603 }, { "epoch": 1.5423076923076922, "grad_norm": 7.735995769500732, "learning_rate": 1.9948003912101274e-05, "loss": 0.485, "step": 1604 }, { "epoch": 1.5432692307692308, "grad_norm": 5.1619367599487305, "learning_rate": 1.9947876970497015e-05, "loss": 0.2924, "step": 1605 }, { "epoch": 1.544230769230769, "grad_norm": 5.842985153198242, "learning_rate": 1.9947749874530906e-05, "loss": 0.3964, "step": 1606 }, { "epoch": 1.5451923076923078, "grad_norm": 7.020264148712158, "learning_rate": 1.9947622624204914e-05, "loss": 0.3465, "step": 1607 }, { "epoch": 1.546153846153846, "grad_norm": 6.30239725112915, "learning_rate": 1.9947495219521014e-05, "loss": 0.4576, "step": 1608 }, { "epoch": 1.5471153846153847, "grad_norm": 4.508838653564453, "learning_rate": 1.9947367660481186e-05, "loss": 0.2798, "step": 1609 }, { "epoch": 1.5480769230769231, "grad_norm": 6.468490123748779, "learning_rate": 1.9947239947087404e-05, "loss": 0.4912, "step": 1610 }, { "epoch": 1.5490384615384616, "grad_norm": 7.670508861541748, "learning_rate": 1.9947112079341658e-05, "loss": 0.7687, "step": 1611 }, { "epoch": 1.55, "grad_norm": 6.548172950744629, "learning_rate": 1.9946984057245922e-05, "loss": 0.4116, "step": 1612 }, { "epoch": 1.5509615384615385, "grad_norm": 8.20011043548584, "learning_rate": 1.994685588080219e-05, "loss": 0.7448, "step": 1613 }, { "epoch": 1.551923076923077, "grad_norm": 6.662733554840088, "learning_rate": 1.994672755001245e-05, "loss": 0.6924, "step": 1614 }, { "epoch": 1.5528846153846154, "grad_norm": 6.068974494934082, "learning_rate": 1.9946599064878685e-05, "loss": 0.3193, "step": 1615 }, { "epoch": 1.5538461538461539, "grad_norm": 6.2597503662109375, "learning_rate": 1.99464704254029e-05, "loss": 0.6617, "step": 1616 }, { "epoch": 1.5548076923076923, "grad_norm": 6.361949443817139, "learning_rate": 1.9946341631587086e-05, "loss": 0.4523, "step": 1617 }, { "epoch": 1.5557692307692308, "grad_norm": 5.734055995941162, "learning_rate": 1.9946212683433245e-05, "loss": 0.4806, "step": 1618 }, { "epoch": 1.5567307692307693, "grad_norm": 7.605165481567383, "learning_rate": 1.994608358094337e-05, "loss": 1.1536, "step": 1619 }, { "epoch": 1.5576923076923077, "grad_norm": 8.189724922180176, "learning_rate": 1.994595432411947e-05, "loss": 0.6651, "step": 1620 }, { "epoch": 1.5586538461538462, "grad_norm": 6.005178451538086, "learning_rate": 1.9945824912963553e-05, "loss": 0.4893, "step": 1621 }, { "epoch": 1.5596153846153846, "grad_norm": 8.009387016296387, "learning_rate": 1.9945695347477622e-05, "loss": 0.843, "step": 1622 }, { "epoch": 1.560576923076923, "grad_norm": 6.224215030670166, "learning_rate": 1.994556562766369e-05, "loss": 0.6272, "step": 1623 }, { "epoch": 1.5615384615384615, "grad_norm": 6.250747203826904, "learning_rate": 1.994543575352377e-05, "loss": 0.591, "step": 1624 }, { "epoch": 1.5625, "grad_norm": 6.094480037689209, "learning_rate": 1.9945305725059875e-05, "loss": 0.5202, "step": 1625 }, { "epoch": 1.5634615384615385, "grad_norm": 5.710748672485352, "learning_rate": 1.9945175542274025e-05, "loss": 0.4818, "step": 1626 }, { "epoch": 1.564423076923077, "grad_norm": 6.599826812744141, "learning_rate": 1.9945045205168242e-05, "loss": 0.6374, "step": 1627 }, { "epoch": 1.5653846153846154, "grad_norm": 5.5935139656066895, "learning_rate": 1.994491471374454e-05, "loss": 0.4997, "step": 1628 }, { "epoch": 1.5663461538461538, "grad_norm": 5.685500144958496, "learning_rate": 1.9944784068004954e-05, "loss": 0.4626, "step": 1629 }, { "epoch": 1.5673076923076923, "grad_norm": 5.513497829437256, "learning_rate": 1.9944653267951507e-05, "loss": 0.4812, "step": 1630 }, { "epoch": 1.5682692307692307, "grad_norm": 6.1875152587890625, "learning_rate": 1.9944522313586226e-05, "loss": 0.6569, "step": 1631 }, { "epoch": 1.5692307692307692, "grad_norm": 4.290899753570557, "learning_rate": 1.9944391204911148e-05, "loss": 0.2722, "step": 1632 }, { "epoch": 1.5701923076923077, "grad_norm": 6.43146276473999, "learning_rate": 1.99442599419283e-05, "loss": 0.5923, "step": 1633 }, { "epoch": 1.5711538461538461, "grad_norm": 5.350543975830078, "learning_rate": 1.994412852463973e-05, "loss": 0.4342, "step": 1634 }, { "epoch": 1.5721153846153846, "grad_norm": 5.580043315887451, "learning_rate": 1.994399695304747e-05, "loss": 0.507, "step": 1635 }, { "epoch": 1.573076923076923, "grad_norm": 6.2495293617248535, "learning_rate": 1.994386522715356e-05, "loss": 0.7371, "step": 1636 }, { "epoch": 1.5740384615384615, "grad_norm": 6.147898197174072, "learning_rate": 1.9943733346960047e-05, "loss": 0.588, "step": 1637 }, { "epoch": 1.575, "grad_norm": 6.946560382843018, "learning_rate": 1.9943601312468976e-05, "loss": 0.6188, "step": 1638 }, { "epoch": 1.5759615384615384, "grad_norm": 5.3039703369140625, "learning_rate": 1.99434691236824e-05, "loss": 0.4696, "step": 1639 }, { "epoch": 1.5769230769230769, "grad_norm": 6.914163589477539, "learning_rate": 1.9943336780602363e-05, "loss": 0.633, "step": 1640 }, { "epoch": 1.5778846153846153, "grad_norm": 7.0524773597717285, "learning_rate": 1.9943204283230924e-05, "loss": 0.8995, "step": 1641 }, { "epoch": 1.578846153846154, "grad_norm": 5.581746578216553, "learning_rate": 1.994307163157014e-05, "loss": 0.5383, "step": 1642 }, { "epoch": 1.5798076923076922, "grad_norm": 4.8841423988342285, "learning_rate": 1.9942938825622064e-05, "loss": 0.5645, "step": 1643 }, { "epoch": 1.580769230769231, "grad_norm": 5.803023815155029, "learning_rate": 1.994280586538876e-05, "loss": 0.4765, "step": 1644 }, { "epoch": 1.5817307692307692, "grad_norm": 5.471337795257568, "learning_rate": 1.9942672750872293e-05, "loss": 0.3873, "step": 1645 }, { "epoch": 1.5826923076923078, "grad_norm": 5.458987712860107, "learning_rate": 1.9942539482074722e-05, "loss": 0.3702, "step": 1646 }, { "epoch": 1.583653846153846, "grad_norm": 5.965085506439209, "learning_rate": 1.9942406058998122e-05, "loss": 0.5943, "step": 1647 }, { "epoch": 1.5846153846153848, "grad_norm": 7.880143165588379, "learning_rate": 1.994227248164456e-05, "loss": 0.8153, "step": 1648 }, { "epoch": 1.585576923076923, "grad_norm": 7.336966037750244, "learning_rate": 1.994213875001611e-05, "loss": 0.5456, "step": 1649 }, { "epoch": 1.5865384615384617, "grad_norm": 6.497345924377441, "learning_rate": 1.9942004864114848e-05, "loss": 0.5616, "step": 1650 }, { "epoch": 1.5875, "grad_norm": 8.12093734741211, "learning_rate": 1.9941870823942847e-05, "loss": 0.6193, "step": 1651 }, { "epoch": 1.5884615384615386, "grad_norm": 7.118032932281494, "learning_rate": 1.9941736629502192e-05, "loss": 0.715, "step": 1652 }, { "epoch": 1.5894230769230768, "grad_norm": 4.688540935516357, "learning_rate": 1.9941602280794964e-05, "loss": 0.3253, "step": 1653 }, { "epoch": 1.5903846153846155, "grad_norm": 6.676557540893555, "learning_rate": 1.9941467777823242e-05, "loss": 0.6746, "step": 1654 }, { "epoch": 1.5913461538461537, "grad_norm": 6.002638339996338, "learning_rate": 1.9941333120589125e-05, "loss": 0.4654, "step": 1655 }, { "epoch": 1.5923076923076924, "grad_norm": 5.9710493087768555, "learning_rate": 1.994119830909469e-05, "loss": 0.5274, "step": 1656 }, { "epoch": 1.5932692307692307, "grad_norm": 5.601850986480713, "learning_rate": 1.9941063343342034e-05, "loss": 0.4588, "step": 1657 }, { "epoch": 1.5942307692307693, "grad_norm": 5.794230937957764, "learning_rate": 1.9940928223333254e-05, "loss": 0.6002, "step": 1658 }, { "epoch": 1.5951923076923076, "grad_norm": 5.477252960205078, "learning_rate": 1.9940792949070445e-05, "loss": 0.4571, "step": 1659 }, { "epoch": 1.5961538461538463, "grad_norm": 8.156267166137695, "learning_rate": 1.99406575205557e-05, "loss": 0.4388, "step": 1660 }, { "epoch": 1.5971153846153845, "grad_norm": 6.701836109161377, "learning_rate": 1.9940521937791132e-05, "loss": 0.6204, "step": 1661 }, { "epoch": 1.5980769230769232, "grad_norm": 7.115114688873291, "learning_rate": 1.9940386200778835e-05, "loss": 0.6114, "step": 1662 }, { "epoch": 1.5990384615384614, "grad_norm": 7.441988945007324, "learning_rate": 1.9940250309520922e-05, "loss": 0.3621, "step": 1663 }, { "epoch": 1.6, "grad_norm": 5.517586708068848, "learning_rate": 1.9940114264019496e-05, "loss": 0.3193, "step": 1664 }, { "epoch": 1.6009615384615383, "grad_norm": 4.965282917022705, "learning_rate": 1.993997806427667e-05, "loss": 0.2554, "step": 1665 }, { "epoch": 1.601923076923077, "grad_norm": 5.277624607086182, "learning_rate": 1.993984171029456e-05, "loss": 0.2841, "step": 1666 }, { "epoch": 1.6028846153846152, "grad_norm": 7.047111511230469, "learning_rate": 1.993970520207528e-05, "loss": 0.7778, "step": 1667 }, { "epoch": 1.603846153846154, "grad_norm": 6.345799446105957, "learning_rate": 1.9939568539620944e-05, "loss": 0.6562, "step": 1668 }, { "epoch": 1.6048076923076922, "grad_norm": 4.965785980224609, "learning_rate": 1.9939431722933678e-05, "loss": 0.5147, "step": 1669 }, { "epoch": 1.6057692307692308, "grad_norm": 5.732254981994629, "learning_rate": 1.9939294752015604e-05, "loss": 0.5509, "step": 1670 }, { "epoch": 1.606730769230769, "grad_norm": 4.9716057777404785, "learning_rate": 1.9939157626868846e-05, "loss": 0.3038, "step": 1671 }, { "epoch": 1.6076923076923078, "grad_norm": 5.285294055938721, "learning_rate": 1.993902034749554e-05, "loss": 0.3287, "step": 1672 }, { "epoch": 1.608653846153846, "grad_norm": 6.55269718170166, "learning_rate": 1.99388829138978e-05, "loss": 0.6948, "step": 1673 }, { "epoch": 1.6096153846153847, "grad_norm": 5.560397148132324, "learning_rate": 1.993874532607777e-05, "loss": 0.4868, "step": 1674 }, { "epoch": 1.6105769230769231, "grad_norm": 6.100982666015625, "learning_rate": 1.9938607584037584e-05, "loss": 0.5985, "step": 1675 }, { "epoch": 1.6115384615384616, "grad_norm": 4.461267948150635, "learning_rate": 1.9938469687779377e-05, "loss": 0.2478, "step": 1676 }, { "epoch": 1.6125, "grad_norm": 5.7994608879089355, "learning_rate": 1.993833163730529e-05, "loss": 0.3603, "step": 1677 }, { "epoch": 1.6134615384615385, "grad_norm": 7.71946907043457, "learning_rate": 1.9938193432617466e-05, "loss": 1.1978, "step": 1678 }, { "epoch": 1.614423076923077, "grad_norm": 7.231532573699951, "learning_rate": 1.9938055073718045e-05, "loss": 0.7562, "step": 1679 }, { "epoch": 1.6153846153846154, "grad_norm": 6.509933948516846, "learning_rate": 1.9937916560609178e-05, "loss": 0.7417, "step": 1680 }, { "epoch": 1.6163461538461539, "grad_norm": 4.762012958526611, "learning_rate": 1.9937777893293013e-05, "loss": 0.3815, "step": 1681 }, { "epoch": 1.6173076923076923, "grad_norm": 6.398928642272949, "learning_rate": 1.9937639071771704e-05, "loss": 0.8225, "step": 1682 }, { "epoch": 1.6182692307692308, "grad_norm": 6.766134262084961, "learning_rate": 1.99375000960474e-05, "loss": 0.6246, "step": 1683 }, { "epoch": 1.6192307692307693, "grad_norm": 6.355029582977295, "learning_rate": 1.9937360966122263e-05, "loss": 0.4471, "step": 1684 }, { "epoch": 1.6201923076923077, "grad_norm": 5.411880016326904, "learning_rate": 1.9937221681998447e-05, "loss": 0.3705, "step": 1685 }, { "epoch": 1.6211538461538462, "grad_norm": 5.910158634185791, "learning_rate": 1.993708224367812e-05, "loss": 0.4868, "step": 1686 }, { "epoch": 1.6221153846153846, "grad_norm": 5.374317169189453, "learning_rate": 1.9936942651163443e-05, "loss": 0.3513, "step": 1687 }, { "epoch": 1.623076923076923, "grad_norm": 7.389001846313477, "learning_rate": 1.9936802904456574e-05, "loss": 0.5539, "step": 1688 }, { "epoch": 1.6240384615384615, "grad_norm": 5.939535140991211, "learning_rate": 1.9936663003559694e-05, "loss": 0.5287, "step": 1689 }, { "epoch": 1.625, "grad_norm": 4.256484508514404, "learning_rate": 1.9936522948474963e-05, "loss": 0.2126, "step": 1690 }, { "epoch": 1.6259615384615385, "grad_norm": 4.952394962310791, "learning_rate": 1.9936382739204564e-05, "loss": 0.3326, "step": 1691 }, { "epoch": 1.626923076923077, "grad_norm": 5.469478130340576, "learning_rate": 1.9936242375750665e-05, "loss": 0.392, "step": 1692 }, { "epoch": 1.6278846153846154, "grad_norm": 7.758575439453125, "learning_rate": 1.9936101858115448e-05, "loss": 0.93, "step": 1693 }, { "epoch": 1.6288461538461538, "grad_norm": 7.072799205780029, "learning_rate": 1.9935961186301092e-05, "loss": 0.5711, "step": 1694 }, { "epoch": 1.6298076923076923, "grad_norm": 5.900166034698486, "learning_rate": 1.993582036030978e-05, "loss": 0.4547, "step": 1695 }, { "epoch": 1.6307692307692307, "grad_norm": 7.8006415367126465, "learning_rate": 1.9935679380143696e-05, "loss": 0.7638, "step": 1696 }, { "epoch": 1.6317307692307692, "grad_norm": 7.290627956390381, "learning_rate": 1.993553824580503e-05, "loss": 0.839, "step": 1697 }, { "epoch": 1.6326923076923077, "grad_norm": 6.606028079986572, "learning_rate": 1.993539695729597e-05, "loss": 0.5101, "step": 1698 }, { "epoch": 1.6336538461538461, "grad_norm": 6.1012654304504395, "learning_rate": 1.993525551461871e-05, "loss": 0.7586, "step": 1699 }, { "epoch": 1.6346153846153846, "grad_norm": 5.586043834686279, "learning_rate": 1.9935113917775445e-05, "loss": 0.4157, "step": 1700 }, { "epoch": 1.635576923076923, "grad_norm": 7.859102725982666, "learning_rate": 1.9934972166768368e-05, "loss": 1.2365, "step": 1701 }, { "epoch": 1.6365384615384615, "grad_norm": 6.809602737426758, "learning_rate": 1.9934830261599685e-05, "loss": 0.5654, "step": 1702 }, { "epoch": 1.6375, "grad_norm": 8.0498628616333, "learning_rate": 1.993468820227159e-05, "loss": 0.9435, "step": 1703 }, { "epoch": 1.6384615384615384, "grad_norm": 6.0868706703186035, "learning_rate": 1.99345459887863e-05, "loss": 0.5878, "step": 1704 }, { "epoch": 1.6394230769230769, "grad_norm": 7.17013692855835, "learning_rate": 1.993440362114601e-05, "loss": 0.9027, "step": 1705 }, { "epoch": 1.6403846153846153, "grad_norm": 6.010045528411865, "learning_rate": 1.9934261099352933e-05, "loss": 1.0293, "step": 1706 }, { "epoch": 1.641346153846154, "grad_norm": 7.198141098022461, "learning_rate": 1.993411842340928e-05, "loss": 0.9342, "step": 1707 }, { "epoch": 1.6423076923076922, "grad_norm": 5.775900363922119, "learning_rate": 1.9933975593317263e-05, "loss": 0.4757, "step": 1708 }, { "epoch": 1.643269230769231, "grad_norm": 7.258525371551514, "learning_rate": 1.9933832609079105e-05, "loss": 0.6985, "step": 1709 }, { "epoch": 1.6442307692307692, "grad_norm": 5.944091796875, "learning_rate": 1.9933689470697017e-05, "loss": 0.4406, "step": 1710 }, { "epoch": 1.6451923076923078, "grad_norm": 5.345902442932129, "learning_rate": 1.9933546178173225e-05, "loss": 0.3484, "step": 1711 }, { "epoch": 1.646153846153846, "grad_norm": 6.117547512054443, "learning_rate": 1.9933402731509952e-05, "loss": 0.5085, "step": 1712 }, { "epoch": 1.6471153846153848, "grad_norm": 7.034620761871338, "learning_rate": 1.993325913070942e-05, "loss": 0.8489, "step": 1713 }, { "epoch": 1.648076923076923, "grad_norm": 6.418431758880615, "learning_rate": 1.9933115375773863e-05, "loss": 0.5847, "step": 1714 }, { "epoch": 1.6490384615384617, "grad_norm": 6.878027439117432, "learning_rate": 1.9932971466705506e-05, "loss": 0.8715, "step": 1715 }, { "epoch": 1.65, "grad_norm": 5.747933387756348, "learning_rate": 1.9932827403506585e-05, "loss": 0.4199, "step": 1716 }, { "epoch": 1.6509615384615386, "grad_norm": 4.751288414001465, "learning_rate": 1.993268318617934e-05, "loss": 0.3763, "step": 1717 }, { "epoch": 1.6519230769230768, "grad_norm": 5.924879550933838, "learning_rate": 1.9932538814725996e-05, "loss": 0.7786, "step": 1718 }, { "epoch": 1.6528846153846155, "grad_norm": 5.998470306396484, "learning_rate": 1.9932394289148807e-05, "loss": 0.6039, "step": 1719 }, { "epoch": 1.6538461538461537, "grad_norm": 5.923929214477539, "learning_rate": 1.9932249609450007e-05, "loss": 0.5524, "step": 1720 }, { "epoch": 1.6548076923076924, "grad_norm": 5.548923969268799, "learning_rate": 1.9932104775631847e-05, "loss": 0.3538, "step": 1721 }, { "epoch": 1.6557692307692307, "grad_norm": 5.211772441864014, "learning_rate": 1.993195978769657e-05, "loss": 0.4292, "step": 1722 }, { "epoch": 1.6567307692307693, "grad_norm": 7.9240312576293945, "learning_rate": 1.9931814645646425e-05, "loss": 0.8542, "step": 1723 }, { "epoch": 1.6576923076923076, "grad_norm": 4.400460720062256, "learning_rate": 1.993166934948367e-05, "loss": 0.2798, "step": 1724 }, { "epoch": 1.6586538461538463, "grad_norm": 6.923060417175293, "learning_rate": 1.9931523899210553e-05, "loss": 0.6915, "step": 1725 }, { "epoch": 1.6596153846153845, "grad_norm": 5.689517021179199, "learning_rate": 1.9931378294829333e-05, "loss": 0.593, "step": 1726 }, { "epoch": 1.6605769230769232, "grad_norm": 6.163456439971924, "learning_rate": 1.9931232536342273e-05, "loss": 0.604, "step": 1727 }, { "epoch": 1.6615384615384614, "grad_norm": 5.756467342376709, "learning_rate": 1.9931086623751628e-05, "loss": 0.4495, "step": 1728 }, { "epoch": 1.6625, "grad_norm": 7.801590442657471, "learning_rate": 1.993094055705967e-05, "loss": 0.9844, "step": 1729 }, { "epoch": 1.6634615384615383, "grad_norm": 7.109904766082764, "learning_rate": 1.993079433626866e-05, "loss": 0.7389, "step": 1730 }, { "epoch": 1.664423076923077, "grad_norm": 5.299765586853027, "learning_rate": 1.9930647961380868e-05, "loss": 0.3089, "step": 1731 }, { "epoch": 1.6653846153846152, "grad_norm": 7.8931565284729, "learning_rate": 1.9930501432398567e-05, "loss": 0.975, "step": 1732 }, { "epoch": 1.666346153846154, "grad_norm": 5.872621059417725, "learning_rate": 1.9930354749324026e-05, "loss": 0.544, "step": 1733 }, { "epoch": 1.6673076923076922, "grad_norm": 6.682176113128662, "learning_rate": 1.993020791215953e-05, "loss": 0.6177, "step": 1734 }, { "epoch": 1.6682692307692308, "grad_norm": 6.2974395751953125, "learning_rate": 1.9930060920907347e-05, "loss": 0.4868, "step": 1735 }, { "epoch": 1.669230769230769, "grad_norm": 6.942503452301025, "learning_rate": 1.9929913775569766e-05, "loss": 0.5357, "step": 1736 }, { "epoch": 1.6701923076923078, "grad_norm": 6.4099602699279785, "learning_rate": 1.9929766476149065e-05, "loss": 0.8031, "step": 1737 }, { "epoch": 1.671153846153846, "grad_norm": 5.2666707038879395, "learning_rate": 1.992961902264753e-05, "loss": 0.3486, "step": 1738 }, { "epoch": 1.6721153846153847, "grad_norm": 5.964724540710449, "learning_rate": 1.9929471415067456e-05, "loss": 0.4478, "step": 1739 }, { "epoch": 1.6730769230769231, "grad_norm": 6.233753681182861, "learning_rate": 1.9929323653411124e-05, "loss": 0.411, "step": 1740 }, { "epoch": 1.6740384615384616, "grad_norm": 6.116096496582031, "learning_rate": 1.9929175737680835e-05, "loss": 0.4958, "step": 1741 }, { "epoch": 1.675, "grad_norm": 5.222075462341309, "learning_rate": 1.992902766787888e-05, "loss": 0.6089, "step": 1742 }, { "epoch": 1.6759615384615385, "grad_norm": 6.635097980499268, "learning_rate": 1.9928879444007553e-05, "loss": 0.6351, "step": 1743 }, { "epoch": 1.676923076923077, "grad_norm": 5.972752094268799, "learning_rate": 1.9928731066069162e-05, "loss": 0.4602, "step": 1744 }, { "epoch": 1.6778846153846154, "grad_norm": 7.029899597167969, "learning_rate": 1.9928582534066005e-05, "loss": 0.7092, "step": 1745 }, { "epoch": 1.6788461538461539, "grad_norm": 6.60123348236084, "learning_rate": 1.9928433848000385e-05, "loss": 0.547, "step": 1746 }, { "epoch": 1.6798076923076923, "grad_norm": 5.815776348114014, "learning_rate": 1.992828500787461e-05, "loss": 0.4903, "step": 1747 }, { "epoch": 1.6807692307692308, "grad_norm": 8.113380432128906, "learning_rate": 1.992813601369099e-05, "loss": 0.8347, "step": 1748 }, { "epoch": 1.6817307692307693, "grad_norm": 4.767938613891602, "learning_rate": 1.9927986865451846e-05, "loss": 0.2668, "step": 1749 }, { "epoch": 1.6826923076923077, "grad_norm": 5.651208877563477, "learning_rate": 1.9927837563159474e-05, "loss": 0.3852, "step": 1750 }, { "epoch": 1.6836538461538462, "grad_norm": 4.948121070861816, "learning_rate": 1.9927688106816207e-05, "loss": 0.3345, "step": 1751 }, { "epoch": 1.6846153846153846, "grad_norm": 5.301750183105469, "learning_rate": 1.9927538496424356e-05, "loss": 0.2993, "step": 1752 }, { "epoch": 1.685576923076923, "grad_norm": 5.525892734527588, "learning_rate": 1.9927388731986248e-05, "loss": 0.5425, "step": 1753 }, { "epoch": 1.6865384615384615, "grad_norm": 11.30070686340332, "learning_rate": 1.99272388135042e-05, "loss": 0.7296, "step": 1754 }, { "epoch": 1.6875, "grad_norm": 7.177574634552002, "learning_rate": 1.992708874098054e-05, "loss": 0.856, "step": 1755 }, { "epoch": 1.6884615384615385, "grad_norm": 6.832375526428223, "learning_rate": 1.99269385144176e-05, "loss": 0.6202, "step": 1756 }, { "epoch": 1.689423076923077, "grad_norm": 7.8303303718566895, "learning_rate": 1.992678813381771e-05, "loss": 1.0916, "step": 1757 }, { "epoch": 1.6903846153846154, "grad_norm": 5.54482364654541, "learning_rate": 1.9926637599183204e-05, "loss": 0.3001, "step": 1758 }, { "epoch": 1.6913461538461538, "grad_norm": 7.268090724945068, "learning_rate": 1.9926486910516418e-05, "loss": 0.7845, "step": 1759 }, { "epoch": 1.6923076923076923, "grad_norm": 6.332513332366943, "learning_rate": 1.9926336067819686e-05, "loss": 0.5255, "step": 1760 }, { "epoch": 1.6932692307692307, "grad_norm": 6.998030185699463, "learning_rate": 1.9926185071095353e-05, "loss": 0.6884, "step": 1761 }, { "epoch": 1.6942307692307692, "grad_norm": 5.226657867431641, "learning_rate": 1.992603392034576e-05, "loss": 0.3135, "step": 1762 }, { "epoch": 1.6951923076923077, "grad_norm": 6.828600883483887, "learning_rate": 1.9925882615573252e-05, "loss": 0.673, "step": 1763 }, { "epoch": 1.6961538461538461, "grad_norm": 5.5819926261901855, "learning_rate": 1.992573115678018e-05, "loss": 0.4907, "step": 1764 }, { "epoch": 1.6971153846153846, "grad_norm": 5.998831748962402, "learning_rate": 1.9925579543968892e-05, "loss": 0.7445, "step": 1765 }, { "epoch": 1.698076923076923, "grad_norm": 7.053996562957764, "learning_rate": 1.992542777714174e-05, "loss": 0.2341, "step": 1766 }, { "epoch": 1.6990384615384615, "grad_norm": 5.587109565734863, "learning_rate": 1.9925275856301082e-05, "loss": 0.2832, "step": 1767 }, { "epoch": 1.7, "grad_norm": 6.134127140045166, "learning_rate": 1.992512378144927e-05, "loss": 0.4564, "step": 1768 }, { "epoch": 1.7009615384615384, "grad_norm": 6.080428600311279, "learning_rate": 1.9924971552588666e-05, "loss": 0.5821, "step": 1769 }, { "epoch": 1.7019230769230769, "grad_norm": 4.973850250244141, "learning_rate": 1.9924819169721633e-05, "loss": 0.2763, "step": 1770 }, { "epoch": 1.7028846153846153, "grad_norm": 5.455986022949219, "learning_rate": 1.992466663285054e-05, "loss": 0.375, "step": 1771 }, { "epoch": 1.703846153846154, "grad_norm": 7.013481140136719, "learning_rate": 1.9924513941977745e-05, "loss": 0.3959, "step": 1772 }, { "epoch": 1.7048076923076922, "grad_norm": 7.3079118728637695, "learning_rate": 1.9924361097105624e-05, "loss": 0.7531, "step": 1773 }, { "epoch": 1.705769230769231, "grad_norm": 6.937167167663574, "learning_rate": 1.9924208098236547e-05, "loss": 0.5655, "step": 1774 }, { "epoch": 1.7067307692307692, "grad_norm": 6.379134178161621, "learning_rate": 1.9924054945372885e-05, "loss": 0.6123, "step": 1775 }, { "epoch": 1.7076923076923078, "grad_norm": 6.770493030548096, "learning_rate": 1.9923901638517017e-05, "loss": 0.5284, "step": 1776 }, { "epoch": 1.708653846153846, "grad_norm": 6.335988998413086, "learning_rate": 1.9923748177671322e-05, "loss": 0.494, "step": 1777 }, { "epoch": 1.7096153846153848, "grad_norm": 4.673907279968262, "learning_rate": 1.9923594562838185e-05, "loss": 0.2705, "step": 1778 }, { "epoch": 1.710576923076923, "grad_norm": 5.649864673614502, "learning_rate": 1.992344079401998e-05, "loss": 0.4186, "step": 1779 }, { "epoch": 1.7115384615384617, "grad_norm": 5.251285552978516, "learning_rate": 1.9923286871219104e-05, "loss": 0.3267, "step": 1780 }, { "epoch": 1.7125, "grad_norm": 6.092261791229248, "learning_rate": 1.9923132794437937e-05, "loss": 0.4423, "step": 1781 }, { "epoch": 1.7134615384615386, "grad_norm": 5.82541036605835, "learning_rate": 1.9922978563678877e-05, "loss": 0.3831, "step": 1782 }, { "epoch": 1.7144230769230768, "grad_norm": 6.178762912750244, "learning_rate": 1.992282417894431e-05, "loss": 0.463, "step": 1783 }, { "epoch": 1.7153846153846155, "grad_norm": 4.030714511871338, "learning_rate": 1.9922669640236633e-05, "loss": 0.2618, "step": 1784 }, { "epoch": 1.7163461538461537, "grad_norm": 7.1071248054504395, "learning_rate": 1.9922514947558248e-05, "loss": 0.6042, "step": 1785 }, { "epoch": 1.7173076923076924, "grad_norm": 7.113064765930176, "learning_rate": 1.9922360100911553e-05, "loss": 0.636, "step": 1786 }, { "epoch": 1.7182692307692307, "grad_norm": 7.14131498336792, "learning_rate": 1.992220510029895e-05, "loss": 0.5412, "step": 1787 }, { "epoch": 1.7192307692307693, "grad_norm": 6.120963096618652, "learning_rate": 1.9922049945722847e-05, "loss": 0.3573, "step": 1788 }, { "epoch": 1.7201923076923076, "grad_norm": 6.321584224700928, "learning_rate": 1.9921894637185654e-05, "loss": 0.4776, "step": 1789 }, { "epoch": 1.7211538461538463, "grad_norm": 8.92752456665039, "learning_rate": 1.992173917468977e-05, "loss": 0.6463, "step": 1790 }, { "epoch": 1.7221153846153845, "grad_norm": 6.420902729034424, "learning_rate": 1.9921583558237615e-05, "loss": 0.7173, "step": 1791 }, { "epoch": 1.7230769230769232, "grad_norm": 6.812191009521484, "learning_rate": 1.9921427787831605e-05, "loss": 0.6422, "step": 1792 }, { "epoch": 1.7240384615384614, "grad_norm": 5.864951133728027, "learning_rate": 1.992127186347415e-05, "loss": 0.4343, "step": 1793 }, { "epoch": 1.725, "grad_norm": 6.321709156036377, "learning_rate": 1.992111578516768e-05, "loss": 0.6609, "step": 1794 }, { "epoch": 1.7259615384615383, "grad_norm": 6.979772090911865, "learning_rate": 1.9920959552914606e-05, "loss": 0.4431, "step": 1795 }, { "epoch": 1.726923076923077, "grad_norm": 7.130726337432861, "learning_rate": 1.992080316671736e-05, "loss": 0.4527, "step": 1796 }, { "epoch": 1.7278846153846152, "grad_norm": 8.048019409179688, "learning_rate": 1.992064662657837e-05, "loss": 0.5637, "step": 1797 }, { "epoch": 1.728846153846154, "grad_norm": 7.149418354034424, "learning_rate": 1.9920489932500056e-05, "loss": 0.5532, "step": 1798 }, { "epoch": 1.7298076923076922, "grad_norm": 7.199006080627441, "learning_rate": 1.992033308448486e-05, "loss": 0.6339, "step": 1799 }, { "epoch": 1.7307692307692308, "grad_norm": 6.6319966316223145, "learning_rate": 1.9920176082535203e-05, "loss": 0.5641, "step": 1800 }, { "epoch": 1.731730769230769, "grad_norm": 7.463079452514648, "learning_rate": 1.992001892665353e-05, "loss": 0.5768, "step": 1801 }, { "epoch": 1.7326923076923078, "grad_norm": 5.862510681152344, "learning_rate": 1.9919861616842283e-05, "loss": 0.4816, "step": 1802 }, { "epoch": 1.733653846153846, "grad_norm": 5.961044788360596, "learning_rate": 1.9919704153103895e-05, "loss": 0.5024, "step": 1803 }, { "epoch": 1.7346153846153847, "grad_norm": 5.627807140350342, "learning_rate": 1.991954653544081e-05, "loss": 0.4882, "step": 1804 }, { "epoch": 1.7355769230769231, "grad_norm": 5.217381954193115, "learning_rate": 1.991938876385548e-05, "loss": 0.3403, "step": 1805 }, { "epoch": 1.7365384615384616, "grad_norm": 4.366616249084473, "learning_rate": 1.991923083835035e-05, "loss": 0.2999, "step": 1806 }, { "epoch": 1.7375, "grad_norm": 5.378798484802246, "learning_rate": 1.9919072758927867e-05, "loss": 0.4053, "step": 1807 }, { "epoch": 1.7384615384615385, "grad_norm": 7.717262268066406, "learning_rate": 1.9918914525590488e-05, "loss": 0.8027, "step": 1808 }, { "epoch": 1.739423076923077, "grad_norm": 6.706855297088623, "learning_rate": 1.9918756138340666e-05, "loss": 0.4845, "step": 1809 }, { "epoch": 1.7403846153846154, "grad_norm": 5.5761003494262695, "learning_rate": 1.991859759718086e-05, "loss": 0.3755, "step": 1810 }, { "epoch": 1.7413461538461539, "grad_norm": 6.439121723175049, "learning_rate": 1.9918438902113534e-05, "loss": 0.6697, "step": 1811 }, { "epoch": 1.7423076923076923, "grad_norm": 5.349660873413086, "learning_rate": 1.9918280053141144e-05, "loss": 0.4342, "step": 1812 }, { "epoch": 1.7432692307692308, "grad_norm": 4.605605125427246, "learning_rate": 1.9918121050266156e-05, "loss": 0.3374, "step": 1813 }, { "epoch": 1.7442307692307693, "grad_norm": 5.781068325042725, "learning_rate": 1.991796189349104e-05, "loss": 0.4822, "step": 1814 }, { "epoch": 1.7451923076923077, "grad_norm": 4.846508979797363, "learning_rate": 1.9917802582818268e-05, "loss": 0.428, "step": 1815 }, { "epoch": 1.7461538461538462, "grad_norm": 6.620258808135986, "learning_rate": 1.9917643118250304e-05, "loss": 0.6947, "step": 1816 }, { "epoch": 1.7471153846153846, "grad_norm": 5.668422222137451, "learning_rate": 1.9917483499789626e-05, "loss": 0.3901, "step": 1817 }, { "epoch": 1.748076923076923, "grad_norm": 4.96247673034668, "learning_rate": 1.9917323727438715e-05, "loss": 0.3518, "step": 1818 }, { "epoch": 1.7490384615384615, "grad_norm": 5.238911151885986, "learning_rate": 1.991716380120004e-05, "loss": 0.3664, "step": 1819 }, { "epoch": 1.75, "grad_norm": 6.302565097808838, "learning_rate": 1.9917003721076096e-05, "loss": 0.7439, "step": 1820 }, { "epoch": 1.7509615384615385, "grad_norm": 5.226541519165039, "learning_rate": 1.991684348706936e-05, "loss": 0.4079, "step": 1821 }, { "epoch": 1.751923076923077, "grad_norm": 6.79351806640625, "learning_rate": 1.9916683099182315e-05, "loss": 0.7983, "step": 1822 }, { "epoch": 1.7528846153846154, "grad_norm": 7.324814319610596, "learning_rate": 1.9916522557417456e-05, "loss": 0.6456, "step": 1823 }, { "epoch": 1.7538461538461538, "grad_norm": 7.00260066986084, "learning_rate": 1.9916361861777268e-05, "loss": 0.6866, "step": 1824 }, { "epoch": 1.7548076923076923, "grad_norm": 8.342662811279297, "learning_rate": 1.9916201012264255e-05, "loss": 0.9104, "step": 1825 }, { "epoch": 1.7557692307692307, "grad_norm": 4.949985504150391, "learning_rate": 1.99160400088809e-05, "loss": 0.3104, "step": 1826 }, { "epoch": 1.7567307692307692, "grad_norm": 5.387944221496582, "learning_rate": 1.9915878851629707e-05, "loss": 0.6468, "step": 1827 }, { "epoch": 1.7576923076923077, "grad_norm": 5.751951217651367, "learning_rate": 1.991571754051318e-05, "loss": 0.5241, "step": 1828 }, { "epoch": 1.7586538461538461, "grad_norm": 6.204518795013428, "learning_rate": 1.991555607553382e-05, "loss": 0.5849, "step": 1829 }, { "epoch": 1.7596153846153846, "grad_norm": 7.493738174438477, "learning_rate": 1.991539445669413e-05, "loss": 0.8126, "step": 1830 }, { "epoch": 1.760576923076923, "grad_norm": 7.0280609130859375, "learning_rate": 1.9915232683996618e-05, "loss": 0.446, "step": 1831 }, { "epoch": 1.7615384615384615, "grad_norm": 5.858822345733643, "learning_rate": 1.9915070757443797e-05, "loss": 0.6264, "step": 1832 }, { "epoch": 1.7625, "grad_norm": 4.892983913421631, "learning_rate": 1.9914908677038178e-05, "loss": 0.2584, "step": 1833 }, { "epoch": 1.7634615384615384, "grad_norm": 4.822608947753906, "learning_rate": 1.9914746442782275e-05, "loss": 0.2726, "step": 1834 }, { "epoch": 1.7644230769230769, "grad_norm": 5.71816873550415, "learning_rate": 1.991458405467861e-05, "loss": 0.5317, "step": 1835 }, { "epoch": 1.7653846153846153, "grad_norm": 6.048571586608887, "learning_rate": 1.9914421512729695e-05, "loss": 0.4406, "step": 1836 }, { "epoch": 1.766346153846154, "grad_norm": 6.644536972045898, "learning_rate": 1.991425881693806e-05, "loss": 0.5571, "step": 1837 }, { "epoch": 1.7673076923076922, "grad_norm": 8.554512023925781, "learning_rate": 1.9914095967306224e-05, "loss": 1.7281, "step": 1838 }, { "epoch": 1.768269230769231, "grad_norm": 6.28848934173584, "learning_rate": 1.991393296383672e-05, "loss": 0.7313, "step": 1839 }, { "epoch": 1.7692307692307692, "grad_norm": 7.338973045349121, "learning_rate": 1.991376980653207e-05, "loss": 0.6454, "step": 1840 }, { "epoch": 1.7701923076923078, "grad_norm": 6.8531599044799805, "learning_rate": 1.9913606495394807e-05, "loss": 0.6502, "step": 1841 }, { "epoch": 1.771153846153846, "grad_norm": 6.262803554534912, "learning_rate": 1.9913443030427472e-05, "loss": 0.4502, "step": 1842 }, { "epoch": 1.7721153846153848, "grad_norm": 4.695810794830322, "learning_rate": 1.9913279411632595e-05, "loss": 0.216, "step": 1843 }, { "epoch": 1.773076923076923, "grad_norm": 5.278253555297852, "learning_rate": 1.9913115639012716e-05, "loss": 0.2711, "step": 1844 }, { "epoch": 1.7740384615384617, "grad_norm": 5.419153213500977, "learning_rate": 1.9912951712570377e-05, "loss": 0.354, "step": 1845 }, { "epoch": 1.775, "grad_norm": 7.371412754058838, "learning_rate": 1.9912787632308125e-05, "loss": 0.4375, "step": 1846 }, { "epoch": 1.7759615384615386, "grad_norm": 7.275217056274414, "learning_rate": 1.9912623398228497e-05, "loss": 0.6987, "step": 1847 }, { "epoch": 1.7769230769230768, "grad_norm": 5.476048469543457, "learning_rate": 1.991245901033405e-05, "loss": 0.4913, "step": 1848 }, { "epoch": 1.7778846153846155, "grad_norm": 6.024328708648682, "learning_rate": 1.991229446862733e-05, "loss": 0.6587, "step": 1849 }, { "epoch": 1.7788461538461537, "grad_norm": 5.263840675354004, "learning_rate": 1.9912129773110896e-05, "loss": 0.5937, "step": 1850 }, { "epoch": 1.7798076923076924, "grad_norm": 6.955621719360352, "learning_rate": 1.9911964923787295e-05, "loss": 0.9387, "step": 1851 }, { "epoch": 1.7807692307692307, "grad_norm": 5.896246433258057, "learning_rate": 1.9911799920659093e-05, "loss": 0.4517, "step": 1852 }, { "epoch": 1.7817307692307693, "grad_norm": 8.152761459350586, "learning_rate": 1.9911634763728847e-05, "loss": 1.0475, "step": 1853 }, { "epoch": 1.7826923076923076, "grad_norm": 6.896229267120361, "learning_rate": 1.9911469452999118e-05, "loss": 0.5779, "step": 1854 }, { "epoch": 1.7836538461538463, "grad_norm": 5.783041000366211, "learning_rate": 1.9911303988472476e-05, "loss": 0.5706, "step": 1855 }, { "epoch": 1.7846153846153845, "grad_norm": 6.265449523925781, "learning_rate": 1.991113837015148e-05, "loss": 0.3775, "step": 1856 }, { "epoch": 1.7855769230769232, "grad_norm": 4.58561372756958, "learning_rate": 1.9910972598038708e-05, "loss": 0.3154, "step": 1857 }, { "epoch": 1.7865384615384614, "grad_norm": 7.495568752288818, "learning_rate": 1.991080667213673e-05, "loss": 0.7887, "step": 1858 }, { "epoch": 1.7875, "grad_norm": 5.2524590492248535, "learning_rate": 1.991064059244812e-05, "loss": 0.3734, "step": 1859 }, { "epoch": 1.7884615384615383, "grad_norm": 5.359567165374756, "learning_rate": 1.9910474358975457e-05, "loss": 0.3526, "step": 1860 }, { "epoch": 1.789423076923077, "grad_norm": 7.308341979980469, "learning_rate": 1.9910307971721317e-05, "loss": 0.8828, "step": 1861 }, { "epoch": 1.7903846153846152, "grad_norm": 5.078431606292725, "learning_rate": 1.9910141430688284e-05, "loss": 0.4253, "step": 1862 }, { "epoch": 1.791346153846154, "grad_norm": 6.61381196975708, "learning_rate": 1.9909974735878944e-05, "loss": 0.4766, "step": 1863 }, { "epoch": 1.7923076923076922, "grad_norm": 5.619102478027344, "learning_rate": 1.990980788729588e-05, "loss": 0.4616, "step": 1864 }, { "epoch": 1.7932692307692308, "grad_norm": 6.226862907409668, "learning_rate": 1.9909640884941683e-05, "loss": 0.5396, "step": 1865 }, { "epoch": 1.794230769230769, "grad_norm": 5.870163440704346, "learning_rate": 1.990947372881894e-05, "loss": 0.4209, "step": 1866 }, { "epoch": 1.7951923076923078, "grad_norm": 5.928430557250977, "learning_rate": 1.9909306418930253e-05, "loss": 0.8044, "step": 1867 }, { "epoch": 1.796153846153846, "grad_norm": 4.8084635734558105, "learning_rate": 1.9909138955278214e-05, "loss": 0.3053, "step": 1868 }, { "epoch": 1.7971153846153847, "grad_norm": 6.036274433135986, "learning_rate": 1.990897133786542e-05, "loss": 0.6324, "step": 1869 }, { "epoch": 1.7980769230769231, "grad_norm": 4.931922912597656, "learning_rate": 1.990880356669447e-05, "loss": 0.3039, "step": 1870 }, { "epoch": 1.7990384615384616, "grad_norm": 7.496730804443359, "learning_rate": 1.9908635641767974e-05, "loss": 1.0806, "step": 1871 }, { "epoch": 1.8, "grad_norm": 4.662125110626221, "learning_rate": 1.9908467563088537e-05, "loss": 0.1874, "step": 1872 }, { "epoch": 1.8009615384615385, "grad_norm": 5.552300930023193, "learning_rate": 1.990829933065876e-05, "loss": 0.5084, "step": 1873 }, { "epoch": 1.801923076923077, "grad_norm": 5.615701675415039, "learning_rate": 1.990813094448126e-05, "loss": 0.8784, "step": 1874 }, { "epoch": 1.8028846153846154, "grad_norm": 7.20648193359375, "learning_rate": 1.990796240455865e-05, "loss": 0.5453, "step": 1875 }, { "epoch": 1.8038461538461539, "grad_norm": 6.66506290435791, "learning_rate": 1.990779371089354e-05, "loss": 0.6842, "step": 1876 }, { "epoch": 1.8048076923076923, "grad_norm": 6.060268878936768, "learning_rate": 1.990762486348855e-05, "loss": 0.4925, "step": 1877 }, { "epoch": 1.8057692307692308, "grad_norm": 6.1951494216918945, "learning_rate": 1.9907455862346302e-05, "loss": 0.498, "step": 1878 }, { "epoch": 1.8067307692307693, "grad_norm": 5.1240234375, "learning_rate": 1.9907286707469416e-05, "loss": 0.4113, "step": 1879 }, { "epoch": 1.8076923076923077, "grad_norm": 5.727426052093506, "learning_rate": 1.9907117398860518e-05, "loss": 0.4341, "step": 1880 }, { "epoch": 1.8086538461538462, "grad_norm": 5.114956855773926, "learning_rate": 1.9906947936522235e-05, "loss": 0.3963, "step": 1881 }, { "epoch": 1.8096153846153846, "grad_norm": 7.299524784088135, "learning_rate": 1.99067783204572e-05, "loss": 0.6953, "step": 1882 }, { "epoch": 1.810576923076923, "grad_norm": 5.301462650299072, "learning_rate": 1.9906608550668037e-05, "loss": 0.4313, "step": 1883 }, { "epoch": 1.8115384615384615, "grad_norm": 7.342327117919922, "learning_rate": 1.9906438627157387e-05, "loss": 0.4521, "step": 1884 }, { "epoch": 1.8125, "grad_norm": 5.840090751647949, "learning_rate": 1.9906268549927886e-05, "loss": 0.5023, "step": 1885 }, { "epoch": 1.8134615384615385, "grad_norm": 5.846935749053955, "learning_rate": 1.990609831898217e-05, "loss": 0.2771, "step": 1886 }, { "epoch": 1.814423076923077, "grad_norm": 7.477528095245361, "learning_rate": 1.990592793432288e-05, "loss": 0.7339, "step": 1887 }, { "epoch": 1.8153846153846154, "grad_norm": 7.000347137451172, "learning_rate": 1.9905757395952664e-05, "loss": 0.6614, "step": 1888 }, { "epoch": 1.8163461538461538, "grad_norm": 5.217353343963623, "learning_rate": 1.9905586703874164e-05, "loss": 0.3125, "step": 1889 }, { "epoch": 1.8173076923076923, "grad_norm": 7.6762003898620605, "learning_rate": 1.9905415858090036e-05, "loss": 0.7867, "step": 1890 }, { "epoch": 1.8182692307692307, "grad_norm": 6.692687034606934, "learning_rate": 1.9905244858602923e-05, "loss": 0.3925, "step": 1891 }, { "epoch": 1.8192307692307692, "grad_norm": 7.354660511016846, "learning_rate": 1.9905073705415482e-05, "loss": 0.7402, "step": 1892 }, { "epoch": 1.8201923076923077, "grad_norm": 4.21101713180542, "learning_rate": 1.9904902398530366e-05, "loss": 0.3222, "step": 1893 }, { "epoch": 1.8211538461538461, "grad_norm": 4.811537265777588, "learning_rate": 1.9904730937950236e-05, "loss": 0.2793, "step": 1894 }, { "epoch": 1.8221153846153846, "grad_norm": 6.681844234466553, "learning_rate": 1.9904559323677754e-05, "loss": 0.4866, "step": 1895 }, { "epoch": 1.823076923076923, "grad_norm": 6.144819259643555, "learning_rate": 1.990438755571558e-05, "loss": 0.4771, "step": 1896 }, { "epoch": 1.8240384615384615, "grad_norm": 7.397452354431152, "learning_rate": 1.990421563406638e-05, "loss": 0.7589, "step": 1897 }, { "epoch": 1.825, "grad_norm": 7.26350736618042, "learning_rate": 1.990404355873282e-05, "loss": 0.6831, "step": 1898 }, { "epoch": 1.8259615384615384, "grad_norm": 4.760599136352539, "learning_rate": 1.9903871329717575e-05, "loss": 0.257, "step": 1899 }, { "epoch": 1.8269230769230769, "grad_norm": 7.074615478515625, "learning_rate": 1.990369894702331e-05, "loss": 0.5795, "step": 1900 }, { "epoch": 1.8278846153846153, "grad_norm": 4.775288105010986, "learning_rate": 1.990352641065271e-05, "loss": 0.3226, "step": 1901 }, { "epoch": 1.828846153846154, "grad_norm": 6.434813499450684, "learning_rate": 1.9903353720608443e-05, "loss": 0.52, "step": 1902 }, { "epoch": 1.8298076923076922, "grad_norm": 6.370591163635254, "learning_rate": 1.9903180876893195e-05, "loss": 0.5451, "step": 1903 }, { "epoch": 1.830769230769231, "grad_norm": 5.551269054412842, "learning_rate": 1.9903007879509642e-05, "loss": 0.4371, "step": 1904 }, { "epoch": 1.8317307692307692, "grad_norm": 7.38054084777832, "learning_rate": 1.9902834728460473e-05, "loss": 0.634, "step": 1905 }, { "epoch": 1.8326923076923078, "grad_norm": 5.161326885223389, "learning_rate": 1.9902661423748373e-05, "loss": 0.3633, "step": 1906 }, { "epoch": 1.833653846153846, "grad_norm": 4.66583251953125, "learning_rate": 1.9902487965376033e-05, "loss": 0.2722, "step": 1907 }, { "epoch": 1.8346153846153848, "grad_norm": 7.070260047912598, "learning_rate": 1.9902314353346142e-05, "loss": 0.4376, "step": 1908 }, { "epoch": 1.835576923076923, "grad_norm": 5.452907085418701, "learning_rate": 1.9902140587661396e-05, "loss": 0.4997, "step": 1909 }, { "epoch": 1.8365384615384617, "grad_norm": 6.507304668426514, "learning_rate": 1.990196666832449e-05, "loss": 0.7309, "step": 1910 }, { "epoch": 1.8375, "grad_norm": 6.720673561096191, "learning_rate": 1.9901792595338125e-05, "loss": 0.7157, "step": 1911 }, { "epoch": 1.8384615384615386, "grad_norm": 5.785982131958008, "learning_rate": 1.9901618368704997e-05, "loss": 0.3869, "step": 1912 }, { "epoch": 1.8394230769230768, "grad_norm": 7.401920795440674, "learning_rate": 1.9901443988427815e-05, "loss": 0.6003, "step": 1913 }, { "epoch": 1.8403846153846155, "grad_norm": 7.067726135253906, "learning_rate": 1.990126945450928e-05, "loss": 0.6255, "step": 1914 }, { "epoch": 1.8413461538461537, "grad_norm": 6.919360637664795, "learning_rate": 1.990109476695211e-05, "loss": 0.7582, "step": 1915 }, { "epoch": 1.8423076923076924, "grad_norm": 4.952747821807861, "learning_rate": 1.9900919925759e-05, "loss": 0.3724, "step": 1916 }, { "epoch": 1.8432692307692307, "grad_norm": 7.162601947784424, "learning_rate": 1.9900744930932676e-05, "loss": 0.7432, "step": 1917 }, { "epoch": 1.8442307692307693, "grad_norm": 7.452887535095215, "learning_rate": 1.990056978247585e-05, "loss": 0.7641, "step": 1918 }, { "epoch": 1.8451923076923076, "grad_norm": 5.045462131500244, "learning_rate": 1.9900394480391236e-05, "loss": 0.3871, "step": 1919 }, { "epoch": 1.8461538461538463, "grad_norm": 5.917646884918213, "learning_rate": 1.990021902468156e-05, "loss": 0.5015, "step": 1920 }, { "epoch": 1.8471153846153845, "grad_norm": 5.327845573425293, "learning_rate": 1.9900043415349538e-05, "loss": 0.3539, "step": 1921 }, { "epoch": 1.8480769230769232, "grad_norm": 6.2743916511535645, "learning_rate": 1.98998676523979e-05, "loss": 0.6036, "step": 1922 }, { "epoch": 1.8490384615384614, "grad_norm": 6.613680839538574, "learning_rate": 1.9899691735829374e-05, "loss": 0.4558, "step": 1923 }, { "epoch": 1.85, "grad_norm": 5.229310512542725, "learning_rate": 1.9899515665646685e-05, "loss": 0.2784, "step": 1924 }, { "epoch": 1.8509615384615383, "grad_norm": 5.244508743286133, "learning_rate": 1.9899339441852566e-05, "loss": 0.3877, "step": 1925 }, { "epoch": 1.851923076923077, "grad_norm": 6.071811199188232, "learning_rate": 1.9899163064449757e-05, "loss": 0.4183, "step": 1926 }, { "epoch": 1.8528846153846152, "grad_norm": 6.788274765014648, "learning_rate": 1.989898653344099e-05, "loss": 0.4516, "step": 1927 }, { "epoch": 1.853846153846154, "grad_norm": 5.900367736816406, "learning_rate": 1.9898809848829004e-05, "loss": 0.4309, "step": 1928 }, { "epoch": 1.8548076923076922, "grad_norm": 5.76145076751709, "learning_rate": 1.989863301061654e-05, "loss": 0.367, "step": 1929 }, { "epoch": 1.8557692307692308, "grad_norm": 7.143716335296631, "learning_rate": 1.9898456018806346e-05, "loss": 0.742, "step": 1930 }, { "epoch": 1.856730769230769, "grad_norm": 5.650564193725586, "learning_rate": 1.9898278873401166e-05, "loss": 0.5394, "step": 1931 }, { "epoch": 1.8576923076923078, "grad_norm": 6.487085819244385, "learning_rate": 1.989810157440375e-05, "loss": 0.6655, "step": 1932 }, { "epoch": 1.858653846153846, "grad_norm": 6.340947151184082, "learning_rate": 1.989792412181685e-05, "loss": 0.5917, "step": 1933 }, { "epoch": 1.8596153846153847, "grad_norm": 7.280224800109863, "learning_rate": 1.9897746515643216e-05, "loss": 0.6062, "step": 1934 }, { "epoch": 1.8605769230769231, "grad_norm": 7.325332164764404, "learning_rate": 1.9897568755885602e-05, "loss": 0.6907, "step": 1935 }, { "epoch": 1.8615384615384616, "grad_norm": 5.857600688934326, "learning_rate": 1.9897390842546772e-05, "loss": 0.4528, "step": 1936 }, { "epoch": 1.8625, "grad_norm": 18.38916778564453, "learning_rate": 1.9897212775629485e-05, "loss": 0.7922, "step": 1937 }, { "epoch": 1.8634615384615385, "grad_norm": 7.914636135101318, "learning_rate": 1.9897034555136506e-05, "loss": 0.6818, "step": 1938 }, { "epoch": 1.864423076923077, "grad_norm": 4.579426288604736, "learning_rate": 1.9896856181070594e-05, "loss": 0.2387, "step": 1939 }, { "epoch": 1.8653846153846154, "grad_norm": 7.844634532928467, "learning_rate": 1.9896677653434523e-05, "loss": 0.554, "step": 1940 }, { "epoch": 1.8663461538461539, "grad_norm": 6.113992691040039, "learning_rate": 1.989649897223106e-05, "loss": 0.4777, "step": 1941 }, { "epoch": 1.8673076923076923, "grad_norm": 4.164723873138428, "learning_rate": 1.9896320137462984e-05, "loss": 0.2218, "step": 1942 }, { "epoch": 1.8682692307692308, "grad_norm": 8.232747077941895, "learning_rate": 1.989614114913306e-05, "loss": 0.9264, "step": 1943 }, { "epoch": 1.8692307692307693, "grad_norm": 6.072230815887451, "learning_rate": 1.989596200724407e-05, "loss": 0.5403, "step": 1944 }, { "epoch": 1.8701923076923077, "grad_norm": 5.659796237945557, "learning_rate": 1.9895782711798795e-05, "loss": 0.4399, "step": 1945 }, { "epoch": 1.8711538461538462, "grad_norm": 7.535306453704834, "learning_rate": 1.9895603262800017e-05, "loss": 0.9698, "step": 1946 }, { "epoch": 1.8721153846153846, "grad_norm": 5.997964382171631, "learning_rate": 1.989542366025052e-05, "loss": 0.5388, "step": 1947 }, { "epoch": 1.873076923076923, "grad_norm": 4.351287364959717, "learning_rate": 1.9895243904153085e-05, "loss": 0.3256, "step": 1948 }, { "epoch": 1.8740384615384615, "grad_norm": 6.133418083190918, "learning_rate": 1.9895063994510512e-05, "loss": 0.6191, "step": 1949 }, { "epoch": 1.875, "grad_norm": 6.61941385269165, "learning_rate": 1.989488393132559e-05, "loss": 0.6826, "step": 1950 }, { "epoch": 1.8759615384615385, "grad_norm": 6.259138584136963, "learning_rate": 1.9894703714601104e-05, "loss": 0.7933, "step": 1951 }, { "epoch": 1.876923076923077, "grad_norm": 4.826964855194092, "learning_rate": 1.989452334433986e-05, "loss": 0.2788, "step": 1952 }, { "epoch": 1.8778846153846154, "grad_norm": 5.8177337646484375, "learning_rate": 1.9894342820544653e-05, "loss": 0.6573, "step": 1953 }, { "epoch": 1.8788461538461538, "grad_norm": 3.9292752742767334, "learning_rate": 1.989416214321829e-05, "loss": 0.2534, "step": 1954 }, { "epoch": 1.8798076923076923, "grad_norm": 5.428994178771973, "learning_rate": 1.9893981312363563e-05, "loss": 0.5051, "step": 1955 }, { "epoch": 1.8807692307692307, "grad_norm": 4.750188827514648, "learning_rate": 1.9893800327983286e-05, "loss": 0.3947, "step": 1956 }, { "epoch": 1.8817307692307692, "grad_norm": 5.061608791351318, "learning_rate": 1.9893619190080266e-05, "loss": 0.4107, "step": 1957 }, { "epoch": 1.8826923076923077, "grad_norm": 6.533303260803223, "learning_rate": 1.9893437898657315e-05, "loss": 0.4044, "step": 1958 }, { "epoch": 1.8836538461538461, "grad_norm": 6.4585723876953125, "learning_rate": 1.989325645371724e-05, "loss": 0.5928, "step": 1959 }, { "epoch": 1.8846153846153846, "grad_norm": 6.0618085861206055, "learning_rate": 1.989307485526287e-05, "loss": 0.5773, "step": 1960 }, { "epoch": 1.885576923076923, "grad_norm": 6.141023635864258, "learning_rate": 1.9892893103297008e-05, "loss": 0.3018, "step": 1961 }, { "epoch": 1.8865384615384615, "grad_norm": 6.240072250366211, "learning_rate": 1.989271119782248e-05, "loss": 0.6466, "step": 1962 }, { "epoch": 1.8875, "grad_norm": 5.800558090209961, "learning_rate": 1.9892529138842113e-05, "loss": 0.7483, "step": 1963 }, { "epoch": 1.8884615384615384, "grad_norm": 4.972325325012207, "learning_rate": 1.9892346926358726e-05, "loss": 0.3592, "step": 1964 }, { "epoch": 1.8894230769230769, "grad_norm": 4.852873802185059, "learning_rate": 1.9892164560375145e-05, "loss": 0.315, "step": 1965 }, { "epoch": 1.8903846153846153, "grad_norm": 3.8724703788757324, "learning_rate": 1.9891982040894208e-05, "loss": 0.2043, "step": 1966 }, { "epoch": 1.891346153846154, "grad_norm": 7.086782932281494, "learning_rate": 1.989179936791874e-05, "loss": 0.5546, "step": 1967 }, { "epoch": 1.8923076923076922, "grad_norm": 6.389884948730469, "learning_rate": 1.989161654145158e-05, "loss": 0.3983, "step": 1968 }, { "epoch": 1.893269230769231, "grad_norm": 5.911256313323975, "learning_rate": 1.989143356149556e-05, "loss": 0.9261, "step": 1969 }, { "epoch": 1.8942307692307692, "grad_norm": 6.50312614440918, "learning_rate": 1.9891250428053525e-05, "loss": 0.7393, "step": 1970 }, { "epoch": 1.8951923076923078, "grad_norm": 6.431880950927734, "learning_rate": 1.989106714112831e-05, "loss": 0.364, "step": 1971 }, { "epoch": 1.896153846153846, "grad_norm": 5.566387176513672, "learning_rate": 1.9890883700722764e-05, "loss": 0.5254, "step": 1972 }, { "epoch": 1.8971153846153848, "grad_norm": 6.9014410972595215, "learning_rate": 1.9890700106839734e-05, "loss": 0.5033, "step": 1973 }, { "epoch": 1.898076923076923, "grad_norm": 5.468316078186035, "learning_rate": 1.9890516359482066e-05, "loss": 0.4723, "step": 1974 }, { "epoch": 1.8990384615384617, "grad_norm": 7.088661193847656, "learning_rate": 1.9890332458652612e-05, "loss": 0.7845, "step": 1975 }, { "epoch": 1.9, "grad_norm": 5.911047458648682, "learning_rate": 1.989014840435423e-05, "loss": 0.4327, "step": 1976 }, { "epoch": 1.9009615384615386, "grad_norm": 6.821689128875732, "learning_rate": 1.988996419658977e-05, "loss": 0.8585, "step": 1977 }, { "epoch": 1.9019230769230768, "grad_norm": 5.599504470825195, "learning_rate": 1.988977983536209e-05, "loss": 0.4872, "step": 1978 }, { "epoch": 1.9028846153846155, "grad_norm": 4.418036460876465, "learning_rate": 1.9889595320674053e-05, "loss": 0.276, "step": 1979 }, { "epoch": 1.9038461538461537, "grad_norm": 6.912367820739746, "learning_rate": 1.988941065252852e-05, "loss": 0.5409, "step": 1980 }, { "epoch": 1.9048076923076924, "grad_norm": 5.557229042053223, "learning_rate": 1.9889225830928365e-05, "loss": 0.5015, "step": 1981 }, { "epoch": 1.9057692307692307, "grad_norm": 5.331482887268066, "learning_rate": 1.988904085587645e-05, "loss": 0.355, "step": 1982 }, { "epoch": 1.9067307692307693, "grad_norm": 5.951603412628174, "learning_rate": 1.988885572737564e-05, "loss": 0.4286, "step": 1983 }, { "epoch": 1.9076923076923076, "grad_norm": 4.88084602355957, "learning_rate": 1.9888670445428816e-05, "loss": 0.282, "step": 1984 }, { "epoch": 1.9086538461538463, "grad_norm": 4.650290489196777, "learning_rate": 1.9888485010038848e-05, "loss": 0.361, "step": 1985 }, { "epoch": 1.9096153846153845, "grad_norm": 6.570076942443848, "learning_rate": 1.9888299421208615e-05, "loss": 0.6796, "step": 1986 }, { "epoch": 1.9105769230769232, "grad_norm": 6.3210129737854, "learning_rate": 1.9888113678940996e-05, "loss": 0.4239, "step": 1987 }, { "epoch": 1.9115384615384614, "grad_norm": 6.351555347442627, "learning_rate": 1.9887927783238875e-05, "loss": 0.4975, "step": 1988 }, { "epoch": 1.9125, "grad_norm": 6.34703254699707, "learning_rate": 1.9887741734105134e-05, "loss": 0.6567, "step": 1989 }, { "epoch": 1.9134615384615383, "grad_norm": 5.599621772766113, "learning_rate": 1.9887555531542664e-05, "loss": 0.5, "step": 1990 }, { "epoch": 1.914423076923077, "grad_norm": 5.046482563018799, "learning_rate": 1.9887369175554352e-05, "loss": 0.3233, "step": 1991 }, { "epoch": 1.9153846153846152, "grad_norm": 5.449278354644775, "learning_rate": 1.9887182666143087e-05, "loss": 0.4537, "step": 1992 }, { "epoch": 1.916346153846154, "grad_norm": 6.44766902923584, "learning_rate": 1.9886996003311767e-05, "loss": 0.9274, "step": 1993 }, { "epoch": 1.9173076923076922, "grad_norm": 6.158804416656494, "learning_rate": 1.9886809187063285e-05, "loss": 0.4978, "step": 1994 }, { "epoch": 1.9182692307692308, "grad_norm": 6.010787010192871, "learning_rate": 1.9886622217400547e-05, "loss": 0.6317, "step": 1995 }, { "epoch": 1.919230769230769, "grad_norm": 7.2143025398254395, "learning_rate": 1.9886435094326445e-05, "loss": 0.6367, "step": 1996 }, { "epoch": 1.9201923076923078, "grad_norm": 7.775146007537842, "learning_rate": 1.988624781784389e-05, "loss": 0.6823, "step": 1997 }, { "epoch": 1.921153846153846, "grad_norm": 5.117441654205322, "learning_rate": 1.988606038795578e-05, "loss": 0.494, "step": 1998 }, { "epoch": 1.9221153846153847, "grad_norm": 6.810523986816406, "learning_rate": 1.988587280466503e-05, "loss": 0.4681, "step": 1999 }, { "epoch": 1.9230769230769231, "grad_norm": 5.880096912384033, "learning_rate": 1.9885685067974547e-05, "loss": 0.4392, "step": 2000 }, { "epoch": 1.9240384615384616, "grad_norm": 5.790782928466797, "learning_rate": 1.988549717788725e-05, "loss": 0.4629, "step": 2001 }, { "epoch": 1.925, "grad_norm": 6.553274631500244, "learning_rate": 1.9885309134406045e-05, "loss": 0.4787, "step": 2002 }, { "epoch": 1.9259615384615385, "grad_norm": 7.578326225280762, "learning_rate": 1.9885120937533856e-05, "loss": 0.921, "step": 2003 }, { "epoch": 1.926923076923077, "grad_norm": 6.801586627960205, "learning_rate": 1.9884932587273605e-05, "loss": 0.6483, "step": 2004 }, { "epoch": 1.9278846153846154, "grad_norm": 4.66160774230957, "learning_rate": 1.9884744083628217e-05, "loss": 0.3459, "step": 2005 }, { "epoch": 1.9288461538461539, "grad_norm": 6.9688920974731445, "learning_rate": 1.9884555426600605e-05, "loss": 0.7861, "step": 2006 }, { "epoch": 1.9298076923076923, "grad_norm": 7.288578510284424, "learning_rate": 1.9884366616193707e-05, "loss": 0.7174, "step": 2007 }, { "epoch": 1.9307692307692308, "grad_norm": 5.673295497894287, "learning_rate": 1.9884177652410447e-05, "loss": 0.4642, "step": 2008 }, { "epoch": 1.9317307692307693, "grad_norm": 4.891266822814941, "learning_rate": 1.988398853525376e-05, "loss": 0.4703, "step": 2009 }, { "epoch": 1.9326923076923077, "grad_norm": 6.593831539154053, "learning_rate": 1.988379926472658e-05, "loss": 0.5324, "step": 2010 }, { "epoch": 1.9336538461538462, "grad_norm": 6.875685691833496, "learning_rate": 1.9883609840831845e-05, "loss": 0.5398, "step": 2011 }, { "epoch": 1.9346153846153846, "grad_norm": 5.6528825759887695, "learning_rate": 1.9883420263572494e-05, "loss": 0.5083, "step": 2012 }, { "epoch": 1.935576923076923, "grad_norm": 6.983880043029785, "learning_rate": 1.988323053295147e-05, "loss": 0.8695, "step": 2013 }, { "epoch": 1.9365384615384615, "grad_norm": 4.613539218902588, "learning_rate": 1.9883040648971706e-05, "loss": 0.3116, "step": 2014 }, { "epoch": 1.9375, "grad_norm": 6.421748638153076, "learning_rate": 1.9882850611636168e-05, "loss": 0.4807, "step": 2015 }, { "epoch": 1.9384615384615385, "grad_norm": 6.28587007522583, "learning_rate": 1.9882660420947788e-05, "loss": 0.5415, "step": 2016 }, { "epoch": 1.939423076923077, "grad_norm": 7.46753454208374, "learning_rate": 1.9882470076909527e-05, "loss": 1.2135, "step": 2017 }, { "epoch": 1.9403846153846154, "grad_norm": 6.310245513916016, "learning_rate": 1.9882279579524334e-05, "loss": 0.5279, "step": 2018 }, { "epoch": 1.9413461538461538, "grad_norm": 7.096616268157959, "learning_rate": 1.9882088928795165e-05, "loss": 0.6521, "step": 2019 }, { "epoch": 1.9423076923076923, "grad_norm": 4.584342002868652, "learning_rate": 1.988189812472498e-05, "loss": 0.3301, "step": 2020 }, { "epoch": 1.9432692307692307, "grad_norm": 6.30571985244751, "learning_rate": 1.9881707167316738e-05, "loss": 0.5942, "step": 2021 }, { "epoch": 1.9442307692307692, "grad_norm": 5.221812725067139, "learning_rate": 1.9881516056573408e-05, "loss": 0.4922, "step": 2022 }, { "epoch": 1.9451923076923077, "grad_norm": 4.9149603843688965, "learning_rate": 1.9881324792497945e-05, "loss": 0.4235, "step": 2023 }, { "epoch": 1.9461538461538461, "grad_norm": 5.382432460784912, "learning_rate": 1.9881133375093323e-05, "loss": 0.3894, "step": 2024 }, { "epoch": 1.9471153846153846, "grad_norm": 4.89478874206543, "learning_rate": 1.9880941804362518e-05, "loss": 0.3364, "step": 2025 }, { "epoch": 1.948076923076923, "grad_norm": 6.530455112457275, "learning_rate": 1.988075008030849e-05, "loss": 0.4805, "step": 2026 }, { "epoch": 1.9490384615384615, "grad_norm": 5.883199691772461, "learning_rate": 1.9880558202934223e-05, "loss": 0.4018, "step": 2027 }, { "epoch": 1.95, "grad_norm": 7.060254096984863, "learning_rate": 1.988036617224269e-05, "loss": 0.466, "step": 2028 }, { "epoch": 1.9509615384615384, "grad_norm": 5.5254387855529785, "learning_rate": 1.9880173988236873e-05, "loss": 0.3766, "step": 2029 }, { "epoch": 1.9519230769230769, "grad_norm": 5.9635701179504395, "learning_rate": 1.9879981650919755e-05, "loss": 0.7883, "step": 2030 }, { "epoch": 1.9528846153846153, "grad_norm": 5.826533317565918, "learning_rate": 1.987978916029432e-05, "loss": 0.4985, "step": 2031 }, { "epoch": 1.953846153846154, "grad_norm": 4.408644199371338, "learning_rate": 1.987959651636355e-05, "loss": 0.1835, "step": 2032 }, { "epoch": 1.9548076923076922, "grad_norm": 5.372979164123535, "learning_rate": 1.987940371913044e-05, "loss": 0.3605, "step": 2033 }, { "epoch": 1.955769230769231, "grad_norm": 6.329056739807129, "learning_rate": 1.9879210768597983e-05, "loss": 0.6289, "step": 2034 }, { "epoch": 1.9567307692307692, "grad_norm": 5.453447341918945, "learning_rate": 1.9879017664769167e-05, "loss": 0.4102, "step": 2035 }, { "epoch": 1.9576923076923078, "grad_norm": 11.300821304321289, "learning_rate": 1.9878824407646992e-05, "loss": 0.9693, "step": 2036 }, { "epoch": 1.958653846153846, "grad_norm": 4.324413299560547, "learning_rate": 1.987863099723446e-05, "loss": 0.2949, "step": 2037 }, { "epoch": 1.9596153846153848, "grad_norm": 5.183276653289795, "learning_rate": 1.9878437433534563e-05, "loss": 0.2607, "step": 2038 }, { "epoch": 1.960576923076923, "grad_norm": 7.444509029388428, "learning_rate": 1.9878243716550313e-05, "loss": 0.6791, "step": 2039 }, { "epoch": 1.9615384615384617, "grad_norm": 5.282601356506348, "learning_rate": 1.987804984628471e-05, "loss": 0.5959, "step": 2040 }, { "epoch": 1.9625, "grad_norm": 4.773013591766357, "learning_rate": 1.987785582274077e-05, "loss": 0.3062, "step": 2041 }, { "epoch": 1.9634615384615386, "grad_norm": 5.883995056152344, "learning_rate": 1.9877661645921492e-05, "loss": 0.4302, "step": 2042 }, { "epoch": 1.9644230769230768, "grad_norm": 7.250327110290527, "learning_rate": 1.98774673158299e-05, "loss": 0.5261, "step": 2043 }, { "epoch": 1.9653846153846155, "grad_norm": 8.343216896057129, "learning_rate": 1.9877272832469002e-05, "loss": 0.5843, "step": 2044 }, { "epoch": 1.9663461538461537, "grad_norm": 7.015777587890625, "learning_rate": 1.9877078195841823e-05, "loss": 0.6518, "step": 2045 }, { "epoch": 1.9673076923076924, "grad_norm": 6.363195896148682, "learning_rate": 1.9876883405951378e-05, "loss": 0.7599, "step": 2046 }, { "epoch": 1.9682692307692307, "grad_norm": 5.945101261138916, "learning_rate": 1.9876688462800692e-05, "loss": 0.4699, "step": 2047 }, { "epoch": 1.9692307692307693, "grad_norm": 5.761539459228516, "learning_rate": 1.9876493366392784e-05, "loss": 0.7868, "step": 2048 }, { "epoch": 1.9701923076923076, "grad_norm": 8.374820709228516, "learning_rate": 1.9876298116730694e-05, "loss": 0.9712, "step": 2049 }, { "epoch": 1.9711538461538463, "grad_norm": 7.333389759063721, "learning_rate": 1.987610271381744e-05, "loss": 0.7061, "step": 2050 }, { "epoch": 1.9721153846153845, "grad_norm": 8.968692779541016, "learning_rate": 1.9875907157656057e-05, "loss": 0.7211, "step": 2051 }, { "epoch": 1.9730769230769232, "grad_norm": 6.070476055145264, "learning_rate": 1.987571144824958e-05, "loss": 0.5944, "step": 2052 }, { "epoch": 1.9740384615384614, "grad_norm": 7.824066162109375, "learning_rate": 1.9875515585601047e-05, "loss": 0.6886, "step": 2053 }, { "epoch": 1.975, "grad_norm": 7.501299858093262, "learning_rate": 1.9875319569713498e-05, "loss": 0.893, "step": 2054 }, { "epoch": 1.9759615384615383, "grad_norm": 6.166316509246826, "learning_rate": 1.987512340058997e-05, "loss": 0.8034, "step": 2055 }, { "epoch": 1.976923076923077, "grad_norm": 5.3480224609375, "learning_rate": 1.9874927078233514e-05, "loss": 0.4992, "step": 2056 }, { "epoch": 1.9778846153846152, "grad_norm": 6.617974758148193, "learning_rate": 1.9874730602647164e-05, "loss": 0.7288, "step": 2057 }, { "epoch": 1.978846153846154, "grad_norm": 7.652027606964111, "learning_rate": 1.9874533973833987e-05, "loss": 1.0967, "step": 2058 }, { "epoch": 1.9798076923076922, "grad_norm": 6.335834503173828, "learning_rate": 1.987433719179702e-05, "loss": 0.6254, "step": 2059 }, { "epoch": 1.9807692307692308, "grad_norm": 5.359956741333008, "learning_rate": 1.987414025653932e-05, "loss": 0.4675, "step": 2060 }, { "epoch": 1.981730769230769, "grad_norm": 6.394716262817383, "learning_rate": 1.987394316806394e-05, "loss": 0.4929, "step": 2061 }, { "epoch": 1.9826923076923078, "grad_norm": 4.776952743530273, "learning_rate": 1.987374592637395e-05, "loss": 0.2734, "step": 2062 }, { "epoch": 1.983653846153846, "grad_norm": 5.067639350891113, "learning_rate": 1.9873548531472395e-05, "loss": 0.2814, "step": 2063 }, { "epoch": 1.9846153846153847, "grad_norm": 6.8142008781433105, "learning_rate": 1.987335098336235e-05, "loss": 0.4436, "step": 2064 }, { "epoch": 1.9855769230769231, "grad_norm": 5.734305381774902, "learning_rate": 1.9873153282046873e-05, "loss": 0.4012, "step": 2065 }, { "epoch": 1.9865384615384616, "grad_norm": 6.340561389923096, "learning_rate": 1.9872955427529036e-05, "loss": 0.5505, "step": 2066 }, { "epoch": 1.9875, "grad_norm": 5.5303497314453125, "learning_rate": 1.9872757419811907e-05, "loss": 0.4778, "step": 2067 }, { "epoch": 1.9884615384615385, "grad_norm": 4.857333183288574, "learning_rate": 1.987255925889856e-05, "loss": 0.393, "step": 2068 }, { "epoch": 1.989423076923077, "grad_norm": 7.884006977081299, "learning_rate": 1.9872360944792066e-05, "loss": 0.9814, "step": 2069 }, { "epoch": 1.9903846153846154, "grad_norm": 7.289398193359375, "learning_rate": 1.987216247749551e-05, "loss": 0.8079, "step": 2070 }, { "epoch": 1.9913461538461539, "grad_norm": 6.309372425079346, "learning_rate": 1.987196385701196e-05, "loss": 0.6326, "step": 2071 }, { "epoch": 1.9923076923076923, "grad_norm": 5.35369873046875, "learning_rate": 1.987176508334451e-05, "loss": 0.4298, "step": 2072 }, { "epoch": 1.9932692307692308, "grad_norm": 5.341714382171631, "learning_rate": 1.9871566156496237e-05, "loss": 0.3332, "step": 2073 }, { "epoch": 1.9942307692307693, "grad_norm": 7.3233842849731445, "learning_rate": 1.987136707647023e-05, "loss": 0.7014, "step": 2074 }, { "epoch": 1.9951923076923077, "grad_norm": 6.488092422485352, "learning_rate": 1.987116784326958e-05, "loss": 0.4191, "step": 2075 }, { "epoch": 1.9961538461538462, "grad_norm": 5.147663116455078, "learning_rate": 1.9870968456897377e-05, "loss": 0.3562, "step": 2076 }, { "epoch": 1.9971153846153846, "grad_norm": 6.963771343231201, "learning_rate": 1.9870768917356715e-05, "loss": 0.4537, "step": 2077 }, { "epoch": 1.998076923076923, "grad_norm": 6.623997688293457, "learning_rate": 1.9870569224650686e-05, "loss": 0.5449, "step": 2078 }, { "epoch": 1.9990384615384615, "grad_norm": 5.1447954177856445, "learning_rate": 1.9870369378782394e-05, "loss": 0.4172, "step": 2079 }, { "epoch": 2.0, "grad_norm": 7.9661335945129395, "learning_rate": 1.9870169379754937e-05, "loss": 0.7317, "step": 2080 }, { "epoch": 2.0009615384615387, "grad_norm": 3.525482654571533, "learning_rate": 1.9869969227571423e-05, "loss": 0.2959, "step": 2081 }, { "epoch": 2.001923076923077, "grad_norm": 3.248424530029297, "learning_rate": 1.986976892223495e-05, "loss": 0.1901, "step": 2082 }, { "epoch": 2.0028846153846156, "grad_norm": 4.2656450271606445, "learning_rate": 1.9869568463748634e-05, "loss": 0.1874, "step": 2083 }, { "epoch": 2.003846153846154, "grad_norm": 5.795900821685791, "learning_rate": 1.986936785211558e-05, "loss": 0.2657, "step": 2084 }, { "epoch": 2.0048076923076925, "grad_norm": 4.017489433288574, "learning_rate": 1.9869167087338908e-05, "loss": 0.1773, "step": 2085 }, { "epoch": 2.0057692307692307, "grad_norm": 7.418359279632568, "learning_rate": 1.9868966169421727e-05, "loss": 0.3319, "step": 2086 }, { "epoch": 2.0067307692307694, "grad_norm": 3.9123785495758057, "learning_rate": 1.9868765098367152e-05, "loss": 0.214, "step": 2087 }, { "epoch": 2.0076923076923077, "grad_norm": 4.232620716094971, "learning_rate": 1.9868563874178314e-05, "loss": 0.1962, "step": 2088 }, { "epoch": 2.0086538461538463, "grad_norm": 4.492551803588867, "learning_rate": 1.9868362496858323e-05, "loss": 0.2454, "step": 2089 }, { "epoch": 2.0096153846153846, "grad_norm": 2.780780076980591, "learning_rate": 1.986816096641031e-05, "loss": 0.083, "step": 2090 }, { "epoch": 2.0105769230769233, "grad_norm": 7.461188316345215, "learning_rate": 1.98679592828374e-05, "loss": 0.2865, "step": 2091 }, { "epoch": 2.0115384615384615, "grad_norm": 5.223734378814697, "learning_rate": 1.9867757446142728e-05, "loss": 0.2685, "step": 2092 }, { "epoch": 2.0125, "grad_norm": 5.683680057525635, "learning_rate": 1.986755545632942e-05, "loss": 0.2689, "step": 2093 }, { "epoch": 2.0134615384615384, "grad_norm": 3.688741683959961, "learning_rate": 1.9867353313400612e-05, "loss": 0.1299, "step": 2094 }, { "epoch": 2.014423076923077, "grad_norm": 6.449171543121338, "learning_rate": 1.9867151017359443e-05, "loss": 0.2003, "step": 2095 }, { "epoch": 2.0153846153846153, "grad_norm": 4.994868755340576, "learning_rate": 1.986694856820905e-05, "loss": 0.2415, "step": 2096 }, { "epoch": 2.016346153846154, "grad_norm": 3.871171712875366, "learning_rate": 1.9866745965952572e-05, "loss": 0.1833, "step": 2097 }, { "epoch": 2.0173076923076922, "grad_norm": 5.62655782699585, "learning_rate": 1.9866543210593154e-05, "loss": 0.2385, "step": 2098 }, { "epoch": 2.018269230769231, "grad_norm": 4.95604133605957, "learning_rate": 1.9866340302133948e-05, "loss": 0.1438, "step": 2099 }, { "epoch": 2.019230769230769, "grad_norm": 5.204801082611084, "learning_rate": 1.9866137240578093e-05, "loss": 0.2253, "step": 2100 }, { "epoch": 2.020192307692308, "grad_norm": 7.344472885131836, "learning_rate": 1.9865934025928748e-05, "loss": 0.2444, "step": 2101 }, { "epoch": 2.021153846153846, "grad_norm": 5.969837188720703, "learning_rate": 1.9865730658189063e-05, "loss": 0.1815, "step": 2102 }, { "epoch": 2.0221153846153848, "grad_norm": 3.3435866832733154, "learning_rate": 1.9865527137362193e-05, "loss": 0.1736, "step": 2103 }, { "epoch": 2.023076923076923, "grad_norm": 5.699297904968262, "learning_rate": 1.9865323463451296e-05, "loss": 0.1313, "step": 2104 }, { "epoch": 2.0240384615384617, "grad_norm": 3.647505283355713, "learning_rate": 1.9865119636459533e-05, "loss": 0.098, "step": 2105 }, { "epoch": 2.025, "grad_norm": 2.0882558822631836, "learning_rate": 1.986491565639007e-05, "loss": 0.0721, "step": 2106 }, { "epoch": 2.0259615384615386, "grad_norm": 3.108933210372925, "learning_rate": 1.9864711523246065e-05, "loss": 0.079, "step": 2107 }, { "epoch": 2.026923076923077, "grad_norm": 4.941086769104004, "learning_rate": 1.9864507237030693e-05, "loss": 0.1691, "step": 2108 }, { "epoch": 2.0278846153846155, "grad_norm": 3.3439278602600098, "learning_rate": 1.986430279774712e-05, "loss": 0.0734, "step": 2109 }, { "epoch": 2.0288461538461537, "grad_norm": 5.294781684875488, "learning_rate": 1.9864098205398517e-05, "loss": 0.2197, "step": 2110 }, { "epoch": 2.0298076923076924, "grad_norm": 3.4782216548919678, "learning_rate": 1.986389345998806e-05, "loss": 0.1258, "step": 2111 }, { "epoch": 2.0307692307692307, "grad_norm": 4.272822856903076, "learning_rate": 1.9863688561518932e-05, "loss": 0.1183, "step": 2112 }, { "epoch": 2.0317307692307693, "grad_norm": 3.954845428466797, "learning_rate": 1.98634835099943e-05, "loss": 0.1456, "step": 2113 }, { "epoch": 2.0326923076923076, "grad_norm": 5.477548599243164, "learning_rate": 1.986327830541736e-05, "loss": 0.1963, "step": 2114 }, { "epoch": 2.0336538461538463, "grad_norm": 6.0302581787109375, "learning_rate": 1.9863072947791284e-05, "loss": 0.2438, "step": 2115 }, { "epoch": 2.0346153846153845, "grad_norm": 4.217291831970215, "learning_rate": 1.9862867437119266e-05, "loss": 0.1432, "step": 2116 }, { "epoch": 2.035576923076923, "grad_norm": 3.237665891647339, "learning_rate": 1.9862661773404492e-05, "loss": 0.0662, "step": 2117 }, { "epoch": 2.0365384615384614, "grad_norm": 6.784976005554199, "learning_rate": 1.9862455956650156e-05, "loss": 0.1255, "step": 2118 }, { "epoch": 2.0375, "grad_norm": 6.269946575164795, "learning_rate": 1.9862249986859444e-05, "loss": 0.2572, "step": 2119 }, { "epoch": 2.0384615384615383, "grad_norm": 7.425948619842529, "learning_rate": 1.9862043864035563e-05, "loss": 0.2941, "step": 2120 }, { "epoch": 2.039423076923077, "grad_norm": 5.253424644470215, "learning_rate": 1.9861837588181703e-05, "loss": 0.1794, "step": 2121 }, { "epoch": 2.0403846153846152, "grad_norm": 6.107443332672119, "learning_rate": 1.9861631159301067e-05, "loss": 0.2189, "step": 2122 }, { "epoch": 2.041346153846154, "grad_norm": 6.572025775909424, "learning_rate": 1.9861424577396863e-05, "loss": 0.2011, "step": 2123 }, { "epoch": 2.042307692307692, "grad_norm": 4.309020519256592, "learning_rate": 1.986121784247229e-05, "loss": 0.1277, "step": 2124 }, { "epoch": 2.043269230769231, "grad_norm": 5.18333625793457, "learning_rate": 1.986101095453056e-05, "loss": 0.1469, "step": 2125 }, { "epoch": 2.044230769230769, "grad_norm": 3.865936279296875, "learning_rate": 1.9860803913574876e-05, "loss": 0.1131, "step": 2126 }, { "epoch": 2.0451923076923078, "grad_norm": 4.2658281326293945, "learning_rate": 1.986059671960846e-05, "loss": 0.1675, "step": 2127 }, { "epoch": 2.046153846153846, "grad_norm": 5.449405193328857, "learning_rate": 1.9860389372634527e-05, "loss": 0.2332, "step": 2128 }, { "epoch": 2.0471153846153847, "grad_norm": 4.518230438232422, "learning_rate": 1.9860181872656288e-05, "loss": 0.1209, "step": 2129 }, { "epoch": 2.048076923076923, "grad_norm": 3.6374998092651367, "learning_rate": 1.9859974219676967e-05, "loss": 0.0859, "step": 2130 }, { "epoch": 2.0490384615384616, "grad_norm": 3.404751777648926, "learning_rate": 1.985976641369978e-05, "loss": 0.0976, "step": 2131 }, { "epoch": 2.05, "grad_norm": 5.017772197723389, "learning_rate": 1.985955845472796e-05, "loss": 0.1422, "step": 2132 }, { "epoch": 2.0509615384615385, "grad_norm": 4.528428077697754, "learning_rate": 1.9859350342764728e-05, "loss": 0.1165, "step": 2133 }, { "epoch": 2.0519230769230767, "grad_norm": 5.300755023956299, "learning_rate": 1.985914207781332e-05, "loss": 0.146, "step": 2134 }, { "epoch": 2.0528846153846154, "grad_norm": 4.168493747711182, "learning_rate": 1.9858933659876963e-05, "loss": 0.0964, "step": 2135 }, { "epoch": 2.0538461538461537, "grad_norm": 6.322251319885254, "learning_rate": 1.985872508895889e-05, "loss": 0.1664, "step": 2136 }, { "epoch": 2.0548076923076923, "grad_norm": 5.7826828956604, "learning_rate": 1.9858516365062334e-05, "loss": 0.1483, "step": 2137 }, { "epoch": 2.0557692307692306, "grad_norm": 4.122310638427734, "learning_rate": 1.985830748819054e-05, "loss": 0.1282, "step": 2138 }, { "epoch": 2.0567307692307693, "grad_norm": 3.1005330085754395, "learning_rate": 1.985809845834675e-05, "loss": 0.0786, "step": 2139 }, { "epoch": 2.0576923076923075, "grad_norm": 4.114658355712891, "learning_rate": 1.9857889275534205e-05, "loss": 0.1224, "step": 2140 }, { "epoch": 2.058653846153846, "grad_norm": 4.626908302307129, "learning_rate": 1.985767993975615e-05, "loss": 0.1555, "step": 2141 }, { "epoch": 2.0596153846153844, "grad_norm": 4.389204502105713, "learning_rate": 1.9857470451015834e-05, "loss": 0.1599, "step": 2142 }, { "epoch": 2.060576923076923, "grad_norm": 5.499892711639404, "learning_rate": 1.985726080931651e-05, "loss": 0.2316, "step": 2143 }, { "epoch": 2.0615384615384613, "grad_norm": 4.738029479980469, "learning_rate": 1.9857051014661428e-05, "loss": 0.2044, "step": 2144 }, { "epoch": 2.0625, "grad_norm": 4.254354476928711, "learning_rate": 1.9856841067053844e-05, "loss": 0.117, "step": 2145 }, { "epoch": 2.0634615384615387, "grad_norm": 5.209582328796387, "learning_rate": 1.9856630966497015e-05, "loss": 0.1788, "step": 2146 }, { "epoch": 2.064423076923077, "grad_norm": 6.641270160675049, "learning_rate": 1.9856420712994205e-05, "loss": 0.1825, "step": 2147 }, { "epoch": 2.0653846153846156, "grad_norm": 4.282842636108398, "learning_rate": 1.9856210306548673e-05, "loss": 0.1217, "step": 2148 }, { "epoch": 2.066346153846154, "grad_norm": 5.440114974975586, "learning_rate": 1.9855999747163685e-05, "loss": 0.1524, "step": 2149 }, { "epoch": 2.0673076923076925, "grad_norm": 6.961729526519775, "learning_rate": 1.9855789034842504e-05, "loss": 0.4797, "step": 2150 }, { "epoch": 2.0682692307692307, "grad_norm": 5.453247547149658, "learning_rate": 1.9855578169588407e-05, "loss": 0.1644, "step": 2151 }, { "epoch": 2.0692307692307694, "grad_norm": 3.921825885772705, "learning_rate": 1.9855367151404662e-05, "loss": 0.0975, "step": 2152 }, { "epoch": 2.0701923076923077, "grad_norm": 4.631995677947998, "learning_rate": 1.9855155980294546e-05, "loss": 0.1912, "step": 2153 }, { "epoch": 2.0711538461538463, "grad_norm": 5.42642879486084, "learning_rate": 1.985494465626133e-05, "loss": 0.1928, "step": 2154 }, { "epoch": 2.0721153846153846, "grad_norm": 5.943675994873047, "learning_rate": 1.9854733179308298e-05, "loss": 0.1881, "step": 2155 }, { "epoch": 2.0730769230769233, "grad_norm": 5.358614921569824, "learning_rate": 1.985452154943873e-05, "loss": 0.1882, "step": 2156 }, { "epoch": 2.0740384615384615, "grad_norm": 3.9063656330108643, "learning_rate": 1.9854309766655913e-05, "loss": 0.1519, "step": 2157 }, { "epoch": 2.075, "grad_norm": 7.260977745056152, "learning_rate": 1.9854097830963125e-05, "loss": 0.2431, "step": 2158 }, { "epoch": 2.0759615384615384, "grad_norm": 6.808504581451416, "learning_rate": 1.9853885742363665e-05, "loss": 0.2829, "step": 2159 }, { "epoch": 2.076923076923077, "grad_norm": 5.26499080657959, "learning_rate": 1.9853673500860816e-05, "loss": 0.1469, "step": 2160 }, { "epoch": 2.0778846153846153, "grad_norm": 4.456763744354248, "learning_rate": 1.9853461106457878e-05, "loss": 0.1551, "step": 2161 }, { "epoch": 2.078846153846154, "grad_norm": 5.048472881317139, "learning_rate": 1.9853248559158144e-05, "loss": 0.1376, "step": 2162 }, { "epoch": 2.0798076923076922, "grad_norm": 4.340219497680664, "learning_rate": 1.9853035858964907e-05, "loss": 0.1477, "step": 2163 }, { "epoch": 2.080769230769231, "grad_norm": 5.556748390197754, "learning_rate": 1.9852823005881472e-05, "loss": 0.2339, "step": 2164 }, { "epoch": 2.081730769230769, "grad_norm": 4.140535831451416, "learning_rate": 1.9852609999911144e-05, "loss": 0.1046, "step": 2165 }, { "epoch": 2.082692307692308, "grad_norm": 4.560685157775879, "learning_rate": 1.9852396841057224e-05, "loss": 0.1536, "step": 2166 }, { "epoch": 2.083653846153846, "grad_norm": 4.957552433013916, "learning_rate": 1.9852183529323025e-05, "loss": 0.2005, "step": 2167 }, { "epoch": 2.0846153846153848, "grad_norm": 7.049039840698242, "learning_rate": 1.985197006471185e-05, "loss": 0.4634, "step": 2168 }, { "epoch": 2.085576923076923, "grad_norm": 5.559741020202637, "learning_rate": 1.9851756447227014e-05, "loss": 0.2565, "step": 2169 }, { "epoch": 2.0865384615384617, "grad_norm": 7.097623825073242, "learning_rate": 1.9851542676871834e-05, "loss": 0.2181, "step": 2170 }, { "epoch": 2.0875, "grad_norm": 4.584753036499023, "learning_rate": 1.9851328753649626e-05, "loss": 0.2148, "step": 2171 }, { "epoch": 2.0884615384615386, "grad_norm": 4.508141040802002, "learning_rate": 1.985111467756371e-05, "loss": 0.2122, "step": 2172 }, { "epoch": 2.089423076923077, "grad_norm": 6.060029029846191, "learning_rate": 1.9850900448617404e-05, "loss": 0.3065, "step": 2173 }, { "epoch": 2.0903846153846155, "grad_norm": 6.564747333526611, "learning_rate": 1.9850686066814035e-05, "loss": 0.2269, "step": 2174 }, { "epoch": 2.0913461538461537, "grad_norm": 5.339322566986084, "learning_rate": 1.9850471532156933e-05, "loss": 0.2492, "step": 2175 }, { "epoch": 2.0923076923076924, "grad_norm": 5.789792537689209, "learning_rate": 1.9850256844649422e-05, "loss": 0.2108, "step": 2176 }, { "epoch": 2.0932692307692307, "grad_norm": 5.06080961227417, "learning_rate": 1.9850042004294833e-05, "loss": 0.231, "step": 2177 }, { "epoch": 2.0942307692307693, "grad_norm": 5.373294830322266, "learning_rate": 1.9849827011096506e-05, "loss": 0.3452, "step": 2178 }, { "epoch": 2.0951923076923076, "grad_norm": 4.40254020690918, "learning_rate": 1.984961186505777e-05, "loss": 0.1548, "step": 2179 }, { "epoch": 2.0961538461538463, "grad_norm": 4.315722465515137, "learning_rate": 1.9849396566181968e-05, "loss": 0.1349, "step": 2180 }, { "epoch": 2.0971153846153845, "grad_norm": 4.2599992752075195, "learning_rate": 1.9849181114472435e-05, "loss": 0.1037, "step": 2181 }, { "epoch": 2.098076923076923, "grad_norm": 5.416462421417236, "learning_rate": 1.984896550993252e-05, "loss": 0.257, "step": 2182 }, { "epoch": 2.0990384615384614, "grad_norm": 5.041589260101318, "learning_rate": 1.9848749752565568e-05, "loss": 0.1884, "step": 2183 }, { "epoch": 2.1, "grad_norm": 3.630223274230957, "learning_rate": 1.9848533842374925e-05, "loss": 0.105, "step": 2184 }, { "epoch": 2.1009615384615383, "grad_norm": 5.540608882904053, "learning_rate": 1.984831777936394e-05, "loss": 0.1576, "step": 2185 }, { "epoch": 2.101923076923077, "grad_norm": 2.9933619499206543, "learning_rate": 1.984810156353597e-05, "loss": 0.0997, "step": 2186 }, { "epoch": 2.1028846153846152, "grad_norm": 4.701622009277344, "learning_rate": 1.9847885194894366e-05, "loss": 0.1811, "step": 2187 }, { "epoch": 2.103846153846154, "grad_norm": 3.1676318645477295, "learning_rate": 1.9847668673442485e-05, "loss": 0.0987, "step": 2188 }, { "epoch": 2.104807692307692, "grad_norm": 4.856916904449463, "learning_rate": 1.9847451999183692e-05, "loss": 0.2075, "step": 2189 }, { "epoch": 2.105769230769231, "grad_norm": 3.7379093170166016, "learning_rate": 1.9847235172121348e-05, "loss": 0.1761, "step": 2190 }, { "epoch": 2.106730769230769, "grad_norm": 5.633196830749512, "learning_rate": 1.984701819225881e-05, "loss": 0.2786, "step": 2191 }, { "epoch": 2.1076923076923078, "grad_norm": 4.8044328689575195, "learning_rate": 1.9846801059599455e-05, "loss": 0.18, "step": 2192 }, { "epoch": 2.108653846153846, "grad_norm": 3.974574327468872, "learning_rate": 1.9846583774146647e-05, "loss": 0.2222, "step": 2193 }, { "epoch": 2.1096153846153847, "grad_norm": 4.693988800048828, "learning_rate": 1.9846366335903753e-05, "loss": 0.1891, "step": 2194 }, { "epoch": 2.110576923076923, "grad_norm": 5.354927062988281, "learning_rate": 1.9846148744874158e-05, "loss": 0.2408, "step": 2195 }, { "epoch": 2.1115384615384616, "grad_norm": 4.119748592376709, "learning_rate": 1.9845931001061228e-05, "loss": 0.155, "step": 2196 }, { "epoch": 2.1125, "grad_norm": 5.759659290313721, "learning_rate": 1.984571310446835e-05, "loss": 0.2904, "step": 2197 }, { "epoch": 2.1134615384615385, "grad_norm": 5.482840538024902, "learning_rate": 1.98454950550989e-05, "loss": 0.2701, "step": 2198 }, { "epoch": 2.1144230769230767, "grad_norm": 5.026867866516113, "learning_rate": 1.9845276852956264e-05, "loss": 0.183, "step": 2199 }, { "epoch": 2.1153846153846154, "grad_norm": 4.26967191696167, "learning_rate": 1.9845058498043825e-05, "loss": 0.1309, "step": 2200 }, { "epoch": 2.1163461538461537, "grad_norm": 3.907182216644287, "learning_rate": 1.9844839990364976e-05, "loss": 0.1554, "step": 2201 }, { "epoch": 2.1173076923076923, "grad_norm": 4.509848117828369, "learning_rate": 1.98446213299231e-05, "loss": 0.0976, "step": 2202 }, { "epoch": 2.1182692307692306, "grad_norm": 4.209320545196533, "learning_rate": 1.98444025167216e-05, "loss": 0.1313, "step": 2203 }, { "epoch": 2.1192307692307693, "grad_norm": 3.625081777572632, "learning_rate": 1.984418355076386e-05, "loss": 0.0855, "step": 2204 }, { "epoch": 2.1201923076923075, "grad_norm": 4.871405601501465, "learning_rate": 1.984396443205329e-05, "loss": 0.2046, "step": 2205 }, { "epoch": 2.121153846153846, "grad_norm": 5.112370491027832, "learning_rate": 1.9843745160593277e-05, "loss": 0.2344, "step": 2206 }, { "epoch": 2.1221153846153844, "grad_norm": 3.932637929916382, "learning_rate": 1.9843525736387235e-05, "loss": 0.0963, "step": 2207 }, { "epoch": 2.123076923076923, "grad_norm": 4.6125335693359375, "learning_rate": 1.9843306159438563e-05, "loss": 0.1764, "step": 2208 }, { "epoch": 2.1240384615384613, "grad_norm": 7.632472038269043, "learning_rate": 1.9843086429750668e-05, "loss": 0.309, "step": 2209 }, { "epoch": 2.125, "grad_norm": 4.6046624183654785, "learning_rate": 1.984286654732696e-05, "loss": 0.1819, "step": 2210 }, { "epoch": 2.1259615384615387, "grad_norm": 4.8147053718566895, "learning_rate": 1.9842646512170853e-05, "loss": 0.1627, "step": 2211 }, { "epoch": 2.126923076923077, "grad_norm": 4.92249870300293, "learning_rate": 1.984242632428576e-05, "loss": 0.1217, "step": 2212 }, { "epoch": 2.127884615384615, "grad_norm": 6.447347164154053, "learning_rate": 1.9842205983675098e-05, "loss": 0.2462, "step": 2213 }, { "epoch": 2.128846153846154, "grad_norm": 6.554574966430664, "learning_rate": 1.9841985490342287e-05, "loss": 0.3032, "step": 2214 }, { "epoch": 2.1298076923076925, "grad_norm": 6.356879234313965, "learning_rate": 1.9841764844290744e-05, "loss": 0.1833, "step": 2215 }, { "epoch": 2.1307692307692307, "grad_norm": 5.4441657066345215, "learning_rate": 1.9841544045523898e-05, "loss": 0.3258, "step": 2216 }, { "epoch": 2.1317307692307694, "grad_norm": 4.585752010345459, "learning_rate": 1.984132309404517e-05, "loss": 0.1485, "step": 2217 }, { "epoch": 2.1326923076923077, "grad_norm": 7.4491167068481445, "learning_rate": 1.9841101989857994e-05, "loss": 0.2705, "step": 2218 }, { "epoch": 2.1336538461538463, "grad_norm": 3.6626012325286865, "learning_rate": 1.98408807329658e-05, "loss": 0.0818, "step": 2219 }, { "epoch": 2.1346153846153846, "grad_norm": 5.549941062927246, "learning_rate": 1.9840659323372015e-05, "loss": 0.2074, "step": 2220 }, { "epoch": 2.1355769230769233, "grad_norm": 4.482202053070068, "learning_rate": 1.9840437761080084e-05, "loss": 0.2207, "step": 2221 }, { "epoch": 2.1365384615384615, "grad_norm": 5.457995414733887, "learning_rate": 1.984021604609344e-05, "loss": 0.2085, "step": 2222 }, { "epoch": 2.1375, "grad_norm": 6.250122547149658, "learning_rate": 1.983999417841552e-05, "loss": 0.1936, "step": 2223 }, { "epoch": 2.1384615384615384, "grad_norm": 5.487456321716309, "learning_rate": 1.983977215804977e-05, "loss": 0.1859, "step": 2224 }, { "epoch": 2.139423076923077, "grad_norm": 3.454246759414673, "learning_rate": 1.9839549984999638e-05, "loss": 0.1203, "step": 2225 }, { "epoch": 2.1403846153846153, "grad_norm": 4.673292636871338, "learning_rate": 1.983932765926857e-05, "loss": 0.108, "step": 2226 }, { "epoch": 2.141346153846154, "grad_norm": 4.8801960945129395, "learning_rate": 1.9839105180860015e-05, "loss": 0.1733, "step": 2227 }, { "epoch": 2.1423076923076922, "grad_norm": 2.9276909828186035, "learning_rate": 1.9838882549777426e-05, "loss": 0.0853, "step": 2228 }, { "epoch": 2.143269230769231, "grad_norm": 8.23082160949707, "learning_rate": 1.983865976602425e-05, "loss": 0.3425, "step": 2229 }, { "epoch": 2.144230769230769, "grad_norm": 4.887610912322998, "learning_rate": 1.983843682960396e-05, "loss": 0.1741, "step": 2230 }, { "epoch": 2.145192307692308, "grad_norm": 4.881590843200684, "learning_rate": 1.9838213740519996e-05, "loss": 0.1824, "step": 2231 }, { "epoch": 2.146153846153846, "grad_norm": 4.7717695236206055, "learning_rate": 1.9837990498775834e-05, "loss": 0.1741, "step": 2232 }, { "epoch": 2.1471153846153848, "grad_norm": 7.28452730178833, "learning_rate": 1.9837767104374935e-05, "loss": 0.33, "step": 2233 }, { "epoch": 2.148076923076923, "grad_norm": 4.473417282104492, "learning_rate": 1.9837543557320763e-05, "loss": 0.1736, "step": 2234 }, { "epoch": 2.1490384615384617, "grad_norm": 4.573214530944824, "learning_rate": 1.983731985761679e-05, "loss": 0.1298, "step": 2235 }, { "epoch": 2.15, "grad_norm": 5.305929660797119, "learning_rate": 1.983709600526648e-05, "loss": 0.2362, "step": 2236 }, { "epoch": 2.1509615384615386, "grad_norm": 5.406174659729004, "learning_rate": 1.9836872000273314e-05, "loss": 0.1887, "step": 2237 }, { "epoch": 2.151923076923077, "grad_norm": 5.551350116729736, "learning_rate": 1.9836647842640767e-05, "loss": 0.1881, "step": 2238 }, { "epoch": 2.1528846153846155, "grad_norm": 2.6238317489624023, "learning_rate": 1.9836423532372315e-05, "loss": 0.0846, "step": 2239 }, { "epoch": 2.1538461538461537, "grad_norm": 5.203509330749512, "learning_rate": 1.983619906947144e-05, "loss": 0.1557, "step": 2240 }, { "epoch": 2.1548076923076924, "grad_norm": 5.77446174621582, "learning_rate": 1.9835974453941623e-05, "loss": 0.2178, "step": 2241 }, { "epoch": 2.1557692307692307, "grad_norm": 4.1042351722717285, "learning_rate": 1.9835749685786346e-05, "loss": 0.0981, "step": 2242 }, { "epoch": 2.1567307692307693, "grad_norm": 4.670960903167725, "learning_rate": 1.9835524765009108e-05, "loss": 0.1893, "step": 2243 }, { "epoch": 2.1576923076923076, "grad_norm": 5.423583030700684, "learning_rate": 1.9835299691613393e-05, "loss": 0.2536, "step": 2244 }, { "epoch": 2.1586538461538463, "grad_norm": 4.5882062911987305, "learning_rate": 1.983507446560269e-05, "loss": 0.154, "step": 2245 }, { "epoch": 2.1596153846153845, "grad_norm": 5.266811847686768, "learning_rate": 1.9834849086980498e-05, "loss": 0.2213, "step": 2246 }, { "epoch": 2.160576923076923, "grad_norm": 4.728659152984619, "learning_rate": 1.9834623555750312e-05, "loss": 0.1546, "step": 2247 }, { "epoch": 2.1615384615384614, "grad_norm": 4.657991409301758, "learning_rate": 1.9834397871915634e-05, "loss": 0.1258, "step": 2248 }, { "epoch": 2.1625, "grad_norm": 7.695667743682861, "learning_rate": 1.9834172035479965e-05, "loss": 0.3141, "step": 2249 }, { "epoch": 2.1634615384615383, "grad_norm": 5.947411060333252, "learning_rate": 1.9833946046446807e-05, "loss": 0.3334, "step": 2250 }, { "epoch": 2.164423076923077, "grad_norm": 6.362522602081299, "learning_rate": 1.9833719904819668e-05, "loss": 0.3533, "step": 2251 }, { "epoch": 2.1653846153846152, "grad_norm": 3.8463711738586426, "learning_rate": 1.983349361060206e-05, "loss": 0.1157, "step": 2252 }, { "epoch": 2.166346153846154, "grad_norm": 4.3210577964782715, "learning_rate": 1.9833267163797495e-05, "loss": 0.1116, "step": 2253 }, { "epoch": 2.167307692307692, "grad_norm": 5.255401611328125, "learning_rate": 1.983304056440948e-05, "loss": 0.1684, "step": 2254 }, { "epoch": 2.168269230769231, "grad_norm": 4.747062683105469, "learning_rate": 1.9832813812441534e-05, "loss": 0.1446, "step": 2255 }, { "epoch": 2.169230769230769, "grad_norm": 4.928251266479492, "learning_rate": 1.9832586907897176e-05, "loss": 0.1932, "step": 2256 }, { "epoch": 2.1701923076923078, "grad_norm": 5.402665138244629, "learning_rate": 1.983235985077993e-05, "loss": 0.2361, "step": 2257 }, { "epoch": 2.171153846153846, "grad_norm": 9.484089851379395, "learning_rate": 1.983213264109332e-05, "loss": 0.2302, "step": 2258 }, { "epoch": 2.1721153846153847, "grad_norm": 6.316507339477539, "learning_rate": 1.983190527884086e-05, "loss": 0.3541, "step": 2259 }, { "epoch": 2.173076923076923, "grad_norm": 5.393300533294678, "learning_rate": 1.9831677764026092e-05, "loss": 0.2535, "step": 2260 }, { "epoch": 2.1740384615384616, "grad_norm": 5.709763526916504, "learning_rate": 1.983145009665254e-05, "loss": 0.2582, "step": 2261 }, { "epoch": 2.175, "grad_norm": 3.854978322982788, "learning_rate": 1.9831222276723737e-05, "loss": 0.1253, "step": 2262 }, { "epoch": 2.1759615384615385, "grad_norm": 3.2109534740448, "learning_rate": 1.983099430424322e-05, "loss": 0.11, "step": 2263 }, { "epoch": 2.1769230769230767, "grad_norm": 3.5666377544403076, "learning_rate": 1.9830766179214523e-05, "loss": 0.1364, "step": 2264 }, { "epoch": 2.1778846153846154, "grad_norm": 5.531325340270996, "learning_rate": 1.9830537901641192e-05, "loss": 0.3211, "step": 2265 }, { "epoch": 2.1788461538461537, "grad_norm": 4.133999347686768, "learning_rate": 1.983030947152676e-05, "loss": 0.1259, "step": 2266 }, { "epoch": 2.1798076923076923, "grad_norm": 6.159808158874512, "learning_rate": 1.983008088887478e-05, "loss": 0.2674, "step": 2267 }, { "epoch": 2.1807692307692306, "grad_norm": 6.769220352172852, "learning_rate": 1.9829852153688792e-05, "loss": 0.1678, "step": 2268 }, { "epoch": 2.1817307692307693, "grad_norm": 4.280147552490234, "learning_rate": 1.982962326597235e-05, "loss": 0.1562, "step": 2269 }, { "epoch": 2.1826923076923075, "grad_norm": 6.4402031898498535, "learning_rate": 1.9829394225729006e-05, "loss": 0.2454, "step": 2270 }, { "epoch": 2.183653846153846, "grad_norm": 3.568971633911133, "learning_rate": 1.982916503296231e-05, "loss": 0.1362, "step": 2271 }, { "epoch": 2.184615384615385, "grad_norm": 4.512812614440918, "learning_rate": 1.9828935687675823e-05, "loss": 0.1689, "step": 2272 }, { "epoch": 2.185576923076923, "grad_norm": 5.386707305908203, "learning_rate": 1.9828706189873103e-05, "loss": 0.102, "step": 2273 }, { "epoch": 2.1865384615384613, "grad_norm": 4.472113609313965, "learning_rate": 1.982847653955771e-05, "loss": 0.1745, "step": 2274 }, { "epoch": 2.1875, "grad_norm": 3.394829511642456, "learning_rate": 1.9828246736733205e-05, "loss": 0.1089, "step": 2275 }, { "epoch": 2.1884615384615387, "grad_norm": 5.648441791534424, "learning_rate": 1.9828016781403155e-05, "loss": 0.178, "step": 2276 }, { "epoch": 2.189423076923077, "grad_norm": 8.01720905303955, "learning_rate": 1.9827786673571132e-05, "loss": 0.2643, "step": 2277 }, { "epoch": 2.190384615384615, "grad_norm": 5.506884574890137, "learning_rate": 1.9827556413240706e-05, "loss": 0.188, "step": 2278 }, { "epoch": 2.191346153846154, "grad_norm": 4.837033271789551, "learning_rate": 1.9827326000415443e-05, "loss": 0.2239, "step": 2279 }, { "epoch": 2.1923076923076925, "grad_norm": 5.916864395141602, "learning_rate": 1.9827095435098926e-05, "loss": 0.2368, "step": 2280 }, { "epoch": 2.1932692307692307, "grad_norm": 4.072029113769531, "learning_rate": 1.982686471729473e-05, "loss": 0.1429, "step": 2281 }, { "epoch": 2.1942307692307694, "grad_norm": 4.820331573486328, "learning_rate": 1.9826633847006436e-05, "loss": 0.2481, "step": 2282 }, { "epoch": 2.1951923076923077, "grad_norm": 6.902980327606201, "learning_rate": 1.9826402824237623e-05, "loss": 0.3489, "step": 2283 }, { "epoch": 2.1961538461538463, "grad_norm": 4.048612117767334, "learning_rate": 1.9826171648991878e-05, "loss": 0.1822, "step": 2284 }, { "epoch": 2.1971153846153846, "grad_norm": 4.748249053955078, "learning_rate": 1.9825940321272792e-05, "loss": 0.2378, "step": 2285 }, { "epoch": 2.1980769230769233, "grad_norm": 2.5668118000030518, "learning_rate": 1.9825708841083948e-05, "loss": 0.0744, "step": 2286 }, { "epoch": 2.1990384615384615, "grad_norm": 4.866230010986328, "learning_rate": 1.9825477208428942e-05, "loss": 0.1476, "step": 2287 }, { "epoch": 2.2, "grad_norm": 5.58302116394043, "learning_rate": 1.9825245423311367e-05, "loss": 0.2158, "step": 2288 }, { "epoch": 2.2009615384615384, "grad_norm": 2.64449143409729, "learning_rate": 1.982501348573482e-05, "loss": 0.0923, "step": 2289 }, { "epoch": 2.201923076923077, "grad_norm": 5.327755451202393, "learning_rate": 1.98247813957029e-05, "loss": 0.1865, "step": 2290 }, { "epoch": 2.2028846153846153, "grad_norm": 5.8152756690979, "learning_rate": 1.9824549153219207e-05, "loss": 0.1609, "step": 2291 }, { "epoch": 2.203846153846154, "grad_norm": 4.9358062744140625, "learning_rate": 1.9824316758287347e-05, "loss": 0.0903, "step": 2292 }, { "epoch": 2.2048076923076922, "grad_norm": 5.16174840927124, "learning_rate": 1.9824084210910924e-05, "loss": 0.1658, "step": 2293 }, { "epoch": 2.205769230769231, "grad_norm": 6.0690226554870605, "learning_rate": 1.982385151109355e-05, "loss": 0.2485, "step": 2294 }, { "epoch": 2.206730769230769, "grad_norm": 4.624405860900879, "learning_rate": 1.982361865883883e-05, "loss": 0.1514, "step": 2295 }, { "epoch": 2.207692307692308, "grad_norm": 3.849137306213379, "learning_rate": 1.9823385654150382e-05, "loss": 0.1168, "step": 2296 }, { "epoch": 2.208653846153846, "grad_norm": 5.406275749206543, "learning_rate": 1.982315249703182e-05, "loss": 0.2037, "step": 2297 }, { "epoch": 2.2096153846153848, "grad_norm": 4.621893405914307, "learning_rate": 1.982291918748676e-05, "loss": 0.1152, "step": 2298 }, { "epoch": 2.210576923076923, "grad_norm": 4.3774285316467285, "learning_rate": 1.9822685725518825e-05, "loss": 0.1098, "step": 2299 }, { "epoch": 2.2115384615384617, "grad_norm": 4.004819393157959, "learning_rate": 1.9822452111131638e-05, "loss": 0.1542, "step": 2300 }, { "epoch": 2.2125, "grad_norm": 6.011551380157471, "learning_rate": 1.9822218344328824e-05, "loss": 0.1941, "step": 2301 }, { "epoch": 2.2134615384615386, "grad_norm": 5.695701599121094, "learning_rate": 1.9821984425114005e-05, "loss": 0.2079, "step": 2302 }, { "epoch": 2.214423076923077, "grad_norm": 5.943024158477783, "learning_rate": 1.9821750353490817e-05, "loss": 0.2358, "step": 2303 }, { "epoch": 2.2153846153846155, "grad_norm": 6.543925762176514, "learning_rate": 1.982151612946289e-05, "loss": 0.2337, "step": 2304 }, { "epoch": 2.2163461538461537, "grad_norm": 5.016531944274902, "learning_rate": 1.982128175303386e-05, "loss": 0.1017, "step": 2305 }, { "epoch": 2.2173076923076924, "grad_norm": 2.764589786529541, "learning_rate": 1.9821047224207362e-05, "loss": 0.0737, "step": 2306 }, { "epoch": 2.2182692307692307, "grad_norm": 4.215437889099121, "learning_rate": 1.9820812542987033e-05, "loss": 0.1987, "step": 2307 }, { "epoch": 2.2192307692307693, "grad_norm": 4.4215216636657715, "learning_rate": 1.982057770937652e-05, "loss": 0.1516, "step": 2308 }, { "epoch": 2.2201923076923076, "grad_norm": 4.607921123504639, "learning_rate": 1.982034272337946e-05, "loss": 0.244, "step": 2309 }, { "epoch": 2.2211538461538463, "grad_norm": 4.336925983428955, "learning_rate": 1.982010758499951e-05, "loss": 0.1962, "step": 2310 }, { "epoch": 2.2221153846153845, "grad_norm": 4.0558953285217285, "learning_rate": 1.9819872294240306e-05, "loss": 0.1062, "step": 2311 }, { "epoch": 2.223076923076923, "grad_norm": 5.041899681091309, "learning_rate": 1.9819636851105506e-05, "loss": 0.235, "step": 2312 }, { "epoch": 2.2240384615384614, "grad_norm": 3.7538065910339355, "learning_rate": 1.9819401255598763e-05, "loss": 0.1231, "step": 2313 }, { "epoch": 2.225, "grad_norm": 5.2985453605651855, "learning_rate": 1.981916550772373e-05, "loss": 0.256, "step": 2314 }, { "epoch": 2.2259615384615383, "grad_norm": 4.808471202850342, "learning_rate": 1.981892960748407e-05, "loss": 0.1381, "step": 2315 }, { "epoch": 2.226923076923077, "grad_norm": 6.154930591583252, "learning_rate": 1.981869355488344e-05, "loss": 0.3792, "step": 2316 }, { "epoch": 2.2278846153846152, "grad_norm": 5.815457344055176, "learning_rate": 1.9818457349925505e-05, "loss": 0.3784, "step": 2317 }, { "epoch": 2.228846153846154, "grad_norm": 4.951245307922363, "learning_rate": 1.9818220992613927e-05, "loss": 0.1979, "step": 2318 }, { "epoch": 2.229807692307692, "grad_norm": 4.463572025299072, "learning_rate": 1.9817984482952378e-05, "loss": 0.1624, "step": 2319 }, { "epoch": 2.230769230769231, "grad_norm": 4.876315593719482, "learning_rate": 1.9817747820944522e-05, "loss": 0.2072, "step": 2320 }, { "epoch": 2.231730769230769, "grad_norm": 4.3576579093933105, "learning_rate": 1.9817511006594038e-05, "loss": 0.2116, "step": 2321 }, { "epoch": 2.2326923076923078, "grad_norm": 6.104149341583252, "learning_rate": 1.9817274039904593e-05, "loss": 0.2107, "step": 2322 }, { "epoch": 2.233653846153846, "grad_norm": 4.155449390411377, "learning_rate": 1.981703692087987e-05, "loss": 0.1276, "step": 2323 }, { "epoch": 2.2346153846153847, "grad_norm": 4.957707405090332, "learning_rate": 1.9816799649523546e-05, "loss": 0.1868, "step": 2324 }, { "epoch": 2.235576923076923, "grad_norm": 5.985584259033203, "learning_rate": 1.9816562225839304e-05, "loss": 0.2039, "step": 2325 }, { "epoch": 2.2365384615384616, "grad_norm": 5.229196548461914, "learning_rate": 1.9816324649830827e-05, "loss": 0.16, "step": 2326 }, { "epoch": 2.2375, "grad_norm": 5.858182430267334, "learning_rate": 1.9816086921501804e-05, "loss": 0.1669, "step": 2327 }, { "epoch": 2.2384615384615385, "grad_norm": 3.483557939529419, "learning_rate": 1.981584904085592e-05, "loss": 0.0892, "step": 2328 }, { "epoch": 2.2394230769230767, "grad_norm": 5.273862838745117, "learning_rate": 1.981561100789687e-05, "loss": 0.1324, "step": 2329 }, { "epoch": 2.2403846153846154, "grad_norm": 4.7940850257873535, "learning_rate": 1.9815372822628343e-05, "loss": 0.1165, "step": 2330 }, { "epoch": 2.2413461538461537, "grad_norm": 5.137730121612549, "learning_rate": 1.9815134485054036e-05, "loss": 0.2752, "step": 2331 }, { "epoch": 2.2423076923076923, "grad_norm": 5.855929851531982, "learning_rate": 1.9814895995177653e-05, "loss": 0.1322, "step": 2332 }, { "epoch": 2.2432692307692306, "grad_norm": 3.9163103103637695, "learning_rate": 1.981465735300289e-05, "loss": 0.1029, "step": 2333 }, { "epoch": 2.2442307692307693, "grad_norm": 6.093039035797119, "learning_rate": 1.981441855853345e-05, "loss": 0.1433, "step": 2334 }, { "epoch": 2.2451923076923075, "grad_norm": 5.452695369720459, "learning_rate": 1.981417961177304e-05, "loss": 0.2878, "step": 2335 }, { "epoch": 2.246153846153846, "grad_norm": 4.675502300262451, "learning_rate": 1.9813940512725363e-05, "loss": 0.1643, "step": 2336 }, { "epoch": 2.247115384615385, "grad_norm": 3.9969770908355713, "learning_rate": 1.9813701261394136e-05, "loss": 0.1452, "step": 2337 }, { "epoch": 2.248076923076923, "grad_norm": 4.700881004333496, "learning_rate": 1.981346185778307e-05, "loss": 0.2231, "step": 2338 }, { "epoch": 2.2490384615384613, "grad_norm": 4.847859859466553, "learning_rate": 1.9813222301895873e-05, "loss": 0.2008, "step": 2339 }, { "epoch": 2.25, "grad_norm": 7.083683967590332, "learning_rate": 1.981298259373627e-05, "loss": 0.3057, "step": 2340 }, { "epoch": 2.2509615384615387, "grad_norm": 2.7536110877990723, "learning_rate": 1.981274273330798e-05, "loss": 0.0765, "step": 2341 }, { "epoch": 2.251923076923077, "grad_norm": 4.812708854675293, "learning_rate": 1.981250272061472e-05, "loss": 0.1226, "step": 2342 }, { "epoch": 2.252884615384615, "grad_norm": 6.351274490356445, "learning_rate": 1.9812262555660216e-05, "loss": 0.2141, "step": 2343 }, { "epoch": 2.253846153846154, "grad_norm": 5.617355823516846, "learning_rate": 1.9812022238448197e-05, "loss": 0.1647, "step": 2344 }, { "epoch": 2.2548076923076925, "grad_norm": 5.278698921203613, "learning_rate": 1.9811781768982392e-05, "loss": 0.2063, "step": 2345 }, { "epoch": 2.2557692307692307, "grad_norm": 6.191963195800781, "learning_rate": 1.981154114726653e-05, "loss": 0.2249, "step": 2346 }, { "epoch": 2.256730769230769, "grad_norm": 4.06395959854126, "learning_rate": 1.9811300373304348e-05, "loss": 0.1201, "step": 2347 }, { "epoch": 2.2576923076923077, "grad_norm": 7.122890949249268, "learning_rate": 1.9811059447099577e-05, "loss": 0.2662, "step": 2348 }, { "epoch": 2.2586538461538463, "grad_norm": 5.552013397216797, "learning_rate": 1.981081836865596e-05, "loss": 0.226, "step": 2349 }, { "epoch": 2.2596153846153846, "grad_norm": 5.492395877838135, "learning_rate": 1.9810577137977236e-05, "loss": 0.2808, "step": 2350 }, { "epoch": 2.2605769230769233, "grad_norm": 6.5810980796813965, "learning_rate": 1.981033575506715e-05, "loss": 0.2364, "step": 2351 }, { "epoch": 2.2615384615384615, "grad_norm": 4.247910499572754, "learning_rate": 1.9810094219929444e-05, "loss": 0.1182, "step": 2352 }, { "epoch": 2.2625, "grad_norm": 4.050240516662598, "learning_rate": 1.980985253256787e-05, "loss": 0.1216, "step": 2353 }, { "epoch": 2.2634615384615384, "grad_norm": 5.84533166885376, "learning_rate": 1.9809610692986174e-05, "loss": 0.2002, "step": 2354 }, { "epoch": 2.264423076923077, "grad_norm": 4.465249538421631, "learning_rate": 1.9809368701188114e-05, "loss": 0.1563, "step": 2355 }, { "epoch": 2.2653846153846153, "grad_norm": 4.321116924285889, "learning_rate": 1.9809126557177437e-05, "loss": 0.1751, "step": 2356 }, { "epoch": 2.266346153846154, "grad_norm": 4.138084411621094, "learning_rate": 1.980888426095791e-05, "loss": 0.17, "step": 2357 }, { "epoch": 2.2673076923076922, "grad_norm": 9.196600914001465, "learning_rate": 1.9808641812533286e-05, "loss": 0.1958, "step": 2358 }, { "epoch": 2.268269230769231, "grad_norm": 4.620643615722656, "learning_rate": 1.980839921190733e-05, "loss": 0.0802, "step": 2359 }, { "epoch": 2.269230769230769, "grad_norm": 3.8093464374542236, "learning_rate": 1.9808156459083805e-05, "loss": 0.1792, "step": 2360 }, { "epoch": 2.270192307692308, "grad_norm": 41.30131149291992, "learning_rate": 1.9807913554066476e-05, "loss": 0.1739, "step": 2361 }, { "epoch": 2.271153846153846, "grad_norm": 7.151562213897705, "learning_rate": 1.9807670496859117e-05, "loss": 0.1323, "step": 2362 }, { "epoch": 2.2721153846153848, "grad_norm": 5.023430347442627, "learning_rate": 1.9807427287465496e-05, "loss": 0.3154, "step": 2363 }, { "epoch": 2.273076923076923, "grad_norm": 5.444788932800293, "learning_rate": 1.980718392588939e-05, "loss": 0.1648, "step": 2364 }, { "epoch": 2.2740384615384617, "grad_norm": 3.0326220989227295, "learning_rate": 1.9806940412134575e-05, "loss": 0.0692, "step": 2365 }, { "epoch": 2.275, "grad_norm": 6.845336437225342, "learning_rate": 1.9806696746204822e-05, "loss": 0.3069, "step": 2366 }, { "epoch": 2.2759615384615386, "grad_norm": 6.808903217315674, "learning_rate": 1.980645292810392e-05, "loss": 0.2365, "step": 2367 }, { "epoch": 2.276923076923077, "grad_norm": 4.580109596252441, "learning_rate": 1.9806208957835653e-05, "loss": 0.1326, "step": 2368 }, { "epoch": 2.2778846153846155, "grad_norm": 6.792446613311768, "learning_rate": 1.9805964835403803e-05, "loss": 0.4199, "step": 2369 }, { "epoch": 2.2788461538461537, "grad_norm": 3.8416144847869873, "learning_rate": 1.9805720560812153e-05, "loss": 0.1342, "step": 2370 }, { "epoch": 2.2798076923076924, "grad_norm": 8.097745895385742, "learning_rate": 1.980547613406451e-05, "loss": 0.2926, "step": 2371 }, { "epoch": 2.2807692307692307, "grad_norm": 3.8633389472961426, "learning_rate": 1.9805231555164644e-05, "loss": 0.2137, "step": 2372 }, { "epoch": 2.2817307692307693, "grad_norm": 2.767080068588257, "learning_rate": 1.980498682411637e-05, "loss": 0.0667, "step": 2373 }, { "epoch": 2.2826923076923076, "grad_norm": 5.6749653816223145, "learning_rate": 1.980474194092348e-05, "loss": 0.1254, "step": 2374 }, { "epoch": 2.2836538461538463, "grad_norm": 5.688784122467041, "learning_rate": 1.9804496905589764e-05, "loss": 0.3744, "step": 2375 }, { "epoch": 2.2846153846153845, "grad_norm": 3.556884527206421, "learning_rate": 1.9804251718119034e-05, "loss": 0.1223, "step": 2376 }, { "epoch": 2.285576923076923, "grad_norm": 4.802083969116211, "learning_rate": 1.9804006378515096e-05, "loss": 0.1643, "step": 2377 }, { "epoch": 2.2865384615384614, "grad_norm": 4.26326847076416, "learning_rate": 1.980376088678175e-05, "loss": 0.1236, "step": 2378 }, { "epoch": 2.2875, "grad_norm": 2.858137607574463, "learning_rate": 1.980351524292281e-05, "loss": 0.0704, "step": 2379 }, { "epoch": 2.2884615384615383, "grad_norm": 5.093818664550781, "learning_rate": 1.9803269446942087e-05, "loss": 0.2406, "step": 2380 }, { "epoch": 2.289423076923077, "grad_norm": 5.01037073135376, "learning_rate": 1.9803023498843394e-05, "loss": 0.1864, "step": 2381 }, { "epoch": 2.2903846153846152, "grad_norm": 5.699233531951904, "learning_rate": 1.9802777398630547e-05, "loss": 0.288, "step": 2382 }, { "epoch": 2.291346153846154, "grad_norm": 5.99445104598999, "learning_rate": 1.9802531146307368e-05, "loss": 0.2731, "step": 2383 }, { "epoch": 2.292307692307692, "grad_norm": 5.08089017868042, "learning_rate": 1.9802284741877674e-05, "loss": 0.2485, "step": 2384 }, { "epoch": 2.293269230769231, "grad_norm": 3.701416254043579, "learning_rate": 1.980203818534529e-05, "loss": 0.0653, "step": 2385 }, { "epoch": 2.294230769230769, "grad_norm": 6.607005596160889, "learning_rate": 1.9801791476714042e-05, "loss": 0.2068, "step": 2386 }, { "epoch": 2.2951923076923078, "grad_norm": 3.949484348297119, "learning_rate": 1.980154461598776e-05, "loss": 0.1207, "step": 2387 }, { "epoch": 2.296153846153846, "grad_norm": 6.1711506843566895, "learning_rate": 1.980129760317027e-05, "loss": 0.2535, "step": 2388 }, { "epoch": 2.2971153846153847, "grad_norm": 4.174452304840088, "learning_rate": 1.9801050438265407e-05, "loss": 0.1525, "step": 2389 }, { "epoch": 2.298076923076923, "grad_norm": 6.075349807739258, "learning_rate": 1.9800803121277012e-05, "loss": 0.2384, "step": 2390 }, { "epoch": 2.2990384615384616, "grad_norm": 4.794215679168701, "learning_rate": 1.9800555652208913e-05, "loss": 0.3403, "step": 2391 }, { "epoch": 2.3, "grad_norm": 5.897273063659668, "learning_rate": 1.9800308031064956e-05, "loss": 0.1981, "step": 2392 }, { "epoch": 2.3009615384615385, "grad_norm": 5.589449882507324, "learning_rate": 1.9800060257848983e-05, "loss": 0.1603, "step": 2393 }, { "epoch": 2.3019230769230767, "grad_norm": 5.739092826843262, "learning_rate": 1.9799812332564836e-05, "loss": 0.1966, "step": 2394 }, { "epoch": 2.3028846153846154, "grad_norm": 7.363352298736572, "learning_rate": 1.9799564255216364e-05, "loss": 0.2166, "step": 2395 }, { "epoch": 2.3038461538461537, "grad_norm": 5.390551567077637, "learning_rate": 1.9799316025807416e-05, "loss": 0.2318, "step": 2396 }, { "epoch": 2.3048076923076923, "grad_norm": 5.186110973358154, "learning_rate": 1.9799067644341844e-05, "loss": 0.1786, "step": 2397 }, { "epoch": 2.305769230769231, "grad_norm": 3.9109480381011963, "learning_rate": 1.9798819110823503e-05, "loss": 0.0802, "step": 2398 }, { "epoch": 2.3067307692307693, "grad_norm": 7.013072490692139, "learning_rate": 1.979857042525625e-05, "loss": 0.2328, "step": 2399 }, { "epoch": 2.3076923076923075, "grad_norm": 5.781711101531982, "learning_rate": 1.9798321587643938e-05, "loss": 0.1661, "step": 2400 }, { "epoch": 2.308653846153846, "grad_norm": 4.248530864715576, "learning_rate": 1.9798072597990434e-05, "loss": 0.1726, "step": 2401 }, { "epoch": 2.309615384615385, "grad_norm": 4.572447299957275, "learning_rate": 1.97978234562996e-05, "loss": 0.1434, "step": 2402 }, { "epoch": 2.310576923076923, "grad_norm": 4.952657222747803, "learning_rate": 1.9797574162575307e-05, "loss": 0.1627, "step": 2403 }, { "epoch": 2.3115384615384613, "grad_norm": 5.1658196449279785, "learning_rate": 1.9797324716821415e-05, "loss": 0.3164, "step": 2404 }, { "epoch": 2.3125, "grad_norm": 4.151769161224365, "learning_rate": 1.97970751190418e-05, "loss": 0.1185, "step": 2405 }, { "epoch": 2.3134615384615387, "grad_norm": 6.863436222076416, "learning_rate": 1.979682536924033e-05, "loss": 0.1779, "step": 2406 }, { "epoch": 2.314423076923077, "grad_norm": 5.319568157196045, "learning_rate": 1.9796575467420886e-05, "loss": 0.2046, "step": 2407 }, { "epoch": 2.315384615384615, "grad_norm": 4.17527961730957, "learning_rate": 1.979632541358734e-05, "loss": 0.1869, "step": 2408 }, { "epoch": 2.316346153846154, "grad_norm": 6.351316928863525, "learning_rate": 1.979607520774358e-05, "loss": 0.321, "step": 2409 }, { "epoch": 2.3173076923076925, "grad_norm": 6.7913031578063965, "learning_rate": 1.9795824849893483e-05, "loss": 0.2219, "step": 2410 }, { "epoch": 2.3182692307692307, "grad_norm": 5.003175735473633, "learning_rate": 1.979557434004093e-05, "loss": 0.1757, "step": 2411 }, { "epoch": 2.319230769230769, "grad_norm": 4.860291004180908, "learning_rate": 1.979532367818982e-05, "loss": 0.2343, "step": 2412 }, { "epoch": 2.3201923076923077, "grad_norm": 4.332314968109131, "learning_rate": 1.979507286434403e-05, "loss": 0.1162, "step": 2413 }, { "epoch": 2.3211538461538463, "grad_norm": 5.302768230438232, "learning_rate": 1.9794821898507458e-05, "loss": 0.179, "step": 2414 }, { "epoch": 2.3221153846153846, "grad_norm": 4.291609764099121, "learning_rate": 1.9794570780684e-05, "loss": 0.4504, "step": 2415 }, { "epoch": 2.3230769230769233, "grad_norm": 4.036319732666016, "learning_rate": 1.9794319510877548e-05, "loss": 0.1347, "step": 2416 }, { "epoch": 2.3240384615384615, "grad_norm": 5.104397773742676, "learning_rate": 1.9794068089092006e-05, "loss": 0.1957, "step": 2417 }, { "epoch": 2.325, "grad_norm": 6.255462646484375, "learning_rate": 1.979381651533127e-05, "loss": 0.2142, "step": 2418 }, { "epoch": 2.3259615384615384, "grad_norm": 4.433906078338623, "learning_rate": 1.979356478959925e-05, "loss": 0.0918, "step": 2419 }, { "epoch": 2.326923076923077, "grad_norm": 4.460216522216797, "learning_rate": 1.9793312911899844e-05, "loss": 0.1555, "step": 2420 }, { "epoch": 2.3278846153846153, "grad_norm": 6.528587341308594, "learning_rate": 1.9793060882236964e-05, "loss": 0.2017, "step": 2421 }, { "epoch": 2.328846153846154, "grad_norm": 3.468989133834839, "learning_rate": 1.9792808700614527e-05, "loss": 0.0755, "step": 2422 }, { "epoch": 2.3298076923076922, "grad_norm": 3.6498968601226807, "learning_rate": 1.9792556367036432e-05, "loss": 0.0981, "step": 2423 }, { "epoch": 2.330769230769231, "grad_norm": 4.1671037673950195, "learning_rate": 1.979230388150661e-05, "loss": 0.2293, "step": 2424 }, { "epoch": 2.331730769230769, "grad_norm": 4.93302583694458, "learning_rate": 1.9792051244028968e-05, "loss": 0.1949, "step": 2425 }, { "epoch": 2.332692307692308, "grad_norm": 6.07304573059082, "learning_rate": 1.979179845460743e-05, "loss": 0.327, "step": 2426 }, { "epoch": 2.333653846153846, "grad_norm": 3.7502830028533936, "learning_rate": 1.979154551324592e-05, "loss": 0.092, "step": 2427 }, { "epoch": 2.3346153846153848, "grad_norm": 4.378740310668945, "learning_rate": 1.9791292419948364e-05, "loss": 0.1525, "step": 2428 }, { "epoch": 2.335576923076923, "grad_norm": 4.180412292480469, "learning_rate": 1.9791039174718686e-05, "loss": 0.1486, "step": 2429 }, { "epoch": 2.3365384615384617, "grad_norm": 5.6397857666015625, "learning_rate": 1.9790785777560812e-05, "loss": 0.2274, "step": 2430 }, { "epoch": 2.3375, "grad_norm": 5.7743611335754395, "learning_rate": 1.979053222847868e-05, "loss": 0.264, "step": 2431 }, { "epoch": 2.3384615384615386, "grad_norm": 4.40526008605957, "learning_rate": 1.9790278527476225e-05, "loss": 0.1232, "step": 2432 }, { "epoch": 2.339423076923077, "grad_norm": 4.463552951812744, "learning_rate": 1.9790024674557382e-05, "loss": 0.1783, "step": 2433 }, { "epoch": 2.3403846153846155, "grad_norm": 6.106057643890381, "learning_rate": 1.9789770669726088e-05, "loss": 0.1801, "step": 2434 }, { "epoch": 2.3413461538461537, "grad_norm": 4.098872184753418, "learning_rate": 1.9789516512986285e-05, "loss": 0.1367, "step": 2435 }, { "epoch": 2.3423076923076924, "grad_norm": 6.0614013671875, "learning_rate": 1.9789262204341918e-05, "loss": 0.2457, "step": 2436 }, { "epoch": 2.3432692307692307, "grad_norm": 5.522771835327148, "learning_rate": 1.9789007743796933e-05, "loss": 0.2085, "step": 2437 }, { "epoch": 2.3442307692307693, "grad_norm": 4.2742719650268555, "learning_rate": 1.978875313135528e-05, "loss": 0.1754, "step": 2438 }, { "epoch": 2.3451923076923076, "grad_norm": 4.677663803100586, "learning_rate": 1.9788498367020904e-05, "loss": 0.1902, "step": 2439 }, { "epoch": 2.3461538461538463, "grad_norm": 5.1254096031188965, "learning_rate": 1.9788243450797764e-05, "loss": 0.1671, "step": 2440 }, { "epoch": 2.3471153846153845, "grad_norm": 4.104305267333984, "learning_rate": 1.9787988382689812e-05, "loss": 0.1379, "step": 2441 }, { "epoch": 2.348076923076923, "grad_norm": 5.618484973907471, "learning_rate": 1.978773316270101e-05, "loss": 0.2262, "step": 2442 }, { "epoch": 2.3490384615384614, "grad_norm": 3.415191173553467, "learning_rate": 1.9787477790835317e-05, "loss": 0.1003, "step": 2443 }, { "epoch": 2.35, "grad_norm": 5.324425220489502, "learning_rate": 1.9787222267096694e-05, "loss": 0.2183, "step": 2444 }, { "epoch": 2.3509615384615383, "grad_norm": 7.177800178527832, "learning_rate": 1.9786966591489107e-05, "loss": 0.2739, "step": 2445 }, { "epoch": 2.351923076923077, "grad_norm": 5.117581844329834, "learning_rate": 1.9786710764016517e-05, "loss": 0.3042, "step": 2446 }, { "epoch": 2.3528846153846152, "grad_norm": 5.603579521179199, "learning_rate": 1.9786454784682908e-05, "loss": 0.2734, "step": 2447 }, { "epoch": 2.353846153846154, "grad_norm": 5.1919708251953125, "learning_rate": 1.978619865349224e-05, "loss": 0.1824, "step": 2448 }, { "epoch": 2.354807692307692, "grad_norm": 3.582406520843506, "learning_rate": 1.978594237044849e-05, "loss": 0.1288, "step": 2449 }, { "epoch": 2.355769230769231, "grad_norm": 5.294240474700928, "learning_rate": 1.9785685935555637e-05, "loss": 0.1827, "step": 2450 }, { "epoch": 2.356730769230769, "grad_norm": 4.633302688598633, "learning_rate": 1.978542934881766e-05, "loss": 0.176, "step": 2451 }, { "epoch": 2.3576923076923078, "grad_norm": 5.038829803466797, "learning_rate": 1.9785172610238536e-05, "loss": 0.2169, "step": 2452 }, { "epoch": 2.358653846153846, "grad_norm": 5.634434223175049, "learning_rate": 1.9784915719822255e-05, "loss": 0.2261, "step": 2453 }, { "epoch": 2.3596153846153847, "grad_norm": 6.37322473526001, "learning_rate": 1.97846586775728e-05, "loss": 0.296, "step": 2454 }, { "epoch": 2.360576923076923, "grad_norm": 5.000021934509277, "learning_rate": 1.978440148349416e-05, "loss": 0.2335, "step": 2455 }, { "epoch": 2.3615384615384616, "grad_norm": 5.531896114349365, "learning_rate": 1.9784144137590324e-05, "loss": 0.2348, "step": 2456 }, { "epoch": 2.3625, "grad_norm": 4.7109694480896, "learning_rate": 1.9783886639865287e-05, "loss": 0.1552, "step": 2457 }, { "epoch": 2.3634615384615385, "grad_norm": 7.282832145690918, "learning_rate": 1.978362899032305e-05, "loss": 0.3366, "step": 2458 }, { "epoch": 2.3644230769230767, "grad_norm": 6.139801979064941, "learning_rate": 1.97833711889676e-05, "loss": 0.2766, "step": 2459 }, { "epoch": 2.3653846153846154, "grad_norm": 6.346157073974609, "learning_rate": 1.9783113235802947e-05, "loss": 0.2284, "step": 2460 }, { "epoch": 2.3663461538461537, "grad_norm": 3.326220750808716, "learning_rate": 1.978285513083309e-05, "loss": 0.1161, "step": 2461 }, { "epoch": 2.3673076923076923, "grad_norm": 5.017039775848389, "learning_rate": 1.9782596874062028e-05, "loss": 0.2308, "step": 2462 }, { "epoch": 2.368269230769231, "grad_norm": 3.965756893157959, "learning_rate": 1.978233846549378e-05, "loss": 0.18, "step": 2463 }, { "epoch": 2.3692307692307693, "grad_norm": 4.885164737701416, "learning_rate": 1.9782079905132344e-05, "loss": 0.198, "step": 2464 }, { "epoch": 2.3701923076923075, "grad_norm": 5.036369800567627, "learning_rate": 1.9781821192981744e-05, "loss": 0.2071, "step": 2465 }, { "epoch": 2.371153846153846, "grad_norm": 5.507899284362793, "learning_rate": 1.9781562329045984e-05, "loss": 0.1892, "step": 2466 }, { "epoch": 2.372115384615385, "grad_norm": 5.721193313598633, "learning_rate": 1.9781303313329086e-05, "loss": 0.1676, "step": 2467 }, { "epoch": 2.373076923076923, "grad_norm": 4.642559051513672, "learning_rate": 1.978104414583507e-05, "loss": 0.2452, "step": 2468 }, { "epoch": 2.3740384615384613, "grad_norm": 4.939530372619629, "learning_rate": 1.9780784826567955e-05, "loss": 0.1876, "step": 2469 }, { "epoch": 2.375, "grad_norm": 5.625223159790039, "learning_rate": 1.9780525355531766e-05, "loss": 0.1895, "step": 2470 }, { "epoch": 2.3759615384615387, "grad_norm": 4.409672737121582, "learning_rate": 1.9780265732730532e-05, "loss": 0.1173, "step": 2471 }, { "epoch": 2.376923076923077, "grad_norm": 4.943048000335693, "learning_rate": 1.9780005958168275e-05, "loss": 0.1626, "step": 2472 }, { "epoch": 2.377884615384615, "grad_norm": 4.768527507781982, "learning_rate": 1.9779746031849027e-05, "loss": 0.1596, "step": 2473 }, { "epoch": 2.378846153846154, "grad_norm": 6.355160236358643, "learning_rate": 1.977948595377683e-05, "loss": 0.2462, "step": 2474 }, { "epoch": 2.3798076923076925, "grad_norm": 4.706120491027832, "learning_rate": 1.977922572395571e-05, "loss": 0.1647, "step": 2475 }, { "epoch": 2.3807692307692307, "grad_norm": 5.383947849273682, "learning_rate": 1.9778965342389707e-05, "loss": 0.1765, "step": 2476 }, { "epoch": 2.381730769230769, "grad_norm": 7.6637468338012695, "learning_rate": 1.9778704809082864e-05, "loss": 0.276, "step": 2477 }, { "epoch": 2.3826923076923077, "grad_norm": 6.485007286071777, "learning_rate": 1.9778444124039224e-05, "loss": 0.3456, "step": 2478 }, { "epoch": 2.3836538461538463, "grad_norm": 6.766197204589844, "learning_rate": 1.9778183287262828e-05, "loss": 0.3719, "step": 2479 }, { "epoch": 2.3846153846153846, "grad_norm": 2.5173583030700684, "learning_rate": 1.977792229875773e-05, "loss": 0.0954, "step": 2480 }, { "epoch": 2.3855769230769233, "grad_norm": 5.594350814819336, "learning_rate": 1.977766115852797e-05, "loss": 0.3551, "step": 2481 }, { "epoch": 2.3865384615384615, "grad_norm": 5.770278453826904, "learning_rate": 1.977739986657761e-05, "loss": 0.2101, "step": 2482 }, { "epoch": 2.3875, "grad_norm": 5.3012261390686035, "learning_rate": 1.97771384229107e-05, "loss": 0.2674, "step": 2483 }, { "epoch": 2.3884615384615384, "grad_norm": 3.445857286453247, "learning_rate": 1.9776876827531296e-05, "loss": 0.0914, "step": 2484 }, { "epoch": 2.389423076923077, "grad_norm": 4.306792736053467, "learning_rate": 1.9776615080443462e-05, "loss": 0.1436, "step": 2485 }, { "epoch": 2.3903846153846153, "grad_norm": 5.098939418792725, "learning_rate": 1.9776353181651254e-05, "loss": 0.1754, "step": 2486 }, { "epoch": 2.391346153846154, "grad_norm": 5.379793167114258, "learning_rate": 1.9776091131158736e-05, "loss": 0.2451, "step": 2487 }, { "epoch": 2.3923076923076922, "grad_norm": 6.776202201843262, "learning_rate": 1.9775828928969976e-05, "loss": 0.4444, "step": 2488 }, { "epoch": 2.393269230769231, "grad_norm": 12.346667289733887, "learning_rate": 1.9775566575089043e-05, "loss": 0.2381, "step": 2489 }, { "epoch": 2.394230769230769, "grad_norm": 3.277831554412842, "learning_rate": 1.977530406952001e-05, "loss": 0.0767, "step": 2490 }, { "epoch": 2.395192307692308, "grad_norm": 5.149238586425781, "learning_rate": 1.9775041412266946e-05, "loss": 0.2443, "step": 2491 }, { "epoch": 2.396153846153846, "grad_norm": 6.274430274963379, "learning_rate": 1.977477860333393e-05, "loss": 0.2867, "step": 2492 }, { "epoch": 2.3971153846153848, "grad_norm": 6.05775785446167, "learning_rate": 1.977451564272504e-05, "loss": 0.2695, "step": 2493 }, { "epoch": 2.398076923076923, "grad_norm": 5.385843276977539, "learning_rate": 1.977425253044435e-05, "loss": 0.2112, "step": 2494 }, { "epoch": 2.3990384615384617, "grad_norm": 4.644103527069092, "learning_rate": 1.9773989266495953e-05, "loss": 0.1575, "step": 2495 }, { "epoch": 2.4, "grad_norm": 5.234340190887451, "learning_rate": 1.9773725850883923e-05, "loss": 0.229, "step": 2496 }, { "epoch": 2.4009615384615386, "grad_norm": 4.5071587562561035, "learning_rate": 1.9773462283612363e-05, "loss": 0.189, "step": 2497 }, { "epoch": 2.401923076923077, "grad_norm": 6.618834972381592, "learning_rate": 1.9773198564685346e-05, "loss": 0.2706, "step": 2498 }, { "epoch": 2.4028846153846155, "grad_norm": 7.0560102462768555, "learning_rate": 1.977293469410697e-05, "loss": 0.5238, "step": 2499 }, { "epoch": 2.4038461538461537, "grad_norm": 5.490859031677246, "learning_rate": 1.9772670671881332e-05, "loss": 0.2183, "step": 2500 }, { "epoch": 2.4048076923076924, "grad_norm": 5.411064147949219, "learning_rate": 1.977240649801253e-05, "loss": 0.2204, "step": 2501 }, { "epoch": 2.4057692307692307, "grad_norm": 5.965962886810303, "learning_rate": 1.977214217250466e-05, "loss": 0.2481, "step": 2502 }, { "epoch": 2.4067307692307693, "grad_norm": 6.318474769592285, "learning_rate": 1.977187769536183e-05, "loss": 0.3625, "step": 2503 }, { "epoch": 2.4076923076923076, "grad_norm": 5.414254188537598, "learning_rate": 1.9771613066588127e-05, "loss": 0.1502, "step": 2504 }, { "epoch": 2.4086538461538463, "grad_norm": 4.67551851272583, "learning_rate": 1.9771348286187675e-05, "loss": 0.1983, "step": 2505 }, { "epoch": 2.4096153846153845, "grad_norm": 5.108248710632324, "learning_rate": 1.9771083354164575e-05, "loss": 0.2559, "step": 2506 }, { "epoch": 2.410576923076923, "grad_norm": 3.50192928314209, "learning_rate": 1.977081827052294e-05, "loss": 0.1042, "step": 2507 }, { "epoch": 2.4115384615384614, "grad_norm": 5.392573356628418, "learning_rate": 1.9770553035266882e-05, "loss": 0.1989, "step": 2508 }, { "epoch": 2.4125, "grad_norm": 6.424921989440918, "learning_rate": 1.9770287648400516e-05, "loss": 0.3755, "step": 2509 }, { "epoch": 2.4134615384615383, "grad_norm": 6.140743255615234, "learning_rate": 1.9770022109927962e-05, "loss": 0.2439, "step": 2510 }, { "epoch": 2.414423076923077, "grad_norm": 5.862917900085449, "learning_rate": 1.976975641985334e-05, "loss": 0.1784, "step": 2511 }, { "epoch": 2.4153846153846152, "grad_norm": 5.07866907119751, "learning_rate": 1.9769490578180773e-05, "loss": 0.17, "step": 2512 }, { "epoch": 2.416346153846154, "grad_norm": 4.572740077972412, "learning_rate": 1.9769224584914383e-05, "loss": 0.1393, "step": 2513 }, { "epoch": 2.417307692307692, "grad_norm": 5.068890571594238, "learning_rate": 1.97689584400583e-05, "loss": 0.1983, "step": 2514 }, { "epoch": 2.418269230769231, "grad_norm": 3.457620143890381, "learning_rate": 1.9768692143616656e-05, "loss": 0.0674, "step": 2515 }, { "epoch": 2.419230769230769, "grad_norm": 7.095941066741943, "learning_rate": 1.976842569559358e-05, "loss": 0.2021, "step": 2516 }, { "epoch": 2.4201923076923078, "grad_norm": 5.529844284057617, "learning_rate": 1.9768159095993207e-05, "loss": 0.2498, "step": 2517 }, { "epoch": 2.421153846153846, "grad_norm": 5.79261589050293, "learning_rate": 1.976789234481967e-05, "loss": 0.3383, "step": 2518 }, { "epoch": 2.4221153846153847, "grad_norm": 4.498996257781982, "learning_rate": 1.976762544207712e-05, "loss": 0.1498, "step": 2519 }, { "epoch": 2.423076923076923, "grad_norm": 7.247810363769531, "learning_rate": 1.9767358387769683e-05, "loss": 0.3617, "step": 2520 }, { "epoch": 2.4240384615384616, "grad_norm": 8.739104270935059, "learning_rate": 1.9767091181901516e-05, "loss": 0.4301, "step": 2521 }, { "epoch": 2.425, "grad_norm": 8.514158248901367, "learning_rate": 1.9766823824476756e-05, "loss": 0.4749, "step": 2522 }, { "epoch": 2.4259615384615385, "grad_norm": 5.203855037689209, "learning_rate": 1.976655631549956e-05, "loss": 0.1282, "step": 2523 }, { "epoch": 2.4269230769230767, "grad_norm": 5.31114387512207, "learning_rate": 1.9766288654974072e-05, "loss": 0.2362, "step": 2524 }, { "epoch": 2.4278846153846154, "grad_norm": 4.38612174987793, "learning_rate": 1.976602084290445e-05, "loss": 0.1495, "step": 2525 }, { "epoch": 2.4288461538461537, "grad_norm": 3.957280158996582, "learning_rate": 1.976575287929484e-05, "loss": 0.125, "step": 2526 }, { "epoch": 2.4298076923076923, "grad_norm": 4.1751227378845215, "learning_rate": 1.9765484764149413e-05, "loss": 0.1824, "step": 2527 }, { "epoch": 2.430769230769231, "grad_norm": 7.158936977386475, "learning_rate": 1.9765216497472325e-05, "loss": 0.257, "step": 2528 }, { "epoch": 2.4317307692307693, "grad_norm": 3.781999349594116, "learning_rate": 1.9764948079267738e-05, "loss": 0.1244, "step": 2529 }, { "epoch": 2.4326923076923075, "grad_norm": 4.814290523529053, "learning_rate": 1.976467950953981e-05, "loss": 0.1696, "step": 2530 }, { "epoch": 2.433653846153846, "grad_norm": 5.527986526489258, "learning_rate": 1.9764410788292724e-05, "loss": 0.1917, "step": 2531 }, { "epoch": 2.434615384615385, "grad_norm": 3.3021116256713867, "learning_rate": 1.9764141915530633e-05, "loss": 0.1297, "step": 2532 }, { "epoch": 2.435576923076923, "grad_norm": 6.434967041015625, "learning_rate": 1.9763872891257722e-05, "loss": 0.5062, "step": 2533 }, { "epoch": 2.4365384615384613, "grad_norm": 4.979333400726318, "learning_rate": 1.976360371547816e-05, "loss": 0.3133, "step": 2534 }, { "epoch": 2.4375, "grad_norm": 5.463548183441162, "learning_rate": 1.9763334388196122e-05, "loss": 0.2924, "step": 2535 }, { "epoch": 2.4384615384615387, "grad_norm": 4.401573181152344, "learning_rate": 1.976306490941579e-05, "loss": 0.2213, "step": 2536 }, { "epoch": 2.439423076923077, "grad_norm": 3.915416717529297, "learning_rate": 1.9762795279141344e-05, "loss": 0.1845, "step": 2537 }, { "epoch": 2.440384615384615, "grad_norm": 4.998812198638916, "learning_rate": 1.976252549737697e-05, "loss": 0.2188, "step": 2538 }, { "epoch": 2.441346153846154, "grad_norm": 4.054767608642578, "learning_rate": 1.9762255564126852e-05, "loss": 0.1229, "step": 2539 }, { "epoch": 2.4423076923076925, "grad_norm": 7.241117000579834, "learning_rate": 1.976198547939518e-05, "loss": 0.4197, "step": 2540 }, { "epoch": 2.4432692307692307, "grad_norm": 6.512776851654053, "learning_rate": 1.9761715243186146e-05, "loss": 0.351, "step": 2541 }, { "epoch": 2.444230769230769, "grad_norm": 5.182671546936035, "learning_rate": 1.9761444855503938e-05, "loss": 0.182, "step": 2542 }, { "epoch": 2.4451923076923077, "grad_norm": 3.9664418697357178, "learning_rate": 1.976117431635276e-05, "loss": 0.1686, "step": 2543 }, { "epoch": 2.4461538461538463, "grad_norm": 3.7793869972229004, "learning_rate": 1.97609036257368e-05, "loss": 0.158, "step": 2544 }, { "epoch": 2.4471153846153846, "grad_norm": 5.152229309082031, "learning_rate": 1.976063278366027e-05, "loss": 0.178, "step": 2545 }, { "epoch": 2.4480769230769233, "grad_norm": 4.557929039001465, "learning_rate": 1.976036179012736e-05, "loss": 0.1978, "step": 2546 }, { "epoch": 2.4490384615384615, "grad_norm": 4.006479263305664, "learning_rate": 1.976009064514229e-05, "loss": 0.1616, "step": 2547 }, { "epoch": 2.45, "grad_norm": 6.884338855743408, "learning_rate": 1.975981934870925e-05, "loss": 0.2935, "step": 2548 }, { "epoch": 2.4509615384615384, "grad_norm": 3.7611606121063232, "learning_rate": 1.9759547900832463e-05, "loss": 0.0737, "step": 2549 }, { "epoch": 2.451923076923077, "grad_norm": 6.819275379180908, "learning_rate": 1.9759276301516133e-05, "loss": 0.2907, "step": 2550 }, { "epoch": 2.4528846153846153, "grad_norm": 5.923386096954346, "learning_rate": 1.975900455076448e-05, "loss": 0.4233, "step": 2551 }, { "epoch": 2.453846153846154, "grad_norm": 5.758503437042236, "learning_rate": 1.975873264858172e-05, "loss": 0.1537, "step": 2552 }, { "epoch": 2.4548076923076922, "grad_norm": 3.197998285293579, "learning_rate": 1.9758460594972068e-05, "loss": 0.1129, "step": 2553 }, { "epoch": 2.455769230769231, "grad_norm": 6.351034164428711, "learning_rate": 1.9758188389939753e-05, "loss": 0.2026, "step": 2554 }, { "epoch": 2.456730769230769, "grad_norm": 2.6844468116760254, "learning_rate": 1.975791603348899e-05, "loss": 0.0953, "step": 2555 }, { "epoch": 2.457692307692308, "grad_norm": 6.24268102645874, "learning_rate": 1.9757643525624008e-05, "loss": 0.2384, "step": 2556 }, { "epoch": 2.458653846153846, "grad_norm": 5.2440571784973145, "learning_rate": 1.975737086634904e-05, "loss": 0.2746, "step": 2557 }, { "epoch": 2.4596153846153848, "grad_norm": 6.970211982727051, "learning_rate": 1.9757098055668312e-05, "loss": 0.2604, "step": 2558 }, { "epoch": 2.460576923076923, "grad_norm": 3.6449708938598633, "learning_rate": 1.975682509358606e-05, "loss": 0.1223, "step": 2559 }, { "epoch": 2.4615384615384617, "grad_norm": 5.384904861450195, "learning_rate": 1.975655198010652e-05, "loss": 0.2413, "step": 2560 }, { "epoch": 2.4625, "grad_norm": 3.6428475379943848, "learning_rate": 1.9756278715233925e-05, "loss": 0.1444, "step": 2561 }, { "epoch": 2.4634615384615386, "grad_norm": 4.280209064483643, "learning_rate": 1.975600529897252e-05, "loss": 0.1074, "step": 2562 }, { "epoch": 2.464423076923077, "grad_norm": 5.162388324737549, "learning_rate": 1.9755731731326545e-05, "loss": 0.193, "step": 2563 }, { "epoch": 2.4653846153846155, "grad_norm": 4.4329423904418945, "learning_rate": 1.975545801230025e-05, "loss": 0.1605, "step": 2564 }, { "epoch": 2.4663461538461537, "grad_norm": 3.6372647285461426, "learning_rate": 1.9755184141897876e-05, "loss": 0.1507, "step": 2565 }, { "epoch": 2.4673076923076924, "grad_norm": 4.998239040374756, "learning_rate": 1.9754910120123675e-05, "loss": 0.193, "step": 2566 }, { "epoch": 2.4682692307692307, "grad_norm": 6.215221881866455, "learning_rate": 1.9754635946981898e-05, "loss": 0.5996, "step": 2567 }, { "epoch": 2.4692307692307693, "grad_norm": 4.841153144836426, "learning_rate": 1.9754361622476807e-05, "loss": 0.1645, "step": 2568 }, { "epoch": 2.4701923076923076, "grad_norm": 4.904017925262451, "learning_rate": 1.9754087146612644e-05, "loss": 0.1853, "step": 2569 }, { "epoch": 2.4711538461538463, "grad_norm": 5.714367389678955, "learning_rate": 1.975381251939368e-05, "loss": 0.2436, "step": 2570 }, { "epoch": 2.4721153846153845, "grad_norm": 7.083236217498779, "learning_rate": 1.9753537740824175e-05, "loss": 0.4039, "step": 2571 }, { "epoch": 2.473076923076923, "grad_norm": 4.72493839263916, "learning_rate": 1.9753262810908387e-05, "loss": 0.3464, "step": 2572 }, { "epoch": 2.4740384615384614, "grad_norm": 4.345181941986084, "learning_rate": 1.975298772965059e-05, "loss": 0.1585, "step": 2573 }, { "epoch": 2.475, "grad_norm": 4.933207035064697, "learning_rate": 1.975271249705504e-05, "loss": 0.119, "step": 2574 }, { "epoch": 2.4759615384615383, "grad_norm": 5.171920299530029, "learning_rate": 1.9752437113126023e-05, "loss": 0.1789, "step": 2575 }, { "epoch": 2.476923076923077, "grad_norm": 6.203938007354736, "learning_rate": 1.9752161577867805e-05, "loss": 0.2173, "step": 2576 }, { "epoch": 2.4778846153846152, "grad_norm": 3.040940284729004, "learning_rate": 1.9751885891284658e-05, "loss": 0.1671, "step": 2577 }, { "epoch": 2.478846153846154, "grad_norm": 4.499283313751221, "learning_rate": 1.9751610053380867e-05, "loss": 0.1318, "step": 2578 }, { "epoch": 2.479807692307692, "grad_norm": 4.684685230255127, "learning_rate": 1.9751334064160708e-05, "loss": 0.2243, "step": 2579 }, { "epoch": 2.480769230769231, "grad_norm": 3.7311012744903564, "learning_rate": 1.9751057923628463e-05, "loss": 0.1545, "step": 2580 }, { "epoch": 2.481730769230769, "grad_norm": 4.014016151428223, "learning_rate": 1.9750781631788416e-05, "loss": 0.1255, "step": 2581 }, { "epoch": 2.4826923076923078, "grad_norm": 6.371394634246826, "learning_rate": 1.9750505188644856e-05, "loss": 0.4661, "step": 2582 }, { "epoch": 2.483653846153846, "grad_norm": 4.2303667068481445, "learning_rate": 1.9750228594202075e-05, "loss": 0.146, "step": 2583 }, { "epoch": 2.4846153846153847, "grad_norm": 4.701770305633545, "learning_rate": 1.9749951848464367e-05, "loss": 0.1482, "step": 2584 }, { "epoch": 2.485576923076923, "grad_norm": 5.634987831115723, "learning_rate": 1.9749674951436016e-05, "loss": 0.1765, "step": 2585 }, { "epoch": 2.4865384615384616, "grad_norm": 6.108672618865967, "learning_rate": 1.9749397903121327e-05, "loss": 0.3019, "step": 2586 }, { "epoch": 2.4875, "grad_norm": 6.007690906524658, "learning_rate": 1.97491207035246e-05, "loss": 0.1669, "step": 2587 }, { "epoch": 2.4884615384615385, "grad_norm": 5.366580963134766, "learning_rate": 1.9748843352650127e-05, "loss": 0.1642, "step": 2588 }, { "epoch": 2.4894230769230767, "grad_norm": 5.927120208740234, "learning_rate": 1.9748565850502218e-05, "loss": 0.1076, "step": 2589 }, { "epoch": 2.4903846153846154, "grad_norm": 4.644710063934326, "learning_rate": 1.9748288197085182e-05, "loss": 0.1512, "step": 2590 }, { "epoch": 2.4913461538461537, "grad_norm": 4.418264389038086, "learning_rate": 1.9748010392403322e-05, "loss": 0.1254, "step": 2591 }, { "epoch": 2.4923076923076923, "grad_norm": 5.187132358551025, "learning_rate": 1.9747732436460955e-05, "loss": 0.1676, "step": 2592 }, { "epoch": 2.493269230769231, "grad_norm": 6.084901809692383, "learning_rate": 1.9747454329262382e-05, "loss": 0.3145, "step": 2593 }, { "epoch": 2.4942307692307693, "grad_norm": 5.632884979248047, "learning_rate": 1.974717607081193e-05, "loss": 0.2762, "step": 2594 }, { "epoch": 2.4951923076923075, "grad_norm": 2.989712953567505, "learning_rate": 1.9746897661113915e-05, "loss": 0.0976, "step": 2595 }, { "epoch": 2.496153846153846, "grad_norm": 3.473109483718872, "learning_rate": 1.9746619100172654e-05, "loss": 0.1202, "step": 2596 }, { "epoch": 2.497115384615385, "grad_norm": 2.44325590133667, "learning_rate": 1.974634038799247e-05, "loss": 0.0544, "step": 2597 }, { "epoch": 2.498076923076923, "grad_norm": 5.256768703460693, "learning_rate": 1.974606152457769e-05, "loss": 0.4239, "step": 2598 }, { "epoch": 2.4990384615384613, "grad_norm": 4.953283786773682, "learning_rate": 1.974578250993264e-05, "loss": 0.1691, "step": 2599 }, { "epoch": 2.5, "grad_norm": 5.0860490798950195, "learning_rate": 1.9745503344061647e-05, "loss": 0.1634, "step": 2600 }, { "epoch": 2.5009615384615387, "grad_norm": 5.726993560791016, "learning_rate": 1.9745224026969042e-05, "loss": 0.2305, "step": 2601 }, { "epoch": 2.501923076923077, "grad_norm": 4.004560470581055, "learning_rate": 1.9744944558659166e-05, "loss": 0.118, "step": 2602 }, { "epoch": 2.502884615384615, "grad_norm": 5.004958152770996, "learning_rate": 1.974466493913635e-05, "loss": 0.2459, "step": 2603 }, { "epoch": 2.503846153846154, "grad_norm": 4.564997673034668, "learning_rate": 1.9744385168404935e-05, "loss": 0.1892, "step": 2604 }, { "epoch": 2.5048076923076925, "grad_norm": 4.288000106811523, "learning_rate": 1.9744105246469264e-05, "loss": 0.1281, "step": 2605 }, { "epoch": 2.5057692307692307, "grad_norm": 6.24931526184082, "learning_rate": 1.9743825173333672e-05, "loss": 0.2348, "step": 2606 }, { "epoch": 2.506730769230769, "grad_norm": 4.6119279861450195, "learning_rate": 1.9743544949002516e-05, "loss": 0.1069, "step": 2607 }, { "epoch": 2.5076923076923077, "grad_norm": 7.229186534881592, "learning_rate": 1.974326457348014e-05, "loss": 0.1442, "step": 2608 }, { "epoch": 2.5086538461538463, "grad_norm": 3.377401113510132, "learning_rate": 1.974298404677089e-05, "loss": 0.1521, "step": 2609 }, { "epoch": 2.5096153846153846, "grad_norm": 4.2120842933654785, "learning_rate": 1.9742703368879126e-05, "loss": 0.1379, "step": 2610 }, { "epoch": 2.510576923076923, "grad_norm": 2.803270101547241, "learning_rate": 1.9742422539809197e-05, "loss": 0.0699, "step": 2611 }, { "epoch": 2.5115384615384615, "grad_norm": 7.127243995666504, "learning_rate": 1.974214155956547e-05, "loss": 0.3242, "step": 2612 }, { "epoch": 2.5125, "grad_norm": 6.2487335205078125, "learning_rate": 1.9741860428152294e-05, "loss": 0.2772, "step": 2613 }, { "epoch": 2.5134615384615384, "grad_norm": 6.838139057159424, "learning_rate": 1.9741579145574037e-05, "loss": 0.3223, "step": 2614 }, { "epoch": 2.5144230769230766, "grad_norm": 6.727719306945801, "learning_rate": 1.9741297711835063e-05, "loss": 0.4846, "step": 2615 }, { "epoch": 2.5153846153846153, "grad_norm": 5.587729454040527, "learning_rate": 1.9741016126939735e-05, "loss": 0.1715, "step": 2616 }, { "epoch": 2.516346153846154, "grad_norm": 7.197773456573486, "learning_rate": 1.9740734390892432e-05, "loss": 0.3245, "step": 2617 }, { "epoch": 2.5173076923076922, "grad_norm": 5.139633655548096, "learning_rate": 1.9740452503697518e-05, "loss": 0.1392, "step": 2618 }, { "epoch": 2.518269230769231, "grad_norm": 4.35609245300293, "learning_rate": 1.974017046535937e-05, "loss": 0.161, "step": 2619 }, { "epoch": 2.519230769230769, "grad_norm": 5.541009902954102, "learning_rate": 1.9739888275882362e-05, "loss": 0.2324, "step": 2620 }, { "epoch": 2.520192307692308, "grad_norm": 5.501104354858398, "learning_rate": 1.9739605935270877e-05, "loss": 0.326, "step": 2621 }, { "epoch": 2.521153846153846, "grad_norm": 5.014784812927246, "learning_rate": 1.973932344352929e-05, "loss": 0.1719, "step": 2622 }, { "epoch": 2.5221153846153848, "grad_norm": 4.682886123657227, "learning_rate": 1.9739040800661988e-05, "loss": 0.174, "step": 2623 }, { "epoch": 2.523076923076923, "grad_norm": 6.483053684234619, "learning_rate": 1.973875800667336e-05, "loss": 0.1548, "step": 2624 }, { "epoch": 2.5240384615384617, "grad_norm": 4.7576093673706055, "learning_rate": 1.9738475061567785e-05, "loss": 0.1817, "step": 2625 }, { "epoch": 2.525, "grad_norm": 5.816281795501709, "learning_rate": 1.973819196534966e-05, "loss": 0.2404, "step": 2626 }, { "epoch": 2.5259615384615386, "grad_norm": 4.256402969360352, "learning_rate": 1.973790871802338e-05, "loss": 0.1493, "step": 2627 }, { "epoch": 2.526923076923077, "grad_norm": 3.9958384037017822, "learning_rate": 1.9737625319593338e-05, "loss": 0.1381, "step": 2628 }, { "epoch": 2.5278846153846155, "grad_norm": 5.380248546600342, "learning_rate": 1.9737341770063928e-05, "loss": 0.1479, "step": 2629 }, { "epoch": 2.5288461538461537, "grad_norm": 4.9945969581604, "learning_rate": 1.973705806943955e-05, "loss": 0.1616, "step": 2630 }, { "epoch": 2.5298076923076924, "grad_norm": 6.513396263122559, "learning_rate": 1.9736774217724614e-05, "loss": 0.2846, "step": 2631 }, { "epoch": 2.5307692307692307, "grad_norm": 5.0898613929748535, "learning_rate": 1.9736490214923517e-05, "loss": 0.1952, "step": 2632 }, { "epoch": 2.5317307692307693, "grad_norm": 5.813454627990723, "learning_rate": 1.973620606104067e-05, "loss": 0.2686, "step": 2633 }, { "epoch": 2.5326923076923076, "grad_norm": 4.0013203620910645, "learning_rate": 1.9735921756080477e-05, "loss": 0.1714, "step": 2634 }, { "epoch": 2.5336538461538463, "grad_norm": 3.8858165740966797, "learning_rate": 1.9735637300047353e-05, "loss": 0.1976, "step": 2635 }, { "epoch": 2.5346153846153845, "grad_norm": 6.373676300048828, "learning_rate": 1.9735352692945714e-05, "loss": 0.3844, "step": 2636 }, { "epoch": 2.535576923076923, "grad_norm": 3.8788180351257324, "learning_rate": 1.9735067934779973e-05, "loss": 0.14, "step": 2637 }, { "epoch": 2.5365384615384614, "grad_norm": 5.21560525894165, "learning_rate": 1.973478302555455e-05, "loss": 0.2202, "step": 2638 }, { "epoch": 2.5375, "grad_norm": 5.892939567565918, "learning_rate": 1.9734497965273865e-05, "loss": 0.2126, "step": 2639 }, { "epoch": 2.5384615384615383, "grad_norm": 4.028469085693359, "learning_rate": 1.9734212753942344e-05, "loss": 0.1654, "step": 2640 }, { "epoch": 2.539423076923077, "grad_norm": 5.3479790687561035, "learning_rate": 1.973392739156441e-05, "loss": 0.1568, "step": 2641 }, { "epoch": 2.5403846153846152, "grad_norm": 6.04403018951416, "learning_rate": 1.973364187814449e-05, "loss": 0.343, "step": 2642 }, { "epoch": 2.541346153846154, "grad_norm": 5.438522815704346, "learning_rate": 1.973335621368702e-05, "loss": 0.2299, "step": 2643 }, { "epoch": 2.542307692307692, "grad_norm": 5.080386161804199, "learning_rate": 1.9733070398196423e-05, "loss": 0.1324, "step": 2644 }, { "epoch": 2.543269230769231, "grad_norm": 4.6586012840271, "learning_rate": 1.9732784431677143e-05, "loss": 0.2319, "step": 2645 }, { "epoch": 2.544230769230769, "grad_norm": 6.661589622497559, "learning_rate": 1.9732498314133616e-05, "loss": 0.3303, "step": 2646 }, { "epoch": 2.5451923076923078, "grad_norm": 4.404206275939941, "learning_rate": 1.9732212045570274e-05, "loss": 0.1687, "step": 2647 }, { "epoch": 2.546153846153846, "grad_norm": 6.513505458831787, "learning_rate": 1.973192562599157e-05, "loss": 0.2852, "step": 2648 }, { "epoch": 2.5471153846153847, "grad_norm": 6.436662197113037, "learning_rate": 1.9731639055401937e-05, "loss": 0.3132, "step": 2649 }, { "epoch": 2.5480769230769234, "grad_norm": 4.686267375946045, "learning_rate": 1.9731352333805833e-05, "loss": 0.4976, "step": 2650 }, { "epoch": 2.5490384615384616, "grad_norm": 5.210356712341309, "learning_rate": 1.97310654612077e-05, "loss": 0.1543, "step": 2651 }, { "epoch": 2.55, "grad_norm": 4.866021633148193, "learning_rate": 1.9730778437611994e-05, "loss": 0.1527, "step": 2652 }, { "epoch": 2.5509615384615385, "grad_norm": 6.34114933013916, "learning_rate": 1.9730491263023163e-05, "loss": 0.2469, "step": 2653 }, { "epoch": 2.551923076923077, "grad_norm": 3.758969306945801, "learning_rate": 1.973020393744567e-05, "loss": 0.1558, "step": 2654 }, { "epoch": 2.5528846153846154, "grad_norm": 4.15219259262085, "learning_rate": 1.9729916460883965e-05, "loss": 0.1648, "step": 2655 }, { "epoch": 2.5538461538461537, "grad_norm": 7.067380428314209, "learning_rate": 1.9729628833342515e-05, "loss": 0.2247, "step": 2656 }, { "epoch": 2.5548076923076923, "grad_norm": 5.113513946533203, "learning_rate": 1.9729341054825783e-05, "loss": 0.1491, "step": 2657 }, { "epoch": 2.555769230769231, "grad_norm": 3.8726518154144287, "learning_rate": 1.9729053125338232e-05, "loss": 0.1311, "step": 2658 }, { "epoch": 2.5567307692307693, "grad_norm": 3.6247594356536865, "learning_rate": 1.9728765044884334e-05, "loss": 0.1108, "step": 2659 }, { "epoch": 2.5576923076923075, "grad_norm": 5.836147308349609, "learning_rate": 1.9728476813468552e-05, "loss": 0.4034, "step": 2660 }, { "epoch": 2.558653846153846, "grad_norm": 3.890964984893799, "learning_rate": 1.9728188431095364e-05, "loss": 0.1463, "step": 2661 }, { "epoch": 2.559615384615385, "grad_norm": 4.620306968688965, "learning_rate": 1.9727899897769244e-05, "loss": 0.1579, "step": 2662 }, { "epoch": 2.560576923076923, "grad_norm": 4.574516773223877, "learning_rate": 1.972761121349467e-05, "loss": 0.1254, "step": 2663 }, { "epoch": 2.5615384615384613, "grad_norm": 5.529510974884033, "learning_rate": 1.9727322378276115e-05, "loss": 0.2296, "step": 2664 }, { "epoch": 2.5625, "grad_norm": 5.989273548126221, "learning_rate": 1.972703339211807e-05, "loss": 0.2681, "step": 2665 }, { "epoch": 2.5634615384615387, "grad_norm": 4.9331746101379395, "learning_rate": 1.9726744255025014e-05, "loss": 0.2435, "step": 2666 }, { "epoch": 2.564423076923077, "grad_norm": 5.25949764251709, "learning_rate": 1.9726454967001435e-05, "loss": 0.1862, "step": 2667 }, { "epoch": 2.565384615384615, "grad_norm": 6.354513645172119, "learning_rate": 1.972616552805182e-05, "loss": 0.1516, "step": 2668 }, { "epoch": 2.566346153846154, "grad_norm": 4.607203483581543, "learning_rate": 1.9725875938180667e-05, "loss": 0.1526, "step": 2669 }, { "epoch": 2.5673076923076925, "grad_norm": 5.7577033042907715, "learning_rate": 1.972558619739246e-05, "loss": 0.2221, "step": 2670 }, { "epoch": 2.5682692307692307, "grad_norm": 5.010351181030273, "learning_rate": 1.9725296305691702e-05, "loss": 0.1421, "step": 2671 }, { "epoch": 2.569230769230769, "grad_norm": 5.24898624420166, "learning_rate": 1.9725006263082885e-05, "loss": 0.1832, "step": 2672 }, { "epoch": 2.5701923076923077, "grad_norm": 4.797828674316406, "learning_rate": 1.972471606957052e-05, "loss": 0.1285, "step": 2673 }, { "epoch": 2.5711538461538463, "grad_norm": 4.979761123657227, "learning_rate": 1.97244257251591e-05, "loss": 0.1868, "step": 2674 }, { "epoch": 2.5721153846153846, "grad_norm": 4.565947532653809, "learning_rate": 1.9724135229853133e-05, "loss": 0.1767, "step": 2675 }, { "epoch": 2.573076923076923, "grad_norm": 5.538753986358643, "learning_rate": 1.9723844583657125e-05, "loss": 0.1854, "step": 2676 }, { "epoch": 2.5740384615384615, "grad_norm": 5.275347709655762, "learning_rate": 1.972355378657559e-05, "loss": 0.2494, "step": 2677 }, { "epoch": 2.575, "grad_norm": 5.434114456176758, "learning_rate": 1.972326283861304e-05, "loss": 0.2696, "step": 2678 }, { "epoch": 2.5759615384615384, "grad_norm": 5.307821750640869, "learning_rate": 1.9722971739773986e-05, "loss": 0.6132, "step": 2679 }, { "epoch": 2.5769230769230766, "grad_norm": 4.697876453399658, "learning_rate": 1.972268049006295e-05, "loss": 0.1849, "step": 2680 }, { "epoch": 2.5778846153846153, "grad_norm": 6.624809741973877, "learning_rate": 1.9722389089484446e-05, "loss": 0.4016, "step": 2681 }, { "epoch": 2.578846153846154, "grad_norm": 4.111186504364014, "learning_rate": 1.9722097538043e-05, "loss": 0.1239, "step": 2682 }, { "epoch": 2.5798076923076922, "grad_norm": 5.289012908935547, "learning_rate": 1.972180583574313e-05, "loss": 0.1337, "step": 2683 }, { "epoch": 2.580769230769231, "grad_norm": 5.934934616088867, "learning_rate": 1.9721513982589372e-05, "loss": 0.2014, "step": 2684 }, { "epoch": 2.581730769230769, "grad_norm": 5.554327011108398, "learning_rate": 1.972122197858625e-05, "loss": 0.2088, "step": 2685 }, { "epoch": 2.582692307692308, "grad_norm": 5.901093482971191, "learning_rate": 1.972092982373829e-05, "loss": 0.2104, "step": 2686 }, { "epoch": 2.583653846153846, "grad_norm": 5.397544860839844, "learning_rate": 1.9720637518050033e-05, "loss": 0.2013, "step": 2687 }, { "epoch": 2.5846153846153848, "grad_norm": 5.240126609802246, "learning_rate": 1.9720345061526008e-05, "loss": 0.1673, "step": 2688 }, { "epoch": 2.585576923076923, "grad_norm": 3.4742515087127686, "learning_rate": 1.972005245417076e-05, "loss": 0.086, "step": 2689 }, { "epoch": 2.5865384615384617, "grad_norm": 4.464771270751953, "learning_rate": 1.9719759695988824e-05, "loss": 0.1584, "step": 2690 }, { "epoch": 2.5875, "grad_norm": 4.819149017333984, "learning_rate": 1.9719466786984742e-05, "loss": 0.224, "step": 2691 }, { "epoch": 2.5884615384615386, "grad_norm": 6.931131362915039, "learning_rate": 1.9719173727163062e-05, "loss": 0.2577, "step": 2692 }, { "epoch": 2.589423076923077, "grad_norm": 3.7326529026031494, "learning_rate": 1.9718880516528335e-05, "loss": 0.144, "step": 2693 }, { "epoch": 2.5903846153846155, "grad_norm": 3.813305616378784, "learning_rate": 1.9718587155085108e-05, "loss": 0.1266, "step": 2694 }, { "epoch": 2.5913461538461537, "grad_norm": 6.30224609375, "learning_rate": 1.971829364283793e-05, "loss": 0.2049, "step": 2695 }, { "epoch": 2.5923076923076924, "grad_norm": 6.147940158843994, "learning_rate": 1.9717999979791356e-05, "loss": 0.2533, "step": 2696 }, { "epoch": 2.5932692307692307, "grad_norm": 5.670370101928711, "learning_rate": 1.9717706165949945e-05, "loss": 0.2571, "step": 2697 }, { "epoch": 2.5942307692307693, "grad_norm": 6.209771156311035, "learning_rate": 1.9717412201318256e-05, "loss": 0.2618, "step": 2698 }, { "epoch": 2.5951923076923076, "grad_norm": 4.900542736053467, "learning_rate": 1.971711808590085e-05, "loss": 0.1937, "step": 2699 }, { "epoch": 2.5961538461538463, "grad_norm": 4.372768878936768, "learning_rate": 1.971682381970229e-05, "loss": 0.1771, "step": 2700 }, { "epoch": 2.5971153846153845, "grad_norm": 4.071897506713867, "learning_rate": 1.9716529402727144e-05, "loss": 0.2086, "step": 2701 }, { "epoch": 2.598076923076923, "grad_norm": 5.888193607330322, "learning_rate": 1.971623483497998e-05, "loss": 0.2319, "step": 2702 }, { "epoch": 2.5990384615384614, "grad_norm": 5.508358001708984, "learning_rate": 1.9715940116465365e-05, "loss": 0.2175, "step": 2703 }, { "epoch": 2.6, "grad_norm": 7.063728332519531, "learning_rate": 1.971564524718788e-05, "loss": 0.3502, "step": 2704 }, { "epoch": 2.6009615384615383, "grad_norm": 3.1751279830932617, "learning_rate": 1.9715350227152093e-05, "loss": 0.0992, "step": 2705 }, { "epoch": 2.601923076923077, "grad_norm": 5.388612270355225, "learning_rate": 1.9715055056362583e-05, "loss": 0.2116, "step": 2706 }, { "epoch": 2.6028846153846152, "grad_norm": 5.9517130851745605, "learning_rate": 1.9714759734823936e-05, "loss": 0.233, "step": 2707 }, { "epoch": 2.603846153846154, "grad_norm": 7.448097229003906, "learning_rate": 1.9714464262540726e-05, "loss": 0.2992, "step": 2708 }, { "epoch": 2.604807692307692, "grad_norm": 3.947622299194336, "learning_rate": 1.9714168639517543e-05, "loss": 0.1851, "step": 2709 }, { "epoch": 2.605769230769231, "grad_norm": 4.9030866622924805, "learning_rate": 1.9713872865758977e-05, "loss": 0.1874, "step": 2710 }, { "epoch": 2.606730769230769, "grad_norm": 5.712357521057129, "learning_rate": 1.9713576941269613e-05, "loss": 0.1705, "step": 2711 }, { "epoch": 2.6076923076923078, "grad_norm": 4.790009498596191, "learning_rate": 1.971328086605404e-05, "loss": 0.1556, "step": 2712 }, { "epoch": 2.608653846153846, "grad_norm": 3.8841965198516846, "learning_rate": 1.971298464011686e-05, "loss": 0.1508, "step": 2713 }, { "epoch": 2.6096153846153847, "grad_norm": 4.273936748504639, "learning_rate": 1.9712688263462665e-05, "loss": 0.1304, "step": 2714 }, { "epoch": 2.6105769230769234, "grad_norm": 5.894554615020752, "learning_rate": 1.9712391736096053e-05, "loss": 0.205, "step": 2715 }, { "epoch": 2.6115384615384616, "grad_norm": 5.827793121337891, "learning_rate": 1.971209505802163e-05, "loss": 0.2851, "step": 2716 }, { "epoch": 2.6125, "grad_norm": 5.66070556640625, "learning_rate": 1.9711798229243993e-05, "loss": 0.2438, "step": 2717 }, { "epoch": 2.6134615384615385, "grad_norm": 7.082469463348389, "learning_rate": 1.971150124976775e-05, "loss": 0.3842, "step": 2718 }, { "epoch": 2.614423076923077, "grad_norm": 5.752699375152588, "learning_rate": 1.9711204119597514e-05, "loss": 0.2221, "step": 2719 }, { "epoch": 2.6153846153846154, "grad_norm": 5.989920139312744, "learning_rate": 1.971090683873789e-05, "loss": 0.2941, "step": 2720 }, { "epoch": 2.6163461538461537, "grad_norm": 4.835052490234375, "learning_rate": 1.9710609407193493e-05, "loss": 0.0919, "step": 2721 }, { "epoch": 2.6173076923076923, "grad_norm": 6.690685272216797, "learning_rate": 1.9710311824968942e-05, "loss": 0.2641, "step": 2722 }, { "epoch": 2.618269230769231, "grad_norm": 6.77543830871582, "learning_rate": 1.9710014092068847e-05, "loss": 0.2503, "step": 2723 }, { "epoch": 2.6192307692307693, "grad_norm": 5.485464572906494, "learning_rate": 1.9709716208497833e-05, "loss": 0.2482, "step": 2724 }, { "epoch": 2.6201923076923075, "grad_norm": 5.704768180847168, "learning_rate": 1.9709418174260523e-05, "loss": 0.2374, "step": 2725 }, { "epoch": 2.621153846153846, "grad_norm": 6.436628341674805, "learning_rate": 1.9709119989361536e-05, "loss": 0.2625, "step": 2726 }, { "epoch": 2.622115384615385, "grad_norm": 6.42435359954834, "learning_rate": 1.9708821653805505e-05, "loss": 0.2997, "step": 2727 }, { "epoch": 2.623076923076923, "grad_norm": 4.853302955627441, "learning_rate": 1.9708523167597055e-05, "loss": 0.2875, "step": 2728 }, { "epoch": 2.6240384615384613, "grad_norm": 5.013129234313965, "learning_rate": 1.9708224530740824e-05, "loss": 0.2274, "step": 2729 }, { "epoch": 2.625, "grad_norm": 7.294846057891846, "learning_rate": 1.970792574324144e-05, "loss": 0.3031, "step": 2730 }, { "epoch": 2.6259615384615387, "grad_norm": 3.8257360458374023, "learning_rate": 1.9707626805103538e-05, "loss": 0.1582, "step": 2731 }, { "epoch": 2.626923076923077, "grad_norm": 4.086266040802002, "learning_rate": 1.970732771633176e-05, "loss": 0.1426, "step": 2732 }, { "epoch": 2.627884615384615, "grad_norm": 3.8983566761016846, "learning_rate": 1.9707028476930747e-05, "loss": 0.117, "step": 2733 }, { "epoch": 2.628846153846154, "grad_norm": 5.256546497344971, "learning_rate": 1.9706729086905145e-05, "loss": 0.2835, "step": 2734 }, { "epoch": 2.6298076923076925, "grad_norm": 4.006194591522217, "learning_rate": 1.9706429546259592e-05, "loss": 0.1209, "step": 2735 }, { "epoch": 2.6307692307692307, "grad_norm": 5.763978004455566, "learning_rate": 1.9706129854998744e-05, "loss": 0.1847, "step": 2736 }, { "epoch": 2.631730769230769, "grad_norm": 5.911851406097412, "learning_rate": 1.9705830013127248e-05, "loss": 0.2844, "step": 2737 }, { "epoch": 2.6326923076923077, "grad_norm": 6.251553058624268, "learning_rate": 1.9705530020649754e-05, "loss": 0.3402, "step": 2738 }, { "epoch": 2.6336538461538463, "grad_norm": 5.490074634552002, "learning_rate": 1.9705229877570916e-05, "loss": 0.2265, "step": 2739 }, { "epoch": 2.6346153846153846, "grad_norm": 4.196887969970703, "learning_rate": 1.9704929583895404e-05, "loss": 0.2024, "step": 2740 }, { "epoch": 2.635576923076923, "grad_norm": 4.697658061981201, "learning_rate": 1.970462913962786e-05, "loss": 0.1841, "step": 2741 }, { "epoch": 2.6365384615384615, "grad_norm": 4.822288990020752, "learning_rate": 1.970432854477296e-05, "loss": 0.1305, "step": 2742 }, { "epoch": 2.6375, "grad_norm": 5.088793754577637, "learning_rate": 1.970402779933536e-05, "loss": 0.2781, "step": 2743 }, { "epoch": 2.6384615384615384, "grad_norm": 5.488014221191406, "learning_rate": 1.970372690331973e-05, "loss": 0.2021, "step": 2744 }, { "epoch": 2.6394230769230766, "grad_norm": 6.957672119140625, "learning_rate": 1.970342585673074e-05, "loss": 0.2878, "step": 2745 }, { "epoch": 2.6403846153846153, "grad_norm": 5.8081889152526855, "learning_rate": 1.970312465957306e-05, "loss": 0.2232, "step": 2746 }, { "epoch": 2.641346153846154, "grad_norm": 4.9656171798706055, "learning_rate": 1.970282331185136e-05, "loss": 0.169, "step": 2747 }, { "epoch": 2.6423076923076922, "grad_norm": 4.947081565856934, "learning_rate": 1.9702521813570322e-05, "loss": 0.2509, "step": 2748 }, { "epoch": 2.643269230769231, "grad_norm": 6.0286383628845215, "learning_rate": 1.9702220164734624e-05, "loss": 0.2507, "step": 2749 }, { "epoch": 2.644230769230769, "grad_norm": 6.877699851989746, "learning_rate": 1.9701918365348943e-05, "loss": 0.2382, "step": 2750 }, { "epoch": 2.645192307692308, "grad_norm": 3.713202953338623, "learning_rate": 1.9701616415417963e-05, "loss": 0.1099, "step": 2751 }, { "epoch": 2.646153846153846, "grad_norm": 3.967100143432617, "learning_rate": 1.9701314314946368e-05, "loss": 0.1333, "step": 2752 }, { "epoch": 2.6471153846153848, "grad_norm": 4.659093379974365, "learning_rate": 1.9701012063938854e-05, "loss": 0.1339, "step": 2753 }, { "epoch": 2.648076923076923, "grad_norm": 5.124791622161865, "learning_rate": 1.97007096624001e-05, "loss": 0.2276, "step": 2754 }, { "epoch": 2.6490384615384617, "grad_norm": 5.692117214202881, "learning_rate": 1.9700407110334804e-05, "loss": 0.2274, "step": 2755 }, { "epoch": 2.65, "grad_norm": 5.326379776000977, "learning_rate": 1.9700104407747663e-05, "loss": 0.2396, "step": 2756 }, { "epoch": 2.6509615384615386, "grad_norm": 5.0015387535095215, "learning_rate": 1.9699801554643367e-05, "loss": 0.2266, "step": 2757 }, { "epoch": 2.651923076923077, "grad_norm": 3.8245739936828613, "learning_rate": 1.9699498551026623e-05, "loss": 0.1517, "step": 2758 }, { "epoch": 2.6528846153846155, "grad_norm": 6.491021156311035, "learning_rate": 1.9699195396902128e-05, "loss": 0.2494, "step": 2759 }, { "epoch": 2.6538461538461537, "grad_norm": 39.23085021972656, "learning_rate": 1.9698892092274584e-05, "loss": 0.0974, "step": 2760 }, { "epoch": 2.6548076923076924, "grad_norm": 5.020315647125244, "learning_rate": 1.9698588637148705e-05, "loss": 0.2575, "step": 2761 }, { "epoch": 2.6557692307692307, "grad_norm": 5.324814796447754, "learning_rate": 1.9698285031529195e-05, "loss": 0.1365, "step": 2762 }, { "epoch": 2.6567307692307693, "grad_norm": 5.811254024505615, "learning_rate": 1.969798127542076e-05, "loss": 0.3196, "step": 2763 }, { "epoch": 2.6576923076923076, "grad_norm": 5.111333847045898, "learning_rate": 1.9697677368828127e-05, "loss": 0.1847, "step": 2764 }, { "epoch": 2.6586538461538463, "grad_norm": 6.315842151641846, "learning_rate": 1.9697373311755997e-05, "loss": 0.58, "step": 2765 }, { "epoch": 2.6596153846153845, "grad_norm": 4.91906213760376, "learning_rate": 1.9697069104209098e-05, "loss": 0.1684, "step": 2766 }, { "epoch": 2.660576923076923, "grad_norm": 4.960872173309326, "learning_rate": 1.9696764746192146e-05, "loss": 0.1784, "step": 2767 }, { "epoch": 2.6615384615384614, "grad_norm": 3.8591248989105225, "learning_rate": 1.9696460237709867e-05, "loss": 0.2234, "step": 2768 }, { "epoch": 2.6625, "grad_norm": 3.9620096683502197, "learning_rate": 1.969615557876698e-05, "loss": 0.2875, "step": 2769 }, { "epoch": 2.6634615384615383, "grad_norm": 3.91888427734375, "learning_rate": 1.9695850769368217e-05, "loss": 0.1406, "step": 2770 }, { "epoch": 2.664423076923077, "grad_norm": 3.6694743633270264, "learning_rate": 1.969554580951831e-05, "loss": 0.1264, "step": 2771 }, { "epoch": 2.6653846153846152, "grad_norm": 4.9203925132751465, "learning_rate": 1.9695240699221988e-05, "loss": 0.2256, "step": 2772 }, { "epoch": 2.666346153846154, "grad_norm": 5.559036731719971, "learning_rate": 1.9694935438483985e-05, "loss": 0.3003, "step": 2773 }, { "epoch": 2.667307692307692, "grad_norm": 4.818363666534424, "learning_rate": 1.9694630027309035e-05, "loss": 0.198, "step": 2774 }, { "epoch": 2.668269230769231, "grad_norm": 6.347158908843994, "learning_rate": 1.9694324465701883e-05, "loss": 0.2686, "step": 2775 }, { "epoch": 2.669230769230769, "grad_norm": 3.911846876144409, "learning_rate": 1.9694018753667268e-05, "loss": 0.1569, "step": 2776 }, { "epoch": 2.6701923076923078, "grad_norm": 5.837467193603516, "learning_rate": 1.9693712891209934e-05, "loss": 0.3718, "step": 2777 }, { "epoch": 2.671153846153846, "grad_norm": 4.780585289001465, "learning_rate": 1.9693406878334623e-05, "loss": 0.1784, "step": 2778 }, { "epoch": 2.6721153846153847, "grad_norm": 4.662821292877197, "learning_rate": 1.969310071504609e-05, "loss": 0.171, "step": 2779 }, { "epoch": 2.6730769230769234, "grad_norm": 6.163466453552246, "learning_rate": 1.9692794401349082e-05, "loss": 0.3203, "step": 2780 }, { "epoch": 2.6740384615384616, "grad_norm": 4.993046283721924, "learning_rate": 1.9692487937248352e-05, "loss": 0.3217, "step": 2781 }, { "epoch": 2.675, "grad_norm": 4.806095123291016, "learning_rate": 1.969218132274866e-05, "loss": 0.2222, "step": 2782 }, { "epoch": 2.6759615384615385, "grad_norm": 4.663135051727295, "learning_rate": 1.9691874557854756e-05, "loss": 0.2393, "step": 2783 }, { "epoch": 2.676923076923077, "grad_norm": 4.035224437713623, "learning_rate": 1.9691567642571407e-05, "loss": 0.1481, "step": 2784 }, { "epoch": 2.6778846153846154, "grad_norm": 5.727306365966797, "learning_rate": 1.9691260576903372e-05, "loss": 0.3511, "step": 2785 }, { "epoch": 2.6788461538461537, "grad_norm": 4.696863174438477, "learning_rate": 1.9690953360855415e-05, "loss": 0.2465, "step": 2786 }, { "epoch": 2.6798076923076923, "grad_norm": 6.710147857666016, "learning_rate": 1.9690645994432307e-05, "loss": 0.2667, "step": 2787 }, { "epoch": 2.680769230769231, "grad_norm": 5.146725177764893, "learning_rate": 1.969033847763881e-05, "loss": 0.2013, "step": 2788 }, { "epoch": 2.6817307692307693, "grad_norm": 7.20049524307251, "learning_rate": 1.9690030810479706e-05, "loss": 0.3545, "step": 2789 }, { "epoch": 2.6826923076923075, "grad_norm": 4.807055473327637, "learning_rate": 1.9689722992959762e-05, "loss": 0.152, "step": 2790 }, { "epoch": 2.683653846153846, "grad_norm": 4.760829448699951, "learning_rate": 1.9689415025083755e-05, "loss": 0.2561, "step": 2791 }, { "epoch": 2.684615384615385, "grad_norm": 6.03800630569458, "learning_rate": 1.9689106906856466e-05, "loss": 0.2496, "step": 2792 }, { "epoch": 2.685576923076923, "grad_norm": 5.289977550506592, "learning_rate": 1.9688798638282675e-05, "loss": 0.2946, "step": 2793 }, { "epoch": 2.6865384615384613, "grad_norm": 6.207324504852295, "learning_rate": 1.9688490219367163e-05, "loss": 0.3452, "step": 2794 }, { "epoch": 2.6875, "grad_norm": 6.679322242736816, "learning_rate": 1.968818165011472e-05, "loss": 0.1563, "step": 2795 }, { "epoch": 2.6884615384615387, "grad_norm": 5.198542594909668, "learning_rate": 1.9687872930530133e-05, "loss": 0.1721, "step": 2796 }, { "epoch": 2.689423076923077, "grad_norm": 4.411172866821289, "learning_rate": 1.9687564060618193e-05, "loss": 0.2194, "step": 2797 }, { "epoch": 2.690384615384615, "grad_norm": 4.97967004776001, "learning_rate": 1.968725504038369e-05, "loss": 0.216, "step": 2798 }, { "epoch": 2.691346153846154, "grad_norm": 6.837647914886475, "learning_rate": 1.968694586983142e-05, "loss": 0.2463, "step": 2799 }, { "epoch": 2.6923076923076925, "grad_norm": 6.432051181793213, "learning_rate": 1.9686636548966177e-05, "loss": 0.399, "step": 2800 }, { "epoch": 2.6932692307692307, "grad_norm": 6.378058910369873, "learning_rate": 1.9686327077792772e-05, "loss": 0.3047, "step": 2801 }, { "epoch": 2.694230769230769, "grad_norm": 4.013583183288574, "learning_rate": 1.9686017456315996e-05, "loss": 0.1264, "step": 2802 }, { "epoch": 2.6951923076923077, "grad_norm": 5.321689605712891, "learning_rate": 1.9685707684540657e-05, "loss": 0.2068, "step": 2803 }, { "epoch": 2.6961538461538463, "grad_norm": 3.26872181892395, "learning_rate": 1.9685397762471562e-05, "loss": 0.1001, "step": 2804 }, { "epoch": 2.6971153846153846, "grad_norm": 4.369649410247803, "learning_rate": 1.968508769011352e-05, "loss": 0.0954, "step": 2805 }, { "epoch": 2.698076923076923, "grad_norm": 2.9532527923583984, "learning_rate": 1.9684777467471346e-05, "loss": 0.0676, "step": 2806 }, { "epoch": 2.6990384615384615, "grad_norm": 4.917516231536865, "learning_rate": 1.9684467094549847e-05, "loss": 0.2043, "step": 2807 }, { "epoch": 2.7, "grad_norm": 4.662053108215332, "learning_rate": 1.9684156571353843e-05, "loss": 0.1771, "step": 2808 }, { "epoch": 2.7009615384615384, "grad_norm": 5.420110702514648, "learning_rate": 1.968384589788815e-05, "loss": 0.2264, "step": 2809 }, { "epoch": 2.7019230769230766, "grad_norm": 5.507479190826416, "learning_rate": 1.968353507415759e-05, "loss": 0.1816, "step": 2810 }, { "epoch": 2.7028846153846153, "grad_norm": 5.838038921356201, "learning_rate": 1.968322410016699e-05, "loss": 0.2184, "step": 2811 }, { "epoch": 2.703846153846154, "grad_norm": 4.703076362609863, "learning_rate": 1.968291297592117e-05, "loss": 0.1315, "step": 2812 }, { "epoch": 2.7048076923076922, "grad_norm": 4.610999584197998, "learning_rate": 1.9682601701424958e-05, "loss": 0.1777, "step": 2813 }, { "epoch": 2.705769230769231, "grad_norm": 4.578296661376953, "learning_rate": 1.968229027668319e-05, "loss": 0.1432, "step": 2814 }, { "epoch": 2.706730769230769, "grad_norm": 4.934652328491211, "learning_rate": 1.968197870170069e-05, "loss": 0.1635, "step": 2815 }, { "epoch": 2.707692307692308, "grad_norm": 7.144440650939941, "learning_rate": 1.9681666976482298e-05, "loss": 0.3577, "step": 2816 }, { "epoch": 2.708653846153846, "grad_norm": 6.961996078491211, "learning_rate": 1.968135510103285e-05, "loss": 0.247, "step": 2817 }, { "epoch": 2.7096153846153848, "grad_norm": 4.915164947509766, "learning_rate": 1.968104307535719e-05, "loss": 0.1632, "step": 2818 }, { "epoch": 2.710576923076923, "grad_norm": 6.871428489685059, "learning_rate": 1.9680730899460148e-05, "loss": 0.2522, "step": 2819 }, { "epoch": 2.7115384615384617, "grad_norm": 5.403188228607178, "learning_rate": 1.9680418573346575e-05, "loss": 0.2984, "step": 2820 }, { "epoch": 2.7125, "grad_norm": 6.62227725982666, "learning_rate": 1.968010609702132e-05, "loss": 0.2742, "step": 2821 }, { "epoch": 2.7134615384615386, "grad_norm": 7.132105350494385, "learning_rate": 1.967979347048923e-05, "loss": 0.336, "step": 2822 }, { "epoch": 2.714423076923077, "grad_norm": 6.0303497314453125, "learning_rate": 1.9679480693755155e-05, "loss": 0.3127, "step": 2823 }, { "epoch": 2.7153846153846155, "grad_norm": 3.329627275466919, "learning_rate": 1.9679167766823948e-05, "loss": 0.0849, "step": 2824 }, { "epoch": 2.7163461538461537, "grad_norm": 4.828161716461182, "learning_rate": 1.9678854689700463e-05, "loss": 0.1737, "step": 2825 }, { "epoch": 2.7173076923076924, "grad_norm": 6.22435188293457, "learning_rate": 1.9678541462389564e-05, "loss": 0.2379, "step": 2826 }, { "epoch": 2.7182692307692307, "grad_norm": 5.782882213592529, "learning_rate": 1.9678228084896104e-05, "loss": 0.239, "step": 2827 }, { "epoch": 2.7192307692307693, "grad_norm": 4.500005722045898, "learning_rate": 1.9677914557224953e-05, "loss": 0.1768, "step": 2828 }, { "epoch": 2.7201923076923076, "grad_norm": 5.813259124755859, "learning_rate": 1.9677600879380972e-05, "loss": 0.2234, "step": 2829 }, { "epoch": 2.7211538461538463, "grad_norm": 4.062171936035156, "learning_rate": 1.9677287051369024e-05, "loss": 0.1428, "step": 2830 }, { "epoch": 2.7221153846153845, "grad_norm": 5.748444557189941, "learning_rate": 1.9676973073193988e-05, "loss": 0.2251, "step": 2831 }, { "epoch": 2.723076923076923, "grad_norm": 4.632853031158447, "learning_rate": 1.9676658944860732e-05, "loss": 0.1339, "step": 2832 }, { "epoch": 2.7240384615384614, "grad_norm": 3.8894288539886475, "learning_rate": 1.967634466637413e-05, "loss": 0.1313, "step": 2833 }, { "epoch": 2.725, "grad_norm": 5.366718769073486, "learning_rate": 1.9676030237739053e-05, "loss": 0.1912, "step": 2834 }, { "epoch": 2.7259615384615383, "grad_norm": 4.987998008728027, "learning_rate": 1.967571565896039e-05, "loss": 0.1938, "step": 2835 }, { "epoch": 2.726923076923077, "grad_norm": 5.649818420410156, "learning_rate": 1.9675400930043018e-05, "loss": 0.2201, "step": 2836 }, { "epoch": 2.7278846153846152, "grad_norm": 5.189840793609619, "learning_rate": 1.967508605099182e-05, "loss": 0.2279, "step": 2837 }, { "epoch": 2.728846153846154, "grad_norm": 5.603961944580078, "learning_rate": 1.9674771021811682e-05, "loss": 0.3741, "step": 2838 }, { "epoch": 2.729807692307692, "grad_norm": 5.427038669586182, "learning_rate": 1.9674455842507494e-05, "loss": 0.2509, "step": 2839 }, { "epoch": 2.730769230769231, "grad_norm": 5.728171348571777, "learning_rate": 1.9674140513084144e-05, "loss": 0.418, "step": 2840 }, { "epoch": 2.731730769230769, "grad_norm": 4.790779113769531, "learning_rate": 1.9673825033546526e-05, "loss": 0.2717, "step": 2841 }, { "epoch": 2.7326923076923078, "grad_norm": 4.940395832061768, "learning_rate": 1.9673509403899538e-05, "loss": 0.2518, "step": 2842 }, { "epoch": 2.733653846153846, "grad_norm": 7.289984703063965, "learning_rate": 1.9673193624148076e-05, "loss": 0.2185, "step": 2843 }, { "epoch": 2.7346153846153847, "grad_norm": 4.968393802642822, "learning_rate": 1.9672877694297037e-05, "loss": 0.2042, "step": 2844 }, { "epoch": 2.7355769230769234, "grad_norm": 5.747406482696533, "learning_rate": 1.9672561614351325e-05, "loss": 0.2102, "step": 2845 }, { "epoch": 2.7365384615384616, "grad_norm": 3.4348855018615723, "learning_rate": 1.9672245384315847e-05, "loss": 0.1031, "step": 2846 }, { "epoch": 2.7375, "grad_norm": 3.2266652584075928, "learning_rate": 1.967192900419551e-05, "loss": 0.0979, "step": 2847 }, { "epoch": 2.7384615384615385, "grad_norm": 4.863850116729736, "learning_rate": 1.967161247399522e-05, "loss": 0.2949, "step": 2848 }, { "epoch": 2.739423076923077, "grad_norm": 5.890815258026123, "learning_rate": 1.9671295793719893e-05, "loss": 0.2462, "step": 2849 }, { "epoch": 2.7403846153846154, "grad_norm": 4.834710597991943, "learning_rate": 1.9670978963374435e-05, "loss": 0.1635, "step": 2850 }, { "epoch": 2.7413461538461537, "grad_norm": 3.1105406284332275, "learning_rate": 1.967066198296377e-05, "loss": 0.106, "step": 2851 }, { "epoch": 2.7423076923076923, "grad_norm": 5.216267108917236, "learning_rate": 1.9670344852492814e-05, "loss": 0.2654, "step": 2852 }, { "epoch": 2.743269230769231, "grad_norm": 4.4570465087890625, "learning_rate": 1.9670027571966486e-05, "loss": 0.1365, "step": 2853 }, { "epoch": 2.7442307692307693, "grad_norm": 6.203767776489258, "learning_rate": 1.9669710141389714e-05, "loss": 0.4542, "step": 2854 }, { "epoch": 2.7451923076923075, "grad_norm": 3.9510726928710938, "learning_rate": 1.9669392560767422e-05, "loss": 0.1051, "step": 2855 }, { "epoch": 2.746153846153846, "grad_norm": 7.019155025482178, "learning_rate": 1.9669074830104533e-05, "loss": 0.245, "step": 2856 }, { "epoch": 2.747115384615385, "grad_norm": 4.5069427490234375, "learning_rate": 1.9668756949405986e-05, "loss": 0.1923, "step": 2857 }, { "epoch": 2.748076923076923, "grad_norm": 4.3341851234436035, "learning_rate": 1.9668438918676705e-05, "loss": 0.143, "step": 2858 }, { "epoch": 2.7490384615384613, "grad_norm": 5.7922892570495605, "learning_rate": 1.9668120737921626e-05, "loss": 0.181, "step": 2859 }, { "epoch": 2.75, "grad_norm": 6.773197650909424, "learning_rate": 1.9667802407145692e-05, "loss": 0.3355, "step": 2860 }, { "epoch": 2.7509615384615387, "grad_norm": 4.666913986206055, "learning_rate": 1.966748392635384e-05, "loss": 0.2698, "step": 2861 }, { "epoch": 2.751923076923077, "grad_norm": 5.752151966094971, "learning_rate": 1.9667165295551013e-05, "loss": 0.4779, "step": 2862 }, { "epoch": 2.752884615384615, "grad_norm": 3.1222949028015137, "learning_rate": 1.9666846514742148e-05, "loss": 0.0672, "step": 2863 }, { "epoch": 2.753846153846154, "grad_norm": 6.136159420013428, "learning_rate": 1.9666527583932197e-05, "loss": 0.2994, "step": 2864 }, { "epoch": 2.7548076923076925, "grad_norm": 5.655993938446045, "learning_rate": 1.9666208503126115e-05, "loss": 0.2879, "step": 2865 }, { "epoch": 2.7557692307692307, "grad_norm": 4.929742336273193, "learning_rate": 1.9665889272328842e-05, "loss": 0.2448, "step": 2866 }, { "epoch": 2.756730769230769, "grad_norm": 6.114670753479004, "learning_rate": 1.9665569891545337e-05, "loss": 0.2894, "step": 2867 }, { "epoch": 2.7576923076923077, "grad_norm": 5.557150363922119, "learning_rate": 1.9665250360780558e-05, "loss": 0.2576, "step": 2868 }, { "epoch": 2.7586538461538463, "grad_norm": 5.353416442871094, "learning_rate": 1.9664930680039457e-05, "loss": 0.2347, "step": 2869 }, { "epoch": 2.7596153846153846, "grad_norm": 4.883788585662842, "learning_rate": 1.9664610849327003e-05, "loss": 0.2284, "step": 2870 }, { "epoch": 2.760576923076923, "grad_norm": 4.769567966461182, "learning_rate": 1.9664290868648147e-05, "loss": 0.1333, "step": 2871 }, { "epoch": 2.7615384615384615, "grad_norm": 5.917158126831055, "learning_rate": 1.966397073800787e-05, "loss": 0.2852, "step": 2872 }, { "epoch": 2.7625, "grad_norm": 5.781335353851318, "learning_rate": 1.9663650457411123e-05, "loss": 0.2608, "step": 2873 }, { "epoch": 2.7634615384615384, "grad_norm": 5.0086669921875, "learning_rate": 1.9663330026862885e-05, "loss": 0.2322, "step": 2874 }, { "epoch": 2.7644230769230766, "grad_norm": 4.859689712524414, "learning_rate": 1.9663009446368128e-05, "loss": 0.2149, "step": 2875 }, { "epoch": 2.7653846153846153, "grad_norm": 5.542873382568359, "learning_rate": 1.9662688715931825e-05, "loss": 0.2002, "step": 2876 }, { "epoch": 2.766346153846154, "grad_norm": 5.229291915893555, "learning_rate": 1.9662367835558954e-05, "loss": 0.2552, "step": 2877 }, { "epoch": 2.7673076923076922, "grad_norm": 2.9662723541259766, "learning_rate": 1.966204680525449e-05, "loss": 0.0545, "step": 2878 }, { "epoch": 2.768269230769231, "grad_norm": 6.317688465118408, "learning_rate": 1.9661725625023416e-05, "loss": 0.1929, "step": 2879 }, { "epoch": 2.769230769230769, "grad_norm": 8.729328155517578, "learning_rate": 1.966140429487072e-05, "loss": 0.3712, "step": 2880 }, { "epoch": 2.770192307692308, "grad_norm": 3.854649543762207, "learning_rate": 1.9661082814801387e-05, "loss": 0.1615, "step": 2881 }, { "epoch": 2.771153846153846, "grad_norm": 4.461411476135254, "learning_rate": 1.96607611848204e-05, "loss": 0.1804, "step": 2882 }, { "epoch": 2.7721153846153848, "grad_norm": 6.445132732391357, "learning_rate": 1.9660439404932753e-05, "loss": 0.2272, "step": 2883 }, { "epoch": 2.773076923076923, "grad_norm": 4.610062599182129, "learning_rate": 1.966011747514344e-05, "loss": 0.1903, "step": 2884 }, { "epoch": 2.7740384615384617, "grad_norm": 4.88641881942749, "learning_rate": 1.965979539545746e-05, "loss": 0.203, "step": 2885 }, { "epoch": 2.775, "grad_norm": 4.970893859863281, "learning_rate": 1.96594731658798e-05, "loss": 0.184, "step": 2886 }, { "epoch": 2.7759615384615386, "grad_norm": 5.163666248321533, "learning_rate": 1.9659150786415474e-05, "loss": 0.2297, "step": 2887 }, { "epoch": 2.776923076923077, "grad_norm": 3.648752450942993, "learning_rate": 1.9658828257069477e-05, "loss": 0.1055, "step": 2888 }, { "epoch": 2.7778846153846155, "grad_norm": 6.739298343658447, "learning_rate": 1.965850557784681e-05, "loss": 0.3544, "step": 2889 }, { "epoch": 2.7788461538461537, "grad_norm": 7.144076347351074, "learning_rate": 1.9658182748752482e-05, "loss": 0.4615, "step": 2890 }, { "epoch": 2.7798076923076924, "grad_norm": 4.475281715393066, "learning_rate": 1.9657859769791506e-05, "loss": 0.1633, "step": 2891 }, { "epoch": 2.7807692307692307, "grad_norm": 5.255269527435303, "learning_rate": 1.9657536640968894e-05, "loss": 0.1415, "step": 2892 }, { "epoch": 2.7817307692307693, "grad_norm": 5.615118503570557, "learning_rate": 1.9657213362289657e-05, "loss": 0.3398, "step": 2893 }, { "epoch": 2.7826923076923076, "grad_norm": 5.495492935180664, "learning_rate": 1.965688993375881e-05, "loss": 0.1851, "step": 2894 }, { "epoch": 2.7836538461538463, "grad_norm": 6.297289848327637, "learning_rate": 1.9656566355381375e-05, "loss": 0.3559, "step": 2895 }, { "epoch": 2.7846153846153845, "grad_norm": 6.038186550140381, "learning_rate": 1.9656242627162373e-05, "loss": 0.2091, "step": 2896 }, { "epoch": 2.785576923076923, "grad_norm": 4.432078838348389, "learning_rate": 1.9655918749106823e-05, "loss": 0.2031, "step": 2897 }, { "epoch": 2.7865384615384614, "grad_norm": 4.9365315437316895, "learning_rate": 1.9655594721219758e-05, "loss": 0.2977, "step": 2898 }, { "epoch": 2.7875, "grad_norm": 5.976475238800049, "learning_rate": 1.96552705435062e-05, "loss": 0.3316, "step": 2899 }, { "epoch": 2.7884615384615383, "grad_norm": 7.349127292633057, "learning_rate": 1.9654946215971182e-05, "loss": 0.3155, "step": 2900 }, { "epoch": 2.789423076923077, "grad_norm": 4.691496849060059, "learning_rate": 1.965462173861973e-05, "loss": 0.1791, "step": 2901 }, { "epoch": 2.7903846153846152, "grad_norm": 5.154839038848877, "learning_rate": 1.965429711145689e-05, "loss": 0.1388, "step": 2902 }, { "epoch": 2.791346153846154, "grad_norm": 5.114126205444336, "learning_rate": 1.9653972334487692e-05, "loss": 0.3019, "step": 2903 }, { "epoch": 2.792307692307692, "grad_norm": 6.139132499694824, "learning_rate": 1.965364740771718e-05, "loss": 0.2921, "step": 2904 }, { "epoch": 2.793269230769231, "grad_norm": 4.392107009887695, "learning_rate": 1.965332233115039e-05, "loss": 0.1835, "step": 2905 }, { "epoch": 2.794230769230769, "grad_norm": 4.811239719390869, "learning_rate": 1.9652997104792373e-05, "loss": 0.2035, "step": 2906 }, { "epoch": 2.7951923076923078, "grad_norm": 5.27248477935791, "learning_rate": 1.965267172864817e-05, "loss": 0.1911, "step": 2907 }, { "epoch": 2.796153846153846, "grad_norm": 3.7075531482696533, "learning_rate": 1.9652346202722832e-05, "loss": 0.142, "step": 2908 }, { "epoch": 2.7971153846153847, "grad_norm": 3.787740468978882, "learning_rate": 1.9652020527021412e-05, "loss": 0.1015, "step": 2909 }, { "epoch": 2.7980769230769234, "grad_norm": 7.271007061004639, "learning_rate": 1.965169470154896e-05, "loss": 0.4691, "step": 2910 }, { "epoch": 2.7990384615384616, "grad_norm": 4.301021099090576, "learning_rate": 1.965136872631053e-05, "loss": 0.2948, "step": 2911 }, { "epoch": 2.8, "grad_norm": 5.582171440124512, "learning_rate": 1.9651042601311193e-05, "loss": 0.1744, "step": 2912 }, { "epoch": 2.8009615384615385, "grad_norm": 4.713108539581299, "learning_rate": 1.965071632655599e-05, "loss": 0.1715, "step": 2913 }, { "epoch": 2.801923076923077, "grad_norm": 4.1910929679870605, "learning_rate": 1.965038990205e-05, "loss": 0.1562, "step": 2914 }, { "epoch": 2.8028846153846154, "grad_norm": 5.967095375061035, "learning_rate": 1.9650063327798283e-05, "loss": 0.2143, "step": 2915 }, { "epoch": 2.8038461538461537, "grad_norm": 6.041947841644287, "learning_rate": 1.9649736603805906e-05, "loss": 0.2381, "step": 2916 }, { "epoch": 2.8048076923076923, "grad_norm": 5.816675662994385, "learning_rate": 1.9649409730077934e-05, "loss": 0.3031, "step": 2917 }, { "epoch": 2.805769230769231, "grad_norm": 6.957763671875, "learning_rate": 1.9649082706619447e-05, "loss": 0.3871, "step": 2918 }, { "epoch": 2.8067307692307693, "grad_norm": 4.7330474853515625, "learning_rate": 1.9648755533435517e-05, "loss": 0.2788, "step": 2919 }, { "epoch": 2.8076923076923075, "grad_norm": 3.9301815032958984, "learning_rate": 1.9648428210531218e-05, "loss": 0.193, "step": 2920 }, { "epoch": 2.808653846153846, "grad_norm": 5.846735954284668, "learning_rate": 1.964810073791163e-05, "loss": 0.2262, "step": 2921 }, { "epoch": 2.809615384615385, "grad_norm": 5.1407647132873535, "learning_rate": 1.9647773115581835e-05, "loss": 0.2353, "step": 2922 }, { "epoch": 2.810576923076923, "grad_norm": 3.1071181297302246, "learning_rate": 1.964744534354692e-05, "loss": 0.1245, "step": 2923 }, { "epoch": 2.8115384615384613, "grad_norm": 6.221703052520752, "learning_rate": 1.964711742181197e-05, "loss": 0.2352, "step": 2924 }, { "epoch": 2.8125, "grad_norm": 5.925057411193848, "learning_rate": 1.9646789350382066e-05, "loss": 0.316, "step": 2925 }, { "epoch": 2.8134615384615387, "grad_norm": 2.9338302612304688, "learning_rate": 1.964646112926231e-05, "loss": 0.1098, "step": 2926 }, { "epoch": 2.814423076923077, "grad_norm": 4.477776527404785, "learning_rate": 1.9646132758457783e-05, "loss": 0.1552, "step": 2927 }, { "epoch": 2.815384615384615, "grad_norm": 4.813665866851807, "learning_rate": 1.964580423797359e-05, "loss": 0.1523, "step": 2928 }, { "epoch": 2.816346153846154, "grad_norm": 6.059589862823486, "learning_rate": 1.9645475567814826e-05, "loss": 0.2317, "step": 2929 }, { "epoch": 2.8173076923076925, "grad_norm": 4.519842624664307, "learning_rate": 1.964514674798659e-05, "loss": 0.1468, "step": 2930 }, { "epoch": 2.8182692307692307, "grad_norm": 6.8209404945373535, "learning_rate": 1.9644817778493984e-05, "loss": 0.1989, "step": 2931 }, { "epoch": 2.819230769230769, "grad_norm": 4.209668159484863, "learning_rate": 1.9644488659342113e-05, "loss": 0.176, "step": 2932 }, { "epoch": 2.8201923076923077, "grad_norm": 6.77686071395874, "learning_rate": 1.9644159390536086e-05, "loss": 0.3396, "step": 2933 }, { "epoch": 2.8211538461538463, "grad_norm": 5.785131931304932, "learning_rate": 1.9643829972081007e-05, "loss": 0.2239, "step": 2934 }, { "epoch": 2.8221153846153846, "grad_norm": 5.003313064575195, "learning_rate": 1.9643500403981996e-05, "loss": 0.1682, "step": 2935 }, { "epoch": 2.823076923076923, "grad_norm": 4.290151596069336, "learning_rate": 1.964317068624416e-05, "loss": 0.1349, "step": 2936 }, { "epoch": 2.8240384615384615, "grad_norm": 8.976717948913574, "learning_rate": 1.9642840818872616e-05, "loss": 0.387, "step": 2937 }, { "epoch": 2.825, "grad_norm": 5.872025966644287, "learning_rate": 1.9642510801872482e-05, "loss": 0.2214, "step": 2938 }, { "epoch": 2.8259615384615384, "grad_norm": 5.152150630950928, "learning_rate": 1.9642180635248884e-05, "loss": 0.2542, "step": 2939 }, { "epoch": 2.8269230769230766, "grad_norm": 7.082061767578125, "learning_rate": 1.9641850319006942e-05, "loss": 0.3952, "step": 2940 }, { "epoch": 2.8278846153846153, "grad_norm": 6.098680019378662, "learning_rate": 1.964151985315178e-05, "loss": 0.2436, "step": 2941 }, { "epoch": 2.828846153846154, "grad_norm": 4.896108627319336, "learning_rate": 1.964118923768853e-05, "loss": 0.1737, "step": 2942 }, { "epoch": 2.8298076923076922, "grad_norm": 5.350537300109863, "learning_rate": 1.9640858472622316e-05, "loss": 0.2045, "step": 2943 }, { "epoch": 2.830769230769231, "grad_norm": 4.70806360244751, "learning_rate": 1.9640527557958278e-05, "loss": 0.1366, "step": 2944 }, { "epoch": 2.831730769230769, "grad_norm": 3.6627984046936035, "learning_rate": 1.9640196493701543e-05, "loss": 0.1983, "step": 2945 }, { "epoch": 2.832692307692308, "grad_norm": 5.081018447875977, "learning_rate": 1.9639865279857256e-05, "loss": 0.1681, "step": 2946 }, { "epoch": 2.833653846153846, "grad_norm": 5.785346031188965, "learning_rate": 1.963953391643055e-05, "loss": 0.27, "step": 2947 }, { "epoch": 2.8346153846153848, "grad_norm": 6.65278434753418, "learning_rate": 1.963920240342657e-05, "loss": 0.3462, "step": 2948 }, { "epoch": 2.835576923076923, "grad_norm": 5.54983377456665, "learning_rate": 1.963887074085046e-05, "loss": 0.1584, "step": 2949 }, { "epoch": 2.8365384615384617, "grad_norm": 4.543923854827881, "learning_rate": 1.9638538928707363e-05, "loss": 0.1426, "step": 2950 }, { "epoch": 2.8375, "grad_norm": 8.178581237792969, "learning_rate": 1.9638206967002433e-05, "loss": 0.4203, "step": 2951 }, { "epoch": 2.8384615384615386, "grad_norm": 5.564050674438477, "learning_rate": 1.9637874855740818e-05, "loss": 0.228, "step": 2952 }, { "epoch": 2.839423076923077, "grad_norm": 5.732683181762695, "learning_rate": 1.9637542594927673e-05, "loss": 0.1693, "step": 2953 }, { "epoch": 2.8403846153846155, "grad_norm": 4.417923927307129, "learning_rate": 1.963721018456815e-05, "loss": 0.1894, "step": 2954 }, { "epoch": 2.8413461538461537, "grad_norm": 3.309131383895874, "learning_rate": 1.9636877624667414e-05, "loss": 0.1106, "step": 2955 }, { "epoch": 2.8423076923076924, "grad_norm": 6.632902145385742, "learning_rate": 1.963654491523062e-05, "loss": 0.2595, "step": 2956 }, { "epoch": 2.8432692307692307, "grad_norm": 5.059811115264893, "learning_rate": 1.963621205626293e-05, "loss": 0.1334, "step": 2957 }, { "epoch": 2.8442307692307693, "grad_norm": 7.115297317504883, "learning_rate": 1.9635879047769513e-05, "loss": 0.2179, "step": 2958 }, { "epoch": 2.8451923076923076, "grad_norm": 5.280583381652832, "learning_rate": 1.9635545889755532e-05, "loss": 0.3399, "step": 2959 }, { "epoch": 2.8461538461538463, "grad_norm": 5.133427619934082, "learning_rate": 1.963521258222616e-05, "loss": 0.1807, "step": 2960 }, { "epoch": 2.8471153846153845, "grad_norm": 5.80424690246582, "learning_rate": 1.9634879125186567e-05, "loss": 0.2176, "step": 2961 }, { "epoch": 2.848076923076923, "grad_norm": 3.3277900218963623, "learning_rate": 1.963454551864193e-05, "loss": 0.0929, "step": 2962 }, { "epoch": 2.8490384615384614, "grad_norm": 6.816668510437012, "learning_rate": 1.963421176259742e-05, "loss": 0.3455, "step": 2963 }, { "epoch": 2.85, "grad_norm": 6.271804332733154, "learning_rate": 1.9633877857058225e-05, "loss": 0.1799, "step": 2964 }, { "epoch": 2.8509615384615383, "grad_norm": 4.926943302154541, "learning_rate": 1.9633543802029513e-05, "loss": 0.2179, "step": 2965 }, { "epoch": 2.851923076923077, "grad_norm": 3.9009366035461426, "learning_rate": 1.963320959751648e-05, "loss": 0.1046, "step": 2966 }, { "epoch": 2.8528846153846152, "grad_norm": 6.511645317077637, "learning_rate": 1.9632875243524308e-05, "loss": 0.4039, "step": 2967 }, { "epoch": 2.853846153846154, "grad_norm": 6.273448944091797, "learning_rate": 1.963254074005818e-05, "loss": 0.1993, "step": 2968 }, { "epoch": 2.854807692307692, "grad_norm": 6.402008056640625, "learning_rate": 1.9632206087123296e-05, "loss": 0.2117, "step": 2969 }, { "epoch": 2.855769230769231, "grad_norm": 4.912503242492676, "learning_rate": 1.9631871284724845e-05, "loss": 0.1734, "step": 2970 }, { "epoch": 2.856730769230769, "grad_norm": 3.248563289642334, "learning_rate": 1.9631536332868016e-05, "loss": 0.0882, "step": 2971 }, { "epoch": 2.8576923076923078, "grad_norm": 4.4413981437683105, "learning_rate": 1.9631201231558014e-05, "loss": 0.1116, "step": 2972 }, { "epoch": 2.858653846153846, "grad_norm": 5.540526390075684, "learning_rate": 1.9630865980800032e-05, "loss": 0.166, "step": 2973 }, { "epoch": 2.8596153846153847, "grad_norm": 4.762707233428955, "learning_rate": 1.963053058059928e-05, "loss": 0.2661, "step": 2974 }, { "epoch": 2.8605769230769234, "grad_norm": 7.097982406616211, "learning_rate": 1.963019503096096e-05, "loss": 0.298, "step": 2975 }, { "epoch": 2.8615384615384616, "grad_norm": 5.349534034729004, "learning_rate": 1.9629859331890275e-05, "loss": 0.2751, "step": 2976 }, { "epoch": 2.8625, "grad_norm": 3.7136991024017334, "learning_rate": 1.9629523483392435e-05, "loss": 0.1513, "step": 2977 }, { "epoch": 2.8634615384615385, "grad_norm": 5.164628505706787, "learning_rate": 1.9629187485472654e-05, "loss": 0.1973, "step": 2978 }, { "epoch": 2.864423076923077, "grad_norm": 5.938715934753418, "learning_rate": 1.9628851338136146e-05, "loss": 0.1647, "step": 2979 }, { "epoch": 2.8653846153846154, "grad_norm": 3.752492904663086, "learning_rate": 1.9628515041388125e-05, "loss": 0.1121, "step": 2980 }, { "epoch": 2.8663461538461537, "grad_norm": 5.1540093421936035, "learning_rate": 1.962817859523381e-05, "loss": 0.2229, "step": 2981 }, { "epoch": 2.8673076923076923, "grad_norm": 5.0233235359191895, "learning_rate": 1.9627841999678422e-05, "loss": 0.1787, "step": 2982 }, { "epoch": 2.868269230769231, "grad_norm": 3.3896639347076416, "learning_rate": 1.9627505254727183e-05, "loss": 0.1404, "step": 2983 }, { "epoch": 2.8692307692307693, "grad_norm": 5.2560858726501465, "learning_rate": 1.962716836038532e-05, "loss": 0.2639, "step": 2984 }, { "epoch": 2.8701923076923075, "grad_norm": 4.449851036071777, "learning_rate": 1.9626831316658058e-05, "loss": 0.1222, "step": 2985 }, { "epoch": 2.871153846153846, "grad_norm": 5.298680305480957, "learning_rate": 1.9626494123550627e-05, "loss": 0.1827, "step": 2986 }, { "epoch": 2.872115384615385, "grad_norm": 7.268467903137207, "learning_rate": 1.9626156781068262e-05, "loss": 0.2908, "step": 2987 }, { "epoch": 2.873076923076923, "grad_norm": 2.758173704147339, "learning_rate": 1.9625819289216194e-05, "loss": 0.0994, "step": 2988 }, { "epoch": 2.8740384615384613, "grad_norm": 4.915393829345703, "learning_rate": 1.9625481647999668e-05, "loss": 0.2855, "step": 2989 }, { "epoch": 2.875, "grad_norm": 3.915201425552368, "learning_rate": 1.962514385742391e-05, "loss": 0.203, "step": 2990 }, { "epoch": 2.8759615384615387, "grad_norm": 4.791874885559082, "learning_rate": 1.9624805917494175e-05, "loss": 0.2338, "step": 2991 }, { "epoch": 2.876923076923077, "grad_norm": 5.4355058670043945, "learning_rate": 1.9624467828215698e-05, "loss": 0.2084, "step": 2992 }, { "epoch": 2.877884615384615, "grad_norm": 8.388391494750977, "learning_rate": 1.962412958959373e-05, "loss": 0.245, "step": 2993 }, { "epoch": 2.878846153846154, "grad_norm": 4.320865631103516, "learning_rate": 1.9623791201633512e-05, "loss": 0.1052, "step": 2994 }, { "epoch": 2.8798076923076925, "grad_norm": 5.7897162437438965, "learning_rate": 1.9623452664340305e-05, "loss": 0.2338, "step": 2995 }, { "epoch": 2.8807692307692307, "grad_norm": 5.95161771774292, "learning_rate": 1.9623113977719354e-05, "loss": 0.1781, "step": 2996 }, { "epoch": 2.881730769230769, "grad_norm": 3.8990132808685303, "learning_rate": 1.962277514177592e-05, "loss": 0.1587, "step": 2997 }, { "epoch": 2.8826923076923077, "grad_norm": 4.395937442779541, "learning_rate": 1.9622436156515255e-05, "loss": 0.1293, "step": 2998 }, { "epoch": 2.8836538461538463, "grad_norm": 5.662482738494873, "learning_rate": 1.9622097021942624e-05, "loss": 0.2332, "step": 2999 }, { "epoch": 2.8846153846153846, "grad_norm": 3.7063260078430176, "learning_rate": 1.962175773806329e-05, "loss": 0.1613, "step": 3000 }, { "epoch": 2.885576923076923, "grad_norm": 4.139437198638916, "learning_rate": 1.9621418304882514e-05, "loss": 0.2132, "step": 3001 }, { "epoch": 2.8865384615384615, "grad_norm": 5.487600803375244, "learning_rate": 1.9621078722405565e-05, "loss": 0.2267, "step": 3002 }, { "epoch": 2.8875, "grad_norm": 6.774466514587402, "learning_rate": 1.9620738990637708e-05, "loss": 0.2933, "step": 3003 }, { "epoch": 2.8884615384615384, "grad_norm": 4.330489635467529, "learning_rate": 1.9620399109584223e-05, "loss": 0.3296, "step": 3004 }, { "epoch": 2.8894230769230766, "grad_norm": 5.130780220031738, "learning_rate": 1.9620059079250378e-05, "loss": 0.1851, "step": 3005 }, { "epoch": 2.8903846153846153, "grad_norm": 3.7240102291107178, "learning_rate": 1.961971889964145e-05, "loss": 0.0889, "step": 3006 }, { "epoch": 2.891346153846154, "grad_norm": 3.673112392425537, "learning_rate": 1.9619378570762716e-05, "loss": 0.1384, "step": 3007 }, { "epoch": 2.8923076923076922, "grad_norm": 6.135677814483643, "learning_rate": 1.9619038092619465e-05, "loss": 0.4266, "step": 3008 }, { "epoch": 2.893269230769231, "grad_norm": 7.169144630432129, "learning_rate": 1.961869746521697e-05, "loss": 0.2753, "step": 3009 }, { "epoch": 2.894230769230769, "grad_norm": 5.942383289337158, "learning_rate": 1.961835668856052e-05, "loss": 0.1855, "step": 3010 }, { "epoch": 2.895192307692308, "grad_norm": 3.8993992805480957, "learning_rate": 1.9618015762655406e-05, "loss": 0.1452, "step": 3011 }, { "epoch": 2.896153846153846, "grad_norm": 5.994568347930908, "learning_rate": 1.9617674687506918e-05, "loss": 0.2863, "step": 3012 }, { "epoch": 2.8971153846153848, "grad_norm": 5.9594597816467285, "learning_rate": 1.9617333463120342e-05, "loss": 0.2281, "step": 3013 }, { "epoch": 2.898076923076923, "grad_norm": 4.744441509246826, "learning_rate": 1.9616992089500978e-05, "loss": 0.1645, "step": 3014 }, { "epoch": 2.8990384615384617, "grad_norm": 4.770723819732666, "learning_rate": 1.9616650566654126e-05, "loss": 0.2475, "step": 3015 }, { "epoch": 2.9, "grad_norm": 5.612180233001709, "learning_rate": 1.9616308894585078e-05, "loss": 0.1394, "step": 3016 }, { "epoch": 2.9009615384615386, "grad_norm": 4.339375972747803, "learning_rate": 1.961596707329914e-05, "loss": 0.098, "step": 3017 }, { "epoch": 2.901923076923077, "grad_norm": 5.541866779327393, "learning_rate": 1.9615625102801618e-05, "loss": 0.2272, "step": 3018 }, { "epoch": 2.9028846153846155, "grad_norm": 5.251184940338135, "learning_rate": 1.9615282983097815e-05, "loss": 0.2549, "step": 3019 }, { "epoch": 2.9038461538461537, "grad_norm": 5.092959403991699, "learning_rate": 1.9614940714193035e-05, "loss": 0.1982, "step": 3020 }, { "epoch": 2.9048076923076924, "grad_norm": 4.892589569091797, "learning_rate": 1.9614598296092603e-05, "loss": 0.1729, "step": 3021 }, { "epoch": 2.9057692307692307, "grad_norm": 7.526634693145752, "learning_rate": 1.9614255728801818e-05, "loss": 0.4618, "step": 3022 }, { "epoch": 2.9067307692307693, "grad_norm": 5.5518598556518555, "learning_rate": 1.9613913012326004e-05, "loss": 0.2662, "step": 3023 }, { "epoch": 2.9076923076923076, "grad_norm": 4.180647373199463, "learning_rate": 1.9613570146670476e-05, "loss": 0.1427, "step": 3024 }, { "epoch": 2.9086538461538463, "grad_norm": 5.612442493438721, "learning_rate": 1.9613227131840554e-05, "loss": 0.1834, "step": 3025 }, { "epoch": 2.9096153846153845, "grad_norm": 5.761976718902588, "learning_rate": 1.9612883967841563e-05, "loss": 0.3369, "step": 3026 }, { "epoch": 2.910576923076923, "grad_norm": 5.557407379150391, "learning_rate": 1.9612540654678828e-05, "loss": 0.2302, "step": 3027 }, { "epoch": 2.9115384615384614, "grad_norm": 5.023488521575928, "learning_rate": 1.9612197192357672e-05, "loss": 0.1335, "step": 3028 }, { "epoch": 2.9125, "grad_norm": 4.176513671875, "learning_rate": 1.9611853580883427e-05, "loss": 0.1094, "step": 3029 }, { "epoch": 2.9134615384615383, "grad_norm": 5.438658237457275, "learning_rate": 1.9611509820261426e-05, "loss": 0.2223, "step": 3030 }, { "epoch": 2.914423076923077, "grad_norm": 4.220186233520508, "learning_rate": 1.9611165910497e-05, "loss": 0.1782, "step": 3031 }, { "epoch": 2.9153846153846152, "grad_norm": 6.089065074920654, "learning_rate": 1.961082185159549e-05, "loss": 0.2436, "step": 3032 }, { "epoch": 2.916346153846154, "grad_norm": 5.078456401824951, "learning_rate": 1.961047764356223e-05, "loss": 0.2317, "step": 3033 }, { "epoch": 2.917307692307692, "grad_norm": 9.855822563171387, "learning_rate": 1.9610133286402565e-05, "loss": 0.4325, "step": 3034 }, { "epoch": 2.918269230769231, "grad_norm": 5.072558879852295, "learning_rate": 1.9609788780121837e-05, "loss": 0.2512, "step": 3035 }, { "epoch": 2.919230769230769, "grad_norm": 4.699074745178223, "learning_rate": 1.960944412472539e-05, "loss": 0.1884, "step": 3036 }, { "epoch": 2.9201923076923078, "grad_norm": 3.7308714389801025, "learning_rate": 1.9609099320218575e-05, "loss": 0.1393, "step": 3037 }, { "epoch": 2.921153846153846, "grad_norm": 4.658520221710205, "learning_rate": 1.960875436660674e-05, "loss": 0.2081, "step": 3038 }, { "epoch": 2.9221153846153847, "grad_norm": 7.683556079864502, "learning_rate": 1.9608409263895237e-05, "loss": 0.2674, "step": 3039 }, { "epoch": 2.9230769230769234, "grad_norm": 5.9235334396362305, "learning_rate": 1.9608064012089424e-05, "loss": 0.2658, "step": 3040 }, { "epoch": 2.9240384615384616, "grad_norm": 4.1959075927734375, "learning_rate": 1.960771861119466e-05, "loss": 0.11, "step": 3041 }, { "epoch": 2.925, "grad_norm": 7.580928325653076, "learning_rate": 1.96073730612163e-05, "loss": 0.5671, "step": 3042 }, { "epoch": 2.9259615384615385, "grad_norm": 3.74802827835083, "learning_rate": 1.9607027362159705e-05, "loss": 0.1654, "step": 3043 }, { "epoch": 2.926923076923077, "grad_norm": 4.006467342376709, "learning_rate": 1.9606681514030243e-05, "loss": 0.2274, "step": 3044 }, { "epoch": 2.9278846153846154, "grad_norm": 4.061321258544922, "learning_rate": 1.9606335516833278e-05, "loss": 0.1674, "step": 3045 }, { "epoch": 2.9288461538461537, "grad_norm": 4.045295715332031, "learning_rate": 1.9605989370574182e-05, "loss": 0.1519, "step": 3046 }, { "epoch": 2.9298076923076923, "grad_norm": 5.840970516204834, "learning_rate": 1.9605643075258323e-05, "loss": 0.3928, "step": 3047 }, { "epoch": 2.930769230769231, "grad_norm": 5.506835460662842, "learning_rate": 1.9605296630891077e-05, "loss": 0.2694, "step": 3048 }, { "epoch": 2.9317307692307693, "grad_norm": 4.0788421630859375, "learning_rate": 1.9604950037477816e-05, "loss": 0.1634, "step": 3049 }, { "epoch": 2.9326923076923075, "grad_norm": 3.6285057067871094, "learning_rate": 1.960460329502392e-05, "loss": 0.164, "step": 3050 }, { "epoch": 2.933653846153846, "grad_norm": 6.41070032119751, "learning_rate": 1.9604256403534774e-05, "loss": 0.3601, "step": 3051 }, { "epoch": 2.934615384615385, "grad_norm": 3.6456077098846436, "learning_rate": 1.9603909363015755e-05, "loss": 0.1187, "step": 3052 }, { "epoch": 2.935576923076923, "grad_norm": 5.226222991943359, "learning_rate": 1.960356217347225e-05, "loss": 0.132, "step": 3053 }, { "epoch": 2.9365384615384613, "grad_norm": 3.465205192565918, "learning_rate": 1.9603214834909646e-05, "loss": 0.1284, "step": 3054 }, { "epoch": 2.9375, "grad_norm": 4.249354839324951, "learning_rate": 1.9602867347333332e-05, "loss": 0.1711, "step": 3055 }, { "epoch": 2.9384615384615387, "grad_norm": 5.752633094787598, "learning_rate": 1.96025197107487e-05, "loss": 0.1729, "step": 3056 }, { "epoch": 2.939423076923077, "grad_norm": 6.1840362548828125, "learning_rate": 1.960217192516115e-05, "loss": 0.3307, "step": 3057 }, { "epoch": 2.940384615384615, "grad_norm": 4.511633396148682, "learning_rate": 1.960182399057607e-05, "loss": 0.2417, "step": 3058 }, { "epoch": 2.941346153846154, "grad_norm": 5.615838050842285, "learning_rate": 1.9601475906998862e-05, "loss": 0.2788, "step": 3059 }, { "epoch": 2.9423076923076925, "grad_norm": 5.969213962554932, "learning_rate": 1.960112767443493e-05, "loss": 0.4004, "step": 3060 }, { "epoch": 2.9432692307692307, "grad_norm": 5.219755172729492, "learning_rate": 1.9600779292889675e-05, "loss": 0.2035, "step": 3061 }, { "epoch": 2.944230769230769, "grad_norm": 4.423344612121582, "learning_rate": 1.9600430762368503e-05, "loss": 0.1088, "step": 3062 }, { "epoch": 2.9451923076923077, "grad_norm": 5.098330974578857, "learning_rate": 1.960008208287682e-05, "loss": 0.1903, "step": 3063 }, { "epoch": 2.9461538461538463, "grad_norm": 4.066972732543945, "learning_rate": 1.9599733254420042e-05, "loss": 0.2343, "step": 3064 }, { "epoch": 2.9471153846153846, "grad_norm": 5.8068976402282715, "learning_rate": 1.9599384277003576e-05, "loss": 0.2456, "step": 3065 }, { "epoch": 2.948076923076923, "grad_norm": 5.091919898986816, "learning_rate": 1.9599035150632845e-05, "loss": 0.157, "step": 3066 }, { "epoch": 2.9490384615384615, "grad_norm": 11.354142189025879, "learning_rate": 1.9598685875313258e-05, "loss": 0.3137, "step": 3067 }, { "epoch": 2.95, "grad_norm": 6.931436061859131, "learning_rate": 1.959833645105024e-05, "loss": 0.1903, "step": 3068 }, { "epoch": 2.9509615384615384, "grad_norm": 5.182626247406006, "learning_rate": 1.9597986877849208e-05, "loss": 0.1872, "step": 3069 }, { "epoch": 2.9519230769230766, "grad_norm": 4.872951507568359, "learning_rate": 1.9597637155715587e-05, "loss": 0.2749, "step": 3070 }, { "epoch": 2.9528846153846153, "grad_norm": 8.365646362304688, "learning_rate": 1.959728728465481e-05, "loss": 0.221, "step": 3071 }, { "epoch": 2.953846153846154, "grad_norm": 5.195140838623047, "learning_rate": 1.9596937264672304e-05, "loss": 0.2304, "step": 3072 }, { "epoch": 2.9548076923076922, "grad_norm": 4.211370944976807, "learning_rate": 1.9596587095773496e-05, "loss": 0.1868, "step": 3073 }, { "epoch": 2.955769230769231, "grad_norm": 7.374955654144287, "learning_rate": 1.9596236777963823e-05, "loss": 0.2406, "step": 3074 }, { "epoch": 2.956730769230769, "grad_norm": 4.6541571617126465, "learning_rate": 1.9595886311248716e-05, "loss": 0.1758, "step": 3075 }, { "epoch": 2.957692307692308, "grad_norm": 4.681119441986084, "learning_rate": 1.959553569563362e-05, "loss": 0.2047, "step": 3076 }, { "epoch": 2.958653846153846, "grad_norm": 8.919179916381836, "learning_rate": 1.9595184931123974e-05, "loss": 0.2035, "step": 3077 }, { "epoch": 2.9596153846153848, "grad_norm": 4.981673240661621, "learning_rate": 1.959483401772522e-05, "loss": 0.2293, "step": 3078 }, { "epoch": 2.960576923076923, "grad_norm": 5.088468551635742, "learning_rate": 1.95944829554428e-05, "loss": 0.2644, "step": 3079 }, { "epoch": 2.9615384615384617, "grad_norm": 4.353606700897217, "learning_rate": 1.9594131744282163e-05, "loss": 0.2051, "step": 3080 }, { "epoch": 2.9625, "grad_norm": 4.360877513885498, "learning_rate": 1.9593780384248762e-05, "loss": 0.1933, "step": 3081 }, { "epoch": 2.9634615384615386, "grad_norm": 4.828239440917969, "learning_rate": 1.9593428875348043e-05, "loss": 0.1574, "step": 3082 }, { "epoch": 2.964423076923077, "grad_norm": 4.640857696533203, "learning_rate": 1.959307721758547e-05, "loss": 0.1402, "step": 3083 }, { "epoch": 2.9653846153846155, "grad_norm": 6.110815525054932, "learning_rate": 1.9592725410966493e-05, "loss": 0.3302, "step": 3084 }, { "epoch": 2.9663461538461537, "grad_norm": 5.153989315032959, "learning_rate": 1.959237345549657e-05, "loss": 0.2273, "step": 3085 }, { "epoch": 2.9673076923076924, "grad_norm": 6.654915809631348, "learning_rate": 1.9592021351181163e-05, "loss": 0.2908, "step": 3086 }, { "epoch": 2.9682692307692307, "grad_norm": 6.274724960327148, "learning_rate": 1.9591669098025737e-05, "loss": 0.2957, "step": 3087 }, { "epoch": 2.9692307692307693, "grad_norm": 5.912987232208252, "learning_rate": 1.959131669603576e-05, "loss": 0.2928, "step": 3088 }, { "epoch": 2.9701923076923076, "grad_norm": 6.073190689086914, "learning_rate": 1.9590964145216698e-05, "loss": 0.3974, "step": 3089 }, { "epoch": 2.9711538461538463, "grad_norm": 4.429744720458984, "learning_rate": 1.959061144557402e-05, "loss": 0.1034, "step": 3090 }, { "epoch": 2.9721153846153845, "grad_norm": 7.630964756011963, "learning_rate": 1.9590258597113202e-05, "loss": 0.313, "step": 3091 }, { "epoch": 2.973076923076923, "grad_norm": 5.347609519958496, "learning_rate": 1.9589905599839715e-05, "loss": 0.1939, "step": 3092 }, { "epoch": 2.9740384615384614, "grad_norm": 4.978479385375977, "learning_rate": 1.958955245375904e-05, "loss": 0.1612, "step": 3093 }, { "epoch": 2.975, "grad_norm": 6.167547702789307, "learning_rate": 1.9589199158876658e-05, "loss": 0.2785, "step": 3094 }, { "epoch": 2.9759615384615383, "grad_norm": 8.828461647033691, "learning_rate": 1.9588845715198047e-05, "loss": 0.1916, "step": 3095 }, { "epoch": 2.976923076923077, "grad_norm": 5.502031326293945, "learning_rate": 1.9588492122728696e-05, "loss": 0.23, "step": 3096 }, { "epoch": 2.9778846153846152, "grad_norm": 5.984594821929932, "learning_rate": 1.9588138381474085e-05, "loss": 0.1359, "step": 3097 }, { "epoch": 2.978846153846154, "grad_norm": 6.2122883796691895, "learning_rate": 1.9587784491439712e-05, "loss": 0.1909, "step": 3098 }, { "epoch": 2.979807692307692, "grad_norm": 5.400017738342285, "learning_rate": 1.958743045263106e-05, "loss": 0.2592, "step": 3099 }, { "epoch": 2.980769230769231, "grad_norm": 4.304708480834961, "learning_rate": 1.958707626505363e-05, "loss": 0.1429, "step": 3100 }, { "epoch": 2.981730769230769, "grad_norm": 5.554794788360596, "learning_rate": 1.958672192871291e-05, "loss": 0.2342, "step": 3101 }, { "epoch": 2.9826923076923078, "grad_norm": 5.769754886627197, "learning_rate": 1.9586367443614406e-05, "loss": 0.1858, "step": 3102 }, { "epoch": 2.983653846153846, "grad_norm": 5.563751220703125, "learning_rate": 1.958601280976361e-05, "loss": 0.1645, "step": 3103 }, { "epoch": 2.9846153846153847, "grad_norm": 6.732911586761475, "learning_rate": 1.9585658027166034e-05, "loss": 0.3018, "step": 3104 }, { "epoch": 2.9855769230769234, "grad_norm": 6.730845928192139, "learning_rate": 1.9585303095827174e-05, "loss": 0.3451, "step": 3105 }, { "epoch": 2.9865384615384616, "grad_norm": 6.310127258300781, "learning_rate": 1.9584948015752547e-05, "loss": 0.2851, "step": 3106 }, { "epoch": 2.9875, "grad_norm": 5.288368225097656, "learning_rate": 1.958459278694766e-05, "loss": 0.2759, "step": 3107 }, { "epoch": 2.9884615384615385, "grad_norm": 5.140648365020752, "learning_rate": 1.958423740941802e-05, "loss": 0.1991, "step": 3108 }, { "epoch": 2.989423076923077, "grad_norm": 5.029825687408447, "learning_rate": 1.9583881883169143e-05, "loss": 0.226, "step": 3109 }, { "epoch": 2.9903846153846154, "grad_norm": 5.283029079437256, "learning_rate": 1.958352620820655e-05, "loss": 0.217, "step": 3110 }, { "epoch": 2.9913461538461537, "grad_norm": 7.757811546325684, "learning_rate": 1.958317038453576e-05, "loss": 0.3392, "step": 3111 }, { "epoch": 2.9923076923076923, "grad_norm": 5.2107720375061035, "learning_rate": 1.9582814412162288e-05, "loss": 0.2346, "step": 3112 }, { "epoch": 2.993269230769231, "grad_norm": 5.899914264678955, "learning_rate": 1.9582458291091664e-05, "loss": 0.2025, "step": 3113 }, { "epoch": 2.9942307692307693, "grad_norm": 5.784294605255127, "learning_rate": 1.958210202132941e-05, "loss": 0.2752, "step": 3114 }, { "epoch": 2.9951923076923075, "grad_norm": 6.699867248535156, "learning_rate": 1.9581745602881056e-05, "loss": 0.3929, "step": 3115 }, { "epoch": 2.996153846153846, "grad_norm": 6.595979690551758, "learning_rate": 1.9581389035752134e-05, "loss": 0.3633, "step": 3116 }, { "epoch": 2.997115384615385, "grad_norm": 4.052805423736572, "learning_rate": 1.958103231994817e-05, "loss": 0.2199, "step": 3117 }, { "epoch": 2.998076923076923, "grad_norm": 4.577149868011475, "learning_rate": 1.958067545547471e-05, "loss": 0.1383, "step": 3118 }, { "epoch": 2.9990384615384613, "grad_norm": 6.135124206542969, "learning_rate": 1.9580318442337284e-05, "loss": 0.2248, "step": 3119 }, { "epoch": 3.0, "grad_norm": 4.6251678466796875, "learning_rate": 1.9579961280541434e-05, "loss": 0.2443, "step": 3120 }, { "epoch": 3.0009615384615387, "grad_norm": 2.0865349769592285, "learning_rate": 1.9579603970092702e-05, "loss": 0.054, "step": 3121 }, { "epoch": 3.001923076923077, "grad_norm": 2.4825563430786133, "learning_rate": 1.9579246510996633e-05, "loss": 0.1005, "step": 3122 }, { "epoch": 3.0028846153846156, "grad_norm": 3.9596855640411377, "learning_rate": 1.957888890325877e-05, "loss": 0.1534, "step": 3123 }, { "epoch": 3.003846153846154, "grad_norm": 5.890209674835205, "learning_rate": 1.9578531146884668e-05, "loss": 0.2294, "step": 3124 }, { "epoch": 3.0048076923076925, "grad_norm": 6.302380084991455, "learning_rate": 1.957817324187987e-05, "loss": 0.2298, "step": 3125 }, { "epoch": 3.0057692307692307, "grad_norm": 3.7534570693969727, "learning_rate": 1.957781518824994e-05, "loss": 0.1246, "step": 3126 }, { "epoch": 3.0067307692307694, "grad_norm": 3.481750249862671, "learning_rate": 1.957745698600043e-05, "loss": 0.1017, "step": 3127 }, { "epoch": 3.0076923076923077, "grad_norm": 3.8703324794769287, "learning_rate": 1.9577098635136892e-05, "loss": 0.1922, "step": 3128 }, { "epoch": 3.0086538461538463, "grad_norm": 3.774289846420288, "learning_rate": 1.9576740135664894e-05, "loss": 0.1313, "step": 3129 }, { "epoch": 3.0096153846153846, "grad_norm": 3.144435405731201, "learning_rate": 1.9576381487589995e-05, "loss": 0.099, "step": 3130 }, { "epoch": 3.0105769230769233, "grad_norm": 4.729006290435791, "learning_rate": 1.9576022690917765e-05, "loss": 0.1627, "step": 3131 }, { "epoch": 3.0115384615384615, "grad_norm": 3.2195234298706055, "learning_rate": 1.9575663745653768e-05, "loss": 0.0997, "step": 3132 }, { "epoch": 3.0125, "grad_norm": 4.8212785720825195, "learning_rate": 1.9575304651803572e-05, "loss": 0.0791, "step": 3133 }, { "epoch": 3.0134615384615384, "grad_norm": 2.3408117294311523, "learning_rate": 1.9574945409372752e-05, "loss": 0.0575, "step": 3134 }, { "epoch": 3.014423076923077, "grad_norm": 2.1569855213165283, "learning_rate": 1.9574586018366882e-05, "loss": 0.0843, "step": 3135 }, { "epoch": 3.0153846153846153, "grad_norm": 3.066580295562744, "learning_rate": 1.9574226478791534e-05, "loss": 0.1067, "step": 3136 }, { "epoch": 3.016346153846154, "grad_norm": 2.860563278198242, "learning_rate": 1.9573866790652296e-05, "loss": 0.0806, "step": 3137 }, { "epoch": 3.0173076923076922, "grad_norm": 4.961119174957275, "learning_rate": 1.957350695395474e-05, "loss": 0.1333, "step": 3138 }, { "epoch": 3.018269230769231, "grad_norm": 3.0769846439361572, "learning_rate": 1.9573146968704456e-05, "loss": 0.0495, "step": 3139 }, { "epoch": 3.019230769230769, "grad_norm": 3.912537097930908, "learning_rate": 1.957278683490703e-05, "loss": 0.1027, "step": 3140 }, { "epoch": 3.020192307692308, "grad_norm": 4.876780033111572, "learning_rate": 1.9572426552568045e-05, "loss": 0.0995, "step": 3141 }, { "epoch": 3.021153846153846, "grad_norm": 2.9083638191223145, "learning_rate": 1.9572066121693097e-05, "loss": 0.1166, "step": 3142 }, { "epoch": 3.0221153846153848, "grad_norm": 2.472947359085083, "learning_rate": 1.9571705542287774e-05, "loss": 0.0347, "step": 3143 }, { "epoch": 3.023076923076923, "grad_norm": 2.475461483001709, "learning_rate": 1.9571344814357676e-05, "loss": 0.0624, "step": 3144 }, { "epoch": 3.0240384615384617, "grad_norm": 3.9751973152160645, "learning_rate": 1.9570983937908398e-05, "loss": 0.1267, "step": 3145 }, { "epoch": 3.025, "grad_norm": 7.997836112976074, "learning_rate": 1.9570622912945536e-05, "loss": 0.1052, "step": 3146 }, { "epoch": 3.0259615384615386, "grad_norm": 6.687990665435791, "learning_rate": 1.95702617394747e-05, "loss": 0.091, "step": 3147 }, { "epoch": 3.026923076923077, "grad_norm": 6.812047958374023, "learning_rate": 1.956990041750149e-05, "loss": 0.1539, "step": 3148 }, { "epoch": 3.0278846153846155, "grad_norm": 5.773587226867676, "learning_rate": 1.956953894703151e-05, "loss": 0.1834, "step": 3149 }, { "epoch": 3.0288461538461537, "grad_norm": 4.5256547927856445, "learning_rate": 1.956917732807037e-05, "loss": 0.0987, "step": 3150 }, { "epoch": 3.0298076923076924, "grad_norm": 4.138901710510254, "learning_rate": 1.956881556062369e-05, "loss": 0.1107, "step": 3151 }, { "epoch": 3.0307692307692307, "grad_norm": 2.277996301651001, "learning_rate": 1.9568453644697073e-05, "loss": 0.0458, "step": 3152 }, { "epoch": 3.0317307692307693, "grad_norm": 4.084400653839111, "learning_rate": 1.9568091580296138e-05, "loss": 0.0899, "step": 3153 }, { "epoch": 3.0326923076923076, "grad_norm": 3.5015413761138916, "learning_rate": 1.9567729367426506e-05, "loss": 0.092, "step": 3154 }, { "epoch": 3.0336538461538463, "grad_norm": 3.2244436740875244, "learning_rate": 1.9567367006093794e-05, "loss": 0.1067, "step": 3155 }, { "epoch": 3.0346153846153845, "grad_norm": 5.25763463973999, "learning_rate": 1.9567004496303626e-05, "loss": 0.1937, "step": 3156 }, { "epoch": 3.035576923076923, "grad_norm": 3.4731013774871826, "learning_rate": 1.9566641838061624e-05, "loss": 0.0917, "step": 3157 }, { "epoch": 3.0365384615384614, "grad_norm": 6.105301380157471, "learning_rate": 1.956627903137342e-05, "loss": 0.1375, "step": 3158 }, { "epoch": 3.0375, "grad_norm": 3.172846555709839, "learning_rate": 1.9565916076244643e-05, "loss": 0.1272, "step": 3159 }, { "epoch": 3.0384615384615383, "grad_norm": 5.949155807495117, "learning_rate": 1.9565552972680927e-05, "loss": 0.1172, "step": 3160 }, { "epoch": 3.039423076923077, "grad_norm": 2.9153892993927, "learning_rate": 1.95651897206879e-05, "loss": 0.0532, "step": 3161 }, { "epoch": 3.0403846153846152, "grad_norm": 3.7678048610687256, "learning_rate": 1.95648263202712e-05, "loss": 0.0748, "step": 3162 }, { "epoch": 3.041346153846154, "grad_norm": 3.761106491088867, "learning_rate": 1.956446277143647e-05, "loss": 0.08, "step": 3163 }, { "epoch": 3.042307692307692, "grad_norm": 4.834085464477539, "learning_rate": 1.956409907418935e-05, "loss": 0.1296, "step": 3164 }, { "epoch": 3.043269230769231, "grad_norm": 4.115090370178223, "learning_rate": 1.9563735228535482e-05, "loss": 0.0965, "step": 3165 }, { "epoch": 3.044230769230769, "grad_norm": 2.7223305702209473, "learning_rate": 1.9563371234480516e-05, "loss": 0.041, "step": 3166 }, { "epoch": 3.0451923076923078, "grad_norm": 2.0768253803253174, "learning_rate": 1.956300709203009e-05, "loss": 0.0523, "step": 3167 }, { "epoch": 3.046153846153846, "grad_norm": 5.427130222320557, "learning_rate": 1.9562642801189867e-05, "loss": 0.135, "step": 3168 }, { "epoch": 3.0471153846153847, "grad_norm": 3.4805383682250977, "learning_rate": 1.9562278361965494e-05, "loss": 0.0952, "step": 3169 }, { "epoch": 3.048076923076923, "grad_norm": 3.511920690536499, "learning_rate": 1.9561913774362618e-05, "loss": 0.1486, "step": 3170 }, { "epoch": 3.0490384615384616, "grad_norm": 2.9547431468963623, "learning_rate": 1.956154903838691e-05, "loss": 0.092, "step": 3171 }, { "epoch": 3.05, "grad_norm": 5.161396503448486, "learning_rate": 1.9561184154044025e-05, "loss": 0.1532, "step": 3172 }, { "epoch": 3.0509615384615385, "grad_norm": 5.993930339813232, "learning_rate": 1.956081912133962e-05, "loss": 0.1355, "step": 3173 }, { "epoch": 3.0519230769230767, "grad_norm": 3.8415822982788086, "learning_rate": 1.9560453940279364e-05, "loss": 0.1302, "step": 3174 }, { "epoch": 3.0528846153846154, "grad_norm": 4.046850204467773, "learning_rate": 1.9560088610868923e-05, "loss": 0.0915, "step": 3175 }, { "epoch": 3.0538461538461537, "grad_norm": 3.8729283809661865, "learning_rate": 1.9559723133113967e-05, "loss": 0.1404, "step": 3176 }, { "epoch": 3.0548076923076923, "grad_norm": 4.917222023010254, "learning_rate": 1.9559357507020163e-05, "loss": 0.1104, "step": 3177 }, { "epoch": 3.0557692307692306, "grad_norm": 2.5254714488983154, "learning_rate": 1.9558991732593188e-05, "loss": 0.1363, "step": 3178 }, { "epoch": 3.0567307692307693, "grad_norm": 3.901068687438965, "learning_rate": 1.9558625809838715e-05, "loss": 0.0958, "step": 3179 }, { "epoch": 3.0576923076923075, "grad_norm": 4.043454170227051, "learning_rate": 1.9558259738762424e-05, "loss": 0.1134, "step": 3180 }, { "epoch": 3.058653846153846, "grad_norm": 4.04932975769043, "learning_rate": 1.9557893519369994e-05, "loss": 0.1068, "step": 3181 }, { "epoch": 3.0596153846153844, "grad_norm": 4.681528091430664, "learning_rate": 1.955752715166711e-05, "loss": 0.1483, "step": 3182 }, { "epoch": 3.060576923076923, "grad_norm": 7.259921550750732, "learning_rate": 1.9557160635659457e-05, "loss": 0.0924, "step": 3183 }, { "epoch": 3.0615384615384613, "grad_norm": 4.475643157958984, "learning_rate": 1.955679397135272e-05, "loss": 0.0817, "step": 3184 }, { "epoch": 3.0625, "grad_norm": 3.4719817638397217, "learning_rate": 1.955642715875259e-05, "loss": 0.1217, "step": 3185 }, { "epoch": 3.0634615384615387, "grad_norm": 2.2137370109558105, "learning_rate": 1.9556060197864756e-05, "loss": 0.0249, "step": 3186 }, { "epoch": 3.064423076923077, "grad_norm": 3.1501691341400146, "learning_rate": 1.9555693088694918e-05, "loss": 0.1353, "step": 3187 }, { "epoch": 3.0653846153846156, "grad_norm": 4.117206573486328, "learning_rate": 1.9555325831248767e-05, "loss": 0.0611, "step": 3188 }, { "epoch": 3.066346153846154, "grad_norm": 2.2491133213043213, "learning_rate": 1.9554958425532002e-05, "loss": 0.0352, "step": 3189 }, { "epoch": 3.0673076923076925, "grad_norm": 4.118116855621338, "learning_rate": 1.955459087155033e-05, "loss": 0.0947, "step": 3190 }, { "epoch": 3.0682692307692307, "grad_norm": 4.111854553222656, "learning_rate": 1.9554223169309444e-05, "loss": 0.1007, "step": 3191 }, { "epoch": 3.0692307692307694, "grad_norm": 4.167990684509277, "learning_rate": 1.955385531881506e-05, "loss": 0.1527, "step": 3192 }, { "epoch": 3.0701923076923077, "grad_norm": 3.362555980682373, "learning_rate": 1.9553487320072883e-05, "loss": 0.0654, "step": 3193 }, { "epoch": 3.0711538461538463, "grad_norm": 5.472823143005371, "learning_rate": 1.9553119173088617e-05, "loss": 0.2084, "step": 3194 }, { "epoch": 3.0721153846153846, "grad_norm": 2.9563465118408203, "learning_rate": 1.9552750877867982e-05, "loss": 0.0508, "step": 3195 }, { "epoch": 3.0730769230769233, "grad_norm": 3.8849475383758545, "learning_rate": 1.9552382434416693e-05, "loss": 0.111, "step": 3196 }, { "epoch": 3.0740384615384615, "grad_norm": 2.6671953201293945, "learning_rate": 1.955201384274046e-05, "loss": 0.0426, "step": 3197 }, { "epoch": 3.075, "grad_norm": 1.6627923250198364, "learning_rate": 1.955164510284501e-05, "loss": 0.0244, "step": 3198 }, { "epoch": 3.0759615384615384, "grad_norm": 1.5880942344665527, "learning_rate": 1.955127621473606e-05, "loss": 0.0243, "step": 3199 }, { "epoch": 3.076923076923077, "grad_norm": 3.5636091232299805, "learning_rate": 1.9550907178419337e-05, "loss": 0.0488, "step": 3200 }, { "epoch": 3.0778846153846153, "grad_norm": 3.798408031463623, "learning_rate": 1.9550537993900566e-05, "loss": 0.0699, "step": 3201 }, { "epoch": 3.078846153846154, "grad_norm": 1.3280463218688965, "learning_rate": 1.9550168661185472e-05, "loss": 0.0184, "step": 3202 }, { "epoch": 3.0798076923076922, "grad_norm": 4.631742000579834, "learning_rate": 1.9549799180279793e-05, "loss": 0.088, "step": 3203 }, { "epoch": 3.080769230769231, "grad_norm": 2.8473618030548096, "learning_rate": 1.9549429551189264e-05, "loss": 0.0507, "step": 3204 }, { "epoch": 3.081730769230769, "grad_norm": 5.206886291503906, "learning_rate": 1.954905977391961e-05, "loss": 0.2256, "step": 3205 }, { "epoch": 3.082692307692308, "grad_norm": 3.958738088607788, "learning_rate": 1.9548689848476574e-05, "loss": 0.1134, "step": 3206 }, { "epoch": 3.083653846153846, "grad_norm": 2.303834915161133, "learning_rate": 1.95483197748659e-05, "loss": 0.0503, "step": 3207 }, { "epoch": 3.0846153846153848, "grad_norm": 4.85396671295166, "learning_rate": 1.9547949553093325e-05, "loss": 0.0954, "step": 3208 }, { "epoch": 3.085576923076923, "grad_norm": 1.4163556098937988, "learning_rate": 1.9547579183164595e-05, "loss": 0.0226, "step": 3209 }, { "epoch": 3.0865384615384617, "grad_norm": 5.1978583335876465, "learning_rate": 1.954720866508546e-05, "loss": 0.1715, "step": 3210 }, { "epoch": 3.0875, "grad_norm": 5.1722588539123535, "learning_rate": 1.9546837998861667e-05, "loss": 0.1391, "step": 3211 }, { "epoch": 3.0884615384615386, "grad_norm": 1.7108672857284546, "learning_rate": 1.9546467184498965e-05, "loss": 0.0242, "step": 3212 }, { "epoch": 3.089423076923077, "grad_norm": 3.562511920928955, "learning_rate": 1.9546096222003112e-05, "loss": 0.1808, "step": 3213 }, { "epoch": 3.0903846153846155, "grad_norm": 5.511242389678955, "learning_rate": 1.9545725111379866e-05, "loss": 0.1053, "step": 3214 }, { "epoch": 3.0913461538461537, "grad_norm": 3.4852511882781982, "learning_rate": 1.9545353852634978e-05, "loss": 0.0508, "step": 3215 }, { "epoch": 3.0923076923076924, "grad_norm": 3.2051806449890137, "learning_rate": 1.9544982445774217e-05, "loss": 0.0543, "step": 3216 }, { "epoch": 3.0932692307692307, "grad_norm": 5.403791427612305, "learning_rate": 1.954461089080334e-05, "loss": 0.0774, "step": 3217 }, { "epoch": 3.0942307692307693, "grad_norm": 2.60874080657959, "learning_rate": 1.9544239187728117e-05, "loss": 0.0348, "step": 3218 }, { "epoch": 3.0951923076923076, "grad_norm": 5.445559501647949, "learning_rate": 1.9543867336554316e-05, "loss": 0.1808, "step": 3219 }, { "epoch": 3.0961538461538463, "grad_norm": 4.439818382263184, "learning_rate": 1.95434953372877e-05, "loss": 0.115, "step": 3220 }, { "epoch": 3.0971153846153845, "grad_norm": 4.773172855377197, "learning_rate": 1.954312318993405e-05, "loss": 0.1309, "step": 3221 }, { "epoch": 3.098076923076923, "grad_norm": 2.1032636165618896, "learning_rate": 1.9542750894499134e-05, "loss": 0.0386, "step": 3222 }, { "epoch": 3.0990384615384614, "grad_norm": 5.956071376800537, "learning_rate": 1.954237845098873e-05, "loss": 0.1792, "step": 3223 }, { "epoch": 3.1, "grad_norm": 7.983707427978516, "learning_rate": 1.9542005859408624e-05, "loss": 0.1234, "step": 3224 }, { "epoch": 3.1009615384615383, "grad_norm": 4.384886741638184, "learning_rate": 1.9541633119764594e-05, "loss": 0.1008, "step": 3225 }, { "epoch": 3.101923076923077, "grad_norm": 3.8126320838928223, "learning_rate": 1.9541260232062415e-05, "loss": 0.1026, "step": 3226 }, { "epoch": 3.1028846153846152, "grad_norm": 3.846466064453125, "learning_rate": 1.9540887196307884e-05, "loss": 0.054, "step": 3227 }, { "epoch": 3.103846153846154, "grad_norm": 4.73018741607666, "learning_rate": 1.954051401250679e-05, "loss": 0.1747, "step": 3228 }, { "epoch": 3.104807692307692, "grad_norm": 4.001744270324707, "learning_rate": 1.9540140680664915e-05, "loss": 0.1057, "step": 3229 }, { "epoch": 3.105769230769231, "grad_norm": 2.768085479736328, "learning_rate": 1.9539767200788054e-05, "loss": 0.07, "step": 3230 }, { "epoch": 3.106730769230769, "grad_norm": 3.4468491077423096, "learning_rate": 1.953939357288201e-05, "loss": 0.0446, "step": 3231 }, { "epoch": 3.1076923076923078, "grad_norm": 2.5772385597229004, "learning_rate": 1.9539019796952577e-05, "loss": 0.0378, "step": 3232 }, { "epoch": 3.108653846153846, "grad_norm": 2.8556690216064453, "learning_rate": 1.953864587300555e-05, "loss": 0.0576, "step": 3233 }, { "epoch": 3.1096153846153847, "grad_norm": 4.325265884399414, "learning_rate": 1.9538271801046735e-05, "loss": 0.0957, "step": 3234 }, { "epoch": 3.110576923076923, "grad_norm": 5.567111015319824, "learning_rate": 1.9537897581081935e-05, "loss": 0.1749, "step": 3235 }, { "epoch": 3.1115384615384616, "grad_norm": 3.748208999633789, "learning_rate": 1.953752321311696e-05, "loss": 0.0685, "step": 3236 }, { "epoch": 3.1125, "grad_norm": 3.8505945205688477, "learning_rate": 1.9537148697157614e-05, "loss": 0.1181, "step": 3237 }, { "epoch": 3.1134615384615385, "grad_norm": 4.1048502922058105, "learning_rate": 1.9536774033209717e-05, "loss": 0.1783, "step": 3238 }, { "epoch": 3.1144230769230767, "grad_norm": 3.160984992980957, "learning_rate": 1.9536399221279072e-05, "loss": 0.1988, "step": 3239 }, { "epoch": 3.1153846153846154, "grad_norm": 4.344096660614014, "learning_rate": 1.95360242613715e-05, "loss": 0.0389, "step": 3240 }, { "epoch": 3.1163461538461537, "grad_norm": 2.6033945083618164, "learning_rate": 1.9535649153492823e-05, "loss": 0.0542, "step": 3241 }, { "epoch": 3.1173076923076923, "grad_norm": 3.7934749126434326, "learning_rate": 1.9535273897648857e-05, "loss": 0.062, "step": 3242 }, { "epoch": 3.1182692307692306, "grad_norm": 3.9595730304718018, "learning_rate": 1.9534898493845422e-05, "loss": 0.089, "step": 3243 }, { "epoch": 3.1192307692307693, "grad_norm": 4.491755485534668, "learning_rate": 1.953452294208835e-05, "loss": 0.1372, "step": 3244 }, { "epoch": 3.1201923076923075, "grad_norm": 4.800078868865967, "learning_rate": 1.9534147242383467e-05, "loss": 0.162, "step": 3245 }, { "epoch": 3.121153846153846, "grad_norm": 3.71752667427063, "learning_rate": 1.95337713947366e-05, "loss": 0.1037, "step": 3246 }, { "epoch": 3.1221153846153844, "grad_norm": 3.911043643951416, "learning_rate": 1.953339539915358e-05, "loss": 0.1106, "step": 3247 }, { "epoch": 3.123076923076923, "grad_norm": 5.055746555328369, "learning_rate": 1.9533019255640245e-05, "loss": 0.1276, "step": 3248 }, { "epoch": 3.1240384615384613, "grad_norm": 7.3017377853393555, "learning_rate": 1.953264296420243e-05, "loss": 0.1549, "step": 3249 }, { "epoch": 3.125, "grad_norm": 5.524139404296875, "learning_rate": 1.9532266524845974e-05, "loss": 0.1355, "step": 3250 }, { "epoch": 3.1259615384615387, "grad_norm": 3.516396999359131, "learning_rate": 1.9531889937576722e-05, "loss": 0.0355, "step": 3251 }, { "epoch": 3.126923076923077, "grad_norm": 4.801011085510254, "learning_rate": 1.953151320240051e-05, "loss": 0.2023, "step": 3252 }, { "epoch": 3.127884615384615, "grad_norm": 5.520909309387207, "learning_rate": 1.953113631932319e-05, "loss": 0.1442, "step": 3253 }, { "epoch": 3.128846153846154, "grad_norm": 4.7429399490356445, "learning_rate": 1.9530759288350606e-05, "loss": 0.0984, "step": 3254 }, { "epoch": 3.1298076923076925, "grad_norm": 2.0056440830230713, "learning_rate": 1.953038210948861e-05, "loss": 0.0624, "step": 3255 }, { "epoch": 3.1307692307692307, "grad_norm": 3.1644327640533447, "learning_rate": 1.9530004782743054e-05, "loss": 0.0687, "step": 3256 }, { "epoch": 3.1317307692307694, "grad_norm": 4.909358978271484, "learning_rate": 1.95296273081198e-05, "loss": 0.0946, "step": 3257 }, { "epoch": 3.1326923076923077, "grad_norm": 3.0741283893585205, "learning_rate": 1.9529249685624693e-05, "loss": 0.0507, "step": 3258 }, { "epoch": 3.1336538461538463, "grad_norm": 6.388713836669922, "learning_rate": 1.9528871915263602e-05, "loss": 0.1788, "step": 3259 }, { "epoch": 3.1346153846153846, "grad_norm": 2.3674864768981934, "learning_rate": 1.9528493997042383e-05, "loss": 0.041, "step": 3260 }, { "epoch": 3.1355769230769233, "grad_norm": 5.459568023681641, "learning_rate": 1.9528115930966907e-05, "loss": 0.1534, "step": 3261 }, { "epoch": 3.1365384615384615, "grad_norm": 3.011629343032837, "learning_rate": 1.9527737717043033e-05, "loss": 0.0656, "step": 3262 }, { "epoch": 3.1375, "grad_norm": 2.7638041973114014, "learning_rate": 1.9527359355276636e-05, "loss": 0.0692, "step": 3263 }, { "epoch": 3.1384615384615384, "grad_norm": 4.4303131103515625, "learning_rate": 1.952698084567358e-05, "loss": 0.1115, "step": 3264 }, { "epoch": 3.139423076923077, "grad_norm": 2.868849754333496, "learning_rate": 1.9526602188239746e-05, "loss": 0.041, "step": 3265 }, { "epoch": 3.1403846153846153, "grad_norm": 5.20855712890625, "learning_rate": 1.9526223382981006e-05, "loss": 0.1402, "step": 3266 }, { "epoch": 3.141346153846154, "grad_norm": 3.280486822128296, "learning_rate": 1.952584442990324e-05, "loss": 0.0749, "step": 3267 }, { "epoch": 3.1423076923076922, "grad_norm": 2.8321995735168457, "learning_rate": 1.9525465329012322e-05, "loss": 0.0747, "step": 3268 }, { "epoch": 3.143269230769231, "grad_norm": 3.6528284549713135, "learning_rate": 1.952508608031414e-05, "loss": 0.07, "step": 3269 }, { "epoch": 3.144230769230769, "grad_norm": 3.339731216430664, "learning_rate": 1.9524706683814583e-05, "loss": 0.054, "step": 3270 }, { "epoch": 3.145192307692308, "grad_norm": 3.338890790939331, "learning_rate": 1.952432713951953e-05, "loss": 0.0721, "step": 3271 }, { "epoch": 3.146153846153846, "grad_norm": 1.562388300895691, "learning_rate": 1.952394744743487e-05, "loss": 0.018, "step": 3272 }, { "epoch": 3.1471153846153848, "grad_norm": 4.113982677459717, "learning_rate": 1.95235676075665e-05, "loss": 0.1106, "step": 3273 }, { "epoch": 3.148076923076923, "grad_norm": 6.584005355834961, "learning_rate": 1.9523187619920315e-05, "loss": 0.2064, "step": 3274 }, { "epoch": 3.1490384615384617, "grad_norm": 5.5661396980285645, "learning_rate": 1.9522807484502203e-05, "loss": 0.166, "step": 3275 }, { "epoch": 3.15, "grad_norm": 3.429112672805786, "learning_rate": 1.9522427201318073e-05, "loss": 0.0657, "step": 3276 }, { "epoch": 3.1509615384615386, "grad_norm": 2.1078619956970215, "learning_rate": 1.952204677037382e-05, "loss": 0.0429, "step": 3277 }, { "epoch": 3.151923076923077, "grad_norm": 3.1636414527893066, "learning_rate": 1.9521666191675343e-05, "loss": 0.0783, "step": 3278 }, { "epoch": 3.1528846153846155, "grad_norm": 4.184540271759033, "learning_rate": 1.9521285465228558e-05, "loss": 0.0746, "step": 3279 }, { "epoch": 3.1538461538461537, "grad_norm": 5.197473049163818, "learning_rate": 1.9520904591039366e-05, "loss": 0.1475, "step": 3280 }, { "epoch": 3.1548076923076924, "grad_norm": 4.100708961486816, "learning_rate": 1.952052356911368e-05, "loss": 0.0713, "step": 3281 }, { "epoch": 3.1557692307692307, "grad_norm": 5.928302764892578, "learning_rate": 1.9520142399457405e-05, "loss": 0.246, "step": 3282 }, { "epoch": 3.1567307692307693, "grad_norm": 3.1266121864318848, "learning_rate": 1.9519761082076465e-05, "loss": 0.0783, "step": 3283 }, { "epoch": 3.1576923076923076, "grad_norm": 2.5611610412597656, "learning_rate": 1.9519379616976773e-05, "loss": 0.0749, "step": 3284 }, { "epoch": 3.1586538461538463, "grad_norm": 4.813133716583252, "learning_rate": 1.951899800416425e-05, "loss": 0.1415, "step": 3285 }, { "epoch": 3.1596153846153845, "grad_norm": 3.198099136352539, "learning_rate": 1.9518616243644813e-05, "loss": 0.0765, "step": 3286 }, { "epoch": 3.160576923076923, "grad_norm": 4.079225063323975, "learning_rate": 1.951823433542439e-05, "loss": 0.1224, "step": 3287 }, { "epoch": 3.1615384615384614, "grad_norm": 2.9172370433807373, "learning_rate": 1.9517852279508903e-05, "loss": 0.0695, "step": 3288 }, { "epoch": 3.1625, "grad_norm": 3.537060022354126, "learning_rate": 1.9517470075904285e-05, "loss": 0.1522, "step": 3289 }, { "epoch": 3.1634615384615383, "grad_norm": 4.395223617553711, "learning_rate": 1.9517087724616467e-05, "loss": 0.1147, "step": 3290 }, { "epoch": 3.164423076923077, "grad_norm": 5.015844821929932, "learning_rate": 1.951670522565138e-05, "loss": 0.1253, "step": 3291 }, { "epoch": 3.1653846153846152, "grad_norm": 5.006269931793213, "learning_rate": 1.9516322579014953e-05, "loss": 0.1509, "step": 3292 }, { "epoch": 3.166346153846154, "grad_norm": 4.631577491760254, "learning_rate": 1.951593978471314e-05, "loss": 0.141, "step": 3293 }, { "epoch": 3.167307692307692, "grad_norm": 3.315913677215576, "learning_rate": 1.9515556842751863e-05, "loss": 0.0754, "step": 3294 }, { "epoch": 3.168269230769231, "grad_norm": 4.8657050132751465, "learning_rate": 1.9515173753137074e-05, "loss": 0.176, "step": 3295 }, { "epoch": 3.169230769230769, "grad_norm": 3.482797622680664, "learning_rate": 1.9514790515874714e-05, "loss": 0.1458, "step": 3296 }, { "epoch": 3.1701923076923078, "grad_norm": 5.409046649932861, "learning_rate": 1.9514407130970728e-05, "loss": 0.1747, "step": 3297 }, { "epoch": 3.171153846153846, "grad_norm": 4.156620502471924, "learning_rate": 1.9514023598431074e-05, "loss": 0.0846, "step": 3298 }, { "epoch": 3.1721153846153847, "grad_norm": 4.345427513122559, "learning_rate": 1.951363991826169e-05, "loss": 0.0333, "step": 3299 }, { "epoch": 3.173076923076923, "grad_norm": 2.6740822792053223, "learning_rate": 1.951325609046854e-05, "loss": 0.0525, "step": 3300 }, { "epoch": 3.1740384615384616, "grad_norm": 4.681272983551025, "learning_rate": 1.951287211505758e-05, "loss": 0.1584, "step": 3301 }, { "epoch": 3.175, "grad_norm": 1.9206680059432983, "learning_rate": 1.951248799203476e-05, "loss": 0.0402, "step": 3302 }, { "epoch": 3.1759615384615385, "grad_norm": 3.922664165496826, "learning_rate": 1.951210372140605e-05, "loss": 0.0935, "step": 3303 }, { "epoch": 3.1769230769230767, "grad_norm": 3.9437737464904785, "learning_rate": 1.9511719303177402e-05, "loss": 0.1179, "step": 3304 }, { "epoch": 3.1778846153846154, "grad_norm": 3.9769325256347656, "learning_rate": 1.951133473735479e-05, "loss": 0.0937, "step": 3305 }, { "epoch": 3.1788461538461537, "grad_norm": 3.0307600498199463, "learning_rate": 1.9510950023944178e-05, "loss": 0.0939, "step": 3306 }, { "epoch": 3.1798076923076923, "grad_norm": 2.408324718475342, "learning_rate": 1.9510565162951538e-05, "loss": 0.0363, "step": 3307 }, { "epoch": 3.1807692307692306, "grad_norm": 4.2474799156188965, "learning_rate": 1.9510180154382838e-05, "loss": 0.2376, "step": 3308 }, { "epoch": 3.1817307692307693, "grad_norm": 4.042513370513916, "learning_rate": 1.9509794998244054e-05, "loss": 0.1271, "step": 3309 }, { "epoch": 3.1826923076923075, "grad_norm": 1.8375616073608398, "learning_rate": 1.9509409694541163e-05, "loss": 0.0486, "step": 3310 }, { "epoch": 3.183653846153846, "grad_norm": 4.672604560852051, "learning_rate": 1.9509024243280142e-05, "loss": 0.1204, "step": 3311 }, { "epoch": 3.184615384615385, "grad_norm": 4.4522247314453125, "learning_rate": 1.9508638644466975e-05, "loss": 0.095, "step": 3312 }, { "epoch": 3.185576923076923, "grad_norm": 4.037006855010986, "learning_rate": 1.950825289810764e-05, "loss": 0.1151, "step": 3313 }, { "epoch": 3.1865384615384613, "grad_norm": 4.583474636077881, "learning_rate": 1.950786700420813e-05, "loss": 0.1285, "step": 3314 }, { "epoch": 3.1875, "grad_norm": 3.394299268722534, "learning_rate": 1.950748096277443e-05, "loss": 0.0846, "step": 3315 }, { "epoch": 3.1884615384615387, "grad_norm": 7.5591936111450195, "learning_rate": 1.9507094773812528e-05, "loss": 0.1446, "step": 3316 }, { "epoch": 3.189423076923077, "grad_norm": 5.4180588722229, "learning_rate": 1.9506708437328423e-05, "loss": 0.1276, "step": 3317 }, { "epoch": 3.190384615384615, "grad_norm": 2.035930871963501, "learning_rate": 1.95063219533281e-05, "loss": 0.0553, "step": 3318 }, { "epoch": 3.191346153846154, "grad_norm": 2.542020320892334, "learning_rate": 1.9505935321817556e-05, "loss": 0.132, "step": 3319 }, { "epoch": 3.1923076923076925, "grad_norm": 4.5901055335998535, "learning_rate": 1.9505548542802805e-05, "loss": 0.171, "step": 3320 }, { "epoch": 3.1932692307692307, "grad_norm": 3.923616409301758, "learning_rate": 1.9505161616289833e-05, "loss": 0.1164, "step": 3321 }, { "epoch": 3.1942307692307694, "grad_norm": 3.3671979904174805, "learning_rate": 1.950477454228465e-05, "loss": 0.0884, "step": 3322 }, { "epoch": 3.1951923076923077, "grad_norm": 5.112796306610107, "learning_rate": 1.9504387320793268e-05, "loss": 0.1962, "step": 3323 }, { "epoch": 3.1961538461538463, "grad_norm": 4.1975483894348145, "learning_rate": 1.9503999951821682e-05, "loss": 0.1234, "step": 3324 }, { "epoch": 3.1971153846153846, "grad_norm": 3.1640241146087646, "learning_rate": 1.9503612435375914e-05, "loss": 0.0902, "step": 3325 }, { "epoch": 3.1980769230769233, "grad_norm": 3.389312267303467, "learning_rate": 1.9503224771461978e-05, "loss": 0.1279, "step": 3326 }, { "epoch": 3.1990384615384615, "grad_norm": 3.922196626663208, "learning_rate": 1.950283696008588e-05, "loss": 0.1174, "step": 3327 }, { "epoch": 3.2, "grad_norm": 3.071601629257202, "learning_rate": 1.9502449001253645e-05, "loss": 0.099, "step": 3328 }, { "epoch": 3.2009615384615384, "grad_norm": 5.03646993637085, "learning_rate": 1.950206089497129e-05, "loss": 0.1318, "step": 3329 }, { "epoch": 3.201923076923077, "grad_norm": 2.680859088897705, "learning_rate": 1.950167264124484e-05, "loss": 0.0761, "step": 3330 }, { "epoch": 3.2028846153846153, "grad_norm": 3.303500175476074, "learning_rate": 1.9501284240080314e-05, "loss": 0.0795, "step": 3331 }, { "epoch": 3.203846153846154, "grad_norm": 3.5195086002349854, "learning_rate": 1.9500895691483747e-05, "loss": 0.0987, "step": 3332 }, { "epoch": 3.2048076923076922, "grad_norm": 3.6052706241607666, "learning_rate": 1.950050699546116e-05, "loss": 0.1367, "step": 3333 }, { "epoch": 3.205769230769231, "grad_norm": 4.942826271057129, "learning_rate": 1.950011815201859e-05, "loss": 0.079, "step": 3334 }, { "epoch": 3.206730769230769, "grad_norm": 2.643024444580078, "learning_rate": 1.9499729161162067e-05, "loss": 0.054, "step": 3335 }, { "epoch": 3.207692307692308, "grad_norm": 4.532960414886475, "learning_rate": 1.949934002289763e-05, "loss": 0.1307, "step": 3336 }, { "epoch": 3.208653846153846, "grad_norm": 3.9136898517608643, "learning_rate": 1.9498950737231318e-05, "loss": 0.1034, "step": 3337 }, { "epoch": 3.2096153846153848, "grad_norm": 3.2164604663848877, "learning_rate": 1.9498561304169165e-05, "loss": 0.093, "step": 3338 }, { "epoch": 3.210576923076923, "grad_norm": 3.199207305908203, "learning_rate": 1.9498171723717222e-05, "loss": 0.08, "step": 3339 }, { "epoch": 3.2115384615384617, "grad_norm": 1.748049020767212, "learning_rate": 1.949778199588153e-05, "loss": 0.0313, "step": 3340 }, { "epoch": 3.2125, "grad_norm": 3.973787307739258, "learning_rate": 1.9497392120668136e-05, "loss": 0.1053, "step": 3341 }, { "epoch": 3.2134615384615386, "grad_norm": 3.397916793823242, "learning_rate": 1.9497002098083094e-05, "loss": 0.1204, "step": 3342 }, { "epoch": 3.214423076923077, "grad_norm": 3.5356624126434326, "learning_rate": 1.9496611928132448e-05, "loss": 0.0733, "step": 3343 }, { "epoch": 3.2153846153846155, "grad_norm": 1.8037984371185303, "learning_rate": 1.949622161082226e-05, "loss": 0.033, "step": 3344 }, { "epoch": 3.2163461538461537, "grad_norm": 2.2866616249084473, "learning_rate": 1.9495831146158583e-05, "loss": 0.0397, "step": 3345 }, { "epoch": 3.2173076923076924, "grad_norm": 4.58430290222168, "learning_rate": 1.949544053414748e-05, "loss": 0.127, "step": 3346 }, { "epoch": 3.2182692307692307, "grad_norm": 4.3769965171813965, "learning_rate": 1.9495049774795006e-05, "loss": 0.0738, "step": 3347 }, { "epoch": 3.2192307692307693, "grad_norm": 3.3616819381713867, "learning_rate": 1.9494658868107225e-05, "loss": 0.0735, "step": 3348 }, { "epoch": 3.2201923076923076, "grad_norm": 2.987802028656006, "learning_rate": 1.9494267814090207e-05, "loss": 0.0986, "step": 3349 }, { "epoch": 3.2211538461538463, "grad_norm": 3.5349044799804688, "learning_rate": 1.9493876612750018e-05, "loss": 0.1274, "step": 3350 }, { "epoch": 3.2221153846153845, "grad_norm": 4.357303142547607, "learning_rate": 1.9493485264092728e-05, "loss": 0.2019, "step": 3351 }, { "epoch": 3.223076923076923, "grad_norm": 4.485270023345947, "learning_rate": 1.949309376812441e-05, "loss": 0.0996, "step": 3352 }, { "epoch": 3.2240384615384614, "grad_norm": 4.256842136383057, "learning_rate": 1.9492702124851135e-05, "loss": 0.1245, "step": 3353 }, { "epoch": 3.225, "grad_norm": 4.006919860839844, "learning_rate": 1.9492310334278988e-05, "loss": 0.0672, "step": 3354 }, { "epoch": 3.2259615384615383, "grad_norm": 2.650634527206421, "learning_rate": 1.9491918396414043e-05, "loss": 0.0841, "step": 3355 }, { "epoch": 3.226923076923077, "grad_norm": 2.6897034645080566, "learning_rate": 1.9491526311262384e-05, "loss": 0.0681, "step": 3356 }, { "epoch": 3.2278846153846152, "grad_norm": 5.491283416748047, "learning_rate": 1.949113407883009e-05, "loss": 0.1754, "step": 3357 }, { "epoch": 3.228846153846154, "grad_norm": 5.382660865783691, "learning_rate": 1.9490741699123256e-05, "loss": 0.1763, "step": 3358 }, { "epoch": 3.229807692307692, "grad_norm": 4.143012523651123, "learning_rate": 1.9490349172147964e-05, "loss": 0.1082, "step": 3359 }, { "epoch": 3.230769230769231, "grad_norm": 3.533517837524414, "learning_rate": 1.9489956497910304e-05, "loss": 0.1449, "step": 3360 }, { "epoch": 3.231730769230769, "grad_norm": 1.7464234828948975, "learning_rate": 1.9489563676416377e-05, "loss": 0.0412, "step": 3361 }, { "epoch": 3.2326923076923078, "grad_norm": 3.406182050704956, "learning_rate": 1.948917070767227e-05, "loss": 0.0625, "step": 3362 }, { "epoch": 3.233653846153846, "grad_norm": 3.6733038425445557, "learning_rate": 1.9488777591684083e-05, "loss": 0.1202, "step": 3363 }, { "epoch": 3.2346153846153847, "grad_norm": 3.9524261951446533, "learning_rate": 1.948838432845792e-05, "loss": 0.0872, "step": 3364 }, { "epoch": 3.235576923076923, "grad_norm": 4.23707914352417, "learning_rate": 1.9487990917999878e-05, "loss": 0.2114, "step": 3365 }, { "epoch": 3.2365384615384616, "grad_norm": 4.439446449279785, "learning_rate": 1.9487597360316067e-05, "loss": 0.1549, "step": 3366 }, { "epoch": 3.2375, "grad_norm": 2.5567498207092285, "learning_rate": 1.9487203655412587e-05, "loss": 0.0509, "step": 3367 }, { "epoch": 3.2384615384615385, "grad_norm": 4.105798244476318, "learning_rate": 1.9486809803295553e-05, "loss": 0.117, "step": 3368 }, { "epoch": 3.2394230769230767, "grad_norm": 3.6776888370513916, "learning_rate": 1.9486415803971074e-05, "loss": 0.1695, "step": 3369 }, { "epoch": 3.2403846153846154, "grad_norm": 2.5002808570861816, "learning_rate": 1.9486021657445263e-05, "loss": 0.0733, "step": 3370 }, { "epoch": 3.2413461538461537, "grad_norm": 4.26901388168335, "learning_rate": 1.948562736372424e-05, "loss": 0.2554, "step": 3371 }, { "epoch": 3.2423076923076923, "grad_norm": 2.470133066177368, "learning_rate": 1.9485232922814117e-05, "loss": 0.0418, "step": 3372 }, { "epoch": 3.2432692307692306, "grad_norm": 2.534027576446533, "learning_rate": 1.9484838334721018e-05, "loss": 0.0455, "step": 3373 }, { "epoch": 3.2442307692307693, "grad_norm": 3.804274082183838, "learning_rate": 1.9484443599451065e-05, "loss": 0.096, "step": 3374 }, { "epoch": 3.2451923076923075, "grad_norm": 5.522318363189697, "learning_rate": 1.9484048717010388e-05, "loss": 0.225, "step": 3375 }, { "epoch": 3.246153846153846, "grad_norm": 3.334137201309204, "learning_rate": 1.9483653687405105e-05, "loss": 0.1273, "step": 3376 }, { "epoch": 3.247115384615385, "grad_norm": 3.6966841220855713, "learning_rate": 1.9483258510641356e-05, "loss": 0.1033, "step": 3377 }, { "epoch": 3.248076923076923, "grad_norm": 6.550156593322754, "learning_rate": 1.9482863186725267e-05, "loss": 0.2024, "step": 3378 }, { "epoch": 3.2490384615384613, "grad_norm": 4.062534809112549, "learning_rate": 1.948246771566297e-05, "loss": 0.0951, "step": 3379 }, { "epoch": 3.25, "grad_norm": 4.280130386352539, "learning_rate": 1.9482072097460608e-05, "loss": 0.1265, "step": 3380 }, { "epoch": 3.2509615384615387, "grad_norm": 1.4394652843475342, "learning_rate": 1.9481676332124317e-05, "loss": 0.0296, "step": 3381 }, { "epoch": 3.251923076923077, "grad_norm": 2.24004864692688, "learning_rate": 1.9481280419660232e-05, "loss": 0.0393, "step": 3382 }, { "epoch": 3.252884615384615, "grad_norm": 1.9473650455474854, "learning_rate": 1.9480884360074507e-05, "loss": 0.0251, "step": 3383 }, { "epoch": 3.253846153846154, "grad_norm": 3.7343850135803223, "learning_rate": 1.9480488153373284e-05, "loss": 0.1128, "step": 3384 }, { "epoch": 3.2548076923076925, "grad_norm": 5.427224159240723, "learning_rate": 1.9480091799562706e-05, "loss": 0.1715, "step": 3385 }, { "epoch": 3.2557692307692307, "grad_norm": 4.601633071899414, "learning_rate": 1.947969529864893e-05, "loss": 0.1236, "step": 3386 }, { "epoch": 3.256730769230769, "grad_norm": 5.477370738983154, "learning_rate": 1.9479298650638102e-05, "loss": 0.1461, "step": 3387 }, { "epoch": 3.2576923076923077, "grad_norm": 3.972316265106201, "learning_rate": 1.9478901855536386e-05, "loss": 0.1003, "step": 3388 }, { "epoch": 3.2586538461538463, "grad_norm": 3.0806570053100586, "learning_rate": 1.9478504913349926e-05, "loss": 0.0858, "step": 3389 }, { "epoch": 3.2596153846153846, "grad_norm": 3.248333692550659, "learning_rate": 1.947810782408489e-05, "loss": 0.0573, "step": 3390 }, { "epoch": 3.2605769230769233, "grad_norm": 3.1754918098449707, "learning_rate": 1.9477710587747447e-05, "loss": 0.0613, "step": 3391 }, { "epoch": 3.2615384615384615, "grad_norm": 6.390472412109375, "learning_rate": 1.9477313204343744e-05, "loss": 0.1997, "step": 3392 }, { "epoch": 3.2625, "grad_norm": 2.4030184745788574, "learning_rate": 1.9476915673879962e-05, "loss": 0.0657, "step": 3393 }, { "epoch": 3.2634615384615384, "grad_norm": 4.029269218444824, "learning_rate": 1.947651799636226e-05, "loss": 0.0809, "step": 3394 }, { "epoch": 3.264423076923077, "grad_norm": 3.9504950046539307, "learning_rate": 1.9476120171796807e-05, "loss": 0.1191, "step": 3395 }, { "epoch": 3.2653846153846153, "grad_norm": 2.976820468902588, "learning_rate": 1.9475722200189787e-05, "loss": 0.0581, "step": 3396 }, { "epoch": 3.266346153846154, "grad_norm": 4.297825336456299, "learning_rate": 1.9475324081547367e-05, "loss": 0.1117, "step": 3397 }, { "epoch": 3.2673076923076922, "grad_norm": 4.0574188232421875, "learning_rate": 1.947492581587573e-05, "loss": 0.1292, "step": 3398 }, { "epoch": 3.268269230769231, "grad_norm": 2.4252736568450928, "learning_rate": 1.947452740318105e-05, "loss": 0.0903, "step": 3399 }, { "epoch": 3.269230769230769, "grad_norm": 3.42997670173645, "learning_rate": 1.9474128843469514e-05, "loss": 0.0552, "step": 3400 }, { "epoch": 3.270192307692308, "grad_norm": 4.384942054748535, "learning_rate": 1.94737301367473e-05, "loss": 0.0925, "step": 3401 }, { "epoch": 3.271153846153846, "grad_norm": 4.890955448150635, "learning_rate": 1.9473331283020603e-05, "loss": 0.1449, "step": 3402 }, { "epoch": 3.2721153846153848, "grad_norm": 3.985548973083496, "learning_rate": 1.947293228229561e-05, "loss": 0.072, "step": 3403 }, { "epoch": 3.273076923076923, "grad_norm": 3.929962396621704, "learning_rate": 1.947253313457851e-05, "loss": 0.0721, "step": 3404 }, { "epoch": 3.2740384615384617, "grad_norm": 2.8355512619018555, "learning_rate": 1.9472133839875495e-05, "loss": 0.0821, "step": 3405 }, { "epoch": 3.275, "grad_norm": 5.320434093475342, "learning_rate": 1.9471734398192765e-05, "loss": 0.1418, "step": 3406 }, { "epoch": 3.2759615384615386, "grad_norm": 3.97235369682312, "learning_rate": 1.9471334809536514e-05, "loss": 0.1331, "step": 3407 }, { "epoch": 3.276923076923077, "grad_norm": 3.52239990234375, "learning_rate": 1.9470935073912948e-05, "loss": 0.124, "step": 3408 }, { "epoch": 3.2778846153846155, "grad_norm": 6.201186656951904, "learning_rate": 1.9470535191328265e-05, "loss": 0.1949, "step": 3409 }, { "epoch": 3.2788461538461537, "grad_norm": 3.7255916595458984, "learning_rate": 1.947013516178867e-05, "loss": 0.0851, "step": 3410 }, { "epoch": 3.2798076923076924, "grad_norm": 4.879457950592041, "learning_rate": 1.9469734985300373e-05, "loss": 0.1897, "step": 3411 }, { "epoch": 3.2807692307692307, "grad_norm": 4.920492172241211, "learning_rate": 1.9469334661869583e-05, "loss": 0.2244, "step": 3412 }, { "epoch": 3.2817307692307693, "grad_norm": 2.8562583923339844, "learning_rate": 1.946893419150251e-05, "loss": 0.0286, "step": 3413 }, { "epoch": 3.2826923076923076, "grad_norm": 4.061267375946045, "learning_rate": 1.946853357420537e-05, "loss": 0.1326, "step": 3414 }, { "epoch": 3.2836538461538463, "grad_norm": 3.5274031162261963, "learning_rate": 1.9468132809984382e-05, "loss": 0.0741, "step": 3415 }, { "epoch": 3.2846153846153845, "grad_norm": 5.5090837478637695, "learning_rate": 1.9467731898845758e-05, "loss": 0.0561, "step": 3416 }, { "epoch": 3.285576923076923, "grad_norm": 4.344176769256592, "learning_rate": 1.9467330840795723e-05, "loss": 0.0915, "step": 3417 }, { "epoch": 3.2865384615384614, "grad_norm": 1.8863168954849243, "learning_rate": 1.9466929635840495e-05, "loss": 0.0388, "step": 3418 }, { "epoch": 3.2875, "grad_norm": 4.076805591583252, "learning_rate": 1.9466528283986312e-05, "loss": 0.0888, "step": 3419 }, { "epoch": 3.2884615384615383, "grad_norm": 3.5251150131225586, "learning_rate": 1.946612678523939e-05, "loss": 0.0656, "step": 3420 }, { "epoch": 3.289423076923077, "grad_norm": 5.896756172180176, "learning_rate": 1.9465725139605965e-05, "loss": 0.2031, "step": 3421 }, { "epoch": 3.2903846153846152, "grad_norm": 4.918638706207275, "learning_rate": 1.9465323347092266e-05, "loss": 0.1165, "step": 3422 }, { "epoch": 3.291346153846154, "grad_norm": 4.509721279144287, "learning_rate": 1.9464921407704528e-05, "loss": 0.0992, "step": 3423 }, { "epoch": 3.292307692307692, "grad_norm": 4.497099876403809, "learning_rate": 1.9464519321448988e-05, "loss": 0.1318, "step": 3424 }, { "epoch": 3.293269230769231, "grad_norm": 2.021934747695923, "learning_rate": 1.946411708833189e-05, "loss": 0.0691, "step": 3425 }, { "epoch": 3.294230769230769, "grad_norm": 3.638639211654663, "learning_rate": 1.946371470835947e-05, "loss": 0.0803, "step": 3426 }, { "epoch": 3.2951923076923078, "grad_norm": 4.2053070068359375, "learning_rate": 1.946331218153797e-05, "loss": 0.1393, "step": 3427 }, { "epoch": 3.296153846153846, "grad_norm": 3.0919008255004883, "learning_rate": 1.9462909507873645e-05, "loss": 0.0874, "step": 3428 }, { "epoch": 3.2971153846153847, "grad_norm": 5.46739387512207, "learning_rate": 1.9462506687372738e-05, "loss": 0.1378, "step": 3429 }, { "epoch": 3.298076923076923, "grad_norm": 3.3835532665252686, "learning_rate": 1.9462103720041495e-05, "loss": 0.0916, "step": 3430 }, { "epoch": 3.2990384615384616, "grad_norm": 4.41173791885376, "learning_rate": 1.9461700605886174e-05, "loss": 0.1031, "step": 3431 }, { "epoch": 3.3, "grad_norm": 2.8370158672332764, "learning_rate": 1.946129734491303e-05, "loss": 0.0678, "step": 3432 }, { "epoch": 3.3009615384615385, "grad_norm": 2.362901449203491, "learning_rate": 1.946089393712832e-05, "loss": 0.0682, "step": 3433 }, { "epoch": 3.3019230769230767, "grad_norm": 2.951328992843628, "learning_rate": 1.9460490382538304e-05, "loss": 0.0726, "step": 3434 }, { "epoch": 3.3028846153846154, "grad_norm": 4.986036777496338, "learning_rate": 1.9460086681149244e-05, "loss": 0.1526, "step": 3435 }, { "epoch": 3.3038461538461537, "grad_norm": 3.6545584201812744, "learning_rate": 1.9459682832967403e-05, "loss": 0.0962, "step": 3436 }, { "epoch": 3.3048076923076923, "grad_norm": 4.522824764251709, "learning_rate": 1.9459278837999048e-05, "loss": 0.1096, "step": 3437 }, { "epoch": 3.305769230769231, "grad_norm": 2.837047815322876, "learning_rate": 1.9458874696250447e-05, "loss": 0.0866, "step": 3438 }, { "epoch": 3.3067307692307693, "grad_norm": 2.3964109420776367, "learning_rate": 1.9458470407727876e-05, "loss": 0.0507, "step": 3439 }, { "epoch": 3.3076923076923075, "grad_norm": 4.325194358825684, "learning_rate": 1.94580659724376e-05, "loss": 0.1008, "step": 3440 }, { "epoch": 3.308653846153846, "grad_norm": 3.295902967453003, "learning_rate": 1.94576613903859e-05, "loss": 0.0449, "step": 3441 }, { "epoch": 3.309615384615385, "grad_norm": 3.3549373149871826, "learning_rate": 1.9457256661579055e-05, "loss": 0.0882, "step": 3442 }, { "epoch": 3.310576923076923, "grad_norm": 3.399564504623413, "learning_rate": 1.9456851786023342e-05, "loss": 0.1158, "step": 3443 }, { "epoch": 3.3115384615384613, "grad_norm": 4.227174758911133, "learning_rate": 1.9456446763725046e-05, "loss": 0.074, "step": 3444 }, { "epoch": 3.3125, "grad_norm": 8.131501197814941, "learning_rate": 1.945604159469045e-05, "loss": 0.3244, "step": 3445 }, { "epoch": 3.3134615384615387, "grad_norm": 4.697490215301514, "learning_rate": 1.945563627892584e-05, "loss": 0.1024, "step": 3446 }, { "epoch": 3.314423076923077, "grad_norm": 3.4905953407287598, "learning_rate": 1.945523081643751e-05, "loss": 0.0717, "step": 3447 }, { "epoch": 3.315384615384615, "grad_norm": 4.3438215255737305, "learning_rate": 1.9454825207231747e-05, "loss": 0.1603, "step": 3448 }, { "epoch": 3.316346153846154, "grad_norm": 4.556911468505859, "learning_rate": 1.9454419451314845e-05, "loss": 0.1025, "step": 3449 }, { "epoch": 3.3173076923076925, "grad_norm": 3.653836727142334, "learning_rate": 1.9454013548693103e-05, "loss": 0.1125, "step": 3450 }, { "epoch": 3.3182692307692307, "grad_norm": 5.230892658233643, "learning_rate": 1.9453607499372816e-05, "loss": 0.1795, "step": 3451 }, { "epoch": 3.319230769230769, "grad_norm": 4.538278579711914, "learning_rate": 1.945320130336029e-05, "loss": 0.1207, "step": 3452 }, { "epoch": 3.3201923076923077, "grad_norm": 3.611401081085205, "learning_rate": 1.9452794960661822e-05, "loss": 0.1354, "step": 3453 }, { "epoch": 3.3211538461538463, "grad_norm": 4.508507251739502, "learning_rate": 1.9452388471283722e-05, "loss": 0.1714, "step": 3454 }, { "epoch": 3.3221153846153846, "grad_norm": 3.074223756790161, "learning_rate": 1.9451981835232292e-05, "loss": 0.0596, "step": 3455 }, { "epoch": 3.3230769230769233, "grad_norm": 3.296912670135498, "learning_rate": 1.945157505251385e-05, "loss": 0.0672, "step": 3456 }, { "epoch": 3.3240384615384615, "grad_norm": 3.0379161834716797, "learning_rate": 1.9451168123134698e-05, "loss": 0.0762, "step": 3457 }, { "epoch": 3.325, "grad_norm": 2.3546864986419678, "learning_rate": 1.9450761047101158e-05, "loss": 0.0372, "step": 3458 }, { "epoch": 3.3259615384615384, "grad_norm": 4.659527778625488, "learning_rate": 1.9450353824419547e-05, "loss": 0.2081, "step": 3459 }, { "epoch": 3.326923076923077, "grad_norm": 3.37202787399292, "learning_rate": 1.9449946455096177e-05, "loss": 0.0596, "step": 3460 }, { "epoch": 3.3278846153846153, "grad_norm": 3.0648069381713867, "learning_rate": 1.9449538939137373e-05, "loss": 0.1335, "step": 3461 }, { "epoch": 3.328846153846154, "grad_norm": 5.376485824584961, "learning_rate": 1.944913127654946e-05, "loss": 0.0748, "step": 3462 }, { "epoch": 3.3298076923076922, "grad_norm": 5.226487159729004, "learning_rate": 1.9448723467338765e-05, "loss": 0.1887, "step": 3463 }, { "epoch": 3.330769230769231, "grad_norm": 2.9618289470672607, "learning_rate": 1.9448315511511612e-05, "loss": 0.0588, "step": 3464 }, { "epoch": 3.331730769230769, "grad_norm": 3.893101692199707, "learning_rate": 1.9447907409074328e-05, "loss": 0.0735, "step": 3465 }, { "epoch": 3.332692307692308, "grad_norm": 4.40017557144165, "learning_rate": 1.9447499160033255e-05, "loss": 0.1272, "step": 3466 }, { "epoch": 3.333653846153846, "grad_norm": 4.095508098602295, "learning_rate": 1.9447090764394724e-05, "loss": 0.0976, "step": 3467 }, { "epoch": 3.3346153846153848, "grad_norm": 4.354423522949219, "learning_rate": 1.944668222216507e-05, "loss": 0.1338, "step": 3468 }, { "epoch": 3.335576923076923, "grad_norm": 4.553107261657715, "learning_rate": 1.9446273533350633e-05, "loss": 0.1475, "step": 3469 }, { "epoch": 3.3365384615384617, "grad_norm": 4.600802421569824, "learning_rate": 1.9445864697957755e-05, "loss": 0.1267, "step": 3470 }, { "epoch": 3.3375, "grad_norm": 4.422018527984619, "learning_rate": 1.9445455715992782e-05, "loss": 0.1496, "step": 3471 }, { "epoch": 3.3384615384615386, "grad_norm": 3.262626886367798, "learning_rate": 1.9445046587462055e-05, "loss": 0.0611, "step": 3472 }, { "epoch": 3.339423076923077, "grad_norm": 3.298787832260132, "learning_rate": 1.9444637312371925e-05, "loss": 0.0576, "step": 3473 }, { "epoch": 3.3403846153846155, "grad_norm": 2.8214473724365234, "learning_rate": 1.9444227890728748e-05, "loss": 0.0782, "step": 3474 }, { "epoch": 3.3413461538461537, "grad_norm": 5.170365333557129, "learning_rate": 1.944381832253887e-05, "loss": 0.2343, "step": 3475 }, { "epoch": 3.3423076923076924, "grad_norm": 6.947196960449219, "learning_rate": 1.944340860780865e-05, "loss": 0.2559, "step": 3476 }, { "epoch": 3.3432692307692307, "grad_norm": 3.3481457233428955, "learning_rate": 1.9442998746544446e-05, "loss": 0.0831, "step": 3477 }, { "epoch": 3.3442307692307693, "grad_norm": 3.0532169342041016, "learning_rate": 1.9442588738752615e-05, "loss": 0.1036, "step": 3478 }, { "epoch": 3.3451923076923076, "grad_norm": 3.519728422164917, "learning_rate": 1.9442178584439518e-05, "loss": 0.1407, "step": 3479 }, { "epoch": 3.3461538461538463, "grad_norm": 4.29414176940918, "learning_rate": 1.9441768283611523e-05, "loss": 0.1093, "step": 3480 }, { "epoch": 3.3471153846153845, "grad_norm": 4.9964704513549805, "learning_rate": 1.9441357836274998e-05, "loss": 0.1835, "step": 3481 }, { "epoch": 3.348076923076923, "grad_norm": 2.843996286392212, "learning_rate": 1.9440947242436307e-05, "loss": 0.0988, "step": 3482 }, { "epoch": 3.3490384615384614, "grad_norm": 3.6487934589385986, "learning_rate": 1.9440536502101825e-05, "loss": 0.1238, "step": 3483 }, { "epoch": 3.35, "grad_norm": 3.9787752628326416, "learning_rate": 1.9440125615277925e-05, "loss": 0.1127, "step": 3484 }, { "epoch": 3.3509615384615383, "grad_norm": 2.6967241764068604, "learning_rate": 1.9439714581970983e-05, "loss": 0.0408, "step": 3485 }, { "epoch": 3.351923076923077, "grad_norm": 4.635261535644531, "learning_rate": 1.943930340218737e-05, "loss": 0.1152, "step": 3486 }, { "epoch": 3.3528846153846152, "grad_norm": 3.6838221549987793, "learning_rate": 1.9438892075933474e-05, "loss": 0.0521, "step": 3487 }, { "epoch": 3.353846153846154, "grad_norm": 3.6796374320983887, "learning_rate": 1.9438480603215678e-05, "loss": 0.0566, "step": 3488 }, { "epoch": 3.354807692307692, "grad_norm": 4.18607234954834, "learning_rate": 1.9438068984040366e-05, "loss": 0.1108, "step": 3489 }, { "epoch": 3.355769230769231, "grad_norm": 4.092803955078125, "learning_rate": 1.943765721841392e-05, "loss": 0.1059, "step": 3490 }, { "epoch": 3.356730769230769, "grad_norm": 2.3929948806762695, "learning_rate": 1.9437245306342737e-05, "loss": 0.0712, "step": 3491 }, { "epoch": 3.3576923076923078, "grad_norm": 4.719000816345215, "learning_rate": 1.9436833247833204e-05, "loss": 0.1695, "step": 3492 }, { "epoch": 3.358653846153846, "grad_norm": 2.281043291091919, "learning_rate": 1.9436421042891712e-05, "loss": 0.0476, "step": 3493 }, { "epoch": 3.3596153846153847, "grad_norm": 2.518747091293335, "learning_rate": 1.9436008691524664e-05, "loss": 0.068, "step": 3494 }, { "epoch": 3.360576923076923, "grad_norm": 3.7291948795318604, "learning_rate": 1.9435596193738458e-05, "loss": 0.0694, "step": 3495 }, { "epoch": 3.3615384615384616, "grad_norm": 4.549144744873047, "learning_rate": 1.9435183549539486e-05, "loss": 0.163, "step": 3496 }, { "epoch": 3.3625, "grad_norm": 3.041379451751709, "learning_rate": 1.9434770758934162e-05, "loss": 0.0523, "step": 3497 }, { "epoch": 3.3634615384615385, "grad_norm": 2.3755218982696533, "learning_rate": 1.9434357821928883e-05, "loss": 0.0638, "step": 3498 }, { "epoch": 3.3644230769230767, "grad_norm": 6.247125148773193, "learning_rate": 1.9433944738530064e-05, "loss": 0.1669, "step": 3499 }, { "epoch": 3.3653846153846154, "grad_norm": 2.5838170051574707, "learning_rate": 1.943353150874411e-05, "loss": 0.0294, "step": 3500 }, { "epoch": 3.3663461538461537, "grad_norm": 2.5771782398223877, "learning_rate": 1.9433118132577432e-05, "loss": 0.1346, "step": 3501 }, { "epoch": 3.3673076923076923, "grad_norm": 3.103875160217285, "learning_rate": 1.9432704610036448e-05, "loss": 0.0933, "step": 3502 }, { "epoch": 3.368269230769231, "grad_norm": 5.797416687011719, "learning_rate": 1.9432290941127574e-05, "loss": 0.1807, "step": 3503 }, { "epoch": 3.3692307692307693, "grad_norm": 1.7546706199645996, "learning_rate": 1.9431877125857227e-05, "loss": 0.0409, "step": 3504 }, { "epoch": 3.3701923076923075, "grad_norm": 2.6293933391571045, "learning_rate": 1.943146316423183e-05, "loss": 0.0396, "step": 3505 }, { "epoch": 3.371153846153846, "grad_norm": 3.6969661712646484, "learning_rate": 1.9431049056257802e-05, "loss": 0.2716, "step": 3506 }, { "epoch": 3.372115384615385, "grad_norm": 4.3724541664123535, "learning_rate": 1.9430634801941575e-05, "loss": 0.0981, "step": 3507 }, { "epoch": 3.373076923076923, "grad_norm": 3.838937759399414, "learning_rate": 1.9430220401289577e-05, "loss": 0.0845, "step": 3508 }, { "epoch": 3.3740384615384613, "grad_norm": 3.8977866172790527, "learning_rate": 1.942980585430823e-05, "loss": 0.0843, "step": 3509 }, { "epoch": 3.375, "grad_norm": 5.337663173675537, "learning_rate": 1.9429391161003978e-05, "loss": 0.1737, "step": 3510 }, { "epoch": 3.3759615384615387, "grad_norm": 3.7263481616973877, "learning_rate": 1.9428976321383247e-05, "loss": 0.1043, "step": 3511 }, { "epoch": 3.376923076923077, "grad_norm": 4.968623638153076, "learning_rate": 1.942856133545248e-05, "loss": 0.2378, "step": 3512 }, { "epoch": 3.377884615384615, "grad_norm": 4.882884979248047, "learning_rate": 1.942814620321811e-05, "loss": 0.1255, "step": 3513 }, { "epoch": 3.378846153846154, "grad_norm": 6.185255527496338, "learning_rate": 1.9427730924686586e-05, "loss": 0.184, "step": 3514 }, { "epoch": 3.3798076923076925, "grad_norm": 4.838923454284668, "learning_rate": 1.9427315499864345e-05, "loss": 0.1522, "step": 3515 }, { "epoch": 3.3807692307692307, "grad_norm": 2.486006021499634, "learning_rate": 1.9426899928757836e-05, "loss": 0.0843, "step": 3516 }, { "epoch": 3.381730769230769, "grad_norm": 2.644202470779419, "learning_rate": 1.942648421137351e-05, "loss": 0.07, "step": 3517 }, { "epoch": 3.3826923076923077, "grad_norm": 5.144990921020508, "learning_rate": 1.9426068347717812e-05, "loss": 0.1732, "step": 3518 }, { "epoch": 3.3836538461538463, "grad_norm": 3.0474648475646973, "learning_rate": 1.94256523377972e-05, "loss": 0.0589, "step": 3519 }, { "epoch": 3.3846153846153846, "grad_norm": 4.117185115814209, "learning_rate": 1.942523618161813e-05, "loss": 0.1786, "step": 3520 }, { "epoch": 3.3855769230769233, "grad_norm": 2.8761515617370605, "learning_rate": 1.9424819879187056e-05, "loss": 0.071, "step": 3521 }, { "epoch": 3.3865384615384615, "grad_norm": 5.1685261726379395, "learning_rate": 1.9424403430510436e-05, "loss": 0.1712, "step": 3522 }, { "epoch": 3.3875, "grad_norm": 4.888338088989258, "learning_rate": 1.942398683559474e-05, "loss": 0.2785, "step": 3523 }, { "epoch": 3.3884615384615384, "grad_norm": 3.5213422775268555, "learning_rate": 1.9423570094446426e-05, "loss": 0.0535, "step": 3524 }, { "epoch": 3.389423076923077, "grad_norm": 4.02394437789917, "learning_rate": 1.9423153207071958e-05, "loss": 0.1088, "step": 3525 }, { "epoch": 3.3903846153846153, "grad_norm": 2.8697376251220703, "learning_rate": 1.9422736173477814e-05, "loss": 0.0512, "step": 3526 }, { "epoch": 3.391346153846154, "grad_norm": 3.529693126678467, "learning_rate": 1.9422318993670456e-05, "loss": 0.094, "step": 3527 }, { "epoch": 3.3923076923076922, "grad_norm": 1.4588494300842285, "learning_rate": 1.9421901667656364e-05, "loss": 0.0238, "step": 3528 }, { "epoch": 3.393269230769231, "grad_norm": 4.4037299156188965, "learning_rate": 1.942148419544201e-05, "loss": 0.1395, "step": 3529 }, { "epoch": 3.394230769230769, "grad_norm": 4.457376956939697, "learning_rate": 1.9421066577033873e-05, "loss": 0.14, "step": 3530 }, { "epoch": 3.395192307692308, "grad_norm": 4.125909328460693, "learning_rate": 1.9420648812438437e-05, "loss": 0.1059, "step": 3531 }, { "epoch": 3.396153846153846, "grad_norm": 4.40237283706665, "learning_rate": 1.9420230901662178e-05, "loss": 0.1215, "step": 3532 }, { "epoch": 3.3971153846153848, "grad_norm": 4.350257873535156, "learning_rate": 1.9419812844711582e-05, "loss": 0.1343, "step": 3533 }, { "epoch": 3.398076923076923, "grad_norm": 5.384673118591309, "learning_rate": 1.9419394641593135e-05, "loss": 0.1237, "step": 3534 }, { "epoch": 3.3990384615384617, "grad_norm": 3.1783735752105713, "learning_rate": 1.9418976292313333e-05, "loss": 0.1059, "step": 3535 }, { "epoch": 3.4, "grad_norm": 3.114515542984009, "learning_rate": 1.9418557796878663e-05, "loss": 0.0465, "step": 3536 }, { "epoch": 3.4009615384615386, "grad_norm": 3.5407493114471436, "learning_rate": 1.941813915529562e-05, "loss": 0.0606, "step": 3537 }, { "epoch": 3.401923076923077, "grad_norm": 3.7724721431732178, "learning_rate": 1.9417720367570693e-05, "loss": 0.1417, "step": 3538 }, { "epoch": 3.4028846153846155, "grad_norm": 2.9586338996887207, "learning_rate": 1.9417301433710394e-05, "loss": 0.0843, "step": 3539 }, { "epoch": 3.4038461538461537, "grad_norm": 3.682062864303589, "learning_rate": 1.941688235372121e-05, "loss": 0.0725, "step": 3540 }, { "epoch": 3.4048076923076924, "grad_norm": 3.4655869007110596, "learning_rate": 1.9416463127609655e-05, "loss": 0.0512, "step": 3541 }, { "epoch": 3.4057692307692307, "grad_norm": 3.4884142875671387, "learning_rate": 1.9416043755382227e-05, "loss": 0.0831, "step": 3542 }, { "epoch": 3.4067307692307693, "grad_norm": 3.348989725112915, "learning_rate": 1.941562423704544e-05, "loss": 0.1011, "step": 3543 }, { "epoch": 3.4076923076923076, "grad_norm": 2.6044921875, "learning_rate": 1.9415204572605796e-05, "loss": 0.0807, "step": 3544 }, { "epoch": 3.4086538461538463, "grad_norm": 3.725374937057495, "learning_rate": 1.941478476206981e-05, "loss": 0.0947, "step": 3545 }, { "epoch": 3.4096153846153845, "grad_norm": 3.805351495742798, "learning_rate": 1.9414364805443995e-05, "loss": 0.0896, "step": 3546 }, { "epoch": 3.410576923076923, "grad_norm": 4.380327224731445, "learning_rate": 1.9413944702734872e-05, "loss": 0.0719, "step": 3547 }, { "epoch": 3.4115384615384614, "grad_norm": 3.6666126251220703, "learning_rate": 1.9413524453948953e-05, "loss": 0.0573, "step": 3548 }, { "epoch": 3.4125, "grad_norm": 3.3709371089935303, "learning_rate": 1.9413104059092765e-05, "loss": 0.0899, "step": 3549 }, { "epoch": 3.4134615384615383, "grad_norm": 3.465445041656494, "learning_rate": 1.941268351817283e-05, "loss": 0.0543, "step": 3550 }, { "epoch": 3.414423076923077, "grad_norm": 6.424888610839844, "learning_rate": 1.9412262831195674e-05, "loss": 0.3005, "step": 3551 }, { "epoch": 3.4153846153846152, "grad_norm": 3.5189285278320312, "learning_rate": 1.941184199816782e-05, "loss": 0.0857, "step": 3552 }, { "epoch": 3.416346153846154, "grad_norm": 4.543825626373291, "learning_rate": 1.94114210190958e-05, "loss": 0.1142, "step": 3553 }, { "epoch": 3.417307692307692, "grad_norm": 3.6243600845336914, "learning_rate": 1.9410999893986157e-05, "loss": 0.0828, "step": 3554 }, { "epoch": 3.418269230769231, "grad_norm": 4.972451210021973, "learning_rate": 1.941057862284541e-05, "loss": 0.2718, "step": 3555 }, { "epoch": 3.419230769230769, "grad_norm": 2.9592580795288086, "learning_rate": 1.94101572056801e-05, "loss": 0.0613, "step": 3556 }, { "epoch": 3.4201923076923078, "grad_norm": 3.4977474212646484, "learning_rate": 1.9409735642496775e-05, "loss": 0.061, "step": 3557 }, { "epoch": 3.421153846153846, "grad_norm": 2.838456869125366, "learning_rate": 1.9409313933301965e-05, "loss": 0.0449, "step": 3558 }, { "epoch": 3.4221153846153847, "grad_norm": 3.6998047828674316, "learning_rate": 1.9408892078102223e-05, "loss": 0.0692, "step": 3559 }, { "epoch": 3.423076923076923, "grad_norm": 4.687284469604492, "learning_rate": 1.9408470076904087e-05, "loss": 0.1428, "step": 3560 }, { "epoch": 3.4240384615384616, "grad_norm": 4.201829433441162, "learning_rate": 1.940804792971411e-05, "loss": 0.1172, "step": 3561 }, { "epoch": 3.425, "grad_norm": 2.7410478591918945, "learning_rate": 1.940762563653884e-05, "loss": 0.0598, "step": 3562 }, { "epoch": 3.4259615384615385, "grad_norm": 2.0511536598205566, "learning_rate": 1.9407203197384837e-05, "loss": 0.0308, "step": 3563 }, { "epoch": 3.4269230769230767, "grad_norm": 2.1081342697143555, "learning_rate": 1.9406780612258644e-05, "loss": 0.0374, "step": 3564 }, { "epoch": 3.4278846153846154, "grad_norm": 3.65653657913208, "learning_rate": 1.9406357881166827e-05, "loss": 0.0432, "step": 3565 }, { "epoch": 3.4288461538461537, "grad_norm": 11.339305877685547, "learning_rate": 1.9405935004115942e-05, "loss": 0.186, "step": 3566 }, { "epoch": 3.4298076923076923, "grad_norm": 3.5959482192993164, "learning_rate": 1.9405511981112553e-05, "loss": 0.0983, "step": 3567 }, { "epoch": 3.430769230769231, "grad_norm": 4.223759174346924, "learning_rate": 1.9405088812163222e-05, "loss": 0.1471, "step": 3568 }, { "epoch": 3.4317307692307693, "grad_norm": 4.486666202545166, "learning_rate": 1.940466549727452e-05, "loss": 0.0855, "step": 3569 }, { "epoch": 3.4326923076923075, "grad_norm": 4.523255348205566, "learning_rate": 1.9404242036453006e-05, "loss": 0.1582, "step": 3570 }, { "epoch": 3.433653846153846, "grad_norm": 4.2336273193359375, "learning_rate": 1.940381842970526e-05, "loss": 0.1207, "step": 3571 }, { "epoch": 3.434615384615385, "grad_norm": 4.268650531768799, "learning_rate": 1.940339467703785e-05, "loss": 0.1004, "step": 3572 }, { "epoch": 3.435576923076923, "grad_norm": 4.329716682434082, "learning_rate": 1.9402970778457355e-05, "loss": 0.1026, "step": 3573 }, { "epoch": 3.4365384615384613, "grad_norm": 2.009066581726074, "learning_rate": 1.940254673397035e-05, "loss": 0.0355, "step": 3574 }, { "epoch": 3.4375, "grad_norm": 2.9279186725616455, "learning_rate": 1.9402122543583416e-05, "loss": 0.0811, "step": 3575 }, { "epoch": 3.4384615384615387, "grad_norm": 5.224893569946289, "learning_rate": 1.9401698207303137e-05, "loss": 0.2644, "step": 3576 }, { "epoch": 3.439423076923077, "grad_norm": 4.666082859039307, "learning_rate": 1.9401273725136093e-05, "loss": 0.1305, "step": 3577 }, { "epoch": 3.440384615384615, "grad_norm": 3.9682860374450684, "learning_rate": 1.940084909708887e-05, "loss": 0.0902, "step": 3578 }, { "epoch": 3.441346153846154, "grad_norm": 6.410824298858643, "learning_rate": 1.9400424323168064e-05, "loss": 0.27, "step": 3579 }, { "epoch": 3.4423076923076925, "grad_norm": 2.7308874130249023, "learning_rate": 1.9399999403380266e-05, "loss": 0.0351, "step": 3580 }, { "epoch": 3.4432692307692307, "grad_norm": 6.213601589202881, "learning_rate": 1.9399574337732062e-05, "loss": 0.1515, "step": 3581 }, { "epoch": 3.444230769230769, "grad_norm": 3.768303871154785, "learning_rate": 1.9399149126230053e-05, "loss": 0.0578, "step": 3582 }, { "epoch": 3.4451923076923077, "grad_norm": 4.503800868988037, "learning_rate": 1.9398723768880833e-05, "loss": 0.1034, "step": 3583 }, { "epoch": 3.4461538461538463, "grad_norm": 4.625235557556152, "learning_rate": 1.9398298265691008e-05, "loss": 0.0963, "step": 3584 }, { "epoch": 3.4471153846153846, "grad_norm": 3.5011656284332275, "learning_rate": 1.9397872616667175e-05, "loss": 0.0629, "step": 3585 }, { "epoch": 3.4480769230769233, "grad_norm": 2.64294171333313, "learning_rate": 1.939744682181594e-05, "loss": 0.0536, "step": 3586 }, { "epoch": 3.4490384615384615, "grad_norm": 4.703904151916504, "learning_rate": 1.9397020881143915e-05, "loss": 0.1471, "step": 3587 }, { "epoch": 3.45, "grad_norm": 4.6743669509887695, "learning_rate": 1.9396594794657705e-05, "loss": 0.0554, "step": 3588 }, { "epoch": 3.4509615384615384, "grad_norm": 3.0036771297454834, "learning_rate": 1.9396168562363925e-05, "loss": 0.031, "step": 3589 }, { "epoch": 3.451923076923077, "grad_norm": 6.820315837860107, "learning_rate": 1.9395742184269185e-05, "loss": 0.112, "step": 3590 }, { "epoch": 3.4528846153846153, "grad_norm": 3.3599138259887695, "learning_rate": 1.93953156603801e-05, "loss": 0.0705, "step": 3591 }, { "epoch": 3.453846153846154, "grad_norm": 3.6966450214385986, "learning_rate": 1.9394888990703295e-05, "loss": 0.0462, "step": 3592 }, { "epoch": 3.4548076923076922, "grad_norm": 2.983020305633545, "learning_rate": 1.9394462175245382e-05, "loss": 0.0417, "step": 3593 }, { "epoch": 3.455769230769231, "grad_norm": 5.376989364624023, "learning_rate": 1.939403521401299e-05, "loss": 0.0939, "step": 3594 }, { "epoch": 3.456730769230769, "grad_norm": 4.666782855987549, "learning_rate": 1.9393608107012742e-05, "loss": 0.128, "step": 3595 }, { "epoch": 3.457692307692308, "grad_norm": 4.496923923492432, "learning_rate": 1.939318085425127e-05, "loss": 0.1147, "step": 3596 }, { "epoch": 3.458653846153846, "grad_norm": 4.248642921447754, "learning_rate": 1.9392753455735194e-05, "loss": 0.0643, "step": 3597 }, { "epoch": 3.4596153846153848, "grad_norm": 4.683475971221924, "learning_rate": 1.9392325911471154e-05, "loss": 0.1569, "step": 3598 }, { "epoch": 3.460576923076923, "grad_norm": 4.229770660400391, "learning_rate": 1.9391898221465784e-05, "loss": 0.0985, "step": 3599 }, { "epoch": 3.4615384615384617, "grad_norm": 5.259716510772705, "learning_rate": 1.9391470385725716e-05, "loss": 0.151, "step": 3600 }, { "epoch": 3.4625, "grad_norm": 5.519083499908447, "learning_rate": 1.9391042404257593e-05, "loss": 0.0483, "step": 3601 }, { "epoch": 3.4634615384615386, "grad_norm": 3.41902232170105, "learning_rate": 1.9390614277068054e-05, "loss": 0.0407, "step": 3602 }, { "epoch": 3.464423076923077, "grad_norm": 4.429779052734375, "learning_rate": 1.939018600416374e-05, "loss": 0.1686, "step": 3603 }, { "epoch": 3.4653846153846155, "grad_norm": 5.188122272491455, "learning_rate": 1.9389757585551302e-05, "loss": 0.1353, "step": 3604 }, { "epoch": 3.4663461538461537, "grad_norm": 5.9515886306762695, "learning_rate": 1.9389329021237383e-05, "loss": 0.1847, "step": 3605 }, { "epoch": 3.4673076923076924, "grad_norm": 3.74760365486145, "learning_rate": 1.9388900311228636e-05, "loss": 0.0951, "step": 3606 }, { "epoch": 3.4682692307692307, "grad_norm": 4.004143714904785, "learning_rate": 1.9388471455531713e-05, "loss": 0.0627, "step": 3607 }, { "epoch": 3.4692307692307693, "grad_norm": 4.657111167907715, "learning_rate": 1.9388042454153265e-05, "loss": 0.1354, "step": 3608 }, { "epoch": 3.4701923076923076, "grad_norm": 1.9099477529525757, "learning_rate": 1.9387613307099956e-05, "loss": 0.0314, "step": 3609 }, { "epoch": 3.4711538461538463, "grad_norm": 2.588890314102173, "learning_rate": 1.9387184014378438e-05, "loss": 0.0438, "step": 3610 }, { "epoch": 3.4721153846153845, "grad_norm": 6.439480304718018, "learning_rate": 1.9386754575995374e-05, "loss": 0.1661, "step": 3611 }, { "epoch": 3.473076923076923, "grad_norm": 3.8920061588287354, "learning_rate": 1.9386324991957432e-05, "loss": 0.0978, "step": 3612 }, { "epoch": 3.4740384615384614, "grad_norm": 5.368133068084717, "learning_rate": 1.9385895262271273e-05, "loss": 0.2032, "step": 3613 }, { "epoch": 3.475, "grad_norm": 3.5910849571228027, "learning_rate": 1.9385465386943566e-05, "loss": 0.1133, "step": 3614 }, { "epoch": 3.4759615384615383, "grad_norm": 4.943270206451416, "learning_rate": 1.9385035365980985e-05, "loss": 0.2287, "step": 3615 }, { "epoch": 3.476923076923077, "grad_norm": 4.495514869689941, "learning_rate": 1.9384605199390195e-05, "loss": 0.1055, "step": 3616 }, { "epoch": 3.4778846153846152, "grad_norm": 5.98748779296875, "learning_rate": 1.938417488717788e-05, "loss": 0.1003, "step": 3617 }, { "epoch": 3.478846153846154, "grad_norm": 3.866994619369507, "learning_rate": 1.9383744429350714e-05, "loss": 0.0783, "step": 3618 }, { "epoch": 3.479807692307692, "grad_norm": 2.3095598220825195, "learning_rate": 1.9383313825915372e-05, "loss": 0.0336, "step": 3619 }, { "epoch": 3.480769230769231, "grad_norm": 4.10612678527832, "learning_rate": 1.938288307687854e-05, "loss": 0.0685, "step": 3620 }, { "epoch": 3.481730769230769, "grad_norm": 5.494812488555908, "learning_rate": 1.9382452182246902e-05, "loss": 0.145, "step": 3621 }, { "epoch": 3.4826923076923078, "grad_norm": 2.815544843673706, "learning_rate": 1.9382021142027143e-05, "loss": 0.0496, "step": 3622 }, { "epoch": 3.483653846153846, "grad_norm": 4.698551654815674, "learning_rate": 1.938158995622595e-05, "loss": 0.149, "step": 3623 }, { "epoch": 3.4846153846153847, "grad_norm": 3.3526771068573, "learning_rate": 1.9381158624850017e-05, "loss": 0.1396, "step": 3624 }, { "epoch": 3.485576923076923, "grad_norm": 6.2575273513793945, "learning_rate": 1.9380727147906037e-05, "loss": 0.163, "step": 3625 }, { "epoch": 3.4865384615384616, "grad_norm": 2.825939416885376, "learning_rate": 1.93802955254007e-05, "loss": 0.1171, "step": 3626 }, { "epoch": 3.4875, "grad_norm": 4.5391364097595215, "learning_rate": 1.9379863757340713e-05, "loss": 0.0808, "step": 3627 }, { "epoch": 3.4884615384615385, "grad_norm": 3.380439281463623, "learning_rate": 1.9379431843732766e-05, "loss": 0.056, "step": 3628 }, { "epoch": 3.4894230769230767, "grad_norm": 4.503337383270264, "learning_rate": 1.9378999784583565e-05, "loss": 0.2056, "step": 3629 }, { "epoch": 3.4903846153846154, "grad_norm": 4.603015422821045, "learning_rate": 1.9378567579899817e-05, "loss": 0.1533, "step": 3630 }, { "epoch": 3.4913461538461537, "grad_norm": 4.200058460235596, "learning_rate": 1.9378135229688223e-05, "loss": 0.0318, "step": 3631 }, { "epoch": 3.4923076923076923, "grad_norm": 2.7115635871887207, "learning_rate": 1.9377702733955493e-05, "loss": 0.0594, "step": 3632 }, { "epoch": 3.493269230769231, "grad_norm": 2.8192334175109863, "learning_rate": 1.9377270092708345e-05, "loss": 0.0596, "step": 3633 }, { "epoch": 3.4942307692307693, "grad_norm": 3.731688976287842, "learning_rate": 1.9376837305953483e-05, "loss": 0.1256, "step": 3634 }, { "epoch": 3.4951923076923075, "grad_norm": 4.307844161987305, "learning_rate": 1.937640437369763e-05, "loss": 0.1071, "step": 3635 }, { "epoch": 3.496153846153846, "grad_norm": 3.939016342163086, "learning_rate": 1.9375971295947497e-05, "loss": 0.1075, "step": 3636 }, { "epoch": 3.497115384615385, "grad_norm": 7.392923355102539, "learning_rate": 1.937553807270981e-05, "loss": 0.2437, "step": 3637 }, { "epoch": 3.498076923076923, "grad_norm": 3.5484228134155273, "learning_rate": 1.9375104703991286e-05, "loss": 0.1079, "step": 3638 }, { "epoch": 3.4990384615384613, "grad_norm": 3.888098955154419, "learning_rate": 1.937467118979866e-05, "loss": 0.1123, "step": 3639 }, { "epoch": 3.5, "grad_norm": 5.776838779449463, "learning_rate": 1.9374237530138643e-05, "loss": 0.2485, "step": 3640 }, { "epoch": 3.5009615384615387, "grad_norm": 4.969135761260986, "learning_rate": 1.9373803725017975e-05, "loss": 0.0868, "step": 3641 }, { "epoch": 3.501923076923077, "grad_norm": 3.027738094329834, "learning_rate": 1.9373369774443388e-05, "loss": 0.1009, "step": 3642 }, { "epoch": 3.502884615384615, "grad_norm": 2.0572128295898438, "learning_rate": 1.937293567842161e-05, "loss": 0.0368, "step": 3643 }, { "epoch": 3.503846153846154, "grad_norm": 4.126258850097656, "learning_rate": 1.9372501436959382e-05, "loss": 0.0923, "step": 3644 }, { "epoch": 3.5048076923076925, "grad_norm": 3.7832741737365723, "learning_rate": 1.937206705006344e-05, "loss": 0.1634, "step": 3645 }, { "epoch": 3.5057692307692307, "grad_norm": 2.3300561904907227, "learning_rate": 1.937163251774052e-05, "loss": 0.0695, "step": 3646 }, { "epoch": 3.506730769230769, "grad_norm": 4.227972984313965, "learning_rate": 1.937119783999737e-05, "loss": 0.1017, "step": 3647 }, { "epoch": 3.5076923076923077, "grad_norm": 3.5502982139587402, "learning_rate": 1.9370763016840736e-05, "loss": 0.0756, "step": 3648 }, { "epoch": 3.5086538461538463, "grad_norm": 2.564502716064453, "learning_rate": 1.937032804827736e-05, "loss": 0.0299, "step": 3649 }, { "epoch": 3.5096153846153846, "grad_norm": 5.580163478851318, "learning_rate": 1.9369892934313994e-05, "loss": 0.1673, "step": 3650 }, { "epoch": 3.510576923076923, "grad_norm": 3.07523250579834, "learning_rate": 1.936945767495739e-05, "loss": 0.0658, "step": 3651 }, { "epoch": 3.5115384615384615, "grad_norm": 4.27701997756958, "learning_rate": 1.9369022270214305e-05, "loss": 0.1314, "step": 3652 }, { "epoch": 3.5125, "grad_norm": 2.800945520401001, "learning_rate": 1.9368586720091492e-05, "loss": 0.0594, "step": 3653 }, { "epoch": 3.5134615384615384, "grad_norm": 3.677671432495117, "learning_rate": 1.936815102459571e-05, "loss": 0.0978, "step": 3654 }, { "epoch": 3.5144230769230766, "grad_norm": 3.209047555923462, "learning_rate": 1.9367715183733717e-05, "loss": 0.1103, "step": 3655 }, { "epoch": 3.5153846153846153, "grad_norm": 4.110738754272461, "learning_rate": 1.9367279197512277e-05, "loss": 0.1137, "step": 3656 }, { "epoch": 3.516346153846154, "grad_norm": 4.489234447479248, "learning_rate": 1.936684306593816e-05, "loss": 0.2073, "step": 3657 }, { "epoch": 3.5173076923076922, "grad_norm": 4.991311073303223, "learning_rate": 1.9366406789018127e-05, "loss": 0.123, "step": 3658 }, { "epoch": 3.518269230769231, "grad_norm": 6.305593013763428, "learning_rate": 1.936597036675895e-05, "loss": 0.1959, "step": 3659 }, { "epoch": 3.519230769230769, "grad_norm": 4.649746417999268, "learning_rate": 1.9365533799167403e-05, "loss": 0.0794, "step": 3660 }, { "epoch": 3.520192307692308, "grad_norm": 2.4406909942626953, "learning_rate": 1.936509708625026e-05, "loss": 0.0735, "step": 3661 }, { "epoch": 3.521153846153846, "grad_norm": 3.602385997772217, "learning_rate": 1.9364660228014295e-05, "loss": 0.0754, "step": 3662 }, { "epoch": 3.5221153846153848, "grad_norm": 3.0172619819641113, "learning_rate": 1.936422322446629e-05, "loss": 0.0604, "step": 3663 }, { "epoch": 3.523076923076923, "grad_norm": 5.028341770172119, "learning_rate": 1.9363786075613023e-05, "loss": 0.1177, "step": 3664 }, { "epoch": 3.5240384615384617, "grad_norm": 2.325596809387207, "learning_rate": 1.936334878146128e-05, "loss": 0.0447, "step": 3665 }, { "epoch": 3.525, "grad_norm": 2.877833843231201, "learning_rate": 1.9362911342017844e-05, "loss": 0.0708, "step": 3666 }, { "epoch": 3.5259615384615386, "grad_norm": 3.7379260063171387, "learning_rate": 1.9362473757289504e-05, "loss": 0.1649, "step": 3667 }, { "epoch": 3.526923076923077, "grad_norm": 4.323224067687988, "learning_rate": 1.936203602728305e-05, "loss": 0.0855, "step": 3668 }, { "epoch": 3.5278846153846155, "grad_norm": 3.4408891201019287, "learning_rate": 1.9361598152005272e-05, "loss": 0.1416, "step": 3669 }, { "epoch": 3.5288461538461537, "grad_norm": 3.2649331092834473, "learning_rate": 1.9361160131462967e-05, "loss": 0.0659, "step": 3670 }, { "epoch": 3.5298076923076924, "grad_norm": 3.941927194595337, "learning_rate": 1.9360721965662934e-05, "loss": 0.0736, "step": 3671 }, { "epoch": 3.5307692307692307, "grad_norm": 2.800661325454712, "learning_rate": 1.936028365461197e-05, "loss": 0.051, "step": 3672 }, { "epoch": 3.5317307692307693, "grad_norm": 3.694948434829712, "learning_rate": 1.935984519831687e-05, "loss": 0.0807, "step": 3673 }, { "epoch": 3.5326923076923076, "grad_norm": 3.125663995742798, "learning_rate": 1.935940659678445e-05, "loss": 0.0734, "step": 3674 }, { "epoch": 3.5336538461538463, "grad_norm": 2.93937087059021, "learning_rate": 1.9358967850021505e-05, "loss": 0.0712, "step": 3675 }, { "epoch": 3.5346153846153845, "grad_norm": 7.014827251434326, "learning_rate": 1.935852895803485e-05, "loss": 0.2145, "step": 3676 }, { "epoch": 3.535576923076923, "grad_norm": 3.0850555896759033, "learning_rate": 1.935808992083129e-05, "loss": 0.0476, "step": 3677 }, { "epoch": 3.5365384615384614, "grad_norm": 2.494147777557373, "learning_rate": 1.935765073841764e-05, "loss": 0.0539, "step": 3678 }, { "epoch": 3.5375, "grad_norm": 3.00850248336792, "learning_rate": 1.935721141080072e-05, "loss": 0.0547, "step": 3679 }, { "epoch": 3.5384615384615383, "grad_norm": 3.1282730102539062, "learning_rate": 1.9356771937987336e-05, "loss": 0.0699, "step": 3680 }, { "epoch": 3.539423076923077, "grad_norm": 3.1923389434814453, "learning_rate": 1.9356332319984315e-05, "loss": 0.0634, "step": 3681 }, { "epoch": 3.5403846153846152, "grad_norm": 4.144084930419922, "learning_rate": 1.9355892556798477e-05, "loss": 0.0882, "step": 3682 }, { "epoch": 3.541346153846154, "grad_norm": 7.010143280029297, "learning_rate": 1.9355452648436647e-05, "loss": 0.2081, "step": 3683 }, { "epoch": 3.542307692307692, "grad_norm": 5.1175103187561035, "learning_rate": 1.9355012594905645e-05, "loss": 0.2252, "step": 3684 }, { "epoch": 3.543269230769231, "grad_norm": 4.035778999328613, "learning_rate": 1.935457239621231e-05, "loss": 0.0868, "step": 3685 }, { "epoch": 3.544230769230769, "grad_norm": 2.7856664657592773, "learning_rate": 1.9354132052363464e-05, "loss": 0.044, "step": 3686 }, { "epoch": 3.5451923076923078, "grad_norm": 2.6042304039001465, "learning_rate": 1.9353691563365946e-05, "loss": 0.0434, "step": 3687 }, { "epoch": 3.546153846153846, "grad_norm": 4.547000885009766, "learning_rate": 1.9353250929226585e-05, "loss": 0.0772, "step": 3688 }, { "epoch": 3.5471153846153847, "grad_norm": 3.5495946407318115, "learning_rate": 1.935281014995222e-05, "loss": 0.1045, "step": 3689 }, { "epoch": 3.5480769230769234, "grad_norm": 4.612799644470215, "learning_rate": 1.935236922554969e-05, "loss": 0.0778, "step": 3690 }, { "epoch": 3.5490384615384616, "grad_norm": 2.446753978729248, "learning_rate": 1.9351928156025844e-05, "loss": 0.104, "step": 3691 }, { "epoch": 3.55, "grad_norm": 3.5047669410705566, "learning_rate": 1.935148694138752e-05, "loss": 0.0961, "step": 3692 }, { "epoch": 3.5509615384615385, "grad_norm": 3.224510431289673, "learning_rate": 1.935104558164156e-05, "loss": 0.0717, "step": 3693 }, { "epoch": 3.551923076923077, "grad_norm": 4.918731689453125, "learning_rate": 1.935060407679482e-05, "loss": 0.091, "step": 3694 }, { "epoch": 3.5528846153846154, "grad_norm": 4.525518894195557, "learning_rate": 1.9350162426854152e-05, "loss": 0.1266, "step": 3695 }, { "epoch": 3.5538461538461537, "grad_norm": 3.7112605571746826, "learning_rate": 1.93497206318264e-05, "loss": 0.1526, "step": 3696 }, { "epoch": 3.5548076923076923, "grad_norm": 5.495625019073486, "learning_rate": 1.9349278691718426e-05, "loss": 0.1731, "step": 3697 }, { "epoch": 3.555769230769231, "grad_norm": 5.190914630889893, "learning_rate": 1.934883660653709e-05, "loss": 0.1557, "step": 3698 }, { "epoch": 3.5567307692307693, "grad_norm": 4.718347549438477, "learning_rate": 1.9348394376289247e-05, "loss": 0.117, "step": 3699 }, { "epoch": 3.5576923076923075, "grad_norm": 5.103327751159668, "learning_rate": 1.934795200098176e-05, "loss": 0.1097, "step": 3700 }, { "epoch": 3.558653846153846, "grad_norm": 2.888885021209717, "learning_rate": 1.9347509480621492e-05, "loss": 0.0899, "step": 3701 }, { "epoch": 3.559615384615385, "grad_norm": 3.073770523071289, "learning_rate": 1.9347066815215313e-05, "loss": 0.0604, "step": 3702 }, { "epoch": 3.560576923076923, "grad_norm": 2.5529122352600098, "learning_rate": 1.934662400477009e-05, "loss": 0.0622, "step": 3703 }, { "epoch": 3.5615384615384613, "grad_norm": 3.4220032691955566, "learning_rate": 1.9346181049292695e-05, "loss": 0.0736, "step": 3704 }, { "epoch": 3.5625, "grad_norm": 4.13116979598999, "learning_rate": 1.934573794879e-05, "loss": 0.158, "step": 3705 }, { "epoch": 3.5634615384615387, "grad_norm": 3.170198440551758, "learning_rate": 1.934529470326888e-05, "loss": 0.0585, "step": 3706 }, { "epoch": 3.564423076923077, "grad_norm": 6.262817859649658, "learning_rate": 1.934485131273622e-05, "loss": 0.1489, "step": 3707 }, { "epoch": 3.565384615384615, "grad_norm": 6.1001691818237305, "learning_rate": 1.934440777719889e-05, "loss": 0.1786, "step": 3708 }, { "epoch": 3.566346153846154, "grad_norm": 5.078117847442627, "learning_rate": 1.9343964096663774e-05, "loss": 0.1184, "step": 3709 }, { "epoch": 3.5673076923076925, "grad_norm": 5.305940628051758, "learning_rate": 1.9343520271137764e-05, "loss": 0.1404, "step": 3710 }, { "epoch": 3.5682692307692307, "grad_norm": 3.5610713958740234, "learning_rate": 1.934307630062774e-05, "loss": 0.0831, "step": 3711 }, { "epoch": 3.569230769230769, "grad_norm": 1.9529554843902588, "learning_rate": 1.9342632185140597e-05, "loss": 0.0522, "step": 3712 }, { "epoch": 3.5701923076923077, "grad_norm": 3.1859500408172607, "learning_rate": 1.934218792468322e-05, "loss": 0.0975, "step": 3713 }, { "epoch": 3.5711538461538463, "grad_norm": 3.8774075508117676, "learning_rate": 1.9341743519262505e-05, "loss": 0.1728, "step": 3714 }, { "epoch": 3.5721153846153846, "grad_norm": 5.260478496551514, "learning_rate": 1.934129896888535e-05, "loss": 0.1049, "step": 3715 }, { "epoch": 3.573076923076923, "grad_norm": 4.546828746795654, "learning_rate": 1.9340854273558652e-05, "loss": 0.1347, "step": 3716 }, { "epoch": 3.5740384615384615, "grad_norm": 3.8854241371154785, "learning_rate": 1.9340409433289306e-05, "loss": 0.1231, "step": 3717 }, { "epoch": 3.575, "grad_norm": 5.317526340484619, "learning_rate": 1.9339964448084223e-05, "loss": 0.17, "step": 3718 }, { "epoch": 3.5759615384615384, "grad_norm": 5.421589374542236, "learning_rate": 1.9339519317950305e-05, "loss": 0.1204, "step": 3719 }, { "epoch": 3.5769230769230766, "grad_norm": 4.787055492401123, "learning_rate": 1.9339074042894458e-05, "loss": 0.1065, "step": 3720 }, { "epoch": 3.5778846153846153, "grad_norm": 3.096785068511963, "learning_rate": 1.933862862292359e-05, "loss": 0.0448, "step": 3721 }, { "epoch": 3.578846153846154, "grad_norm": 4.768808364868164, "learning_rate": 1.9338183058044614e-05, "loss": 0.1533, "step": 3722 }, { "epoch": 3.5798076923076922, "grad_norm": 3.1059200763702393, "learning_rate": 1.9337737348264448e-05, "loss": 0.0666, "step": 3723 }, { "epoch": 3.580769230769231, "grad_norm": 6.243869304656982, "learning_rate": 1.933729149359e-05, "loss": 0.1733, "step": 3724 }, { "epoch": 3.581730769230769, "grad_norm": 5.016406059265137, "learning_rate": 1.9336845494028193e-05, "loss": 0.1471, "step": 3725 }, { "epoch": 3.582692307692308, "grad_norm": 4.315366268157959, "learning_rate": 1.933639934958595e-05, "loss": 0.1512, "step": 3726 }, { "epoch": 3.583653846153846, "grad_norm": 5.403285503387451, "learning_rate": 1.933595306027019e-05, "loss": 0.1789, "step": 3727 }, { "epoch": 3.5846153846153848, "grad_norm": 3.764880895614624, "learning_rate": 1.933550662608784e-05, "loss": 0.1105, "step": 3728 }, { "epoch": 3.585576923076923, "grad_norm": 2.852304458618164, "learning_rate": 1.9335060047045827e-05, "loss": 0.0722, "step": 3729 }, { "epoch": 3.5865384615384617, "grad_norm": 5.981154918670654, "learning_rate": 1.9334613323151074e-05, "loss": 0.0783, "step": 3730 }, { "epoch": 3.5875, "grad_norm": 2.5973243713378906, "learning_rate": 1.9334166454410525e-05, "loss": 0.1043, "step": 3731 }, { "epoch": 3.5884615384615386, "grad_norm": 3.7957584857940674, "learning_rate": 1.9333719440831107e-05, "loss": 0.1505, "step": 3732 }, { "epoch": 3.589423076923077, "grad_norm": 5.073719024658203, "learning_rate": 1.9333272282419758e-05, "loss": 0.1077, "step": 3733 }, { "epoch": 3.5903846153846155, "grad_norm": 3.283339262008667, "learning_rate": 1.9332824979183414e-05, "loss": 0.0483, "step": 3734 }, { "epoch": 3.5913461538461537, "grad_norm": 4.9675750732421875, "learning_rate": 1.933237753112902e-05, "loss": 0.1468, "step": 3735 }, { "epoch": 3.5923076923076924, "grad_norm": 3.4971325397491455, "learning_rate": 1.9331929938263515e-05, "loss": 0.0988, "step": 3736 }, { "epoch": 3.5932692307692307, "grad_norm": 5.424452304840088, "learning_rate": 1.9331482200593848e-05, "loss": 0.1869, "step": 3737 }, { "epoch": 3.5942307692307693, "grad_norm": 4.122259140014648, "learning_rate": 1.9331034318126965e-05, "loss": 0.1008, "step": 3738 }, { "epoch": 3.5951923076923076, "grad_norm": 4.699239253997803, "learning_rate": 1.9330586290869812e-05, "loss": 0.1416, "step": 3739 }, { "epoch": 3.5961538461538463, "grad_norm": 4.279750823974609, "learning_rate": 1.9330138118829348e-05, "loss": 0.0535, "step": 3740 }, { "epoch": 3.5971153846153845, "grad_norm": 5.046252727508545, "learning_rate": 1.9329689802012525e-05, "loss": 0.122, "step": 3741 }, { "epoch": 3.598076923076923, "grad_norm": 4.242335319519043, "learning_rate": 1.9329241340426296e-05, "loss": 0.1133, "step": 3742 }, { "epoch": 3.5990384615384614, "grad_norm": 4.16152286529541, "learning_rate": 1.9328792734077623e-05, "loss": 0.0906, "step": 3743 }, { "epoch": 3.6, "grad_norm": 3.388394355773926, "learning_rate": 1.9328343982973465e-05, "loss": 0.1108, "step": 3744 }, { "epoch": 3.6009615384615383, "grad_norm": 5.580038547515869, "learning_rate": 1.932789508712079e-05, "loss": 0.2113, "step": 3745 }, { "epoch": 3.601923076923077, "grad_norm": 3.2504031658172607, "learning_rate": 1.9327446046526557e-05, "loss": 0.0671, "step": 3746 }, { "epoch": 3.6028846153846152, "grad_norm": 3.8746025562286377, "learning_rate": 1.932699686119774e-05, "loss": 0.1652, "step": 3747 }, { "epoch": 3.603846153846154, "grad_norm": 2.87283992767334, "learning_rate": 1.9326547531141304e-05, "loss": 0.0612, "step": 3748 }, { "epoch": 3.604807692307692, "grad_norm": 3.2366583347320557, "learning_rate": 1.9326098056364224e-05, "loss": 0.1072, "step": 3749 }, { "epoch": 3.605769230769231, "grad_norm": 4.9548726081848145, "learning_rate": 1.9325648436873473e-05, "loss": 0.1908, "step": 3750 }, { "epoch": 3.606730769230769, "grad_norm": 4.015273094177246, "learning_rate": 1.9325198672676033e-05, "loss": 0.1569, "step": 3751 }, { "epoch": 3.6076923076923078, "grad_norm": 7.551737308502197, "learning_rate": 1.9324748763778875e-05, "loss": 0.1949, "step": 3752 }, { "epoch": 3.608653846153846, "grad_norm": 3.6033267974853516, "learning_rate": 1.9324298710188986e-05, "loss": 0.1465, "step": 3753 }, { "epoch": 3.6096153846153847, "grad_norm": 5.25921630859375, "learning_rate": 1.9323848511913344e-05, "loss": 0.1363, "step": 3754 }, { "epoch": 3.6105769230769234, "grad_norm": 4.099277973175049, "learning_rate": 1.932339816895894e-05, "loss": 0.1201, "step": 3755 }, { "epoch": 3.6115384615384616, "grad_norm": 3.1257576942443848, "learning_rate": 1.932294768133276e-05, "loss": 0.0716, "step": 3756 }, { "epoch": 3.6125, "grad_norm": 3.1553032398223877, "learning_rate": 1.9322497049041797e-05, "loss": 0.1188, "step": 3757 }, { "epoch": 3.6134615384615385, "grad_norm": 4.727908611297607, "learning_rate": 1.932204627209304e-05, "loss": 0.095, "step": 3758 }, { "epoch": 3.614423076923077, "grad_norm": 4.103100299835205, "learning_rate": 1.9321595350493483e-05, "loss": 0.114, "step": 3759 }, { "epoch": 3.6153846153846154, "grad_norm": 2.4555423259735107, "learning_rate": 1.9321144284250123e-05, "loss": 0.0526, "step": 3760 }, { "epoch": 3.6163461538461537, "grad_norm": 4.270241737365723, "learning_rate": 1.9320693073369967e-05, "loss": 0.1222, "step": 3761 }, { "epoch": 3.6173076923076923, "grad_norm": 2.2261035442352295, "learning_rate": 1.9320241717860007e-05, "loss": 0.0422, "step": 3762 }, { "epoch": 3.618269230769231, "grad_norm": 3.9180071353912354, "learning_rate": 1.9319790217727248e-05, "loss": 0.169, "step": 3763 }, { "epoch": 3.6192307692307693, "grad_norm": 5.46936559677124, "learning_rate": 1.93193385729787e-05, "loss": 0.2286, "step": 3764 }, { "epoch": 3.6201923076923075, "grad_norm": 4.7891154289245605, "learning_rate": 1.931888678362137e-05, "loss": 0.1282, "step": 3765 }, { "epoch": 3.621153846153846, "grad_norm": 4.506956100463867, "learning_rate": 1.9318434849662267e-05, "loss": 0.1668, "step": 3766 }, { "epoch": 3.622115384615385, "grad_norm": 2.630767822265625, "learning_rate": 1.9317982771108405e-05, "loss": 0.0368, "step": 3767 }, { "epoch": 3.623076923076923, "grad_norm": 9.382490158081055, "learning_rate": 1.9317530547966798e-05, "loss": 0.2984, "step": 3768 }, { "epoch": 3.6240384615384613, "grad_norm": 7.938056468963623, "learning_rate": 1.9317078180244466e-05, "loss": 0.2226, "step": 3769 }, { "epoch": 3.625, "grad_norm": 3.340489625930786, "learning_rate": 1.931662566794842e-05, "loss": 0.0895, "step": 3770 }, { "epoch": 3.6259615384615387, "grad_norm": 4.53651237487793, "learning_rate": 1.9316173011085692e-05, "loss": 0.2141, "step": 3771 }, { "epoch": 3.626923076923077, "grad_norm": 4.953281879425049, "learning_rate": 1.93157202096633e-05, "loss": 0.1704, "step": 3772 }, { "epoch": 3.627884615384615, "grad_norm": 4.351008892059326, "learning_rate": 1.9315267263688273e-05, "loss": 0.1351, "step": 3773 }, { "epoch": 3.628846153846154, "grad_norm": 5.543303489685059, "learning_rate": 1.9314814173167638e-05, "loss": 0.2073, "step": 3774 }, { "epoch": 3.6298076923076925, "grad_norm": 5.357900142669678, "learning_rate": 1.9314360938108427e-05, "loss": 0.1067, "step": 3775 }, { "epoch": 3.6307692307692307, "grad_norm": 4.559517860412598, "learning_rate": 1.9313907558517666e-05, "loss": 0.1388, "step": 3776 }, { "epoch": 3.631730769230769, "grad_norm": 3.121957540512085, "learning_rate": 1.93134540344024e-05, "loss": 0.0841, "step": 3777 }, { "epoch": 3.6326923076923077, "grad_norm": 4.830877304077148, "learning_rate": 1.9313000365769663e-05, "loss": 0.1477, "step": 3778 }, { "epoch": 3.6336538461538463, "grad_norm": 1.8638887405395508, "learning_rate": 1.931254655262649e-05, "loss": 0.0434, "step": 3779 }, { "epoch": 3.6346153846153846, "grad_norm": 3.5999820232391357, "learning_rate": 1.931209259497993e-05, "loss": 0.083, "step": 3780 }, { "epoch": 3.635576923076923, "grad_norm": 4.757231712341309, "learning_rate": 1.931163849283702e-05, "loss": 0.1013, "step": 3781 }, { "epoch": 3.6365384615384615, "grad_norm": 4.517132759094238, "learning_rate": 1.9311184246204812e-05, "loss": 0.1492, "step": 3782 }, { "epoch": 3.6375, "grad_norm": 1.8449066877365112, "learning_rate": 1.9310729855090352e-05, "loss": 0.0433, "step": 3783 }, { "epoch": 3.6384615384615384, "grad_norm": 2.723705768585205, "learning_rate": 1.9310275319500694e-05, "loss": 0.0784, "step": 3784 }, { "epoch": 3.6394230769230766, "grad_norm": 2.801442861557007, "learning_rate": 1.9309820639442886e-05, "loss": 0.0619, "step": 3785 }, { "epoch": 3.6403846153846153, "grad_norm": 3.363722085952759, "learning_rate": 1.9309365814923984e-05, "loss": 0.0684, "step": 3786 }, { "epoch": 3.641346153846154, "grad_norm": 5.034792423248291, "learning_rate": 1.9308910845951047e-05, "loss": 0.1709, "step": 3787 }, { "epoch": 3.6423076923076922, "grad_norm": 3.887385129928589, "learning_rate": 1.930845573253114e-05, "loss": 0.1118, "step": 3788 }, { "epoch": 3.643269230769231, "grad_norm": 3.2778940200805664, "learning_rate": 1.9308000474671316e-05, "loss": 0.091, "step": 3789 }, { "epoch": 3.644230769230769, "grad_norm": 3.782569169998169, "learning_rate": 1.9307545072378644e-05, "loss": 0.0597, "step": 3790 }, { "epoch": 3.645192307692308, "grad_norm": 3.213068962097168, "learning_rate": 1.930708952566019e-05, "loss": 0.107, "step": 3791 }, { "epoch": 3.646153846153846, "grad_norm": 5.084219932556152, "learning_rate": 1.9306633834523022e-05, "loss": 0.1089, "step": 3792 }, { "epoch": 3.6471153846153848, "grad_norm": 3.953843593597412, "learning_rate": 1.930617799897421e-05, "loss": 0.0833, "step": 3793 }, { "epoch": 3.648076923076923, "grad_norm": 4.439852714538574, "learning_rate": 1.9305722019020834e-05, "loss": 0.1487, "step": 3794 }, { "epoch": 3.6490384615384617, "grad_norm": 5.493636608123779, "learning_rate": 1.9305265894669962e-05, "loss": 0.1521, "step": 3795 }, { "epoch": 3.65, "grad_norm": 4.941285610198975, "learning_rate": 1.930480962592867e-05, "loss": 0.1261, "step": 3796 }, { "epoch": 3.6509615384615386, "grad_norm": 2.026075601577759, "learning_rate": 1.9304353212804047e-05, "loss": 0.059, "step": 3797 }, { "epoch": 3.651923076923077, "grad_norm": 3.557154893875122, "learning_rate": 1.9303896655303166e-05, "loss": 0.0857, "step": 3798 }, { "epoch": 3.6528846153846155, "grad_norm": 4.167558193206787, "learning_rate": 1.9303439953433122e-05, "loss": 0.0936, "step": 3799 }, { "epoch": 3.6538461538461537, "grad_norm": 4.940688133239746, "learning_rate": 1.9302983107200986e-05, "loss": 0.1738, "step": 3800 }, { "epoch": 3.6548076923076924, "grad_norm": 4.67951774597168, "learning_rate": 1.9302526116613863e-05, "loss": 0.0748, "step": 3801 }, { "epoch": 3.6557692307692307, "grad_norm": 6.171547889709473, "learning_rate": 1.9302068981678834e-05, "loss": 0.165, "step": 3802 }, { "epoch": 3.6567307692307693, "grad_norm": 4.497546672821045, "learning_rate": 1.9301611702402998e-05, "loss": 0.0867, "step": 3803 }, { "epoch": 3.6576923076923076, "grad_norm": 4.645501136779785, "learning_rate": 1.930115427879345e-05, "loss": 0.0835, "step": 3804 }, { "epoch": 3.6586538461538463, "grad_norm": 5.293346405029297, "learning_rate": 1.9300696710857282e-05, "loss": 0.0503, "step": 3805 }, { "epoch": 3.6596153846153845, "grad_norm": 4.430731296539307, "learning_rate": 1.9300238998601598e-05, "loss": 0.1078, "step": 3806 }, { "epoch": 3.660576923076923, "grad_norm": 4.569632530212402, "learning_rate": 1.9299781142033503e-05, "loss": 0.1446, "step": 3807 }, { "epoch": 3.6615384615384614, "grad_norm": 8.604241371154785, "learning_rate": 1.9299323141160097e-05, "loss": 0.1269, "step": 3808 }, { "epoch": 3.6625, "grad_norm": 3.1860907077789307, "learning_rate": 1.929886499598849e-05, "loss": 0.0505, "step": 3809 }, { "epoch": 3.6634615384615383, "grad_norm": 3.5046756267547607, "learning_rate": 1.9298406706525788e-05, "loss": 0.0506, "step": 3810 }, { "epoch": 3.664423076923077, "grad_norm": 3.498863935470581, "learning_rate": 1.929794827277911e-05, "loss": 0.0692, "step": 3811 }, { "epoch": 3.6653846153846152, "grad_norm": 8.29696273803711, "learning_rate": 1.929748969475556e-05, "loss": 0.1543, "step": 3812 }, { "epoch": 3.666346153846154, "grad_norm": 3.573979616165161, "learning_rate": 1.9297030972462257e-05, "loss": 0.1069, "step": 3813 }, { "epoch": 3.667307692307692, "grad_norm": 6.000795841217041, "learning_rate": 1.9296572105906323e-05, "loss": 0.1919, "step": 3814 }, { "epoch": 3.668269230769231, "grad_norm": 3.2543764114379883, "learning_rate": 1.9296113095094877e-05, "loss": 0.0608, "step": 3815 }, { "epoch": 3.669230769230769, "grad_norm": 4.580420017242432, "learning_rate": 1.929565394003503e-05, "loss": 0.1963, "step": 3816 }, { "epoch": 3.6701923076923078, "grad_norm": 7.272792816162109, "learning_rate": 1.9295194640733924e-05, "loss": 0.1684, "step": 3817 }, { "epoch": 3.671153846153846, "grad_norm": 4.359279632568359, "learning_rate": 1.9294735197198675e-05, "loss": 0.1004, "step": 3818 }, { "epoch": 3.6721153846153847, "grad_norm": 2.6701037883758545, "learning_rate": 1.929427560943642e-05, "loss": 0.0609, "step": 3819 }, { "epoch": 3.6730769230769234, "grad_norm": 6.428361415863037, "learning_rate": 1.9293815877454284e-05, "loss": 0.1979, "step": 3820 }, { "epoch": 3.6740384615384616, "grad_norm": 2.8790600299835205, "learning_rate": 1.92933560012594e-05, "loss": 0.0586, "step": 3821 }, { "epoch": 3.675, "grad_norm": 12.46229362487793, "learning_rate": 1.9292895980858904e-05, "loss": 0.0924, "step": 3822 }, { "epoch": 3.6759615384615385, "grad_norm": 6.171648979187012, "learning_rate": 1.9292435816259943e-05, "loss": 0.2036, "step": 3823 }, { "epoch": 3.676923076923077, "grad_norm": 4.30978536605835, "learning_rate": 1.929197550746965e-05, "loss": 0.1234, "step": 3824 }, { "epoch": 3.6778846153846154, "grad_norm": 7.325469017028809, "learning_rate": 1.9291515054495167e-05, "loss": 0.146, "step": 3825 }, { "epoch": 3.6788461538461537, "grad_norm": 3.8203322887420654, "learning_rate": 1.9291054457343642e-05, "loss": 0.1737, "step": 3826 }, { "epoch": 3.6798076923076923, "grad_norm": 5.028871059417725, "learning_rate": 1.9290593716022218e-05, "loss": 0.109, "step": 3827 }, { "epoch": 3.680769230769231, "grad_norm": 5.762265205383301, "learning_rate": 1.929013283053805e-05, "loss": 0.1657, "step": 3828 }, { "epoch": 3.6817307692307693, "grad_norm": 4.4239654541015625, "learning_rate": 1.9289671800898285e-05, "loss": 0.1472, "step": 3829 }, { "epoch": 3.6826923076923075, "grad_norm": 5.080758571624756, "learning_rate": 1.928921062711008e-05, "loss": 0.1999, "step": 3830 }, { "epoch": 3.683653846153846, "grad_norm": 2.134204626083374, "learning_rate": 1.9288749309180586e-05, "loss": 0.0452, "step": 3831 }, { "epoch": 3.684615384615385, "grad_norm": 4.076379299163818, "learning_rate": 1.928828784711697e-05, "loss": 0.0545, "step": 3832 }, { "epoch": 3.685576923076923, "grad_norm": 4.468896865844727, "learning_rate": 1.9287826240926385e-05, "loss": 0.1035, "step": 3833 }, { "epoch": 3.6865384615384613, "grad_norm": 3.0289642810821533, "learning_rate": 1.9287364490615994e-05, "loss": 0.0584, "step": 3834 }, { "epoch": 3.6875, "grad_norm": 4.065279960632324, "learning_rate": 1.9286902596192968e-05, "loss": 0.1596, "step": 3835 }, { "epoch": 3.6884615384615387, "grad_norm": 2.8911166191101074, "learning_rate": 1.9286440557664468e-05, "loss": 0.0727, "step": 3836 }, { "epoch": 3.689423076923077, "grad_norm": 5.308638095855713, "learning_rate": 1.9285978375037666e-05, "loss": 0.1314, "step": 3837 }, { "epoch": 3.690384615384615, "grad_norm": 4.391465663909912, "learning_rate": 1.9285516048319734e-05, "loss": 0.1701, "step": 3838 }, { "epoch": 3.691346153846154, "grad_norm": 4.152435302734375, "learning_rate": 1.928505357751785e-05, "loss": 0.1378, "step": 3839 }, { "epoch": 3.6923076923076925, "grad_norm": 5.81150484085083, "learning_rate": 1.928459096263918e-05, "loss": 0.1688, "step": 3840 }, { "epoch": 3.6932692307692307, "grad_norm": 3.429494857788086, "learning_rate": 1.928412820369091e-05, "loss": 0.1405, "step": 3841 }, { "epoch": 3.694230769230769, "grad_norm": 3.9018561840057373, "learning_rate": 1.9283665300680217e-05, "loss": 0.1652, "step": 3842 }, { "epoch": 3.6951923076923077, "grad_norm": 7.24711799621582, "learning_rate": 1.928320225361429e-05, "loss": 0.2033, "step": 3843 }, { "epoch": 3.6961538461538463, "grad_norm": 4.824862003326416, "learning_rate": 1.9282739062500306e-05, "loss": 0.1859, "step": 3844 }, { "epoch": 3.6971153846153846, "grad_norm": 3.6024606227874756, "learning_rate": 1.9282275727345455e-05, "loss": 0.1076, "step": 3845 }, { "epoch": 3.698076923076923, "grad_norm": 4.745569229125977, "learning_rate": 1.928181224815693e-05, "loss": 0.1661, "step": 3846 }, { "epoch": 3.6990384615384615, "grad_norm": 3.844993829727173, "learning_rate": 1.9281348624941924e-05, "loss": 0.0539, "step": 3847 }, { "epoch": 3.7, "grad_norm": 5.958569526672363, "learning_rate": 1.9280884857707626e-05, "loss": 0.1823, "step": 3848 }, { "epoch": 3.7009615384615384, "grad_norm": 6.12413215637207, "learning_rate": 1.9280420946461233e-05, "loss": 0.2565, "step": 3849 }, { "epoch": 3.7019230769230766, "grad_norm": 5.338071823120117, "learning_rate": 1.9279956891209942e-05, "loss": 0.1986, "step": 3850 }, { "epoch": 3.7028846153846153, "grad_norm": 4.181559085845947, "learning_rate": 1.9279492691960963e-05, "loss": 0.0834, "step": 3851 }, { "epoch": 3.703846153846154, "grad_norm": 4.295332908630371, "learning_rate": 1.9279028348721485e-05, "loss": 0.0869, "step": 3852 }, { "epoch": 3.7048076923076922, "grad_norm": 5.627783298492432, "learning_rate": 1.9278563861498726e-05, "loss": 0.1523, "step": 3853 }, { "epoch": 3.705769230769231, "grad_norm": 3.2038681507110596, "learning_rate": 1.9278099230299884e-05, "loss": 0.0692, "step": 3854 }, { "epoch": 3.706730769230769, "grad_norm": 6.380153179168701, "learning_rate": 1.9277634455132178e-05, "loss": 0.1672, "step": 3855 }, { "epoch": 3.707692307692308, "grad_norm": 4.974225997924805, "learning_rate": 1.927716953600281e-05, "loss": 0.2306, "step": 3856 }, { "epoch": 3.708653846153846, "grad_norm": 4.990019798278809, "learning_rate": 1.9276704472919e-05, "loss": 0.1329, "step": 3857 }, { "epoch": 3.7096153846153848, "grad_norm": 7.856346607208252, "learning_rate": 1.9276239265887966e-05, "loss": 0.2381, "step": 3858 }, { "epoch": 3.710576923076923, "grad_norm": 3.7483937740325928, "learning_rate": 1.927577391491692e-05, "loss": 0.1022, "step": 3859 }, { "epoch": 3.7115384615384617, "grad_norm": 6.795581340789795, "learning_rate": 1.927530842001309e-05, "loss": 0.1977, "step": 3860 }, { "epoch": 3.7125, "grad_norm": 3.6432101726531982, "learning_rate": 1.9274842781183697e-05, "loss": 0.1453, "step": 3861 }, { "epoch": 3.7134615384615386, "grad_norm": 3.9586305618286133, "learning_rate": 1.9274376998435964e-05, "loss": 0.1169, "step": 3862 }, { "epoch": 3.714423076923077, "grad_norm": 2.9138681888580322, "learning_rate": 1.927391107177712e-05, "loss": 0.0681, "step": 3863 }, { "epoch": 3.7153846153846155, "grad_norm": 2.651895523071289, "learning_rate": 1.9273445001214392e-05, "loss": 0.0725, "step": 3864 }, { "epoch": 3.7163461538461537, "grad_norm": 5.281445503234863, "learning_rate": 1.9272978786755016e-05, "loss": 0.1415, "step": 3865 }, { "epoch": 3.7173076923076924, "grad_norm": 3.9196577072143555, "learning_rate": 1.927251242840623e-05, "loss": 0.1012, "step": 3866 }, { "epoch": 3.7182692307692307, "grad_norm": 4.113994598388672, "learning_rate": 1.927204592617526e-05, "loss": 0.1087, "step": 3867 }, { "epoch": 3.7192307692307693, "grad_norm": 5.350371837615967, "learning_rate": 1.9271579280069353e-05, "loss": 0.1921, "step": 3868 }, { "epoch": 3.7201923076923076, "grad_norm": 5.5757527351379395, "learning_rate": 1.9271112490095745e-05, "loss": 0.1285, "step": 3869 }, { "epoch": 3.7211538461538463, "grad_norm": 3.6984198093414307, "learning_rate": 1.9270645556261686e-05, "loss": 0.165, "step": 3870 }, { "epoch": 3.7221153846153845, "grad_norm": 3.731736660003662, "learning_rate": 1.9270178478574412e-05, "loss": 0.1191, "step": 3871 }, { "epoch": 3.723076923076923, "grad_norm": 2.6068215370178223, "learning_rate": 1.926971125704118e-05, "loss": 0.064, "step": 3872 }, { "epoch": 3.7240384615384614, "grad_norm": 2.9098315238952637, "learning_rate": 1.926924389166923e-05, "loss": 0.0641, "step": 3873 }, { "epoch": 3.725, "grad_norm": 3.19742488861084, "learning_rate": 1.9268776382465827e-05, "loss": 0.0683, "step": 3874 }, { "epoch": 3.7259615384615383, "grad_norm": 5.461662769317627, "learning_rate": 1.9268308729438213e-05, "loss": 0.2067, "step": 3875 }, { "epoch": 3.726923076923077, "grad_norm": 4.100137233734131, "learning_rate": 1.9267840932593648e-05, "loss": 0.2164, "step": 3876 }, { "epoch": 3.7278846153846152, "grad_norm": 4.037464141845703, "learning_rate": 1.9267372991939398e-05, "loss": 0.1701, "step": 3877 }, { "epoch": 3.728846153846154, "grad_norm": 5.772424221038818, "learning_rate": 1.926690490748271e-05, "loss": 0.1459, "step": 3878 }, { "epoch": 3.729807692307692, "grad_norm": 5.5869879722595215, "learning_rate": 1.9266436679230866e-05, "loss": 0.1976, "step": 3879 }, { "epoch": 3.730769230769231, "grad_norm": 5.392168045043945, "learning_rate": 1.926596830719112e-05, "loss": 0.1243, "step": 3880 }, { "epoch": 3.731730769230769, "grad_norm": 2.9183263778686523, "learning_rate": 1.9265499791370734e-05, "loss": 0.0618, "step": 3881 }, { "epoch": 3.7326923076923078, "grad_norm": 3.2585532665252686, "learning_rate": 1.9265031131776992e-05, "loss": 0.0977, "step": 3882 }, { "epoch": 3.733653846153846, "grad_norm": 3.563075065612793, "learning_rate": 1.9264562328417155e-05, "loss": 0.1087, "step": 3883 }, { "epoch": 3.7346153846153847, "grad_norm": 4.645782947540283, "learning_rate": 1.9264093381298507e-05, "loss": 0.1189, "step": 3884 }, { "epoch": 3.7355769230769234, "grad_norm": 5.892427444458008, "learning_rate": 1.9263624290428314e-05, "loss": 0.2278, "step": 3885 }, { "epoch": 3.7365384615384616, "grad_norm": 4.113434314727783, "learning_rate": 1.926315505581386e-05, "loss": 0.0965, "step": 3886 }, { "epoch": 3.7375, "grad_norm": 4.320422172546387, "learning_rate": 1.926268567746243e-05, "loss": 0.0768, "step": 3887 }, { "epoch": 3.7384615384615385, "grad_norm": 4.587891101837158, "learning_rate": 1.9262216155381303e-05, "loss": 0.1138, "step": 3888 }, { "epoch": 3.739423076923077, "grad_norm": 5.419760227203369, "learning_rate": 1.9261746489577767e-05, "loss": 0.1055, "step": 3889 }, { "epoch": 3.7403846153846154, "grad_norm": 3.5364818572998047, "learning_rate": 1.9261276680059107e-05, "loss": 0.0416, "step": 3890 }, { "epoch": 3.7413461538461537, "grad_norm": 6.247872352600098, "learning_rate": 1.9260806726832616e-05, "loss": 0.2451, "step": 3891 }, { "epoch": 3.7423076923076923, "grad_norm": 4.6446213722229, "learning_rate": 1.926033662990558e-05, "loss": 0.1952, "step": 3892 }, { "epoch": 3.743269230769231, "grad_norm": 3.1226484775543213, "learning_rate": 1.9259866389285303e-05, "loss": 0.063, "step": 3893 }, { "epoch": 3.7442307692307693, "grad_norm": 6.659451961517334, "learning_rate": 1.9259396004979076e-05, "loss": 0.245, "step": 3894 }, { "epoch": 3.7451923076923075, "grad_norm": 2.1804120540618896, "learning_rate": 1.9258925476994198e-05, "loss": 0.0641, "step": 3895 }, { "epoch": 3.746153846153846, "grad_norm": 2.393233060836792, "learning_rate": 1.925845480533797e-05, "loss": 0.0378, "step": 3896 }, { "epoch": 3.747115384615385, "grad_norm": 5.891981601715088, "learning_rate": 1.92579839900177e-05, "loss": 0.2337, "step": 3897 }, { "epoch": 3.748076923076923, "grad_norm": 4.6527910232543945, "learning_rate": 1.925751303104069e-05, "loss": 0.1407, "step": 3898 }, { "epoch": 3.7490384615384613, "grad_norm": 3.5364573001861572, "learning_rate": 1.9257041928414244e-05, "loss": 0.119, "step": 3899 }, { "epoch": 3.75, "grad_norm": 5.644574165344238, "learning_rate": 1.925657068214568e-05, "loss": 0.1691, "step": 3900 }, { "epoch": 3.7509615384615387, "grad_norm": 4.792076587677002, "learning_rate": 1.925609929224231e-05, "loss": 0.1073, "step": 3901 }, { "epoch": 3.751923076923077, "grad_norm": 5.047701835632324, "learning_rate": 1.9255627758711436e-05, "loss": 0.1041, "step": 3902 }, { "epoch": 3.752884615384615, "grad_norm": 4.6594557762146, "learning_rate": 1.9255156081560394e-05, "loss": 0.1382, "step": 3903 }, { "epoch": 3.753846153846154, "grad_norm": 4.597367286682129, "learning_rate": 1.9254684260796484e-05, "loss": 0.101, "step": 3904 }, { "epoch": 3.7548076923076925, "grad_norm": 3.6701302528381348, "learning_rate": 1.9254212296427043e-05, "loss": 0.1155, "step": 3905 }, { "epoch": 3.7557692307692307, "grad_norm": 3.8556101322174072, "learning_rate": 1.9253740188459387e-05, "loss": 0.1396, "step": 3906 }, { "epoch": 3.756730769230769, "grad_norm": 5.019867897033691, "learning_rate": 1.9253267936900843e-05, "loss": 0.1172, "step": 3907 }, { "epoch": 3.7576923076923077, "grad_norm": 7.624783039093018, "learning_rate": 1.9252795541758735e-05, "loss": 0.1142, "step": 3908 }, { "epoch": 3.7586538461538463, "grad_norm": 4.170985221862793, "learning_rate": 1.92523230030404e-05, "loss": 0.1354, "step": 3909 }, { "epoch": 3.7596153846153846, "grad_norm": 6.149711608886719, "learning_rate": 1.9251850320753167e-05, "loss": 0.1975, "step": 3910 }, { "epoch": 3.760576923076923, "grad_norm": 3.9025473594665527, "learning_rate": 1.925137749490437e-05, "loss": 0.1007, "step": 3911 }, { "epoch": 3.7615384615384615, "grad_norm": 6.05863094329834, "learning_rate": 1.9250904525501344e-05, "loss": 0.1643, "step": 3912 }, { "epoch": 3.7625, "grad_norm": 4.98308801651001, "learning_rate": 1.9250431412551435e-05, "loss": 0.1143, "step": 3913 }, { "epoch": 3.7634615384615384, "grad_norm": 4.732117652893066, "learning_rate": 1.924995815606198e-05, "loss": 0.1311, "step": 3914 }, { "epoch": 3.7644230769230766, "grad_norm": 3.3285396099090576, "learning_rate": 1.9249484756040323e-05, "loss": 0.0698, "step": 3915 }, { "epoch": 3.7653846153846153, "grad_norm": 4.764423847198486, "learning_rate": 1.9249011212493805e-05, "loss": 0.1243, "step": 3916 }, { "epoch": 3.766346153846154, "grad_norm": 4.106700420379639, "learning_rate": 1.9248537525429785e-05, "loss": 0.1193, "step": 3917 }, { "epoch": 3.7673076923076922, "grad_norm": 4.9732346534729, "learning_rate": 1.9248063694855603e-05, "loss": 0.0607, "step": 3918 }, { "epoch": 3.768269230769231, "grad_norm": 5.066559314727783, "learning_rate": 1.9247589720778616e-05, "loss": 0.1351, "step": 3919 }, { "epoch": 3.769230769230769, "grad_norm": 3.57185959815979, "learning_rate": 1.9247115603206177e-05, "loss": 0.0864, "step": 3920 }, { "epoch": 3.770192307692308, "grad_norm": 3.3088409900665283, "learning_rate": 1.9246641342145646e-05, "loss": 0.0984, "step": 3921 }, { "epoch": 3.771153846153846, "grad_norm": 3.7581706047058105, "learning_rate": 1.9246166937604377e-05, "loss": 0.0933, "step": 3922 }, { "epoch": 3.7721153846153848, "grad_norm": 5.533938884735107, "learning_rate": 1.9245692389589737e-05, "loss": 0.1364, "step": 3923 }, { "epoch": 3.773076923076923, "grad_norm": 4.5212483406066895, "learning_rate": 1.9245217698109085e-05, "loss": 0.1117, "step": 3924 }, { "epoch": 3.7740384615384617, "grad_norm": 3.399165153503418, "learning_rate": 1.9244742863169786e-05, "loss": 0.067, "step": 3925 }, { "epoch": 3.775, "grad_norm": 3.7991464138031006, "learning_rate": 1.9244267884779217e-05, "loss": 0.1213, "step": 3926 }, { "epoch": 3.7759615384615386, "grad_norm": 3.353093385696411, "learning_rate": 1.9243792762944736e-05, "loss": 0.1347, "step": 3927 }, { "epoch": 3.776923076923077, "grad_norm": 5.050859451293945, "learning_rate": 1.9243317497673726e-05, "loss": 0.1254, "step": 3928 }, { "epoch": 3.7778846153846155, "grad_norm": 5.5861687660217285, "learning_rate": 1.9242842088973557e-05, "loss": 0.2664, "step": 3929 }, { "epoch": 3.7788461538461537, "grad_norm": 3.7841720581054688, "learning_rate": 1.9242366536851604e-05, "loss": 0.0663, "step": 3930 }, { "epoch": 3.7798076923076924, "grad_norm": 4.086886405944824, "learning_rate": 1.924189084131525e-05, "loss": 0.0827, "step": 3931 }, { "epoch": 3.7807692307692307, "grad_norm": 3.1660726070404053, "learning_rate": 1.9241415002371875e-05, "loss": 0.0453, "step": 3932 }, { "epoch": 3.7817307692307693, "grad_norm": 3.8848774433135986, "learning_rate": 1.924093902002886e-05, "loss": 0.1569, "step": 3933 }, { "epoch": 3.7826923076923076, "grad_norm": 2.9026591777801514, "learning_rate": 1.9240462894293594e-05, "loss": 0.0636, "step": 3934 }, { "epoch": 3.7836538461538463, "grad_norm": 3.3228514194488525, "learning_rate": 1.9239986625173467e-05, "loss": 0.0975, "step": 3935 }, { "epoch": 3.7846153846153845, "grad_norm": 3.4440648555755615, "learning_rate": 1.9239510212675863e-05, "loss": 0.0667, "step": 3936 }, { "epoch": 3.785576923076923, "grad_norm": 3.783468008041382, "learning_rate": 1.923903365680818e-05, "loss": 0.0898, "step": 3937 }, { "epoch": 3.7865384615384614, "grad_norm": 5.449286937713623, "learning_rate": 1.923855695757781e-05, "loss": 0.1934, "step": 3938 }, { "epoch": 3.7875, "grad_norm": 4.4329023361206055, "learning_rate": 1.9238080114992153e-05, "loss": 0.094, "step": 3939 }, { "epoch": 3.7884615384615383, "grad_norm": 4.838362693786621, "learning_rate": 1.9237603129058604e-05, "loss": 0.1985, "step": 3940 }, { "epoch": 3.789423076923077, "grad_norm": 4.2743940353393555, "learning_rate": 1.9237125999784566e-05, "loss": 0.109, "step": 3941 }, { "epoch": 3.7903846153846152, "grad_norm": 2.920656681060791, "learning_rate": 1.9236648727177446e-05, "loss": 0.055, "step": 3942 }, { "epoch": 3.791346153846154, "grad_norm": 4.860262870788574, "learning_rate": 1.9236171311244643e-05, "loss": 0.2004, "step": 3943 }, { "epoch": 3.792307692307692, "grad_norm": 4.765750408172607, "learning_rate": 1.923569375199357e-05, "loss": 0.1284, "step": 3944 }, { "epoch": 3.793269230769231, "grad_norm": 4.343521595001221, "learning_rate": 1.9235216049431642e-05, "loss": 0.185, "step": 3945 }, { "epoch": 3.794230769230769, "grad_norm": 2.112783432006836, "learning_rate": 1.9234738203566257e-05, "loss": 0.0478, "step": 3946 }, { "epoch": 3.7951923076923078, "grad_norm": 2.8358521461486816, "learning_rate": 1.9234260214404846e-05, "loss": 0.0926, "step": 3947 }, { "epoch": 3.796153846153846, "grad_norm": 5.133622169494629, "learning_rate": 1.9233782081954818e-05, "loss": 0.2179, "step": 3948 }, { "epoch": 3.7971153846153847, "grad_norm": 3.6638095378875732, "learning_rate": 1.923330380622359e-05, "loss": 0.1405, "step": 3949 }, { "epoch": 3.7980769230769234, "grad_norm": 3.9099061489105225, "learning_rate": 1.923282538721859e-05, "loss": 0.1187, "step": 3950 }, { "epoch": 3.7990384615384616, "grad_norm": 5.540530204772949, "learning_rate": 1.9232346824947232e-05, "loss": 0.2812, "step": 3951 }, { "epoch": 3.8, "grad_norm": 3.162179470062256, "learning_rate": 1.923186811941695e-05, "loss": 0.121, "step": 3952 }, { "epoch": 3.8009615384615385, "grad_norm": 4.964500904083252, "learning_rate": 1.9231389270635172e-05, "loss": 0.1681, "step": 3953 }, { "epoch": 3.801923076923077, "grad_norm": 3.616546869277954, "learning_rate": 1.9230910278609326e-05, "loss": 0.1018, "step": 3954 }, { "epoch": 3.8028846153846154, "grad_norm": 3.634730339050293, "learning_rate": 1.9230431143346844e-05, "loss": 0.1532, "step": 3955 }, { "epoch": 3.8038461538461537, "grad_norm": 4.423173427581787, "learning_rate": 1.9229951864855164e-05, "loss": 0.0687, "step": 3956 }, { "epoch": 3.8048076923076923, "grad_norm": 4.998879432678223, "learning_rate": 1.922947244314172e-05, "loss": 0.1071, "step": 3957 }, { "epoch": 3.805769230769231, "grad_norm": 2.090623617172241, "learning_rate": 1.9228992878213948e-05, "loss": 0.0382, "step": 3958 }, { "epoch": 3.8067307692307693, "grad_norm": 4.831691741943359, "learning_rate": 1.9228513170079296e-05, "loss": 0.1428, "step": 3959 }, { "epoch": 3.8076923076923075, "grad_norm": 4.191644191741943, "learning_rate": 1.9228033318745206e-05, "loss": 0.1378, "step": 3960 }, { "epoch": 3.808653846153846, "grad_norm": 3.7459843158721924, "learning_rate": 1.922755332421912e-05, "loss": 0.0686, "step": 3961 }, { "epoch": 3.809615384615385, "grad_norm": 5.996288299560547, "learning_rate": 1.922707318650849e-05, "loss": 0.1414, "step": 3962 }, { "epoch": 3.810576923076923, "grad_norm": 3.694542169570923, "learning_rate": 1.9226592905620767e-05, "loss": 0.0737, "step": 3963 }, { "epoch": 3.8115384615384613, "grad_norm": 3.4618442058563232, "learning_rate": 1.92261124815634e-05, "loss": 0.0456, "step": 3964 }, { "epoch": 3.8125, "grad_norm": 4.449311256408691, "learning_rate": 1.9225631914343846e-05, "loss": 0.1241, "step": 3965 }, { "epoch": 3.8134615384615387, "grad_norm": 3.466498374938965, "learning_rate": 1.922515120396956e-05, "loss": 0.0839, "step": 3966 }, { "epoch": 3.814423076923077, "grad_norm": 6.5593976974487305, "learning_rate": 1.9224670350448007e-05, "loss": 0.1956, "step": 3967 }, { "epoch": 3.815384615384615, "grad_norm": 4.172738075256348, "learning_rate": 1.9224189353786642e-05, "loss": 0.1745, "step": 3968 }, { "epoch": 3.816346153846154, "grad_norm": 5.870425224304199, "learning_rate": 1.9223708213992932e-05, "loss": 0.2366, "step": 3969 }, { "epoch": 3.8173076923076925, "grad_norm": 4.060753345489502, "learning_rate": 1.922322693107434e-05, "loss": 0.0981, "step": 3970 }, { "epoch": 3.8182692307692307, "grad_norm": 3.1148316860198975, "learning_rate": 1.9222745505038338e-05, "loss": 0.067, "step": 3971 }, { "epoch": 3.819230769230769, "grad_norm": 5.569246768951416, "learning_rate": 1.922226393589239e-05, "loss": 0.1746, "step": 3972 }, { "epoch": 3.8201923076923077, "grad_norm": 4.57521915435791, "learning_rate": 1.9221782223643976e-05, "loss": 0.124, "step": 3973 }, { "epoch": 3.8211538461538463, "grad_norm": 4.423630714416504, "learning_rate": 1.9221300368300566e-05, "loss": 0.1111, "step": 3974 }, { "epoch": 3.8221153846153846, "grad_norm": 1.914996862411499, "learning_rate": 1.9220818369869642e-05, "loss": 0.0436, "step": 3975 }, { "epoch": 3.823076923076923, "grad_norm": 2.913084030151367, "learning_rate": 1.9220336228358675e-05, "loss": 0.059, "step": 3976 }, { "epoch": 3.8240384615384615, "grad_norm": 2.1094141006469727, "learning_rate": 1.9219853943775155e-05, "loss": 0.0401, "step": 3977 }, { "epoch": 3.825, "grad_norm": 3.2288718223571777, "learning_rate": 1.921937151612656e-05, "loss": 0.0614, "step": 3978 }, { "epoch": 3.8259615384615384, "grad_norm": 4.90403413772583, "learning_rate": 1.9218888945420375e-05, "loss": 0.1791, "step": 3979 }, { "epoch": 3.8269230769230766, "grad_norm": 4.420261383056641, "learning_rate": 1.9218406231664096e-05, "loss": 0.3386, "step": 3980 }, { "epoch": 3.8278846153846153, "grad_norm": 4.9714274406433105, "learning_rate": 1.9217923374865202e-05, "loss": 0.1002, "step": 3981 }, { "epoch": 3.828846153846154, "grad_norm": 3.4752004146575928, "learning_rate": 1.92174403750312e-05, "loss": 0.0611, "step": 3982 }, { "epoch": 3.8298076923076922, "grad_norm": 4.807642459869385, "learning_rate": 1.9216957232169567e-05, "loss": 0.1629, "step": 3983 }, { "epoch": 3.830769230769231, "grad_norm": 3.284860372543335, "learning_rate": 1.9216473946287815e-05, "loss": 0.1319, "step": 3984 }, { "epoch": 3.831730769230769, "grad_norm": 5.119232654571533, "learning_rate": 1.9215990517393436e-05, "loss": 0.1284, "step": 3985 }, { "epoch": 3.832692307692308, "grad_norm": 4.089437484741211, "learning_rate": 1.9215506945493933e-05, "loss": 0.0846, "step": 3986 }, { "epoch": 3.833653846153846, "grad_norm": 5.641073226928711, "learning_rate": 1.9215023230596806e-05, "loss": 0.1373, "step": 3987 }, { "epoch": 3.8346153846153848, "grad_norm": 4.106673717498779, "learning_rate": 1.9214539372709568e-05, "loss": 0.1493, "step": 3988 }, { "epoch": 3.835576923076923, "grad_norm": 4.616686820983887, "learning_rate": 1.9214055371839722e-05, "loss": 0.1431, "step": 3989 }, { "epoch": 3.8365384615384617, "grad_norm": 4.434209823608398, "learning_rate": 1.9213571227994777e-05, "loss": 0.1316, "step": 3990 }, { "epoch": 3.8375, "grad_norm": 3.567739486694336, "learning_rate": 1.9213086941182254e-05, "loss": 0.0579, "step": 3991 }, { "epoch": 3.8384615384615386, "grad_norm": 3.3575804233551025, "learning_rate": 1.9212602511409656e-05, "loss": 0.0584, "step": 3992 }, { "epoch": 3.839423076923077, "grad_norm": 3.847990036010742, "learning_rate": 1.9212117938684513e-05, "loss": 0.0742, "step": 3993 }, { "epoch": 3.8403846153846155, "grad_norm": 2.6901841163635254, "learning_rate": 1.921163322301433e-05, "loss": 0.0513, "step": 3994 }, { "epoch": 3.8413461538461537, "grad_norm": 3.763958215713501, "learning_rate": 1.9211148364406636e-05, "loss": 0.1188, "step": 3995 }, { "epoch": 3.8423076923076924, "grad_norm": 4.665992736816406, "learning_rate": 1.9210663362868956e-05, "loss": 0.2391, "step": 3996 }, { "epoch": 3.8432692307692307, "grad_norm": 4.030921459197998, "learning_rate": 1.9210178218408812e-05, "loss": 0.1146, "step": 3997 }, { "epoch": 3.8442307692307693, "grad_norm": 2.991931915283203, "learning_rate": 1.9209692931033735e-05, "loss": 0.0572, "step": 3998 }, { "epoch": 3.8451923076923076, "grad_norm": 5.830111980438232, "learning_rate": 1.9209207500751253e-05, "loss": 0.2118, "step": 3999 }, { "epoch": 3.8461538461538463, "grad_norm": 3.5543737411499023, "learning_rate": 1.92087219275689e-05, "loss": 0.0857, "step": 4000 }, { "epoch": 3.8471153846153845, "grad_norm": 6.535910129547119, "learning_rate": 1.920823621149421e-05, "loss": 0.096, "step": 4001 }, { "epoch": 3.848076923076923, "grad_norm": 3.3953733444213867, "learning_rate": 1.920775035253472e-05, "loss": 0.0645, "step": 4002 }, { "epoch": 3.8490384615384614, "grad_norm": 4.54937219619751, "learning_rate": 1.9207264350697966e-05, "loss": 0.1182, "step": 4003 }, { "epoch": 3.85, "grad_norm": 4.673548698425293, "learning_rate": 1.9206778205991495e-05, "loss": 0.0987, "step": 4004 }, { "epoch": 3.8509615384615383, "grad_norm": 6.400561332702637, "learning_rate": 1.9206291918422847e-05, "loss": 0.1418, "step": 4005 }, { "epoch": 3.851923076923077, "grad_norm": 2.776106119155884, "learning_rate": 1.920580548799957e-05, "loss": 0.0448, "step": 4006 }, { "epoch": 3.8528846153846152, "grad_norm": 5.027676582336426, "learning_rate": 1.9205318914729206e-05, "loss": 0.1549, "step": 4007 }, { "epoch": 3.853846153846154, "grad_norm": 3.6472926139831543, "learning_rate": 1.9204832198619313e-05, "loss": 0.1093, "step": 4008 }, { "epoch": 3.854807692307692, "grad_norm": 2.918107271194458, "learning_rate": 1.9204345339677442e-05, "loss": 0.0373, "step": 4009 }, { "epoch": 3.855769230769231, "grad_norm": 4.453318119049072, "learning_rate": 1.9203858337911146e-05, "loss": 0.1196, "step": 4010 }, { "epoch": 3.856730769230769, "grad_norm": 4.182651042938232, "learning_rate": 1.9203371193327976e-05, "loss": 0.1289, "step": 4011 }, { "epoch": 3.8576923076923078, "grad_norm": 4.321978569030762, "learning_rate": 1.9202883905935502e-05, "loss": 0.1147, "step": 4012 }, { "epoch": 3.858653846153846, "grad_norm": 5.390964508056641, "learning_rate": 1.920239647574128e-05, "loss": 0.1794, "step": 4013 }, { "epoch": 3.8596153846153847, "grad_norm": 4.556804656982422, "learning_rate": 1.9201908902752867e-05, "loss": 0.1227, "step": 4014 }, { "epoch": 3.8605769230769234, "grad_norm": 6.063300132751465, "learning_rate": 1.9201421186977844e-05, "loss": 0.1138, "step": 4015 }, { "epoch": 3.8615384615384616, "grad_norm": 2.952651262283325, "learning_rate": 1.9200933328423766e-05, "loss": 0.0604, "step": 4016 }, { "epoch": 3.8625, "grad_norm": 3.9118165969848633, "learning_rate": 1.9200445327098205e-05, "loss": 0.1037, "step": 4017 }, { "epoch": 3.8634615384615385, "grad_norm": 2.735775947570801, "learning_rate": 1.9199957183008737e-05, "loss": 0.0758, "step": 4018 }, { "epoch": 3.864423076923077, "grad_norm": 4.920133113861084, "learning_rate": 1.9199468896162936e-05, "loss": 0.2411, "step": 4019 }, { "epoch": 3.8653846153846154, "grad_norm": 6.2729811668396, "learning_rate": 1.9198980466568376e-05, "loss": 0.2599, "step": 4020 }, { "epoch": 3.8663461538461537, "grad_norm": 3.572064161300659, "learning_rate": 1.919849189423264e-05, "loss": 0.0776, "step": 4021 }, { "epoch": 3.8673076923076923, "grad_norm": 3.992091417312622, "learning_rate": 1.9198003179163308e-05, "loss": 0.0568, "step": 4022 }, { "epoch": 3.868269230769231, "grad_norm": 5.001494884490967, "learning_rate": 1.9197514321367958e-05, "loss": 0.165, "step": 4023 }, { "epoch": 3.8692307692307693, "grad_norm": 6.1822075843811035, "learning_rate": 1.9197025320854184e-05, "loss": 0.114, "step": 4024 }, { "epoch": 3.8701923076923075, "grad_norm": 6.464167594909668, "learning_rate": 1.919653617762957e-05, "loss": 0.2353, "step": 4025 }, { "epoch": 3.871153846153846, "grad_norm": 3.564584493637085, "learning_rate": 1.9196046891701704e-05, "loss": 0.0928, "step": 4026 }, { "epoch": 3.872115384615385, "grad_norm": 4.929354190826416, "learning_rate": 1.919555746307818e-05, "loss": 0.1937, "step": 4027 }, { "epoch": 3.873076923076923, "grad_norm": 3.2010884284973145, "learning_rate": 1.9195067891766595e-05, "loss": 0.0602, "step": 4028 }, { "epoch": 3.8740384615384613, "grad_norm": 4.694282531738281, "learning_rate": 1.919457817777454e-05, "loss": 0.1261, "step": 4029 }, { "epoch": 3.875, "grad_norm": 4.248343467712402, "learning_rate": 1.919408832110962e-05, "loss": 0.0894, "step": 4030 }, { "epoch": 3.8759615384615387, "grad_norm": 4.0979084968566895, "learning_rate": 1.9193598321779434e-05, "loss": 0.1275, "step": 4031 }, { "epoch": 3.876923076923077, "grad_norm": 3.767259120941162, "learning_rate": 1.9193108179791586e-05, "loss": 0.1048, "step": 4032 }, { "epoch": 3.877884615384615, "grad_norm": 3.8967132568359375, "learning_rate": 1.9192617895153678e-05, "loss": 0.0734, "step": 4033 }, { "epoch": 3.878846153846154, "grad_norm": 4.663073539733887, "learning_rate": 1.9192127467873322e-05, "loss": 0.1545, "step": 4034 }, { "epoch": 3.8798076923076925, "grad_norm": 6.378917694091797, "learning_rate": 1.9191636897958123e-05, "loss": 0.1803, "step": 4035 }, { "epoch": 3.8807692307692307, "grad_norm": 2.2505295276641846, "learning_rate": 1.91911461854157e-05, "loss": 0.0321, "step": 4036 }, { "epoch": 3.881730769230769, "grad_norm": 3.2796175479888916, "learning_rate": 1.9190655330253663e-05, "loss": 0.0464, "step": 4037 }, { "epoch": 3.8826923076923077, "grad_norm": 3.602072238922119, "learning_rate": 1.919016433247963e-05, "loss": 0.0807, "step": 4038 }, { "epoch": 3.8836538461538463, "grad_norm": 3.6699721813201904, "learning_rate": 1.9189673192101217e-05, "loss": 0.0648, "step": 4039 }, { "epoch": 3.8846153846153846, "grad_norm": 3.707106828689575, "learning_rate": 1.918918190912605e-05, "loss": 0.0831, "step": 4040 }, { "epoch": 3.885576923076923, "grad_norm": 6.119410037994385, "learning_rate": 1.9188690483561746e-05, "loss": 0.1978, "step": 4041 }, { "epoch": 3.8865384615384615, "grad_norm": 4.981754779815674, "learning_rate": 1.9188198915415938e-05, "loss": 0.1189, "step": 4042 }, { "epoch": 3.8875, "grad_norm": 2.4052321910858154, "learning_rate": 1.9187707204696247e-05, "loss": 0.0456, "step": 4043 }, { "epoch": 3.8884615384615384, "grad_norm": 4.06748628616333, "learning_rate": 1.918721535141031e-05, "loss": 0.1365, "step": 4044 }, { "epoch": 3.8894230769230766, "grad_norm": 6.101851463317871, "learning_rate": 1.9186723355565753e-05, "loss": 0.1505, "step": 4045 }, { "epoch": 3.8903846153846153, "grad_norm": 3.9729385375976562, "learning_rate": 1.9186231217170212e-05, "loss": 0.1708, "step": 4046 }, { "epoch": 3.891346153846154, "grad_norm": 4.88426399230957, "learning_rate": 1.9185738936231323e-05, "loss": 0.1404, "step": 4047 }, { "epoch": 3.8923076923076922, "grad_norm": 4.4279704093933105, "learning_rate": 1.9185246512756727e-05, "loss": 0.1404, "step": 4048 }, { "epoch": 3.893269230769231, "grad_norm": 2.875290632247925, "learning_rate": 1.9184753946754063e-05, "loss": 0.0969, "step": 4049 }, { "epoch": 3.894230769230769, "grad_norm": 4.110661029815674, "learning_rate": 1.9184261238230976e-05, "loss": 0.0859, "step": 4050 }, { "epoch": 3.895192307692308, "grad_norm": 4.599233150482178, "learning_rate": 1.9183768387195105e-05, "loss": 0.1278, "step": 4051 }, { "epoch": 3.896153846153846, "grad_norm": 3.7117416858673096, "learning_rate": 1.9183275393654105e-05, "loss": 0.1054, "step": 4052 }, { "epoch": 3.8971153846153848, "grad_norm": 3.5462887287139893, "learning_rate": 1.9182782257615624e-05, "loss": 0.1028, "step": 4053 }, { "epoch": 3.898076923076923, "grad_norm": 1.9182807207107544, "learning_rate": 1.9182288979087316e-05, "loss": 0.0328, "step": 4054 }, { "epoch": 3.8990384615384617, "grad_norm": 4.314310073852539, "learning_rate": 1.918179555807683e-05, "loss": 0.1202, "step": 4055 }, { "epoch": 3.9, "grad_norm": 3.6846811771392822, "learning_rate": 1.9181301994591824e-05, "loss": 0.0871, "step": 4056 }, { "epoch": 3.9009615384615386, "grad_norm": 3.6757185459136963, "learning_rate": 1.918080828863996e-05, "loss": 0.1063, "step": 4057 }, { "epoch": 3.901923076923077, "grad_norm": 2.9590859413146973, "learning_rate": 1.9180314440228896e-05, "loss": 0.0622, "step": 4058 }, { "epoch": 3.9028846153846155, "grad_norm": 4.744538307189941, "learning_rate": 1.91798204493663e-05, "loss": 0.1086, "step": 4059 }, { "epoch": 3.9038461538461537, "grad_norm": 5.090108871459961, "learning_rate": 1.9179326316059826e-05, "loss": 0.1909, "step": 4060 }, { "epoch": 3.9048076923076924, "grad_norm": 3.5069146156311035, "learning_rate": 1.9178832040317153e-05, "loss": 0.0801, "step": 4061 }, { "epoch": 3.9057692307692307, "grad_norm": 4.565255165100098, "learning_rate": 1.9178337622145947e-05, "loss": 0.1312, "step": 4062 }, { "epoch": 3.9067307692307693, "grad_norm": 4.632129192352295, "learning_rate": 1.9177843061553876e-05, "loss": 0.1509, "step": 4063 }, { "epoch": 3.9076923076923076, "grad_norm": 2.5044827461242676, "learning_rate": 1.9177348358548618e-05, "loss": 0.0516, "step": 4064 }, { "epoch": 3.9086538461538463, "grad_norm": 3.7054996490478516, "learning_rate": 1.917685351313785e-05, "loss": 0.1277, "step": 4065 }, { "epoch": 3.9096153846153845, "grad_norm": 3.7028863430023193, "learning_rate": 1.9176358525329248e-05, "loss": 0.0631, "step": 4066 }, { "epoch": 3.910576923076923, "grad_norm": 5.173243522644043, "learning_rate": 1.9175863395130495e-05, "loss": 0.1762, "step": 4067 }, { "epoch": 3.9115384615384614, "grad_norm": 7.081678867340088, "learning_rate": 1.917536812254927e-05, "loss": 0.1386, "step": 4068 }, { "epoch": 3.9125, "grad_norm": 6.8328728675842285, "learning_rate": 1.9174872707593267e-05, "loss": 0.2077, "step": 4069 }, { "epoch": 3.9134615384615383, "grad_norm": 3.6351144313812256, "learning_rate": 1.9174377150270165e-05, "loss": 0.0619, "step": 4070 }, { "epoch": 3.914423076923077, "grad_norm": 5.582037448883057, "learning_rate": 1.9173881450587654e-05, "loss": 0.1327, "step": 4071 }, { "epoch": 3.9153846153846152, "grad_norm": 4.424735069274902, "learning_rate": 1.9173385608553427e-05, "loss": 0.1647, "step": 4072 }, { "epoch": 3.916346153846154, "grad_norm": 1.9863417148590088, "learning_rate": 1.9172889624175185e-05, "loss": 0.0532, "step": 4073 }, { "epoch": 3.917307692307692, "grad_norm": 6.215144634246826, "learning_rate": 1.917239349746061e-05, "loss": 0.1717, "step": 4074 }, { "epoch": 3.918269230769231, "grad_norm": 3.0931596755981445, "learning_rate": 1.9171897228417413e-05, "loss": 0.0879, "step": 4075 }, { "epoch": 3.919230769230769, "grad_norm": 3.1456398963928223, "learning_rate": 1.917140081705329e-05, "loss": 0.0852, "step": 4076 }, { "epoch": 3.9201923076923078, "grad_norm": 2.9583897590637207, "learning_rate": 1.9170904263375948e-05, "loss": 0.1091, "step": 4077 }, { "epoch": 3.921153846153846, "grad_norm": 5.2059550285339355, "learning_rate": 1.917040756739308e-05, "loss": 0.2017, "step": 4078 }, { "epoch": 3.9221153846153847, "grad_norm": 6.016389846801758, "learning_rate": 1.9169910729112407e-05, "loss": 0.1383, "step": 4079 }, { "epoch": 3.9230769230769234, "grad_norm": 4.164958477020264, "learning_rate": 1.9169413748541632e-05, "loss": 0.1564, "step": 4080 }, { "epoch": 3.9240384615384616, "grad_norm": 4.522045612335205, "learning_rate": 1.916891662568847e-05, "loss": 0.1103, "step": 4081 }, { "epoch": 3.925, "grad_norm": 4.942970275878906, "learning_rate": 1.9168419360560627e-05, "loss": 0.122, "step": 4082 }, { "epoch": 3.9259615384615385, "grad_norm": 3.257619619369507, "learning_rate": 1.9167921953165827e-05, "loss": 0.0765, "step": 4083 }, { "epoch": 3.926923076923077, "grad_norm": 5.156679630279541, "learning_rate": 1.9167424403511786e-05, "loss": 0.1348, "step": 4084 }, { "epoch": 3.9278846153846154, "grad_norm": 3.03448748588562, "learning_rate": 1.9166926711606222e-05, "loss": 0.0659, "step": 4085 }, { "epoch": 3.9288461538461537, "grad_norm": 4.4470624923706055, "learning_rate": 1.9166428877456863e-05, "loss": 0.1567, "step": 4086 }, { "epoch": 3.9298076923076923, "grad_norm": 4.343269348144531, "learning_rate": 1.916593090107143e-05, "loss": 0.1722, "step": 4087 }, { "epoch": 3.930769230769231, "grad_norm": 3.6469831466674805, "learning_rate": 1.916543278245765e-05, "loss": 0.0985, "step": 4088 }, { "epoch": 3.9317307692307693, "grad_norm": 2.44948673248291, "learning_rate": 1.9164934521623255e-05, "loss": 0.0784, "step": 4089 }, { "epoch": 3.9326923076923075, "grad_norm": 4.636016368865967, "learning_rate": 1.9164436118575974e-05, "loss": 0.0861, "step": 4090 }, { "epoch": 3.933653846153846, "grad_norm": 2.218951463699341, "learning_rate": 1.916393757332354e-05, "loss": 0.0463, "step": 4091 }, { "epoch": 3.934615384615385, "grad_norm": 5.907419204711914, "learning_rate": 1.9163438885873695e-05, "loss": 0.1897, "step": 4092 }, { "epoch": 3.935576923076923, "grad_norm": 4.74219274520874, "learning_rate": 1.916294005623417e-05, "loss": 0.155, "step": 4093 }, { "epoch": 3.9365384615384613, "grad_norm": 3.347557783126831, "learning_rate": 1.9162441084412712e-05, "loss": 0.075, "step": 4094 }, { "epoch": 3.9375, "grad_norm": 3.865903615951538, "learning_rate": 1.9161941970417054e-05, "loss": 0.0719, "step": 4095 }, { "epoch": 3.9384615384615387, "grad_norm": 3.496537208557129, "learning_rate": 1.916144271425495e-05, "loss": 0.0738, "step": 4096 }, { "epoch": 3.939423076923077, "grad_norm": 3.9193971157073975, "learning_rate": 1.9160943315934144e-05, "loss": 0.1012, "step": 4097 }, { "epoch": 3.940384615384615, "grad_norm": 4.329198837280273, "learning_rate": 1.916044377546238e-05, "loss": 0.1221, "step": 4098 }, { "epoch": 3.941346153846154, "grad_norm": 6.647226333618164, "learning_rate": 1.915994409284742e-05, "loss": 0.1438, "step": 4099 }, { "epoch": 3.9423076923076925, "grad_norm": 2.5412139892578125, "learning_rate": 1.9159444268097012e-05, "loss": 0.0539, "step": 4100 }, { "epoch": 3.9432692307692307, "grad_norm": 3.7931008338928223, "learning_rate": 1.915894430121891e-05, "loss": 0.0912, "step": 4101 }, { "epoch": 3.944230769230769, "grad_norm": 3.020368814468384, "learning_rate": 1.915844419222087e-05, "loss": 0.0815, "step": 4102 }, { "epoch": 3.9451923076923077, "grad_norm": 7.2987213134765625, "learning_rate": 1.9157943941110663e-05, "loss": 0.111, "step": 4103 }, { "epoch": 3.9461538461538463, "grad_norm": 3.9991261959075928, "learning_rate": 1.9157443547896037e-05, "loss": 0.1941, "step": 4104 }, { "epoch": 3.9471153846153846, "grad_norm": 4.9805707931518555, "learning_rate": 1.915694301258477e-05, "loss": 0.2487, "step": 4105 }, { "epoch": 3.948076923076923, "grad_norm": 5.789901256561279, "learning_rate": 1.915644233518462e-05, "loss": 0.172, "step": 4106 }, { "epoch": 3.9490384615384615, "grad_norm": 4.748423099517822, "learning_rate": 1.9155941515703355e-05, "loss": 0.2149, "step": 4107 }, { "epoch": 3.95, "grad_norm": 5.9869842529296875, "learning_rate": 1.9155440554148753e-05, "loss": 0.1619, "step": 4108 }, { "epoch": 3.9509615384615384, "grad_norm": 3.4793543815612793, "learning_rate": 1.9154939450528587e-05, "loss": 0.0857, "step": 4109 }, { "epoch": 3.9519230769230766, "grad_norm": 6.101382255554199, "learning_rate": 1.9154438204850627e-05, "loss": 0.2042, "step": 4110 }, { "epoch": 3.9528846153846153, "grad_norm": 11.972803115844727, "learning_rate": 1.9153936817122657e-05, "loss": 0.2967, "step": 4111 }, { "epoch": 3.953846153846154, "grad_norm": 5.64384651184082, "learning_rate": 1.9153435287352452e-05, "loss": 0.0909, "step": 4112 }, { "epoch": 3.9548076923076922, "grad_norm": 4.897755146026611, "learning_rate": 1.91529336155478e-05, "loss": 0.1583, "step": 4113 }, { "epoch": 3.955769230769231, "grad_norm": 5.8226847648620605, "learning_rate": 1.9152431801716472e-05, "loss": 0.139, "step": 4114 }, { "epoch": 3.956730769230769, "grad_norm": 4.203507423400879, "learning_rate": 1.9151929845866273e-05, "loss": 0.1109, "step": 4115 }, { "epoch": 3.957692307692308, "grad_norm": 4.354581832885742, "learning_rate": 1.9151427748004982e-05, "loss": 0.1125, "step": 4116 }, { "epoch": 3.958653846153846, "grad_norm": 5.827463626861572, "learning_rate": 1.915092550814039e-05, "loss": 0.1758, "step": 4117 }, { "epoch": 3.9596153846153848, "grad_norm": 3.5503480434417725, "learning_rate": 1.915042312628029e-05, "loss": 0.0885, "step": 4118 }, { "epoch": 3.960576923076923, "grad_norm": 3.7996816635131836, "learning_rate": 1.9149920602432484e-05, "loss": 0.1384, "step": 4119 }, { "epoch": 3.9615384615384617, "grad_norm": 4.651414394378662, "learning_rate": 1.9149417936604764e-05, "loss": 0.0892, "step": 4120 }, { "epoch": 3.9625, "grad_norm": 3.6376497745513916, "learning_rate": 1.914891512880493e-05, "loss": 0.087, "step": 4121 }, { "epoch": 3.9634615384615386, "grad_norm": 4.37576150894165, "learning_rate": 1.9148412179040784e-05, "loss": 0.1361, "step": 4122 }, { "epoch": 3.964423076923077, "grad_norm": 4.602898120880127, "learning_rate": 1.9147909087320133e-05, "loss": 0.1238, "step": 4123 }, { "epoch": 3.9653846153846155, "grad_norm": 3.6527016162872314, "learning_rate": 1.914740585365078e-05, "loss": 0.1755, "step": 4124 }, { "epoch": 3.9663461538461537, "grad_norm": 3.730829954147339, "learning_rate": 1.9146902478040537e-05, "loss": 0.0878, "step": 4125 }, { "epoch": 3.9673076923076924, "grad_norm": 3.4503395557403564, "learning_rate": 1.9146398960497213e-05, "loss": 0.119, "step": 4126 }, { "epoch": 3.9682692307692307, "grad_norm": 3.826580286026001, "learning_rate": 1.914589530102862e-05, "loss": 0.0964, "step": 4127 }, { "epoch": 3.9692307692307693, "grad_norm": 4.61676025390625, "learning_rate": 1.9145391499642577e-05, "loss": 0.1084, "step": 4128 }, { "epoch": 3.9701923076923076, "grad_norm": 12.142142295837402, "learning_rate": 1.9144887556346898e-05, "loss": 0.1101, "step": 4129 }, { "epoch": 3.9711538461538463, "grad_norm": 2.9109575748443604, "learning_rate": 1.91443834711494e-05, "loss": 0.0851, "step": 4130 }, { "epoch": 3.9721153846153845, "grad_norm": 4.305755138397217, "learning_rate": 1.9143879244057918e-05, "loss": 0.1497, "step": 4131 }, { "epoch": 3.973076923076923, "grad_norm": 5.42864990234375, "learning_rate": 1.914337487508026e-05, "loss": 0.1748, "step": 4132 }, { "epoch": 3.9740384615384614, "grad_norm": 5.1052961349487305, "learning_rate": 1.9142870364224263e-05, "loss": 0.2689, "step": 4133 }, { "epoch": 3.975, "grad_norm": 6.623252868652344, "learning_rate": 1.914236571149775e-05, "loss": 0.1712, "step": 4134 }, { "epoch": 3.9759615384615383, "grad_norm": 5.039732456207275, "learning_rate": 1.9141860916908555e-05, "loss": 0.2478, "step": 4135 }, { "epoch": 3.976923076923077, "grad_norm": 4.696238040924072, "learning_rate": 1.9141355980464505e-05, "loss": 0.1139, "step": 4136 }, { "epoch": 3.9778846153846152, "grad_norm": 4.277779579162598, "learning_rate": 1.9140850902173445e-05, "loss": 0.197, "step": 4137 }, { "epoch": 3.978846153846154, "grad_norm": 5.395687103271484, "learning_rate": 1.9140345682043208e-05, "loss": 0.1919, "step": 4138 }, { "epoch": 3.979807692307692, "grad_norm": 3.42673397064209, "learning_rate": 1.913984032008163e-05, "loss": 0.113, "step": 4139 }, { "epoch": 3.980769230769231, "grad_norm": 5.769042491912842, "learning_rate": 1.9139334816296556e-05, "loss": 0.1684, "step": 4140 }, { "epoch": 3.981730769230769, "grad_norm": 3.4929144382476807, "learning_rate": 1.9138829170695828e-05, "loss": 0.1007, "step": 4141 }, { "epoch": 3.9826923076923078, "grad_norm": 5.914389610290527, "learning_rate": 1.9138323383287293e-05, "loss": 0.174, "step": 4142 }, { "epoch": 3.983653846153846, "grad_norm": 5.345120429992676, "learning_rate": 1.9137817454078803e-05, "loss": 0.1043, "step": 4143 }, { "epoch": 3.9846153846153847, "grad_norm": 3.261636972427368, "learning_rate": 1.9137311383078203e-05, "loss": 0.0801, "step": 4144 }, { "epoch": 3.9855769230769234, "grad_norm": 3.8187265396118164, "learning_rate": 1.9136805170293348e-05, "loss": 0.1248, "step": 4145 }, { "epoch": 3.9865384615384616, "grad_norm": 4.0022292137146, "learning_rate": 1.9136298815732094e-05, "loss": 0.0788, "step": 4146 }, { "epoch": 3.9875, "grad_norm": 4.854584217071533, "learning_rate": 1.9135792319402296e-05, "loss": 0.2232, "step": 4147 }, { "epoch": 3.9884615384615385, "grad_norm": 3.6507046222686768, "learning_rate": 1.9135285681311814e-05, "loss": 0.1299, "step": 4148 }, { "epoch": 3.989423076923077, "grad_norm": 3.146085262298584, "learning_rate": 1.913477890146851e-05, "loss": 0.116, "step": 4149 }, { "epoch": 3.9903846153846154, "grad_norm": 3.645538330078125, "learning_rate": 1.913427197988025e-05, "loss": 0.1141, "step": 4150 }, { "epoch": 3.9913461538461537, "grad_norm": 5.237228870391846, "learning_rate": 1.91337649165549e-05, "loss": 0.1736, "step": 4151 }, { "epoch": 3.9923076923076923, "grad_norm": 3.6737282276153564, "learning_rate": 1.9133257711500318e-05, "loss": 0.074, "step": 4152 }, { "epoch": 3.993269230769231, "grad_norm": 3.1957173347473145, "learning_rate": 1.9132750364724385e-05, "loss": 0.0656, "step": 4153 }, { "epoch": 3.9942307692307693, "grad_norm": 4.070751190185547, "learning_rate": 1.913224287623497e-05, "loss": 0.1015, "step": 4154 }, { "epoch": 3.9951923076923075, "grad_norm": 4.028691291809082, "learning_rate": 1.913173524603995e-05, "loss": 0.184, "step": 4155 }, { "epoch": 3.996153846153846, "grad_norm": 2.8358983993530273, "learning_rate": 1.9131227474147205e-05, "loss": 0.0874, "step": 4156 }, { "epoch": 3.997115384615385, "grad_norm": 3.3128793239593506, "learning_rate": 1.9130719560564604e-05, "loss": 0.1035, "step": 4157 }, { "epoch": 3.998076923076923, "grad_norm": 5.9521074295043945, "learning_rate": 1.9130211505300034e-05, "loss": 0.1666, "step": 4158 }, { "epoch": 3.9990384615384613, "grad_norm": 4.24595832824707, "learning_rate": 1.9129703308361378e-05, "loss": 0.1507, "step": 4159 }, { "epoch": 4.0, "grad_norm": 5.363455295562744, "learning_rate": 1.9129194969756525e-05, "loss": 0.1638, "step": 4160 }, { "epoch": 4.000961538461539, "grad_norm": 1.4186363220214844, "learning_rate": 1.9128686489493358e-05, "loss": 0.0223, "step": 4161 }, { "epoch": 4.001923076923077, "grad_norm": 3.7518672943115234, "learning_rate": 1.9128177867579766e-05, "loss": 0.1059, "step": 4162 }, { "epoch": 4.002884615384615, "grad_norm": 3.9146173000335693, "learning_rate": 1.912766910402365e-05, "loss": 0.1379, "step": 4163 }, { "epoch": 4.003846153846154, "grad_norm": 4.3795552253723145, "learning_rate": 1.9127160198832896e-05, "loss": 0.0713, "step": 4164 }, { "epoch": 4.0048076923076925, "grad_norm": 2.77838134765625, "learning_rate": 1.9126651152015404e-05, "loss": 0.0551, "step": 4165 }, { "epoch": 4.005769230769231, "grad_norm": 4.608756065368652, "learning_rate": 1.912614196357907e-05, "loss": 0.0898, "step": 4166 }, { "epoch": 4.006730769230769, "grad_norm": 1.9234265089035034, "learning_rate": 1.91256326335318e-05, "loss": 0.012, "step": 4167 }, { "epoch": 4.007692307692308, "grad_norm": 1.2230052947998047, "learning_rate": 1.9125123161881495e-05, "loss": 0.0216, "step": 4168 }, { "epoch": 4.008653846153846, "grad_norm": 3.9927444458007812, "learning_rate": 1.9124613548636063e-05, "loss": 0.0914, "step": 4169 }, { "epoch": 4.009615384615385, "grad_norm": 2.5787222385406494, "learning_rate": 1.9124103793803405e-05, "loss": 0.0392, "step": 4170 }, { "epoch": 4.010576923076923, "grad_norm": 2.1458353996276855, "learning_rate": 1.9123593897391436e-05, "loss": 0.0272, "step": 4171 }, { "epoch": 4.0115384615384615, "grad_norm": 2.0210041999816895, "learning_rate": 1.9123083859408068e-05, "loss": 0.0291, "step": 4172 }, { "epoch": 4.0125, "grad_norm": 4.134536266326904, "learning_rate": 1.9122573679861215e-05, "loss": 0.058, "step": 4173 }, { "epoch": 4.013461538461539, "grad_norm": 3.880343198776245, "learning_rate": 1.9122063358758792e-05, "loss": 0.0346, "step": 4174 }, { "epoch": 4.014423076923077, "grad_norm": 2.841628074645996, "learning_rate": 1.9121552896108717e-05, "loss": 0.0945, "step": 4175 }, { "epoch": 4.015384615384615, "grad_norm": 5.645694255828857, "learning_rate": 1.9121042291918914e-05, "loss": 0.0799, "step": 4176 }, { "epoch": 4.016346153846154, "grad_norm": 1.1016196012496948, "learning_rate": 1.9120531546197306e-05, "loss": 0.012, "step": 4177 }, { "epoch": 4.017307692307693, "grad_norm": 1.576000452041626, "learning_rate": 1.9120020658951814e-05, "loss": 0.0407, "step": 4178 }, { "epoch": 4.0182692307692305, "grad_norm": 1.9732179641723633, "learning_rate": 1.911950963019037e-05, "loss": 0.0342, "step": 4179 }, { "epoch": 4.019230769230769, "grad_norm": 0.7292008996009827, "learning_rate": 1.91189984599209e-05, "loss": 0.0089, "step": 4180 }, { "epoch": 4.020192307692308, "grad_norm": 3.9013991355895996, "learning_rate": 1.9118487148151342e-05, "loss": 0.0872, "step": 4181 }, { "epoch": 4.0211538461538465, "grad_norm": 4.596673011779785, "learning_rate": 1.9117975694889625e-05, "loss": 0.0523, "step": 4182 }, { "epoch": 4.022115384615384, "grad_norm": 3.610842704772949, "learning_rate": 1.9117464100143685e-05, "loss": 0.1183, "step": 4183 }, { "epoch": 4.023076923076923, "grad_norm": 5.435871124267578, "learning_rate": 1.911695236392146e-05, "loss": 0.1828, "step": 4184 }, { "epoch": 4.024038461538462, "grad_norm": 1.6229790449142456, "learning_rate": 1.9116440486230893e-05, "loss": 0.0171, "step": 4185 }, { "epoch": 4.025, "grad_norm": 1.306430697441101, "learning_rate": 1.9115928467079925e-05, "loss": 0.0158, "step": 4186 }, { "epoch": 4.025961538461538, "grad_norm": 6.5027265548706055, "learning_rate": 1.9115416306476504e-05, "loss": 0.1515, "step": 4187 }, { "epoch": 4.026923076923077, "grad_norm": 1.5085411071777344, "learning_rate": 1.9114904004428576e-05, "loss": 0.02, "step": 4188 }, { "epoch": 4.0278846153846155, "grad_norm": 3.4597208499908447, "learning_rate": 1.9114391560944086e-05, "loss": 0.0973, "step": 4189 }, { "epoch": 4.028846153846154, "grad_norm": 3.1437249183654785, "learning_rate": 1.9113878976030996e-05, "loss": 0.046, "step": 4190 }, { "epoch": 4.029807692307692, "grad_norm": 2.5356879234313965, "learning_rate": 1.911336624969725e-05, "loss": 0.0381, "step": 4191 }, { "epoch": 4.030769230769231, "grad_norm": 3.1089508533477783, "learning_rate": 1.9112853381950804e-05, "loss": 0.0693, "step": 4192 }, { "epoch": 4.031730769230769, "grad_norm": 1.4684842824935913, "learning_rate": 1.9112340372799623e-05, "loss": 0.0204, "step": 4193 }, { "epoch": 4.032692307692308, "grad_norm": 2.055389404296875, "learning_rate": 1.9111827222251663e-05, "loss": 0.0124, "step": 4194 }, { "epoch": 4.033653846153846, "grad_norm": 3.6837754249572754, "learning_rate": 1.9111313930314887e-05, "loss": 0.0406, "step": 4195 }, { "epoch": 4.0346153846153845, "grad_norm": 4.239334583282471, "learning_rate": 1.9110800496997263e-05, "loss": 0.1404, "step": 4196 }, { "epoch": 4.035576923076923, "grad_norm": 2.9541122913360596, "learning_rate": 1.911028692230675e-05, "loss": 0.0396, "step": 4197 }, { "epoch": 4.036538461538462, "grad_norm": 4.005491256713867, "learning_rate": 1.9109773206251327e-05, "loss": 0.0939, "step": 4198 }, { "epoch": 4.0375, "grad_norm": 2.4012978076934814, "learning_rate": 1.9109259348838957e-05, "loss": 0.0256, "step": 4199 }, { "epoch": 4.038461538461538, "grad_norm": 3.625948667526245, "learning_rate": 1.910874535007762e-05, "loss": 0.0486, "step": 4200 }, { "epoch": 4.039423076923077, "grad_norm": 21.591163635253906, "learning_rate": 1.9108231209975286e-05, "loss": 0.1088, "step": 4201 }, { "epoch": 4.040384615384616, "grad_norm": 2.9716107845306396, "learning_rate": 1.910771692853994e-05, "loss": 0.0323, "step": 4202 }, { "epoch": 4.0413461538461535, "grad_norm": 2.382800579071045, "learning_rate": 1.9107202505779556e-05, "loss": 0.018, "step": 4203 }, { "epoch": 4.042307692307692, "grad_norm": 3.098299264907837, "learning_rate": 1.910668794170212e-05, "loss": 0.058, "step": 4204 }, { "epoch": 4.043269230769231, "grad_norm": 3.0486114025115967, "learning_rate": 1.9106173236315612e-05, "loss": 0.0805, "step": 4205 }, { "epoch": 4.0442307692307695, "grad_norm": 7.588644027709961, "learning_rate": 1.910565838962802e-05, "loss": 0.1786, "step": 4206 }, { "epoch": 4.045192307692307, "grad_norm": 1.8620649576187134, "learning_rate": 1.910514340164734e-05, "loss": 0.0196, "step": 4207 }, { "epoch": 4.046153846153846, "grad_norm": 6.09047269821167, "learning_rate": 1.9104628272381557e-05, "loss": 0.0377, "step": 4208 }, { "epoch": 4.047115384615385, "grad_norm": 3.9502549171447754, "learning_rate": 1.9104113001838662e-05, "loss": 0.1023, "step": 4209 }, { "epoch": 4.048076923076923, "grad_norm": 5.304384708404541, "learning_rate": 1.9103597590026654e-05, "loss": 0.1005, "step": 4210 }, { "epoch": 4.049038461538461, "grad_norm": 7.346252918243408, "learning_rate": 1.9103082036953534e-05, "loss": 0.0485, "step": 4211 }, { "epoch": 4.05, "grad_norm": 1.8071941137313843, "learning_rate": 1.910256634262729e-05, "loss": 0.0279, "step": 4212 }, { "epoch": 4.0509615384615385, "grad_norm": 3.344494342803955, "learning_rate": 1.910205050705594e-05, "loss": 0.1279, "step": 4213 }, { "epoch": 4.051923076923077, "grad_norm": 2.4574191570281982, "learning_rate": 1.9101534530247476e-05, "loss": 0.0407, "step": 4214 }, { "epoch": 4.052884615384615, "grad_norm": 1.5987919569015503, "learning_rate": 1.910101841220991e-05, "loss": 0.0156, "step": 4215 }, { "epoch": 4.053846153846154, "grad_norm": 6.3409905433654785, "learning_rate": 1.910050215295125e-05, "loss": 0.1445, "step": 4216 }, { "epoch": 4.054807692307692, "grad_norm": 4.107359886169434, "learning_rate": 1.9099985752479505e-05, "loss": 0.057, "step": 4217 }, { "epoch": 4.055769230769231, "grad_norm": 3.923069715499878, "learning_rate": 1.9099469210802693e-05, "loss": 0.0547, "step": 4218 }, { "epoch": 4.056730769230769, "grad_norm": 1.2204337120056152, "learning_rate": 1.909895252792882e-05, "loss": 0.0122, "step": 4219 }, { "epoch": 4.0576923076923075, "grad_norm": 5.410459518432617, "learning_rate": 1.9098435703865913e-05, "loss": 0.1559, "step": 4220 }, { "epoch": 4.058653846153846, "grad_norm": 3.0729401111602783, "learning_rate": 1.9097918738621984e-05, "loss": 0.0459, "step": 4221 }, { "epoch": 4.059615384615385, "grad_norm": 4.741631031036377, "learning_rate": 1.909740163220506e-05, "loss": 0.0973, "step": 4222 }, { "epoch": 4.060576923076923, "grad_norm": 4.845014572143555, "learning_rate": 1.9096884384623163e-05, "loss": 0.159, "step": 4223 }, { "epoch": 4.061538461538461, "grad_norm": 3.9595530033111572, "learning_rate": 1.909636699588432e-05, "loss": 0.1374, "step": 4224 }, { "epoch": 4.0625, "grad_norm": 3.9579458236694336, "learning_rate": 1.909584946599656e-05, "loss": 0.0535, "step": 4225 }, { "epoch": 4.063461538461539, "grad_norm": 3.058152437210083, "learning_rate": 1.909533179496791e-05, "loss": 0.0301, "step": 4226 }, { "epoch": 4.064423076923077, "grad_norm": 2.9969959259033203, "learning_rate": 1.9094813982806407e-05, "loss": 0.0537, "step": 4227 }, { "epoch": 4.065384615384615, "grad_norm": 3.0755205154418945, "learning_rate": 1.9094296029520084e-05, "loss": 0.0857, "step": 4228 }, { "epoch": 4.066346153846154, "grad_norm": 3.2879128456115723, "learning_rate": 1.9093777935116975e-05, "loss": 0.0835, "step": 4229 }, { "epoch": 4.0673076923076925, "grad_norm": 1.9010541439056396, "learning_rate": 1.9093259699605125e-05, "loss": 0.0286, "step": 4230 }, { "epoch": 4.068269230769231, "grad_norm": 3.976060628890991, "learning_rate": 1.909274132299257e-05, "loss": 0.097, "step": 4231 }, { "epoch": 4.069230769230769, "grad_norm": 1.1232370138168335, "learning_rate": 1.909222280528736e-05, "loss": 0.0118, "step": 4232 }, { "epoch": 4.070192307692308, "grad_norm": 3.4167873859405518, "learning_rate": 1.9091704146497533e-05, "loss": 0.1346, "step": 4233 }, { "epoch": 4.071153846153846, "grad_norm": 3.1499571800231934, "learning_rate": 1.9091185346631147e-05, "loss": 0.093, "step": 4234 }, { "epoch": 4.072115384615385, "grad_norm": 2.2090020179748535, "learning_rate": 1.9090666405696242e-05, "loss": 0.0443, "step": 4235 }, { "epoch": 4.073076923076923, "grad_norm": 3.4478657245635986, "learning_rate": 1.909014732370088e-05, "loss": 0.0883, "step": 4236 }, { "epoch": 4.0740384615384615, "grad_norm": 3.559880018234253, "learning_rate": 1.9089628100653106e-05, "loss": 0.0935, "step": 4237 }, { "epoch": 4.075, "grad_norm": 10.516866683959961, "learning_rate": 1.9089108736560984e-05, "loss": 0.0506, "step": 4238 }, { "epoch": 4.075961538461539, "grad_norm": 4.007575988769531, "learning_rate": 1.908858923143257e-05, "loss": 0.0896, "step": 4239 }, { "epoch": 4.076923076923077, "grad_norm": 2.625732660293579, "learning_rate": 1.9088069585275927e-05, "loss": 0.0531, "step": 4240 }, { "epoch": 4.077884615384615, "grad_norm": 4.040773868560791, "learning_rate": 1.9087549798099116e-05, "loss": 0.0608, "step": 4241 }, { "epoch": 4.078846153846154, "grad_norm": 4.179868221282959, "learning_rate": 1.9087029869910204e-05, "loss": 0.0493, "step": 4242 }, { "epoch": 4.079807692307693, "grad_norm": 4.586161136627197, "learning_rate": 1.908650980071726e-05, "loss": 0.0889, "step": 4243 }, { "epoch": 4.0807692307692305, "grad_norm": 3.8312084674835205, "learning_rate": 1.908598959052835e-05, "loss": 0.0821, "step": 4244 }, { "epoch": 4.081730769230769, "grad_norm": 3.672555923461914, "learning_rate": 1.9085469239351552e-05, "loss": 0.0755, "step": 4245 }, { "epoch": 4.082692307692308, "grad_norm": 1.9249508380889893, "learning_rate": 1.908494874719493e-05, "loss": 0.027, "step": 4246 }, { "epoch": 4.0836538461538465, "grad_norm": 1.4635883569717407, "learning_rate": 1.9084428114066576e-05, "loss": 0.0144, "step": 4247 }, { "epoch": 4.084615384615384, "grad_norm": 1.1078195571899414, "learning_rate": 1.9083907339974554e-05, "loss": 0.0187, "step": 4248 }, { "epoch": 4.085576923076923, "grad_norm": 3.023036003112793, "learning_rate": 1.9083386424926952e-05, "loss": 0.0564, "step": 4249 }, { "epoch": 4.086538461538462, "grad_norm": 2.3554091453552246, "learning_rate": 1.9082865368931855e-05, "loss": 0.0244, "step": 4250 }, { "epoch": 4.0875, "grad_norm": 3.549203872680664, "learning_rate": 1.9082344171997344e-05, "loss": 0.0447, "step": 4251 }, { "epoch": 4.088461538461538, "grad_norm": 7.070263862609863, "learning_rate": 1.9081822834131508e-05, "loss": 0.3194, "step": 4252 }, { "epoch": 4.089423076923077, "grad_norm": 2.0671308040618896, "learning_rate": 1.9081301355342433e-05, "loss": 0.0386, "step": 4253 }, { "epoch": 4.0903846153846155, "grad_norm": 3.7090067863464355, "learning_rate": 1.908077973563822e-05, "loss": 0.0826, "step": 4254 }, { "epoch": 4.091346153846154, "grad_norm": 2.629523754119873, "learning_rate": 1.9080257975026954e-05, "loss": 0.026, "step": 4255 }, { "epoch": 4.092307692307692, "grad_norm": 1.6799230575561523, "learning_rate": 1.9079736073516735e-05, "loss": 0.0174, "step": 4256 }, { "epoch": 4.093269230769231, "grad_norm": 1.0732684135437012, "learning_rate": 1.907921403111566e-05, "loss": 0.0162, "step": 4257 }, { "epoch": 4.094230769230769, "grad_norm": 5.046568870544434, "learning_rate": 1.9078691847831834e-05, "loss": 0.1526, "step": 4258 }, { "epoch": 4.095192307692308, "grad_norm": 3.3183882236480713, "learning_rate": 1.9078169523673354e-05, "loss": 0.0732, "step": 4259 }, { "epoch": 4.096153846153846, "grad_norm": 3.4178378582000732, "learning_rate": 1.9077647058648324e-05, "loss": 0.0365, "step": 4260 }, { "epoch": 4.0971153846153845, "grad_norm": 1.2335808277130127, "learning_rate": 1.9077124452764856e-05, "loss": 0.0123, "step": 4261 }, { "epoch": 4.098076923076923, "grad_norm": 3.8940958976745605, "learning_rate": 1.9076601706031058e-05, "loss": 0.0544, "step": 4262 }, { "epoch": 4.099038461538462, "grad_norm": 3.687415838241577, "learning_rate": 1.9076078818455043e-05, "loss": 0.0431, "step": 4263 }, { "epoch": 4.1, "grad_norm": 3.7200355529785156, "learning_rate": 1.907555579004492e-05, "loss": 0.0581, "step": 4264 }, { "epoch": 4.100961538461538, "grad_norm": 1.7835088968276978, "learning_rate": 1.9075032620808808e-05, "loss": 0.0229, "step": 4265 }, { "epoch": 4.101923076923077, "grad_norm": 1.4801199436187744, "learning_rate": 1.9074509310754826e-05, "loss": 0.015, "step": 4266 }, { "epoch": 4.102884615384616, "grad_norm": 2.952310562133789, "learning_rate": 1.9073985859891093e-05, "loss": 0.0198, "step": 4267 }, { "epoch": 4.1038461538461535, "grad_norm": 4.32480525970459, "learning_rate": 1.907346226822573e-05, "loss": 0.0765, "step": 4268 }, { "epoch": 4.104807692307692, "grad_norm": 1.5507683753967285, "learning_rate": 1.9072938535766864e-05, "loss": 0.0196, "step": 4269 }, { "epoch": 4.105769230769231, "grad_norm": 3.560149908065796, "learning_rate": 1.907241466252262e-05, "loss": 0.0877, "step": 4270 }, { "epoch": 4.1067307692307695, "grad_norm": 3.293332099914551, "learning_rate": 1.907189064850113e-05, "loss": 0.1074, "step": 4271 }, { "epoch": 4.107692307692307, "grad_norm": 3.229257583618164, "learning_rate": 1.9071366493710516e-05, "loss": 0.032, "step": 4272 }, { "epoch": 4.108653846153846, "grad_norm": 4.256449222564697, "learning_rate": 1.9070842198158925e-05, "loss": 0.1963, "step": 4273 }, { "epoch": 4.109615384615385, "grad_norm": 0.7040908932685852, "learning_rate": 1.9070317761854483e-05, "loss": 0.0044, "step": 4274 }, { "epoch": 4.110576923076923, "grad_norm": 3.612339496612549, "learning_rate": 1.906979318480533e-05, "loss": 0.0363, "step": 4275 }, { "epoch": 4.111538461538461, "grad_norm": 2.0514864921569824, "learning_rate": 1.9069268467019604e-05, "loss": 0.0379, "step": 4276 }, { "epoch": 4.1125, "grad_norm": 3.3831706047058105, "learning_rate": 1.9068743608505454e-05, "loss": 0.0476, "step": 4277 }, { "epoch": 4.1134615384615385, "grad_norm": 3.1844322681427, "learning_rate": 1.906821860927102e-05, "loss": 0.0489, "step": 4278 }, { "epoch": 4.114423076923077, "grad_norm": 0.9005810618400574, "learning_rate": 1.9067693469324447e-05, "loss": 0.0118, "step": 4279 }, { "epoch": 4.115384615384615, "grad_norm": 2.5490002632141113, "learning_rate": 1.906716818867388e-05, "loss": 0.0513, "step": 4280 }, { "epoch": 4.116346153846154, "grad_norm": 2.338301181793213, "learning_rate": 1.9066642767327482e-05, "loss": 0.0164, "step": 4281 }, { "epoch": 4.117307692307692, "grad_norm": 4.837584972381592, "learning_rate": 1.9066117205293393e-05, "loss": 0.1069, "step": 4282 }, { "epoch": 4.118269230769231, "grad_norm": 3.4592909812927246, "learning_rate": 1.9065591502579777e-05, "loss": 0.0569, "step": 4283 }, { "epoch": 4.119230769230769, "grad_norm": 2.9131875038146973, "learning_rate": 1.9065065659194784e-05, "loss": 0.0658, "step": 4284 }, { "epoch": 4.1201923076923075, "grad_norm": 3.5611913204193115, "learning_rate": 1.9064539675146584e-05, "loss": 0.0764, "step": 4285 }, { "epoch": 4.121153846153846, "grad_norm": 4.488061904907227, "learning_rate": 1.906401355044333e-05, "loss": 0.1357, "step": 4286 }, { "epoch": 4.122115384615385, "grad_norm": 0.39120522141456604, "learning_rate": 1.9063487285093183e-05, "loss": 0.004, "step": 4287 }, { "epoch": 4.123076923076923, "grad_norm": 3.843154191970825, "learning_rate": 1.906296087910432e-05, "loss": 0.0879, "step": 4288 }, { "epoch": 4.124038461538461, "grad_norm": 3.8391709327697754, "learning_rate": 1.90624343324849e-05, "loss": 0.0667, "step": 4289 }, { "epoch": 4.125, "grad_norm": 1.142021894454956, "learning_rate": 1.90619076452431e-05, "loss": 0.0091, "step": 4290 }, { "epoch": 4.125961538461539, "grad_norm": 2.613630533218384, "learning_rate": 1.9061380817387088e-05, "loss": 0.048, "step": 4291 }, { "epoch": 4.126923076923077, "grad_norm": 4.066956043243408, "learning_rate": 1.9060853848925043e-05, "loss": 0.0639, "step": 4292 }, { "epoch": 4.127884615384615, "grad_norm": 3.2000679969787598, "learning_rate": 1.9060326739865135e-05, "loss": 0.0269, "step": 4293 }, { "epoch": 4.128846153846154, "grad_norm": 6.162613391876221, "learning_rate": 1.9059799490215552e-05, "loss": 0.0676, "step": 4294 }, { "epoch": 4.1298076923076925, "grad_norm": 2.335418224334717, "learning_rate": 1.905927209998447e-05, "loss": 0.025, "step": 4295 }, { "epoch": 4.130769230769231, "grad_norm": 4.282700061798096, "learning_rate": 1.9058744569180074e-05, "loss": 0.1024, "step": 4296 }, { "epoch": 4.131730769230769, "grad_norm": 2.1069798469543457, "learning_rate": 1.9058216897810547e-05, "loss": 0.0291, "step": 4297 }, { "epoch": 4.132692307692308, "grad_norm": 3.5027124881744385, "learning_rate": 1.905768908588408e-05, "loss": 0.055, "step": 4298 }, { "epoch": 4.133653846153846, "grad_norm": 4.605428218841553, "learning_rate": 1.9057161133408866e-05, "loss": 0.0745, "step": 4299 }, { "epoch": 4.134615384615385, "grad_norm": 1.9318650960922241, "learning_rate": 1.905663304039309e-05, "loss": 0.0141, "step": 4300 }, { "epoch": 4.135576923076923, "grad_norm": 2.198620080947876, "learning_rate": 1.905610480684495e-05, "loss": 0.0422, "step": 4301 }, { "epoch": 4.1365384615384615, "grad_norm": 4.954405307769775, "learning_rate": 1.9055576432772645e-05, "loss": 0.1828, "step": 4302 }, { "epoch": 4.1375, "grad_norm": 3.3609583377838135, "learning_rate": 1.905504791818437e-05, "loss": 0.0355, "step": 4303 }, { "epoch": 4.138461538461539, "grad_norm": 3.7140817642211914, "learning_rate": 1.9054519263088326e-05, "loss": 0.0883, "step": 4304 }, { "epoch": 4.139423076923077, "grad_norm": 1.232276439666748, "learning_rate": 1.9053990467492724e-05, "loss": 0.0079, "step": 4305 }, { "epoch": 4.140384615384615, "grad_norm": 4.407572269439697, "learning_rate": 1.9053461531405757e-05, "loss": 0.0359, "step": 4306 }, { "epoch": 4.141346153846154, "grad_norm": 7.518306255340576, "learning_rate": 1.9052932454835638e-05, "loss": 0.0518, "step": 4307 }, { "epoch": 4.142307692307693, "grad_norm": 1.9457544088363647, "learning_rate": 1.905240323779058e-05, "loss": 0.0254, "step": 4308 }, { "epoch": 4.1432692307692305, "grad_norm": 3.262720823287964, "learning_rate": 1.9051873880278796e-05, "loss": 0.0901, "step": 4309 }, { "epoch": 4.144230769230769, "grad_norm": 4.983520984649658, "learning_rate": 1.905134438230849e-05, "loss": 0.0694, "step": 4310 }, { "epoch": 4.145192307692308, "grad_norm": 2.13716459274292, "learning_rate": 1.9050814743887887e-05, "loss": 0.0284, "step": 4311 }, { "epoch": 4.1461538461538465, "grad_norm": 2.8682339191436768, "learning_rate": 1.9050284965025202e-05, "loss": 0.1404, "step": 4312 }, { "epoch": 4.147115384615384, "grad_norm": 4.134939193725586, "learning_rate": 1.9049755045728658e-05, "loss": 0.0886, "step": 4313 }, { "epoch": 4.148076923076923, "grad_norm": 4.142866134643555, "learning_rate": 1.9049224986006476e-05, "loss": 0.0732, "step": 4314 }, { "epoch": 4.149038461538462, "grad_norm": 1.7879680395126343, "learning_rate": 1.9048694785866882e-05, "loss": 0.0192, "step": 4315 }, { "epoch": 4.15, "grad_norm": 2.3747100830078125, "learning_rate": 1.90481644453181e-05, "loss": 0.0656, "step": 4316 }, { "epoch": 4.150961538461538, "grad_norm": 3.7592086791992188, "learning_rate": 1.9047633964368363e-05, "loss": 0.0832, "step": 4317 }, { "epoch": 4.151923076923077, "grad_norm": 4.840105056762695, "learning_rate": 1.9047103343025905e-05, "loss": 0.1217, "step": 4318 }, { "epoch": 4.1528846153846155, "grad_norm": 5.253194332122803, "learning_rate": 1.9046572581298954e-05, "loss": 0.1178, "step": 4319 }, { "epoch": 4.153846153846154, "grad_norm": 4.056741714477539, "learning_rate": 1.9046041679195743e-05, "loss": 0.0548, "step": 4320 }, { "epoch": 4.154807692307692, "grad_norm": 4.166906356811523, "learning_rate": 1.904551063672452e-05, "loss": 0.0311, "step": 4321 }, { "epoch": 4.155769230769231, "grad_norm": 2.5989208221435547, "learning_rate": 1.904497945389352e-05, "loss": 0.057, "step": 4322 }, { "epoch": 4.156730769230769, "grad_norm": 2.2421326637268066, "learning_rate": 1.9044448130710985e-05, "loss": 0.0158, "step": 4323 }, { "epoch": 4.157692307692308, "grad_norm": 3.3725287914276123, "learning_rate": 1.904391666718516e-05, "loss": 0.0615, "step": 4324 }, { "epoch": 4.158653846153846, "grad_norm": 5.653958320617676, "learning_rate": 1.9043385063324292e-05, "loss": 0.1134, "step": 4325 }, { "epoch": 4.1596153846153845, "grad_norm": 3.3591151237487793, "learning_rate": 1.9042853319136626e-05, "loss": 0.0352, "step": 4326 }, { "epoch": 4.160576923076923, "grad_norm": 3.6845080852508545, "learning_rate": 1.9042321434630423e-05, "loss": 0.1258, "step": 4327 }, { "epoch": 4.161538461538462, "grad_norm": 4.462794780731201, "learning_rate": 1.9041789409813928e-05, "loss": 0.1275, "step": 4328 }, { "epoch": 4.1625, "grad_norm": 4.297878265380859, "learning_rate": 1.9041257244695397e-05, "loss": 0.0812, "step": 4329 }, { "epoch": 4.163461538461538, "grad_norm": 2.845524311065674, "learning_rate": 1.904072493928309e-05, "loss": 0.0307, "step": 4330 }, { "epoch": 4.164423076923077, "grad_norm": 3.965973377227783, "learning_rate": 1.9040192493585268e-05, "loss": 0.0264, "step": 4331 }, { "epoch": 4.165384615384616, "grad_norm": 2.3581769466400146, "learning_rate": 1.903965990761019e-05, "loss": 0.0248, "step": 4332 }, { "epoch": 4.1663461538461535, "grad_norm": 1.1850016117095947, "learning_rate": 1.903912718136612e-05, "loss": 0.0103, "step": 4333 }, { "epoch": 4.167307692307692, "grad_norm": 3.9043490886688232, "learning_rate": 1.9038594314861328e-05, "loss": 0.0321, "step": 4334 }, { "epoch": 4.168269230769231, "grad_norm": 3.496800661087036, "learning_rate": 1.9038061308104077e-05, "loss": 0.0795, "step": 4335 }, { "epoch": 4.1692307692307695, "grad_norm": 4.577810287475586, "learning_rate": 1.9037528161102646e-05, "loss": 0.0966, "step": 4336 }, { "epoch": 4.170192307692307, "grad_norm": 3.917909860610962, "learning_rate": 1.90369948738653e-05, "loss": 0.0243, "step": 4337 }, { "epoch": 4.171153846153846, "grad_norm": 5.4368157386779785, "learning_rate": 1.9036461446400315e-05, "loss": 0.1358, "step": 4338 }, { "epoch": 4.172115384615385, "grad_norm": 2.2342138290405273, "learning_rate": 1.9035927878715973e-05, "loss": 0.0484, "step": 4339 }, { "epoch": 4.173076923076923, "grad_norm": 3.5160646438598633, "learning_rate": 1.903539417082055e-05, "loss": 0.0464, "step": 4340 }, { "epoch": 4.174038461538461, "grad_norm": 3.6448662281036377, "learning_rate": 1.9034860322722325e-05, "loss": 0.0339, "step": 4341 }, { "epoch": 4.175, "grad_norm": 3.5974671840667725, "learning_rate": 1.9034326334429586e-05, "loss": 0.0645, "step": 4342 }, { "epoch": 4.1759615384615385, "grad_norm": 2.270153522491455, "learning_rate": 1.9033792205950622e-05, "loss": 0.0519, "step": 4343 }, { "epoch": 4.176923076923077, "grad_norm": 2.9078195095062256, "learning_rate": 1.9033257937293712e-05, "loss": 0.0721, "step": 4344 }, { "epoch": 4.177884615384615, "grad_norm": 1.422836422920227, "learning_rate": 1.9032723528467154e-05, "loss": 0.0062, "step": 4345 }, { "epoch": 4.178846153846154, "grad_norm": 4.265561580657959, "learning_rate": 1.9032188979479235e-05, "loss": 0.1129, "step": 4346 }, { "epoch": 4.179807692307692, "grad_norm": 3.3400635719299316, "learning_rate": 1.9031654290338256e-05, "loss": 0.0567, "step": 4347 }, { "epoch": 4.180769230769231, "grad_norm": 2.5311224460601807, "learning_rate": 1.9031119461052505e-05, "loss": 0.0441, "step": 4348 }, { "epoch": 4.181730769230769, "grad_norm": 5.848202705383301, "learning_rate": 1.903058449163029e-05, "loss": 0.0539, "step": 4349 }, { "epoch": 4.1826923076923075, "grad_norm": 5.75766134262085, "learning_rate": 1.9030049382079906e-05, "loss": 0.1513, "step": 4350 }, { "epoch": 4.183653846153846, "grad_norm": 5.380906105041504, "learning_rate": 1.902951413240966e-05, "loss": 0.119, "step": 4351 }, { "epoch": 4.184615384615385, "grad_norm": 2.7401347160339355, "learning_rate": 1.9028978742627853e-05, "loss": 0.047, "step": 4352 }, { "epoch": 4.185576923076923, "grad_norm": 2.8776161670684814, "learning_rate": 1.9028443212742797e-05, "loss": 0.0399, "step": 4353 }, { "epoch": 4.186538461538461, "grad_norm": 1.4205151796340942, "learning_rate": 1.9027907542762802e-05, "loss": 0.0151, "step": 4354 }, { "epoch": 4.1875, "grad_norm": 2.259019136428833, "learning_rate": 1.9027371732696175e-05, "loss": 0.0452, "step": 4355 }, { "epoch": 4.188461538461539, "grad_norm": 3.971576452255249, "learning_rate": 1.9026835782551237e-05, "loss": 0.0587, "step": 4356 }, { "epoch": 4.189423076923077, "grad_norm": 2.7881388664245605, "learning_rate": 1.90262996923363e-05, "loss": 0.0561, "step": 4357 }, { "epoch": 4.190384615384615, "grad_norm": 3.5842909812927246, "learning_rate": 1.9025763462059687e-05, "loss": 0.0475, "step": 4358 }, { "epoch": 4.191346153846154, "grad_norm": 1.980154275894165, "learning_rate": 1.902522709172971e-05, "loss": 0.0304, "step": 4359 }, { "epoch": 4.1923076923076925, "grad_norm": 2.824033260345459, "learning_rate": 1.90246905813547e-05, "loss": 0.0519, "step": 4360 }, { "epoch": 4.193269230769231, "grad_norm": 4.7998762130737305, "learning_rate": 1.902415393094298e-05, "loss": 0.0625, "step": 4361 }, { "epoch": 4.194230769230769, "grad_norm": 1.410693645477295, "learning_rate": 1.9023617140502876e-05, "loss": 0.0158, "step": 4362 }, { "epoch": 4.195192307692308, "grad_norm": 5.051226615905762, "learning_rate": 1.9023080210042717e-05, "loss": 0.1088, "step": 4363 }, { "epoch": 4.196153846153846, "grad_norm": 3.5006465911865234, "learning_rate": 1.9022543139570837e-05, "loss": 0.086, "step": 4364 }, { "epoch": 4.197115384615385, "grad_norm": 1.4570698738098145, "learning_rate": 1.9022005929095568e-05, "loss": 0.0237, "step": 4365 }, { "epoch": 4.198076923076923, "grad_norm": 3.3142619132995605, "learning_rate": 1.9021468578625245e-05, "loss": 0.0626, "step": 4366 }, { "epoch": 4.1990384615384615, "grad_norm": 4.039090156555176, "learning_rate": 1.902093108816821e-05, "loss": 0.0526, "step": 4367 }, { "epoch": 4.2, "grad_norm": 3.343493700027466, "learning_rate": 1.90203934577328e-05, "loss": 0.0428, "step": 4368 }, { "epoch": 4.200961538461539, "grad_norm": 3.813060998916626, "learning_rate": 1.9019855687327352e-05, "loss": 0.0526, "step": 4369 }, { "epoch": 4.201923076923077, "grad_norm": 4.146040916442871, "learning_rate": 1.901931777696022e-05, "loss": 0.0698, "step": 4370 }, { "epoch": 4.202884615384615, "grad_norm": 1.3208656311035156, "learning_rate": 1.901877972663975e-05, "loss": 0.013, "step": 4371 }, { "epoch": 4.203846153846154, "grad_norm": 2.3693125247955322, "learning_rate": 1.901824153637429e-05, "loss": 0.0535, "step": 4372 }, { "epoch": 4.204807692307693, "grad_norm": 4.063128471374512, "learning_rate": 1.9017703206172187e-05, "loss": 0.0965, "step": 4373 }, { "epoch": 4.2057692307692305, "grad_norm": 2.751282215118408, "learning_rate": 1.9017164736041795e-05, "loss": 0.0232, "step": 4374 }, { "epoch": 4.206730769230769, "grad_norm": 4.724580764770508, "learning_rate": 1.9016626125991477e-05, "loss": 0.2427, "step": 4375 }, { "epoch": 4.207692307692308, "grad_norm": 3.896214485168457, "learning_rate": 1.901608737602958e-05, "loss": 0.0594, "step": 4376 }, { "epoch": 4.2086538461538465, "grad_norm": 7.059468746185303, "learning_rate": 1.901554848616447e-05, "loss": 0.1245, "step": 4377 }, { "epoch": 4.209615384615384, "grad_norm": 3.6255438327789307, "learning_rate": 1.9015009456404504e-05, "loss": 0.0382, "step": 4378 }, { "epoch": 4.210576923076923, "grad_norm": 6.628666877746582, "learning_rate": 1.9014470286758054e-05, "loss": 0.1092, "step": 4379 }, { "epoch": 4.211538461538462, "grad_norm": 5.215757846832275, "learning_rate": 1.9013930977233485e-05, "loss": 0.1438, "step": 4380 }, { "epoch": 4.2125, "grad_norm": 3.5740716457366943, "learning_rate": 1.901339152783916e-05, "loss": 0.0851, "step": 4381 }, { "epoch": 4.213461538461538, "grad_norm": 2.4401752948760986, "learning_rate": 1.901285193858345e-05, "loss": 0.0353, "step": 4382 }, { "epoch": 4.214423076923077, "grad_norm": 3.009415626525879, "learning_rate": 1.9012312209474735e-05, "loss": 0.021, "step": 4383 }, { "epoch": 4.2153846153846155, "grad_norm": 3.370878219604492, "learning_rate": 1.9011772340521383e-05, "loss": 0.0675, "step": 4384 }, { "epoch": 4.216346153846154, "grad_norm": 3.5584323406219482, "learning_rate": 1.9011232331731773e-05, "loss": 0.0426, "step": 4385 }, { "epoch": 4.217307692307692, "grad_norm": 3.278733015060425, "learning_rate": 1.9010692183114285e-05, "loss": 0.0553, "step": 4386 }, { "epoch": 4.218269230769231, "grad_norm": 4.168430328369141, "learning_rate": 1.9010151894677303e-05, "loss": 0.0633, "step": 4387 }, { "epoch": 4.219230769230769, "grad_norm": 3.566399335861206, "learning_rate": 1.9009611466429204e-05, "loss": 0.1102, "step": 4388 }, { "epoch": 4.220192307692308, "grad_norm": 1.7781338691711426, "learning_rate": 1.900907089837838e-05, "loss": 0.0234, "step": 4389 }, { "epoch": 4.221153846153846, "grad_norm": 4.623775005340576, "learning_rate": 1.900853019053322e-05, "loss": 0.0923, "step": 4390 }, { "epoch": 4.2221153846153845, "grad_norm": 2.819844961166382, "learning_rate": 1.9007989342902103e-05, "loss": 0.026, "step": 4391 }, { "epoch": 4.223076923076923, "grad_norm": 5.811221599578857, "learning_rate": 1.9007448355493438e-05, "loss": 0.1339, "step": 4392 }, { "epoch": 4.224038461538462, "grad_norm": 3.733670949935913, "learning_rate": 1.9006907228315605e-05, "loss": 0.0784, "step": 4393 }, { "epoch": 4.225, "grad_norm": 3.6262028217315674, "learning_rate": 1.900636596137701e-05, "loss": 0.1138, "step": 4394 }, { "epoch": 4.225961538461538, "grad_norm": 2.951230525970459, "learning_rate": 1.9005824554686047e-05, "loss": 0.0196, "step": 4395 }, { "epoch": 4.226923076923077, "grad_norm": 3.9108996391296387, "learning_rate": 1.900528300825112e-05, "loss": 0.0565, "step": 4396 }, { "epoch": 4.227884615384616, "grad_norm": 3.465954542160034, "learning_rate": 1.9004741322080633e-05, "loss": 0.0542, "step": 4397 }, { "epoch": 4.2288461538461535, "grad_norm": 1.679966926574707, "learning_rate": 1.9004199496182987e-05, "loss": 0.015, "step": 4398 }, { "epoch": 4.229807692307692, "grad_norm": 2.6952264308929443, "learning_rate": 1.900365753056659e-05, "loss": 0.0603, "step": 4399 }, { "epoch": 4.230769230769231, "grad_norm": 2.006560802459717, "learning_rate": 1.9003115425239856e-05, "loss": 0.022, "step": 4400 }, { "epoch": 4.2317307692307695, "grad_norm": 2.964083194732666, "learning_rate": 1.9002573180211192e-05, "loss": 0.0439, "step": 4401 }, { "epoch": 4.232692307692307, "grad_norm": 2.6468684673309326, "learning_rate": 1.9002030795489018e-05, "loss": 0.0523, "step": 4402 }, { "epoch": 4.233653846153846, "grad_norm": 1.8299405574798584, "learning_rate": 1.9001488271081743e-05, "loss": 0.0214, "step": 4403 }, { "epoch": 4.234615384615385, "grad_norm": 4.118523120880127, "learning_rate": 1.9000945606997792e-05, "loss": 0.0581, "step": 4404 }, { "epoch": 4.235576923076923, "grad_norm": 4.607207775115967, "learning_rate": 1.900040280324558e-05, "loss": 0.0596, "step": 4405 }, { "epoch": 4.236538461538461, "grad_norm": 3.1883909702301025, "learning_rate": 1.8999859859833535e-05, "loss": 0.0895, "step": 4406 }, { "epoch": 4.2375, "grad_norm": 3.1983866691589355, "learning_rate": 1.8999316776770077e-05, "loss": 0.0956, "step": 4407 }, { "epoch": 4.2384615384615385, "grad_norm": 4.979126453399658, "learning_rate": 1.8998773554063636e-05, "loss": 0.0735, "step": 4408 }, { "epoch": 4.239423076923077, "grad_norm": 3.4029734134674072, "learning_rate": 1.8998230191722644e-05, "loss": 0.0814, "step": 4409 }, { "epoch": 4.240384615384615, "grad_norm": 3.4928576946258545, "learning_rate": 1.8997686689755522e-05, "loss": 0.0491, "step": 4410 }, { "epoch": 4.241346153846154, "grad_norm": 0.8051495552062988, "learning_rate": 1.8997143048170715e-05, "loss": 0.0073, "step": 4411 }, { "epoch": 4.242307692307692, "grad_norm": 2.8623480796813965, "learning_rate": 1.8996599266976658e-05, "loss": 0.0278, "step": 4412 }, { "epoch": 4.243269230769231, "grad_norm": 4.924260139465332, "learning_rate": 1.8996055346181776e-05, "loss": 0.1532, "step": 4413 }, { "epoch": 4.244230769230769, "grad_norm": 3.4676854610443115, "learning_rate": 1.8995511285794522e-05, "loss": 0.0623, "step": 4414 }, { "epoch": 4.2451923076923075, "grad_norm": 3.7194976806640625, "learning_rate": 1.899496708582334e-05, "loss": 0.0797, "step": 4415 }, { "epoch": 4.246153846153846, "grad_norm": 2.754344940185547, "learning_rate": 1.8994422746276663e-05, "loss": 0.0315, "step": 4416 }, { "epoch": 4.247115384615385, "grad_norm": 4.035604953765869, "learning_rate": 1.899387826716294e-05, "loss": 0.0669, "step": 4417 }, { "epoch": 4.248076923076923, "grad_norm": 2.398153066635132, "learning_rate": 1.899333364849063e-05, "loss": 0.0526, "step": 4418 }, { "epoch": 4.249038461538461, "grad_norm": 5.725468635559082, "learning_rate": 1.8992788890268175e-05, "loss": 0.162, "step": 4419 }, { "epoch": 4.25, "grad_norm": 4.954188823699951, "learning_rate": 1.899224399250403e-05, "loss": 0.0609, "step": 4420 }, { "epoch": 4.250961538461539, "grad_norm": 3.1579434871673584, "learning_rate": 1.8991698955206645e-05, "loss": 0.0514, "step": 4421 }, { "epoch": 4.251923076923077, "grad_norm": 3.2579331398010254, "learning_rate": 1.8991153778384488e-05, "loss": 0.0524, "step": 4422 }, { "epoch": 4.252884615384615, "grad_norm": 3.8389506340026855, "learning_rate": 1.8990608462046013e-05, "loss": 0.0449, "step": 4423 }, { "epoch": 4.253846153846154, "grad_norm": 1.3877421617507935, "learning_rate": 1.899006300619968e-05, "loss": 0.0117, "step": 4424 }, { "epoch": 4.2548076923076925, "grad_norm": 5.474865913391113, "learning_rate": 1.8989517410853956e-05, "loss": 0.1225, "step": 4425 }, { "epoch": 4.25576923076923, "grad_norm": 3.6528754234313965, "learning_rate": 1.89889716760173e-05, "loss": 0.115, "step": 4426 }, { "epoch": 4.256730769230769, "grad_norm": 1.0332144498825073, "learning_rate": 1.8988425801698197e-05, "loss": 0.0092, "step": 4427 }, { "epoch": 4.257692307692308, "grad_norm": 5.857419967651367, "learning_rate": 1.8987879787905098e-05, "loss": 0.0731, "step": 4428 }, { "epoch": 4.258653846153846, "grad_norm": 2.246748685836792, "learning_rate": 1.8987333634646487e-05, "loss": 0.0288, "step": 4429 }, { "epoch": 4.259615384615385, "grad_norm": 2.3622615337371826, "learning_rate": 1.8986787341930833e-05, "loss": 0.0414, "step": 4430 }, { "epoch": 4.260576923076923, "grad_norm": 2.541978120803833, "learning_rate": 1.8986240909766617e-05, "loss": 0.0754, "step": 4431 }, { "epoch": 4.2615384615384615, "grad_norm": 3.816014289855957, "learning_rate": 1.8985694338162316e-05, "loss": 0.0511, "step": 4432 }, { "epoch": 4.2625, "grad_norm": 3.675713062286377, "learning_rate": 1.8985147627126414e-05, "loss": 0.0406, "step": 4433 }, { "epoch": 4.263461538461539, "grad_norm": 7.982803821563721, "learning_rate": 1.8984600776667387e-05, "loss": 0.1089, "step": 4434 }, { "epoch": 4.264423076923077, "grad_norm": 6.697623252868652, "learning_rate": 1.8984053786793727e-05, "loss": 0.1502, "step": 4435 }, { "epoch": 4.265384615384615, "grad_norm": 3.5968503952026367, "learning_rate": 1.898350665751392e-05, "loss": 0.1128, "step": 4436 }, { "epoch": 4.266346153846154, "grad_norm": 3.8995680809020996, "learning_rate": 1.898295938883646e-05, "loss": 0.0809, "step": 4437 }, { "epoch": 4.267307692307693, "grad_norm": 3.5032525062561035, "learning_rate": 1.898241198076983e-05, "loss": 0.0612, "step": 4438 }, { "epoch": 4.2682692307692305, "grad_norm": 5.504586219787598, "learning_rate": 1.8981864433322534e-05, "loss": 0.213, "step": 4439 }, { "epoch": 4.269230769230769, "grad_norm": 3.289231777191162, "learning_rate": 1.8981316746503057e-05, "loss": 0.0366, "step": 4440 }, { "epoch": 4.270192307692308, "grad_norm": 3.9631638526916504, "learning_rate": 1.898076892031991e-05, "loss": 0.0524, "step": 4441 }, { "epoch": 4.2711538461538465, "grad_norm": 2.1639530658721924, "learning_rate": 1.8980220954781586e-05, "loss": 0.0347, "step": 4442 }, { "epoch": 4.272115384615384, "grad_norm": 6.821115493774414, "learning_rate": 1.8979672849896587e-05, "loss": 0.1964, "step": 4443 }, { "epoch": 4.273076923076923, "grad_norm": 4.5858988761901855, "learning_rate": 1.897912460567342e-05, "loss": 0.1326, "step": 4444 }, { "epoch": 4.274038461538462, "grad_norm": 4.776206016540527, "learning_rate": 1.8978576222120597e-05, "loss": 0.0237, "step": 4445 }, { "epoch": 4.275, "grad_norm": 2.2489686012268066, "learning_rate": 1.897802769924662e-05, "loss": 0.043, "step": 4446 }, { "epoch": 4.275961538461538, "grad_norm": 3.647252082824707, "learning_rate": 1.8977479037060004e-05, "loss": 0.0561, "step": 4447 }, { "epoch": 4.276923076923077, "grad_norm": 2.3396453857421875, "learning_rate": 1.897693023556926e-05, "loss": 0.027, "step": 4448 }, { "epoch": 4.2778846153846155, "grad_norm": 3.8867597579956055, "learning_rate": 1.8976381294782907e-05, "loss": 0.1225, "step": 4449 }, { "epoch": 4.278846153846154, "grad_norm": 3.176083564758301, "learning_rate": 1.8975832214709462e-05, "loss": 0.077, "step": 4450 }, { "epoch": 4.279807692307692, "grad_norm": 1.351235270500183, "learning_rate": 1.8975282995357448e-05, "loss": 0.019, "step": 4451 }, { "epoch": 4.280769230769231, "grad_norm": 4.089657783508301, "learning_rate": 1.8974733636735378e-05, "loss": 0.1089, "step": 4452 }, { "epoch": 4.281730769230769, "grad_norm": 2.527036666870117, "learning_rate": 1.897418413885179e-05, "loss": 0.0228, "step": 4453 }, { "epoch": 4.282692307692308, "grad_norm": 4.964471817016602, "learning_rate": 1.8973634501715196e-05, "loss": 0.1272, "step": 4454 }, { "epoch": 4.283653846153846, "grad_norm": 4.0578227043151855, "learning_rate": 1.8973084725334133e-05, "loss": 0.0978, "step": 4455 }, { "epoch": 4.2846153846153845, "grad_norm": 3.3502562046051025, "learning_rate": 1.8972534809717132e-05, "loss": 0.1053, "step": 4456 }, { "epoch": 4.285576923076923, "grad_norm": 2.715024709701538, "learning_rate": 1.8971984754872724e-05, "loss": 0.0624, "step": 4457 }, { "epoch": 4.286538461538462, "grad_norm": 1.2363669872283936, "learning_rate": 1.8971434560809444e-05, "loss": 0.0191, "step": 4458 }, { "epoch": 4.2875, "grad_norm": 3.9404146671295166, "learning_rate": 1.8970884227535836e-05, "loss": 0.0758, "step": 4459 }, { "epoch": 4.288461538461538, "grad_norm": 1.814285159111023, "learning_rate": 1.897033375506043e-05, "loss": 0.0152, "step": 4460 }, { "epoch": 4.289423076923077, "grad_norm": 1.4436851739883423, "learning_rate": 1.896978314339177e-05, "loss": 0.0267, "step": 4461 }, { "epoch": 4.290384615384616, "grad_norm": 2.749413013458252, "learning_rate": 1.8969232392538407e-05, "loss": 0.0741, "step": 4462 }, { "epoch": 4.2913461538461535, "grad_norm": 2.3572957515716553, "learning_rate": 1.8968681502508877e-05, "loss": 0.0379, "step": 4463 }, { "epoch": 4.292307692307692, "grad_norm": 4.393311977386475, "learning_rate": 1.8968130473311732e-05, "loss": 0.1582, "step": 4464 }, { "epoch": 4.293269230769231, "grad_norm": 2.068350315093994, "learning_rate": 1.8967579304955527e-05, "loss": 0.0272, "step": 4465 }, { "epoch": 4.2942307692307695, "grad_norm": 2.1585962772369385, "learning_rate": 1.896702799744881e-05, "loss": 0.0397, "step": 4466 }, { "epoch": 4.295192307692307, "grad_norm": 2.483020067214966, "learning_rate": 1.8966476550800135e-05, "loss": 0.0389, "step": 4467 }, { "epoch": 4.296153846153846, "grad_norm": 4.209465026855469, "learning_rate": 1.896592496501806e-05, "loss": 0.1282, "step": 4468 }, { "epoch": 4.297115384615385, "grad_norm": 3.7069976329803467, "learning_rate": 1.8965373240111146e-05, "loss": 0.0795, "step": 4469 }, { "epoch": 4.298076923076923, "grad_norm": 2.2464213371276855, "learning_rate": 1.896482137608795e-05, "loss": 0.0398, "step": 4470 }, { "epoch": 4.299038461538461, "grad_norm": 3.2857820987701416, "learning_rate": 1.896426937295704e-05, "loss": 0.0438, "step": 4471 }, { "epoch": 4.3, "grad_norm": 1.7379379272460938, "learning_rate": 1.8963717230726976e-05, "loss": 0.0208, "step": 4472 }, { "epoch": 4.3009615384615385, "grad_norm": 5.859293460845947, "learning_rate": 1.896316494940633e-05, "loss": 0.1239, "step": 4473 }, { "epoch": 4.301923076923077, "grad_norm": 2.4074158668518066, "learning_rate": 1.8962612529003673e-05, "loss": 0.0644, "step": 4474 }, { "epoch": 4.302884615384615, "grad_norm": 4.610363483428955, "learning_rate": 1.8962059969527573e-05, "loss": 0.0825, "step": 4475 }, { "epoch": 4.303846153846154, "grad_norm": 5.763433456420898, "learning_rate": 1.8961507270986606e-05, "loss": 0.0956, "step": 4476 }, { "epoch": 4.304807692307692, "grad_norm": 3.485363721847534, "learning_rate": 1.896095443338935e-05, "loss": 0.0766, "step": 4477 }, { "epoch": 4.305769230769231, "grad_norm": 2.017859697341919, "learning_rate": 1.896040145674438e-05, "loss": 0.0579, "step": 4478 }, { "epoch": 4.30673076923077, "grad_norm": 3.371511697769165, "learning_rate": 1.8959848341060275e-05, "loss": 0.1028, "step": 4479 }, { "epoch": 4.3076923076923075, "grad_norm": 3.7259809970855713, "learning_rate": 1.895929508634562e-05, "loss": 0.0553, "step": 4480 }, { "epoch": 4.308653846153846, "grad_norm": 2.667144536972046, "learning_rate": 1.8958741692609003e-05, "loss": 0.0406, "step": 4481 }, { "epoch": 4.309615384615385, "grad_norm": 5.619309902191162, "learning_rate": 1.8958188159859008e-05, "loss": 0.0546, "step": 4482 }, { "epoch": 4.310576923076923, "grad_norm": 3.191246747970581, "learning_rate": 1.8957634488104222e-05, "loss": 0.0542, "step": 4483 }, { "epoch": 4.311538461538461, "grad_norm": 2.0644161701202393, "learning_rate": 1.8957080677353244e-05, "loss": 0.0293, "step": 4484 }, { "epoch": 4.3125, "grad_norm": 3.3294503688812256, "learning_rate": 1.8956526727614663e-05, "loss": 0.0501, "step": 4485 }, { "epoch": 4.313461538461539, "grad_norm": 2.2718307971954346, "learning_rate": 1.895597263889707e-05, "loss": 0.0359, "step": 4486 }, { "epoch": 4.314423076923077, "grad_norm": 3.939702033996582, "learning_rate": 1.895541841120907e-05, "loss": 0.0938, "step": 4487 }, { "epoch": 4.315384615384615, "grad_norm": 3.172686815261841, "learning_rate": 1.8954864044559258e-05, "loss": 0.0331, "step": 4488 }, { "epoch": 4.316346153846154, "grad_norm": 4.463747024536133, "learning_rate": 1.895430953895624e-05, "loss": 0.1566, "step": 4489 }, { "epoch": 4.3173076923076925, "grad_norm": 2.8746790885925293, "learning_rate": 1.8953754894408617e-05, "loss": 0.0441, "step": 4490 }, { "epoch": 4.31826923076923, "grad_norm": 3.4582667350769043, "learning_rate": 1.8953200110925e-05, "loss": 0.0439, "step": 4491 }, { "epoch": 4.319230769230769, "grad_norm": 4.365861415863037, "learning_rate": 1.895264518851399e-05, "loss": 0.0681, "step": 4492 }, { "epoch": 4.320192307692308, "grad_norm": 2.572927713394165, "learning_rate": 1.8952090127184206e-05, "loss": 0.0439, "step": 4493 }, { "epoch": 4.321153846153846, "grad_norm": 2.834040403366089, "learning_rate": 1.8951534926944254e-05, "loss": 0.0335, "step": 4494 }, { "epoch": 4.322115384615385, "grad_norm": 4.244302272796631, "learning_rate": 1.8950979587802757e-05, "loss": 0.1481, "step": 4495 }, { "epoch": 4.323076923076923, "grad_norm": 1.7524067163467407, "learning_rate": 1.8950424109768324e-05, "loss": 0.0307, "step": 4496 }, { "epoch": 4.3240384615384615, "grad_norm": 0.8257541060447693, "learning_rate": 1.894986849284958e-05, "loss": 0.008, "step": 4497 }, { "epoch": 4.325, "grad_norm": 3.8785791397094727, "learning_rate": 1.8949312737055143e-05, "loss": 0.0518, "step": 4498 }, { "epoch": 4.325961538461539, "grad_norm": 4.565455913543701, "learning_rate": 1.894875684239364e-05, "loss": 0.0425, "step": 4499 }, { "epoch": 4.326923076923077, "grad_norm": 4.570747375488281, "learning_rate": 1.8948200808873692e-05, "loss": 0.0928, "step": 4500 }, { "epoch": 4.327884615384615, "grad_norm": 4.39660120010376, "learning_rate": 1.8947644636503933e-05, "loss": 0.1211, "step": 4501 }, { "epoch": 4.328846153846154, "grad_norm": 2.0258827209472656, "learning_rate": 1.894708832529299e-05, "loss": 0.0255, "step": 4502 }, { "epoch": 4.329807692307693, "grad_norm": 4.584596157073975, "learning_rate": 1.8946531875249496e-05, "loss": 0.0655, "step": 4503 }, { "epoch": 4.3307692307692305, "grad_norm": 3.2300665378570557, "learning_rate": 1.894597528638208e-05, "loss": 0.0418, "step": 4504 }, { "epoch": 4.331730769230769, "grad_norm": 2.8888652324676514, "learning_rate": 1.8945418558699387e-05, "loss": 0.0478, "step": 4505 }, { "epoch": 4.332692307692308, "grad_norm": 5.277583599090576, "learning_rate": 1.8944861692210055e-05, "loss": 0.0737, "step": 4506 }, { "epoch": 4.3336538461538465, "grad_norm": 3.7968549728393555, "learning_rate": 1.8944304686922722e-05, "loss": 0.0506, "step": 4507 }, { "epoch": 4.334615384615384, "grad_norm": 1.815014362335205, "learning_rate": 1.8943747542846026e-05, "loss": 0.0225, "step": 4508 }, { "epoch": 4.335576923076923, "grad_norm": 3.2465150356292725, "learning_rate": 1.8943190259988623e-05, "loss": 0.0339, "step": 4509 }, { "epoch": 4.336538461538462, "grad_norm": 4.5491743087768555, "learning_rate": 1.8942632838359155e-05, "loss": 0.1035, "step": 4510 }, { "epoch": 4.3375, "grad_norm": 4.125491619110107, "learning_rate": 1.894207527796627e-05, "loss": 0.133, "step": 4511 }, { "epoch": 4.338461538461538, "grad_norm": 3.850799083709717, "learning_rate": 1.894151757881862e-05, "loss": 0.0919, "step": 4512 }, { "epoch": 4.339423076923077, "grad_norm": 3.1579368114471436, "learning_rate": 1.8940959740924864e-05, "loss": 0.0393, "step": 4513 }, { "epoch": 4.3403846153846155, "grad_norm": 3.643484354019165, "learning_rate": 1.8940401764293653e-05, "loss": 0.0803, "step": 4514 }, { "epoch": 4.341346153846154, "grad_norm": 2.351945161819458, "learning_rate": 1.8939843648933644e-05, "loss": 0.0208, "step": 4515 }, { "epoch": 4.342307692307692, "grad_norm": 5.539327621459961, "learning_rate": 1.89392853948535e-05, "loss": 0.2498, "step": 4516 }, { "epoch": 4.343269230769231, "grad_norm": 1.6494313478469849, "learning_rate": 1.8938727002061888e-05, "loss": 0.0203, "step": 4517 }, { "epoch": 4.344230769230769, "grad_norm": 3.424334764480591, "learning_rate": 1.8938168470567463e-05, "loss": 0.0333, "step": 4518 }, { "epoch": 4.345192307692308, "grad_norm": 4.929286479949951, "learning_rate": 1.8937609800378895e-05, "loss": 0.059, "step": 4519 }, { "epoch": 4.346153846153846, "grad_norm": 3.293811559677124, "learning_rate": 1.893705099150486e-05, "loss": 0.1071, "step": 4520 }, { "epoch": 4.3471153846153845, "grad_norm": 3.925962448120117, "learning_rate": 1.893649204395402e-05, "loss": 0.0786, "step": 4521 }, { "epoch": 4.348076923076923, "grad_norm": 2.5420854091644287, "learning_rate": 1.893593295773505e-05, "loss": 0.0835, "step": 4522 }, { "epoch": 4.349038461538462, "grad_norm": 5.355295658111572, "learning_rate": 1.893537373285663e-05, "loss": 0.0598, "step": 4523 }, { "epoch": 4.35, "grad_norm": 2.9736502170562744, "learning_rate": 1.8934814369327438e-05, "loss": 0.0511, "step": 4524 }, { "epoch": 4.350961538461538, "grad_norm": 3.411997079849243, "learning_rate": 1.8934254867156146e-05, "loss": 0.0949, "step": 4525 }, { "epoch": 4.351923076923077, "grad_norm": 2.7394583225250244, "learning_rate": 1.893369522635144e-05, "loss": 0.0387, "step": 4526 }, { "epoch": 4.352884615384616, "grad_norm": 3.0308260917663574, "learning_rate": 1.8933135446922006e-05, "loss": 0.0635, "step": 4527 }, { "epoch": 4.3538461538461535, "grad_norm": 2.9453232288360596, "learning_rate": 1.8932575528876527e-05, "loss": 0.0662, "step": 4528 }, { "epoch": 4.354807692307692, "grad_norm": 4.431835651397705, "learning_rate": 1.8932015472223692e-05, "loss": 0.0476, "step": 4529 }, { "epoch": 4.355769230769231, "grad_norm": 2.969420909881592, "learning_rate": 1.8931455276972195e-05, "loss": 0.0645, "step": 4530 }, { "epoch": 4.3567307692307695, "grad_norm": 0.989112138748169, "learning_rate": 1.893089494313072e-05, "loss": 0.0086, "step": 4531 }, { "epoch": 4.357692307692307, "grad_norm": 4.819379806518555, "learning_rate": 1.8930334470707973e-05, "loss": 0.0984, "step": 4532 }, { "epoch": 4.358653846153846, "grad_norm": 4.4299492835998535, "learning_rate": 1.8929773859712643e-05, "loss": 0.0792, "step": 4533 }, { "epoch": 4.359615384615385, "grad_norm": 3.047163963317871, "learning_rate": 1.8929213110153434e-05, "loss": 0.0954, "step": 4534 }, { "epoch": 4.360576923076923, "grad_norm": 3.5170366764068604, "learning_rate": 1.8928652222039037e-05, "loss": 0.1011, "step": 4535 }, { "epoch": 4.361538461538461, "grad_norm": 3.773634433746338, "learning_rate": 1.892809119537817e-05, "loss": 0.066, "step": 4536 }, { "epoch": 4.3625, "grad_norm": 2.8532958030700684, "learning_rate": 1.892753003017953e-05, "loss": 0.0443, "step": 4537 }, { "epoch": 4.3634615384615385, "grad_norm": 3.8993418216705322, "learning_rate": 1.8926968726451823e-05, "loss": 0.1252, "step": 4538 }, { "epoch": 4.364423076923077, "grad_norm": 3.0236544609069824, "learning_rate": 1.8926407284203763e-05, "loss": 0.033, "step": 4539 }, { "epoch": 4.365384615384615, "grad_norm": 3.7091476917266846, "learning_rate": 1.8925845703444065e-05, "loss": 0.0478, "step": 4540 }, { "epoch": 4.366346153846154, "grad_norm": 3.8753323554992676, "learning_rate": 1.8925283984181432e-05, "loss": 0.0985, "step": 4541 }, { "epoch": 4.367307692307692, "grad_norm": 5.119933128356934, "learning_rate": 1.892472212642459e-05, "loss": 0.0757, "step": 4542 }, { "epoch": 4.368269230769231, "grad_norm": 2.704716205596924, "learning_rate": 1.8924160130182257e-05, "loss": 0.0472, "step": 4543 }, { "epoch": 4.36923076923077, "grad_norm": 3.4140219688415527, "learning_rate": 1.8923597995463147e-05, "loss": 0.0336, "step": 4544 }, { "epoch": 4.3701923076923075, "grad_norm": 2.582109212875366, "learning_rate": 1.892303572227599e-05, "loss": 0.0366, "step": 4545 }, { "epoch": 4.371153846153846, "grad_norm": 4.008936882019043, "learning_rate": 1.8922473310629503e-05, "loss": 0.0293, "step": 4546 }, { "epoch": 4.372115384615385, "grad_norm": 4.254825592041016, "learning_rate": 1.892191076053242e-05, "loss": 0.1216, "step": 4547 }, { "epoch": 4.373076923076923, "grad_norm": 3.003325939178467, "learning_rate": 1.8921348071993466e-05, "loss": 0.0487, "step": 4548 }, { "epoch": 4.374038461538461, "grad_norm": 3.8801732063293457, "learning_rate": 1.8920785245021376e-05, "loss": 0.0417, "step": 4549 }, { "epoch": 4.375, "grad_norm": 5.0744194984436035, "learning_rate": 1.8920222279624877e-05, "loss": 0.123, "step": 4550 }, { "epoch": 4.375961538461539, "grad_norm": 3.665045738220215, "learning_rate": 1.8919659175812716e-05, "loss": 0.0277, "step": 4551 }, { "epoch": 4.376923076923077, "grad_norm": 2.37943696975708, "learning_rate": 1.8919095933593615e-05, "loss": 0.0258, "step": 4552 }, { "epoch": 4.377884615384615, "grad_norm": 4.541469573974609, "learning_rate": 1.8918532552976328e-05, "loss": 0.0647, "step": 4553 }, { "epoch": 4.378846153846154, "grad_norm": 2.3360726833343506, "learning_rate": 1.8917969033969588e-05, "loss": 0.0304, "step": 4554 }, { "epoch": 4.3798076923076925, "grad_norm": 2.8511550426483154, "learning_rate": 1.8917405376582144e-05, "loss": 0.1487, "step": 4555 }, { "epoch": 4.38076923076923, "grad_norm": 3.5366311073303223, "learning_rate": 1.8916841580822744e-05, "loss": 0.0809, "step": 4556 }, { "epoch": 4.381730769230769, "grad_norm": 2.663055181503296, "learning_rate": 1.891627764670013e-05, "loss": 0.0345, "step": 4557 }, { "epoch": 4.382692307692308, "grad_norm": 5.255126476287842, "learning_rate": 1.8915713574223057e-05, "loss": 0.1006, "step": 4558 }, { "epoch": 4.383653846153846, "grad_norm": 3.1897106170654297, "learning_rate": 1.8915149363400274e-05, "loss": 0.1304, "step": 4559 }, { "epoch": 4.384615384615385, "grad_norm": 5.1243133544921875, "learning_rate": 1.891458501424054e-05, "loss": 0.0275, "step": 4560 }, { "epoch": 4.385576923076923, "grad_norm": 1.8555001020431519, "learning_rate": 1.8914020526752612e-05, "loss": 0.0303, "step": 4561 }, { "epoch": 4.3865384615384615, "grad_norm": 5.031430244445801, "learning_rate": 1.8913455900945247e-05, "loss": 0.1187, "step": 4562 }, { "epoch": 4.3875, "grad_norm": 4.940237998962402, "learning_rate": 1.8912891136827207e-05, "loss": 0.1304, "step": 4563 }, { "epoch": 4.388461538461539, "grad_norm": 3.5871975421905518, "learning_rate": 1.8912326234407254e-05, "loss": 0.046, "step": 4564 }, { "epoch": 4.389423076923077, "grad_norm": 4.077315330505371, "learning_rate": 1.891176119369416e-05, "loss": 0.0574, "step": 4565 }, { "epoch": 4.390384615384615, "grad_norm": 5.633830547332764, "learning_rate": 1.891119601469668e-05, "loss": 0.1287, "step": 4566 }, { "epoch": 4.391346153846154, "grad_norm": 4.467406749725342, "learning_rate": 1.89106306974236e-05, "loss": 0.1054, "step": 4567 }, { "epoch": 4.392307692307693, "grad_norm": 3.8226566314697266, "learning_rate": 1.891006524188368e-05, "loss": 0.0607, "step": 4568 }, { "epoch": 4.3932692307692305, "grad_norm": 2.7367753982543945, "learning_rate": 1.8909499648085697e-05, "loss": 0.0342, "step": 4569 }, { "epoch": 4.394230769230769, "grad_norm": 3.7613685131073, "learning_rate": 1.8908933916038433e-05, "loss": 0.0714, "step": 4570 }, { "epoch": 4.395192307692308, "grad_norm": 3.198545455932617, "learning_rate": 1.890836804575066e-05, "loss": 0.0316, "step": 4571 }, { "epoch": 4.3961538461538465, "grad_norm": 3.8764312267303467, "learning_rate": 1.8907802037231158e-05, "loss": 0.1306, "step": 4572 }, { "epoch": 4.397115384615384, "grad_norm": 6.313948154449463, "learning_rate": 1.8907235890488716e-05, "loss": 0.1263, "step": 4573 }, { "epoch": 4.398076923076923, "grad_norm": 3.855311155319214, "learning_rate": 1.8906669605532112e-05, "loss": 0.0381, "step": 4574 }, { "epoch": 4.399038461538462, "grad_norm": 5.467794895172119, "learning_rate": 1.890610318237014e-05, "loss": 0.081, "step": 4575 }, { "epoch": 4.4, "grad_norm": 2.1563186645507812, "learning_rate": 1.8905536621011583e-05, "loss": 0.0378, "step": 4576 }, { "epoch": 4.400961538461538, "grad_norm": 1.5417033433914185, "learning_rate": 1.8904969921465234e-05, "loss": 0.0224, "step": 4577 }, { "epoch": 4.401923076923077, "grad_norm": 5.03914213180542, "learning_rate": 1.890440308373989e-05, "loss": 0.1708, "step": 4578 }, { "epoch": 4.4028846153846155, "grad_norm": 4.725558757781982, "learning_rate": 1.8903836107844345e-05, "loss": 0.0826, "step": 4579 }, { "epoch": 4.403846153846154, "grad_norm": 5.90656042098999, "learning_rate": 1.8903268993787397e-05, "loss": 0.1184, "step": 4580 }, { "epoch": 4.404807692307692, "grad_norm": 3.784036636352539, "learning_rate": 1.8902701741577844e-05, "loss": 0.0744, "step": 4581 }, { "epoch": 4.405769230769231, "grad_norm": 2.468109369277954, "learning_rate": 1.8902134351224485e-05, "loss": 0.035, "step": 4582 }, { "epoch": 4.406730769230769, "grad_norm": 6.514894485473633, "learning_rate": 1.890156682273613e-05, "loss": 0.1214, "step": 4583 }, { "epoch": 4.407692307692308, "grad_norm": 4.951172828674316, "learning_rate": 1.8900999156121587e-05, "loss": 0.1288, "step": 4584 }, { "epoch": 4.408653846153846, "grad_norm": 4.978479385375977, "learning_rate": 1.8900431351389657e-05, "loss": 0.0937, "step": 4585 }, { "epoch": 4.4096153846153845, "grad_norm": 5.689270973205566, "learning_rate": 1.8899863408549158e-05, "loss": 0.1737, "step": 4586 }, { "epoch": 4.410576923076923, "grad_norm": 1.8636670112609863, "learning_rate": 1.88992953276089e-05, "loss": 0.0299, "step": 4587 }, { "epoch": 4.411538461538462, "grad_norm": 2.345287322998047, "learning_rate": 1.8898727108577697e-05, "loss": 0.0262, "step": 4588 }, { "epoch": 4.4125, "grad_norm": 3.5226950645446777, "learning_rate": 1.8898158751464368e-05, "loss": 0.1119, "step": 4589 }, { "epoch": 4.413461538461538, "grad_norm": 5.1113057136535645, "learning_rate": 1.8897590256277726e-05, "loss": 0.152, "step": 4590 }, { "epoch": 4.414423076923077, "grad_norm": 4.998522758483887, "learning_rate": 1.88970216230266e-05, "loss": 0.1025, "step": 4591 }, { "epoch": 4.415384615384616, "grad_norm": 3.623325824737549, "learning_rate": 1.8896452851719814e-05, "loss": 0.0584, "step": 4592 }, { "epoch": 4.4163461538461535, "grad_norm": 3.053974151611328, "learning_rate": 1.889588394236619e-05, "loss": 0.0701, "step": 4593 }, { "epoch": 4.417307692307692, "grad_norm": 2.490384578704834, "learning_rate": 1.889531489497455e-05, "loss": 0.0562, "step": 4594 }, { "epoch": 4.418269230769231, "grad_norm": 4.290225505828857, "learning_rate": 1.8894745709553737e-05, "loss": 0.0327, "step": 4595 }, { "epoch": 4.4192307692307695, "grad_norm": 4.169550895690918, "learning_rate": 1.8894176386112573e-05, "loss": 0.1381, "step": 4596 }, { "epoch": 4.420192307692307, "grad_norm": 3.013545036315918, "learning_rate": 1.8893606924659896e-05, "loss": 0.0869, "step": 4597 }, { "epoch": 4.421153846153846, "grad_norm": 2.6402428150177, "learning_rate": 1.8893037325204546e-05, "loss": 0.0469, "step": 4598 }, { "epoch": 4.422115384615385, "grad_norm": 3.235414981842041, "learning_rate": 1.889246758775535e-05, "loss": 0.1053, "step": 4599 }, { "epoch": 4.423076923076923, "grad_norm": 3.9027209281921387, "learning_rate": 1.8891897712321162e-05, "loss": 0.0858, "step": 4600 }, { "epoch": 4.424038461538461, "grad_norm": 3.1706125736236572, "learning_rate": 1.889132769891082e-05, "loss": 0.0475, "step": 4601 }, { "epoch": 4.425, "grad_norm": 4.681237697601318, "learning_rate": 1.889075754753316e-05, "loss": 0.132, "step": 4602 }, { "epoch": 4.4259615384615385, "grad_norm": 4.480448246002197, "learning_rate": 1.889018725819704e-05, "loss": 0.0697, "step": 4603 }, { "epoch": 4.426923076923077, "grad_norm": 3.582240104675293, "learning_rate": 1.888961683091131e-05, "loss": 0.1081, "step": 4604 }, { "epoch": 4.427884615384615, "grad_norm": 3.6383216381073, "learning_rate": 1.8889046265684817e-05, "loss": 0.0572, "step": 4605 }, { "epoch": 4.428846153846154, "grad_norm": 2.254560708999634, "learning_rate": 1.888847556252641e-05, "loss": 0.0418, "step": 4606 }, { "epoch": 4.429807692307692, "grad_norm": 3.7486467361450195, "learning_rate": 1.8887904721444955e-05, "loss": 0.0894, "step": 4607 }, { "epoch": 4.430769230769231, "grad_norm": 1.3280671834945679, "learning_rate": 1.88873337424493e-05, "loss": 0.0237, "step": 4608 }, { "epoch": 4.43173076923077, "grad_norm": 3.724924087524414, "learning_rate": 1.8886762625548315e-05, "loss": 0.086, "step": 4609 }, { "epoch": 4.4326923076923075, "grad_norm": 3.9198272228240967, "learning_rate": 1.8886191370750852e-05, "loss": 0.0532, "step": 4610 }, { "epoch": 4.433653846153846, "grad_norm": 1.744076132774353, "learning_rate": 1.8885619978065783e-05, "loss": 0.0236, "step": 4611 }, { "epoch": 4.434615384615385, "grad_norm": 4.567208766937256, "learning_rate": 1.8885048447501968e-05, "loss": 0.1132, "step": 4612 }, { "epoch": 4.435576923076923, "grad_norm": 2.005631446838379, "learning_rate": 1.8884476779068277e-05, "loss": 0.035, "step": 4613 }, { "epoch": 4.436538461538461, "grad_norm": 1.3187295198440552, "learning_rate": 1.8883904972773584e-05, "loss": 0.0174, "step": 4614 }, { "epoch": 4.4375, "grad_norm": 2.150477170944214, "learning_rate": 1.888333302862676e-05, "loss": 0.0473, "step": 4615 }, { "epoch": 4.438461538461539, "grad_norm": 3.6244912147521973, "learning_rate": 1.888276094663668e-05, "loss": 0.0551, "step": 4616 }, { "epoch": 4.439423076923077, "grad_norm": 5.007544040679932, "learning_rate": 1.8882188726812217e-05, "loss": 0.0577, "step": 4617 }, { "epoch": 4.440384615384615, "grad_norm": 3.3871538639068604, "learning_rate": 1.888161636916226e-05, "loss": 0.0435, "step": 4618 }, { "epoch": 4.441346153846154, "grad_norm": 3.375621795654297, "learning_rate": 1.888104387369568e-05, "loss": 0.0598, "step": 4619 }, { "epoch": 4.4423076923076925, "grad_norm": 3.3702502250671387, "learning_rate": 1.8880471240421365e-05, "loss": 0.0616, "step": 4620 }, { "epoch": 4.44326923076923, "grad_norm": 2.2440364360809326, "learning_rate": 1.8879898469348203e-05, "loss": 0.0289, "step": 4621 }, { "epoch": 4.444230769230769, "grad_norm": 2.6664915084838867, "learning_rate": 1.8879325560485075e-05, "loss": 0.0401, "step": 4622 }, { "epoch": 4.445192307692308, "grad_norm": 1.6142034530639648, "learning_rate": 1.8878752513840877e-05, "loss": 0.0201, "step": 4623 }, { "epoch": 4.446153846153846, "grad_norm": 1.852532982826233, "learning_rate": 1.88781793294245e-05, "loss": 0.0206, "step": 4624 }, { "epoch": 4.447115384615385, "grad_norm": 4.285954475402832, "learning_rate": 1.8877606007244835e-05, "loss": 0.0475, "step": 4625 }, { "epoch": 4.448076923076923, "grad_norm": 4.71354866027832, "learning_rate": 1.8877032547310783e-05, "loss": 0.1114, "step": 4626 }, { "epoch": 4.4490384615384615, "grad_norm": 3.6679272651672363, "learning_rate": 1.8876458949631236e-05, "loss": 0.0607, "step": 4627 }, { "epoch": 4.45, "grad_norm": 5.003118515014648, "learning_rate": 1.88758852142151e-05, "loss": 0.1975, "step": 4628 }, { "epoch": 4.450961538461539, "grad_norm": 4.297549247741699, "learning_rate": 1.8875311341071273e-05, "loss": 0.1164, "step": 4629 }, { "epoch": 4.451923076923077, "grad_norm": 5.269557476043701, "learning_rate": 1.8874737330208668e-05, "loss": 0.2014, "step": 4630 }, { "epoch": 4.452884615384615, "grad_norm": 3.905547618865967, "learning_rate": 1.887416318163618e-05, "loss": 0.0582, "step": 4631 }, { "epoch": 4.453846153846154, "grad_norm": 3.983501672744751, "learning_rate": 1.8873588895362732e-05, "loss": 0.0838, "step": 4632 }, { "epoch": 4.454807692307693, "grad_norm": 4.199418544769287, "learning_rate": 1.8873014471397225e-05, "loss": 0.0439, "step": 4633 }, { "epoch": 4.4557692307692305, "grad_norm": 2.660740852355957, "learning_rate": 1.8872439909748574e-05, "loss": 0.0414, "step": 4634 }, { "epoch": 4.456730769230769, "grad_norm": 0.5973412394523621, "learning_rate": 1.8871865210425697e-05, "loss": 0.0054, "step": 4635 }, { "epoch": 4.457692307692308, "grad_norm": 1.8397271633148193, "learning_rate": 1.8871290373437513e-05, "loss": 0.0269, "step": 4636 }, { "epoch": 4.4586538461538465, "grad_norm": 3.78164005279541, "learning_rate": 1.8870715398792934e-05, "loss": 0.071, "step": 4637 }, { "epoch": 4.459615384615384, "grad_norm": 2.250032424926758, "learning_rate": 1.887014028650089e-05, "loss": 0.0761, "step": 4638 }, { "epoch": 4.460576923076923, "grad_norm": 3.990870475769043, "learning_rate": 1.8869565036570303e-05, "loss": 0.097, "step": 4639 }, { "epoch": 4.461538461538462, "grad_norm": 2.213804006576538, "learning_rate": 1.88689896490101e-05, "loss": 0.053, "step": 4640 }, { "epoch": 4.4625, "grad_norm": 2.405945301055908, "learning_rate": 1.8868414123829206e-05, "loss": 0.028, "step": 4641 }, { "epoch": 4.463461538461538, "grad_norm": 4.536802291870117, "learning_rate": 1.8867838461036553e-05, "loss": 0.1303, "step": 4642 }, { "epoch": 4.464423076923077, "grad_norm": 3.459235191345215, "learning_rate": 1.886726266064107e-05, "loss": 0.0881, "step": 4643 }, { "epoch": 4.4653846153846155, "grad_norm": 3.8674538135528564, "learning_rate": 1.88666867226517e-05, "loss": 0.1032, "step": 4644 }, { "epoch": 4.466346153846154, "grad_norm": 0.39924073219299316, "learning_rate": 1.8866110647077375e-05, "loss": 0.0035, "step": 4645 }, { "epoch": 4.467307692307692, "grad_norm": 3.334717035293579, "learning_rate": 1.8865534433927034e-05, "loss": 0.0448, "step": 4646 }, { "epoch": 4.468269230769231, "grad_norm": 4.261463642120361, "learning_rate": 1.8864958083209618e-05, "loss": 0.0647, "step": 4647 }, { "epoch": 4.469230769230769, "grad_norm": 2.7286815643310547, "learning_rate": 1.8864381594934075e-05, "loss": 0.0668, "step": 4648 }, { "epoch": 4.470192307692308, "grad_norm": 4.380082607269287, "learning_rate": 1.8863804969109343e-05, "loss": 0.0528, "step": 4649 }, { "epoch": 4.471153846153846, "grad_norm": 1.4531282186508179, "learning_rate": 1.8863228205744373e-05, "loss": 0.0216, "step": 4650 }, { "epoch": 4.4721153846153845, "grad_norm": 3.231480360031128, "learning_rate": 1.8862651304848114e-05, "loss": 0.0588, "step": 4651 }, { "epoch": 4.473076923076923, "grad_norm": 1.1963040828704834, "learning_rate": 1.886207426642952e-05, "loss": 0.0075, "step": 4652 }, { "epoch": 4.474038461538462, "grad_norm": 2.365717649459839, "learning_rate": 1.886149709049754e-05, "loss": 0.0418, "step": 4653 }, { "epoch": 4.475, "grad_norm": 2.4634532928466797, "learning_rate": 1.8860919777061134e-05, "loss": 0.0782, "step": 4654 }, { "epoch": 4.475961538461538, "grad_norm": 2.7510924339294434, "learning_rate": 1.8860342326129265e-05, "loss": 0.0422, "step": 4655 }, { "epoch": 4.476923076923077, "grad_norm": 5.5787353515625, "learning_rate": 1.885976473771088e-05, "loss": 0.2979, "step": 4656 }, { "epoch": 4.477884615384616, "grad_norm": 4.23190450668335, "learning_rate": 1.8859187011814956e-05, "loss": 0.0394, "step": 4657 }, { "epoch": 4.4788461538461535, "grad_norm": 5.588977813720703, "learning_rate": 1.885860914845045e-05, "loss": 0.1278, "step": 4658 }, { "epoch": 4.479807692307692, "grad_norm": 3.2456021308898926, "learning_rate": 1.8858031147626326e-05, "loss": 0.0689, "step": 4659 }, { "epoch": 4.480769230769231, "grad_norm": 6.7825469970703125, "learning_rate": 1.8857453009351557e-05, "loss": 0.1978, "step": 4660 }, { "epoch": 4.4817307692307695, "grad_norm": 3.751573085784912, "learning_rate": 1.8856874733635115e-05, "loss": 0.0816, "step": 4661 }, { "epoch": 4.482692307692307, "grad_norm": 2.8887126445770264, "learning_rate": 1.8856296320485975e-05, "loss": 0.0643, "step": 4662 }, { "epoch": 4.483653846153846, "grad_norm": 4.329010963439941, "learning_rate": 1.8855717769913104e-05, "loss": 0.0849, "step": 4663 }, { "epoch": 4.484615384615385, "grad_norm": 4.444439888000488, "learning_rate": 1.8855139081925486e-05, "loss": 0.1504, "step": 4664 }, { "epoch": 4.485576923076923, "grad_norm": 3.544752836227417, "learning_rate": 1.8854560256532098e-05, "loss": 0.0897, "step": 4665 }, { "epoch": 4.486538461538461, "grad_norm": 3.4045298099517822, "learning_rate": 1.8853981293741926e-05, "loss": 0.0528, "step": 4666 }, { "epoch": 4.4875, "grad_norm": 4.848966121673584, "learning_rate": 1.885340219356395e-05, "loss": 0.0766, "step": 4667 }, { "epoch": 4.4884615384615385, "grad_norm": 4.699055194854736, "learning_rate": 1.885282295600715e-05, "loss": 0.1181, "step": 4668 }, { "epoch": 4.489423076923077, "grad_norm": 2.1051695346832275, "learning_rate": 1.885224358108053e-05, "loss": 0.0365, "step": 4669 }, { "epoch": 4.490384615384615, "grad_norm": 1.2634649276733398, "learning_rate": 1.8851664068793064e-05, "loss": 0.0133, "step": 4670 }, { "epoch": 4.491346153846154, "grad_norm": 4.26308012008667, "learning_rate": 1.8851084419153752e-05, "loss": 0.1247, "step": 4671 }, { "epoch": 4.492307692307692, "grad_norm": 1.0991181135177612, "learning_rate": 1.885050463217159e-05, "loss": 0.0115, "step": 4672 }, { "epoch": 4.493269230769231, "grad_norm": 3.566012144088745, "learning_rate": 1.884992470785557e-05, "loss": 0.073, "step": 4673 }, { "epoch": 4.49423076923077, "grad_norm": 2.8481485843658447, "learning_rate": 1.8849344646214693e-05, "loss": 0.023, "step": 4674 }, { "epoch": 4.4951923076923075, "grad_norm": 2.5741705894470215, "learning_rate": 1.884876444725796e-05, "loss": 0.0334, "step": 4675 }, { "epoch": 4.496153846153846, "grad_norm": 1.7362979650497437, "learning_rate": 1.8848184110994374e-05, "loss": 0.0361, "step": 4676 }, { "epoch": 4.497115384615385, "grad_norm": 2.805006742477417, "learning_rate": 1.8847603637432937e-05, "loss": 0.0554, "step": 4677 }, { "epoch": 4.498076923076923, "grad_norm": 5.056220054626465, "learning_rate": 1.8847023026582663e-05, "loss": 0.1212, "step": 4678 }, { "epoch": 4.499038461538461, "grad_norm": 3.326076030731201, "learning_rate": 1.8846442278452555e-05, "loss": 0.1281, "step": 4679 }, { "epoch": 4.5, "grad_norm": 4.58369255065918, "learning_rate": 1.8845861393051628e-05, "loss": 0.0757, "step": 4680 }, { "epoch": 4.500961538461539, "grad_norm": 3.4181206226348877, "learning_rate": 1.8845280370388893e-05, "loss": 0.0706, "step": 4681 }, { "epoch": 4.501923076923077, "grad_norm": 1.5324958562850952, "learning_rate": 1.8844699210473366e-05, "loss": 0.0256, "step": 4682 }, { "epoch": 4.502884615384615, "grad_norm": 3.0725250244140625, "learning_rate": 1.884411791331407e-05, "loss": 0.0603, "step": 4683 }, { "epoch": 4.503846153846154, "grad_norm": 1.354372501373291, "learning_rate": 1.884353647892002e-05, "loss": 0.0084, "step": 4684 }, { "epoch": 4.5048076923076925, "grad_norm": 2.6960530281066895, "learning_rate": 1.8842954907300236e-05, "loss": 0.0393, "step": 4685 }, { "epoch": 4.50576923076923, "grad_norm": 3.4777989387512207, "learning_rate": 1.884237319846375e-05, "loss": 0.1126, "step": 4686 }, { "epoch": 4.506730769230769, "grad_norm": 2.8516290187835693, "learning_rate": 1.8841791352419583e-05, "loss": 0.0677, "step": 4687 }, { "epoch": 4.507692307692308, "grad_norm": 1.2281807661056519, "learning_rate": 1.8841209369176762e-05, "loss": 0.0126, "step": 4688 }, { "epoch": 4.508653846153846, "grad_norm": 4.780460357666016, "learning_rate": 1.8840627248744325e-05, "loss": 0.11, "step": 4689 }, { "epoch": 4.509615384615385, "grad_norm": 5.107330799102783, "learning_rate": 1.8840044991131293e-05, "loss": 0.2256, "step": 4690 }, { "epoch": 4.510576923076923, "grad_norm": 6.0565667152404785, "learning_rate": 1.883946259634671e-05, "loss": 0.1038, "step": 4691 }, { "epoch": 4.5115384615384615, "grad_norm": 3.0915331840515137, "learning_rate": 1.8838880064399616e-05, "loss": 0.0507, "step": 4692 }, { "epoch": 4.5125, "grad_norm": 2.9881858825683594, "learning_rate": 1.8838297395299042e-05, "loss": 0.0599, "step": 4693 }, { "epoch": 4.513461538461538, "grad_norm": 3.8930184841156006, "learning_rate": 1.8837714589054033e-05, "loss": 0.1199, "step": 4694 }, { "epoch": 4.514423076923077, "grad_norm": 2.7678916454315186, "learning_rate": 1.8837131645673633e-05, "loss": 0.0327, "step": 4695 }, { "epoch": 4.515384615384615, "grad_norm": 3.8094708919525146, "learning_rate": 1.8836548565166884e-05, "loss": 0.0593, "step": 4696 }, { "epoch": 4.516346153846154, "grad_norm": 3.2867467403411865, "learning_rate": 1.8835965347542834e-05, "loss": 0.0917, "step": 4697 }, { "epoch": 4.517307692307693, "grad_norm": 4.489847183227539, "learning_rate": 1.883538199281054e-05, "loss": 0.1444, "step": 4698 }, { "epoch": 4.5182692307692305, "grad_norm": 1.7274315357208252, "learning_rate": 1.883479850097905e-05, "loss": 0.0221, "step": 4699 }, { "epoch": 4.519230769230769, "grad_norm": 2.562270402908325, "learning_rate": 1.8834214872057413e-05, "loss": 0.0345, "step": 4700 }, { "epoch": 4.520192307692308, "grad_norm": 4.708662509918213, "learning_rate": 1.883363110605469e-05, "loss": 0.1058, "step": 4701 }, { "epoch": 4.5211538461538465, "grad_norm": 2.3729798793792725, "learning_rate": 1.883304720297994e-05, "loss": 0.0447, "step": 4702 }, { "epoch": 4.522115384615384, "grad_norm": 3.3956758975982666, "learning_rate": 1.883246316284222e-05, "loss": 0.0472, "step": 4703 }, { "epoch": 4.523076923076923, "grad_norm": 1.313652753829956, "learning_rate": 1.88318789856506e-05, "loss": 0.0173, "step": 4704 }, { "epoch": 4.524038461538462, "grad_norm": 1.2758413553237915, "learning_rate": 1.8831294671414134e-05, "loss": 0.0124, "step": 4705 }, { "epoch": 4.525, "grad_norm": 4.866003036499023, "learning_rate": 1.8830710220141894e-05, "loss": 0.0656, "step": 4706 }, { "epoch": 4.525961538461538, "grad_norm": 3.4674630165100098, "learning_rate": 1.8830125631842953e-05, "loss": 0.0772, "step": 4707 }, { "epoch": 4.526923076923077, "grad_norm": 5.065834045410156, "learning_rate": 1.8829540906526376e-05, "loss": 0.0885, "step": 4708 }, { "epoch": 4.5278846153846155, "grad_norm": 4.227726459503174, "learning_rate": 1.8828956044201237e-05, "loss": 0.046, "step": 4709 }, { "epoch": 4.528846153846154, "grad_norm": 3.1113228797912598, "learning_rate": 1.8828371044876617e-05, "loss": 0.0334, "step": 4710 }, { "epoch": 4.529807692307692, "grad_norm": 2.7885563373565674, "learning_rate": 1.8827785908561585e-05, "loss": 0.0213, "step": 4711 }, { "epoch": 4.530769230769231, "grad_norm": 0.9422145485877991, "learning_rate": 1.882720063526523e-05, "loss": 0.0092, "step": 4712 }, { "epoch": 4.531730769230769, "grad_norm": 3.7675650119781494, "learning_rate": 1.882661522499662e-05, "loss": 0.0551, "step": 4713 }, { "epoch": 4.532692307692308, "grad_norm": 1.789058804512024, "learning_rate": 1.8826029677764856e-05, "loss": 0.0153, "step": 4714 }, { "epoch": 4.533653846153846, "grad_norm": 3.3691625595092773, "learning_rate": 1.8825443993579014e-05, "loss": 0.0311, "step": 4715 }, { "epoch": 4.5346153846153845, "grad_norm": 7.730826377868652, "learning_rate": 1.882485817244818e-05, "loss": 0.1738, "step": 4716 }, { "epoch": 4.535576923076923, "grad_norm": 3.391741991043091, "learning_rate": 1.8824272214381453e-05, "loss": 0.05, "step": 4717 }, { "epoch": 4.536538461538462, "grad_norm": 5.668505668640137, "learning_rate": 1.8823686119387917e-05, "loss": 0.0824, "step": 4718 }, { "epoch": 4.5375, "grad_norm": 3.5441079139709473, "learning_rate": 1.8823099887476666e-05, "loss": 0.0938, "step": 4719 }, { "epoch": 4.538461538461538, "grad_norm": 3.1360764503479004, "learning_rate": 1.8822513518656807e-05, "loss": 0.06, "step": 4720 }, { "epoch": 4.539423076923077, "grad_norm": 3.1296322345733643, "learning_rate": 1.882192701293743e-05, "loss": 0.0656, "step": 4721 }, { "epoch": 4.540384615384616, "grad_norm": 1.714515209197998, "learning_rate": 1.8821340370327634e-05, "loss": 0.0209, "step": 4722 }, { "epoch": 4.5413461538461535, "grad_norm": 4.515488147735596, "learning_rate": 1.8820753590836532e-05, "loss": 0.0725, "step": 4723 }, { "epoch": 4.542307692307692, "grad_norm": 1.6307461261749268, "learning_rate": 1.8820166674473217e-05, "loss": 0.0244, "step": 4724 }, { "epoch": 4.543269230769231, "grad_norm": 5.381567478179932, "learning_rate": 1.8819579621246802e-05, "loss": 0.1118, "step": 4725 }, { "epoch": 4.5442307692307695, "grad_norm": 4.904841423034668, "learning_rate": 1.88189924311664e-05, "loss": 0.1181, "step": 4726 }, { "epoch": 4.545192307692307, "grad_norm": 2.8690497875213623, "learning_rate": 1.881840510424112e-05, "loss": 0.023, "step": 4727 }, { "epoch": 4.546153846153846, "grad_norm": 3.5485963821411133, "learning_rate": 1.8817817640480075e-05, "loss": 0.0401, "step": 4728 }, { "epoch": 4.547115384615385, "grad_norm": 3.0058538913726807, "learning_rate": 1.8817230039892377e-05, "loss": 0.0271, "step": 4729 }, { "epoch": 4.548076923076923, "grad_norm": 2.6329288482666016, "learning_rate": 1.8816642302487148e-05, "loss": 0.0386, "step": 4730 }, { "epoch": 4.549038461538462, "grad_norm": 1.8917226791381836, "learning_rate": 1.8816054428273504e-05, "loss": 0.0221, "step": 4731 }, { "epoch": 4.55, "grad_norm": 3.2484142780303955, "learning_rate": 1.8815466417260573e-05, "loss": 0.0354, "step": 4732 }, { "epoch": 4.5509615384615385, "grad_norm": 2.5548882484436035, "learning_rate": 1.8814878269457476e-05, "loss": 0.0621, "step": 4733 }, { "epoch": 4.551923076923077, "grad_norm": 5.562215805053711, "learning_rate": 1.881428998487334e-05, "loss": 0.1437, "step": 4734 }, { "epoch": 4.552884615384615, "grad_norm": 4.88317346572876, "learning_rate": 1.8813701563517293e-05, "loss": 0.0802, "step": 4735 }, { "epoch": 4.553846153846154, "grad_norm": 3.5392584800720215, "learning_rate": 1.8813113005398463e-05, "loss": 0.0983, "step": 4736 }, { "epoch": 4.554807692307692, "grad_norm": 0.967250406742096, "learning_rate": 1.881252431052599e-05, "loss": 0.0091, "step": 4737 }, { "epoch": 4.555769230769231, "grad_norm": 3.993856191635132, "learning_rate": 1.8811935478909e-05, "loss": 0.0541, "step": 4738 }, { "epoch": 4.55673076923077, "grad_norm": 3.2200517654418945, "learning_rate": 1.8811346510556637e-05, "loss": 0.0431, "step": 4739 }, { "epoch": 4.5576923076923075, "grad_norm": 3.5379598140716553, "learning_rate": 1.8810757405478038e-05, "loss": 0.0385, "step": 4740 }, { "epoch": 4.558653846153846, "grad_norm": 2.8507888317108154, "learning_rate": 1.8810168163682338e-05, "loss": 0.0262, "step": 4741 }, { "epoch": 4.559615384615385, "grad_norm": 4.504162311553955, "learning_rate": 1.880957878517869e-05, "loss": 0.0731, "step": 4742 }, { "epoch": 4.560576923076923, "grad_norm": 1.8080928325653076, "learning_rate": 1.8808989269976233e-05, "loss": 0.0244, "step": 4743 }, { "epoch": 4.561538461538461, "grad_norm": 3.066317558288574, "learning_rate": 1.8808399618084117e-05, "loss": 0.0503, "step": 4744 }, { "epoch": 4.5625, "grad_norm": 4.728444576263428, "learning_rate": 1.8807809829511496e-05, "loss": 0.137, "step": 4745 }, { "epoch": 4.563461538461539, "grad_norm": 3.69966197013855, "learning_rate": 1.880721990426751e-05, "loss": 0.0627, "step": 4746 }, { "epoch": 4.564423076923077, "grad_norm": 3.906958818435669, "learning_rate": 1.8806629842361323e-05, "loss": 0.0265, "step": 4747 }, { "epoch": 4.565384615384615, "grad_norm": 3.850543737411499, "learning_rate": 1.880603964380209e-05, "loss": 0.1135, "step": 4748 }, { "epoch": 4.566346153846154, "grad_norm": 3.3653461933135986, "learning_rate": 1.8805449308598963e-05, "loss": 0.0698, "step": 4749 }, { "epoch": 4.5673076923076925, "grad_norm": 3.1000404357910156, "learning_rate": 1.880485883676111e-05, "loss": 0.0574, "step": 4750 }, { "epoch": 4.56826923076923, "grad_norm": 2.540040969848633, "learning_rate": 1.8804268228297688e-05, "loss": 0.0308, "step": 4751 }, { "epoch": 4.569230769230769, "grad_norm": 2.8216607570648193, "learning_rate": 1.8803677483217864e-05, "loss": 0.0425, "step": 4752 }, { "epoch": 4.570192307692308, "grad_norm": 4.836695194244385, "learning_rate": 1.8803086601530803e-05, "loss": 0.1234, "step": 4753 }, { "epoch": 4.571153846153846, "grad_norm": 4.433686256408691, "learning_rate": 1.8802495583245676e-05, "loss": 0.1236, "step": 4754 }, { "epoch": 4.572115384615385, "grad_norm": 1.8185561895370483, "learning_rate": 1.880190442837165e-05, "loss": 0.0269, "step": 4755 }, { "epoch": 4.573076923076923, "grad_norm": 3.7902894020080566, "learning_rate": 1.8801313136917903e-05, "loss": 0.0643, "step": 4756 }, { "epoch": 4.5740384615384615, "grad_norm": 3.193732261657715, "learning_rate": 1.880072170889361e-05, "loss": 0.0576, "step": 4757 }, { "epoch": 4.575, "grad_norm": 2.2709617614746094, "learning_rate": 1.880013014430794e-05, "loss": 0.0812, "step": 4758 }, { "epoch": 4.575961538461538, "grad_norm": 4.18374490737915, "learning_rate": 1.8799538443170083e-05, "loss": 0.11, "step": 4759 }, { "epoch": 4.576923076923077, "grad_norm": 3.924344301223755, "learning_rate": 1.8798946605489213e-05, "loss": 0.1447, "step": 4760 }, { "epoch": 4.577884615384615, "grad_norm": 2.471095323562622, "learning_rate": 1.8798354631274516e-05, "loss": 0.0474, "step": 4761 }, { "epoch": 4.578846153846154, "grad_norm": 4.083654880523682, "learning_rate": 1.8797762520535178e-05, "loss": 0.0416, "step": 4762 }, { "epoch": 4.579807692307693, "grad_norm": 2.926185131072998, "learning_rate": 1.879717027328039e-05, "loss": 0.0631, "step": 4763 }, { "epoch": 4.5807692307692305, "grad_norm": 2.3060271739959717, "learning_rate": 1.8796577889519337e-05, "loss": 0.0309, "step": 4764 }, { "epoch": 4.581730769230769, "grad_norm": 3.442296266555786, "learning_rate": 1.879598536926121e-05, "loss": 0.0728, "step": 4765 }, { "epoch": 4.582692307692308, "grad_norm": 4.579873561859131, "learning_rate": 1.8795392712515212e-05, "loss": 0.1226, "step": 4766 }, { "epoch": 4.5836538461538465, "grad_norm": 4.1666107177734375, "learning_rate": 1.879479991929053e-05, "loss": 0.0607, "step": 4767 }, { "epoch": 4.584615384615384, "grad_norm": 3.749009609222412, "learning_rate": 1.8794206989596365e-05, "loss": 0.0521, "step": 4768 }, { "epoch": 4.585576923076923, "grad_norm": 5.767457962036133, "learning_rate": 1.8793613923441917e-05, "loss": 0.1564, "step": 4769 }, { "epoch": 4.586538461538462, "grad_norm": 4.257750511169434, "learning_rate": 1.879302072083639e-05, "loss": 0.0656, "step": 4770 }, { "epoch": 4.5875, "grad_norm": 2.951741933822632, "learning_rate": 1.879242738178899e-05, "loss": 0.0433, "step": 4771 }, { "epoch": 4.588461538461538, "grad_norm": 3.7454097270965576, "learning_rate": 1.8791833906308925e-05, "loss": 0.0734, "step": 4772 }, { "epoch": 4.589423076923077, "grad_norm": 3.659437656402588, "learning_rate": 1.87912402944054e-05, "loss": 0.0711, "step": 4773 }, { "epoch": 4.5903846153846155, "grad_norm": 3.289337158203125, "learning_rate": 1.8790646546087625e-05, "loss": 0.043, "step": 4774 }, { "epoch": 4.591346153846154, "grad_norm": 5.149374485015869, "learning_rate": 1.8790052661364816e-05, "loss": 0.0939, "step": 4775 }, { "epoch": 4.592307692307692, "grad_norm": 15.497516632080078, "learning_rate": 1.8789458640246193e-05, "loss": 0.1438, "step": 4776 }, { "epoch": 4.593269230769231, "grad_norm": 2.982285261154175, "learning_rate": 1.8788864482740964e-05, "loss": 0.0395, "step": 4777 }, { "epoch": 4.594230769230769, "grad_norm": 5.117213726043701, "learning_rate": 1.8788270188858354e-05, "loss": 0.1086, "step": 4778 }, { "epoch": 4.595192307692308, "grad_norm": 3.3922905921936035, "learning_rate": 1.8787675758607588e-05, "loss": 0.0997, "step": 4779 }, { "epoch": 4.596153846153846, "grad_norm": 5.575203895568848, "learning_rate": 1.878708119199788e-05, "loss": 0.1283, "step": 4780 }, { "epoch": 4.5971153846153845, "grad_norm": 3.980496406555176, "learning_rate": 1.8786486489038466e-05, "loss": 0.0712, "step": 4781 }, { "epoch": 4.598076923076923, "grad_norm": 3.550327777862549, "learning_rate": 1.8785891649738565e-05, "loss": 0.064, "step": 4782 }, { "epoch": 4.599038461538462, "grad_norm": 4.547229766845703, "learning_rate": 1.8785296674107414e-05, "loss": 0.1093, "step": 4783 }, { "epoch": 4.6, "grad_norm": 2.9931089878082275, "learning_rate": 1.8784701562154245e-05, "loss": 0.0384, "step": 4784 }, { "epoch": 4.600961538461538, "grad_norm": 2.432849884033203, "learning_rate": 1.878410631388829e-05, "loss": 0.0199, "step": 4785 }, { "epoch": 4.601923076923077, "grad_norm": 3.6197574138641357, "learning_rate": 1.8783510929318784e-05, "loss": 0.1237, "step": 4786 }, { "epoch": 4.602884615384616, "grad_norm": 3.457503318786621, "learning_rate": 1.8782915408454968e-05, "loss": 0.0889, "step": 4787 }, { "epoch": 4.6038461538461535, "grad_norm": 2.9796195030212402, "learning_rate": 1.8782319751306085e-05, "loss": 0.128, "step": 4788 }, { "epoch": 4.604807692307692, "grad_norm": 2.026778221130371, "learning_rate": 1.8781723957881374e-05, "loss": 0.0349, "step": 4789 }, { "epoch": 4.605769230769231, "grad_norm": 2.29025936126709, "learning_rate": 1.8781128028190082e-05, "loss": 0.0352, "step": 4790 }, { "epoch": 4.6067307692307695, "grad_norm": 2.653512954711914, "learning_rate": 1.878053196224145e-05, "loss": 0.0513, "step": 4791 }, { "epoch": 4.607692307692307, "grad_norm": 4.227165222167969, "learning_rate": 1.877993576004474e-05, "loss": 0.1474, "step": 4792 }, { "epoch": 4.608653846153846, "grad_norm": 3.239269971847534, "learning_rate": 1.8779339421609194e-05, "loss": 0.0444, "step": 4793 }, { "epoch": 4.609615384615385, "grad_norm": 4.011670112609863, "learning_rate": 1.8778742946944063e-05, "loss": 0.0583, "step": 4794 }, { "epoch": 4.610576923076923, "grad_norm": 2.9762539863586426, "learning_rate": 1.8778146336058612e-05, "loss": 0.0506, "step": 4795 }, { "epoch": 4.611538461538462, "grad_norm": 3.42341947555542, "learning_rate": 1.8777549588962088e-05, "loss": 0.0665, "step": 4796 }, { "epoch": 4.6125, "grad_norm": 4.185304164886475, "learning_rate": 1.877695270566376e-05, "loss": 0.22, "step": 4797 }, { "epoch": 4.6134615384615385, "grad_norm": 3.5836803913116455, "learning_rate": 1.8776355686172885e-05, "loss": 0.0638, "step": 4798 }, { "epoch": 4.614423076923077, "grad_norm": 4.834570407867432, "learning_rate": 1.877575853049873e-05, "loss": 0.0872, "step": 4799 }, { "epoch": 4.615384615384615, "grad_norm": 3.021895170211792, "learning_rate": 1.8775161238650556e-05, "loss": 0.0497, "step": 4800 }, { "epoch": 4.616346153846154, "grad_norm": 3.807351589202881, "learning_rate": 1.8774563810637636e-05, "loss": 0.0616, "step": 4801 }, { "epoch": 4.617307692307692, "grad_norm": 1.7562519311904907, "learning_rate": 1.8773966246469238e-05, "loss": 0.0295, "step": 4802 }, { "epoch": 4.618269230769231, "grad_norm": 2.9354772567749023, "learning_rate": 1.8773368546154635e-05, "loss": 0.0651, "step": 4803 }, { "epoch": 4.61923076923077, "grad_norm": 4.0989274978637695, "learning_rate": 1.8772770709703105e-05, "loss": 0.1387, "step": 4804 }, { "epoch": 4.6201923076923075, "grad_norm": 2.7376441955566406, "learning_rate": 1.8772172737123913e-05, "loss": 0.0602, "step": 4805 }, { "epoch": 4.621153846153846, "grad_norm": 4.572730541229248, "learning_rate": 1.8771574628426355e-05, "loss": 0.0953, "step": 4806 }, { "epoch": 4.622115384615385, "grad_norm": 2.9838693141937256, "learning_rate": 1.8770976383619702e-05, "loss": 0.0481, "step": 4807 }, { "epoch": 4.623076923076923, "grad_norm": 0.7246330380439758, "learning_rate": 1.8770378002713232e-05, "loss": 0.0085, "step": 4808 }, { "epoch": 4.624038461538461, "grad_norm": 4.30530309677124, "learning_rate": 1.876977948571624e-05, "loss": 0.1302, "step": 4809 }, { "epoch": 4.625, "grad_norm": 1.3238641023635864, "learning_rate": 1.8769180832638014e-05, "loss": 0.0068, "step": 4810 }, { "epoch": 4.625961538461539, "grad_norm": 3.6417484283447266, "learning_rate": 1.876858204348783e-05, "loss": 0.1172, "step": 4811 }, { "epoch": 4.626923076923077, "grad_norm": 3.8464224338531494, "learning_rate": 1.8767983118274992e-05, "loss": 0.0608, "step": 4812 }, { "epoch": 4.627884615384615, "grad_norm": 2.406527519226074, "learning_rate": 1.8767384057008792e-05, "loss": 0.025, "step": 4813 }, { "epoch": 4.628846153846154, "grad_norm": 4.4237961769104, "learning_rate": 1.876678485969852e-05, "loss": 0.0702, "step": 4814 }, { "epoch": 4.6298076923076925, "grad_norm": 4.5722737312316895, "learning_rate": 1.876618552635348e-05, "loss": 0.1154, "step": 4815 }, { "epoch": 4.63076923076923, "grad_norm": 3.944523334503174, "learning_rate": 1.8765586056982966e-05, "loss": 0.077, "step": 4816 }, { "epoch": 4.631730769230769, "grad_norm": 3.5340096950531006, "learning_rate": 1.8764986451596286e-05, "loss": 0.0817, "step": 4817 }, { "epoch": 4.632692307692308, "grad_norm": 4.004303932189941, "learning_rate": 1.876438671020274e-05, "loss": 0.0827, "step": 4818 }, { "epoch": 4.633653846153846, "grad_norm": 3.5887691974639893, "learning_rate": 1.8763786832811633e-05, "loss": 0.0633, "step": 4819 }, { "epoch": 4.634615384615385, "grad_norm": 2.8320653438568115, "learning_rate": 1.876318681943228e-05, "loss": 0.0415, "step": 4820 }, { "epoch": 4.635576923076923, "grad_norm": 2.8846473693847656, "learning_rate": 1.8762586670073982e-05, "loss": 0.0478, "step": 4821 }, { "epoch": 4.6365384615384615, "grad_norm": 4.74794340133667, "learning_rate": 1.8761986384746064e-05, "loss": 0.0519, "step": 4822 }, { "epoch": 4.6375, "grad_norm": 4.687629699707031, "learning_rate": 1.8761385963457828e-05, "loss": 0.053, "step": 4823 }, { "epoch": 4.638461538461538, "grad_norm": 2.843303680419922, "learning_rate": 1.8760785406218598e-05, "loss": 0.0418, "step": 4824 }, { "epoch": 4.639423076923077, "grad_norm": 3.6771881580352783, "learning_rate": 1.876018471303769e-05, "loss": 0.0454, "step": 4825 }, { "epoch": 4.640384615384615, "grad_norm": 1.8174129724502563, "learning_rate": 1.8759583883924426e-05, "loss": 0.0237, "step": 4826 }, { "epoch": 4.641346153846154, "grad_norm": 4.774555206298828, "learning_rate": 1.875898291888813e-05, "loss": 0.0385, "step": 4827 }, { "epoch": 4.642307692307693, "grad_norm": 3.953590154647827, "learning_rate": 1.8758381817938126e-05, "loss": 0.094, "step": 4828 }, { "epoch": 4.6432692307692305, "grad_norm": 6.3522772789001465, "learning_rate": 1.8757780581083742e-05, "loss": 0.1666, "step": 4829 }, { "epoch": 4.644230769230769, "grad_norm": 3.582336902618408, "learning_rate": 1.875717920833431e-05, "loss": 0.0406, "step": 4830 }, { "epoch": 4.645192307692308, "grad_norm": 4.376293182373047, "learning_rate": 1.8756577699699155e-05, "loss": 0.0704, "step": 4831 }, { "epoch": 4.6461538461538465, "grad_norm": 7.837477684020996, "learning_rate": 1.8755976055187614e-05, "loss": 0.2593, "step": 4832 }, { "epoch": 4.647115384615384, "grad_norm": 2.6779849529266357, "learning_rate": 1.8755374274809027e-05, "loss": 0.0536, "step": 4833 }, { "epoch": 4.648076923076923, "grad_norm": 3.2935547828674316, "learning_rate": 1.8754772358572725e-05, "loss": 0.051, "step": 4834 }, { "epoch": 4.649038461538462, "grad_norm": 2.377913475036621, "learning_rate": 1.8754170306488052e-05, "loss": 0.0599, "step": 4835 }, { "epoch": 4.65, "grad_norm": 4.568455696105957, "learning_rate": 1.8753568118564347e-05, "loss": 0.063, "step": 4836 }, { "epoch": 4.650961538461538, "grad_norm": 1.8128947019577026, "learning_rate": 1.8752965794810956e-05, "loss": 0.0207, "step": 4837 }, { "epoch": 4.651923076923077, "grad_norm": 4.928226947784424, "learning_rate": 1.875236333523723e-05, "loss": 0.1029, "step": 4838 }, { "epoch": 4.6528846153846155, "grad_norm": 3.2418313026428223, "learning_rate": 1.8751760739852508e-05, "loss": 0.0265, "step": 4839 }, { "epoch": 4.653846153846154, "grad_norm": 2.142670154571533, "learning_rate": 1.875115800866615e-05, "loss": 0.0345, "step": 4840 }, { "epoch": 4.654807692307692, "grad_norm": 1.7126107215881348, "learning_rate": 1.87505551416875e-05, "loss": 0.0235, "step": 4841 }, { "epoch": 4.655769230769231, "grad_norm": 3.0046026706695557, "learning_rate": 1.874995213892592e-05, "loss": 0.0976, "step": 4842 }, { "epoch": 4.656730769230769, "grad_norm": 3.322000741958618, "learning_rate": 1.874934900039076e-05, "loss": 0.1042, "step": 4843 }, { "epoch": 4.657692307692308, "grad_norm": 2.782235622406006, "learning_rate": 1.8748745726091383e-05, "loss": 0.0826, "step": 4844 }, { "epoch": 4.658653846153846, "grad_norm": 3.8930318355560303, "learning_rate": 1.8748142316037153e-05, "loss": 0.0727, "step": 4845 }, { "epoch": 4.6596153846153845, "grad_norm": 2.1559982299804688, "learning_rate": 1.8747538770237427e-05, "loss": 0.0609, "step": 4846 }, { "epoch": 4.660576923076923, "grad_norm": 2.5422751903533936, "learning_rate": 1.8746935088701576e-05, "loss": 0.0387, "step": 4847 }, { "epoch": 4.661538461538462, "grad_norm": 4.062628746032715, "learning_rate": 1.874633127143896e-05, "loss": 0.0642, "step": 4848 }, { "epoch": 4.6625, "grad_norm": 3.331784725189209, "learning_rate": 1.8745727318458956e-05, "loss": 0.109, "step": 4849 }, { "epoch": 4.663461538461538, "grad_norm": 3.6104013919830322, "learning_rate": 1.874512322977093e-05, "loss": 0.0954, "step": 4850 }, { "epoch": 4.664423076923077, "grad_norm": 2.514517068862915, "learning_rate": 1.874451900538426e-05, "loss": 0.0467, "step": 4851 }, { "epoch": 4.665384615384616, "grad_norm": 4.57091760635376, "learning_rate": 1.8743914645308317e-05, "loss": 0.1366, "step": 4852 }, { "epoch": 4.6663461538461535, "grad_norm": 4.114323139190674, "learning_rate": 1.8743310149552487e-05, "loss": 0.1168, "step": 4853 }, { "epoch": 4.667307692307692, "grad_norm": 3.0902745723724365, "learning_rate": 1.874270551812614e-05, "loss": 0.0597, "step": 4854 }, { "epoch": 4.668269230769231, "grad_norm": 6.522744655609131, "learning_rate": 1.8742100751038664e-05, "loss": 0.1996, "step": 4855 }, { "epoch": 4.6692307692307695, "grad_norm": 2.5284643173217773, "learning_rate": 1.8741495848299448e-05, "loss": 0.0237, "step": 4856 }, { "epoch": 4.670192307692307, "grad_norm": 1.2887688875198364, "learning_rate": 1.8740890809917866e-05, "loss": 0.0154, "step": 4857 }, { "epoch": 4.671153846153846, "grad_norm": 1.9241749048233032, "learning_rate": 1.8740285635903314e-05, "loss": 0.0493, "step": 4858 }, { "epoch": 4.672115384615385, "grad_norm": 5.4297776222229, "learning_rate": 1.873968032626518e-05, "loss": 0.1611, "step": 4859 }, { "epoch": 4.673076923076923, "grad_norm": 2.655590534210205, "learning_rate": 1.8739074881012864e-05, "loss": 0.0528, "step": 4860 }, { "epoch": 4.674038461538462, "grad_norm": 2.6236190795898438, "learning_rate": 1.873846930015575e-05, "loss": 0.0383, "step": 4861 }, { "epoch": 4.675, "grad_norm": 2.121237277984619, "learning_rate": 1.8737863583703236e-05, "loss": 0.0393, "step": 4862 }, { "epoch": 4.6759615384615385, "grad_norm": 6.063166618347168, "learning_rate": 1.873725773166473e-05, "loss": 0.1696, "step": 4863 }, { "epoch": 4.676923076923077, "grad_norm": 3.147883653640747, "learning_rate": 1.8736651744049627e-05, "loss": 0.1112, "step": 4864 }, { "epoch": 4.677884615384615, "grad_norm": 4.1881585121154785, "learning_rate": 1.873604562086733e-05, "loss": 0.1024, "step": 4865 }, { "epoch": 4.678846153846154, "grad_norm": 2.422581672668457, "learning_rate": 1.8735439362127243e-05, "loss": 0.0304, "step": 4866 }, { "epoch": 4.679807692307692, "grad_norm": 4.664104461669922, "learning_rate": 1.8734832967838775e-05, "loss": 0.0716, "step": 4867 }, { "epoch": 4.680769230769231, "grad_norm": 2.513564348220825, "learning_rate": 1.873422643801134e-05, "loss": 0.0629, "step": 4868 }, { "epoch": 4.68173076923077, "grad_norm": 3.142120838165283, "learning_rate": 1.8733619772654342e-05, "loss": 0.0366, "step": 4869 }, { "epoch": 4.6826923076923075, "grad_norm": 3.4509899616241455, "learning_rate": 1.87330129717772e-05, "loss": 0.0856, "step": 4870 }, { "epoch": 4.683653846153846, "grad_norm": 4.158478260040283, "learning_rate": 1.8732406035389325e-05, "loss": 0.0628, "step": 4871 }, { "epoch": 4.684615384615385, "grad_norm": 1.0295326709747314, "learning_rate": 1.873179896350014e-05, "loss": 0.012, "step": 4872 }, { "epoch": 4.685576923076923, "grad_norm": 3.8328065872192383, "learning_rate": 1.873119175611906e-05, "loss": 0.0694, "step": 4873 }, { "epoch": 4.686538461538461, "grad_norm": 4.350440502166748, "learning_rate": 1.8730584413255513e-05, "loss": 0.0923, "step": 4874 }, { "epoch": 4.6875, "grad_norm": 4.0332722663879395, "learning_rate": 1.8729976934918917e-05, "loss": 0.0568, "step": 4875 }, { "epoch": 4.688461538461539, "grad_norm": 2.3453643321990967, "learning_rate": 1.8729369321118704e-05, "loss": 0.0455, "step": 4876 }, { "epoch": 4.689423076923077, "grad_norm": 2.5380380153656006, "learning_rate": 1.87287615718643e-05, "loss": 0.0609, "step": 4877 }, { "epoch": 4.690384615384615, "grad_norm": 5.984764575958252, "learning_rate": 1.872815368716513e-05, "loss": 0.093, "step": 4878 }, { "epoch": 4.691346153846154, "grad_norm": 0.8300832509994507, "learning_rate": 1.872754566703064e-05, "loss": 0.0053, "step": 4879 }, { "epoch": 4.6923076923076925, "grad_norm": 6.776527404785156, "learning_rate": 1.8726937511470247e-05, "loss": 0.1632, "step": 4880 }, { "epoch": 4.69326923076923, "grad_norm": 3.8058676719665527, "learning_rate": 1.8726329220493403e-05, "loss": 0.0825, "step": 4881 }, { "epoch": 4.694230769230769, "grad_norm": 4.979808330535889, "learning_rate": 1.872572079410954e-05, "loss": 0.1663, "step": 4882 }, { "epoch": 4.695192307692308, "grad_norm": 2.8751883506774902, "learning_rate": 1.8725112232328097e-05, "loss": 0.0442, "step": 4883 }, { "epoch": 4.696153846153846, "grad_norm": 3.5335938930511475, "learning_rate": 1.872450353515852e-05, "loss": 0.0515, "step": 4884 }, { "epoch": 4.697115384615385, "grad_norm": 1.2628445625305176, "learning_rate": 1.8723894702610258e-05, "loss": 0.0199, "step": 4885 }, { "epoch": 4.698076923076923, "grad_norm": 2.414693593978882, "learning_rate": 1.8723285734692753e-05, "loss": 0.026, "step": 4886 }, { "epoch": 4.6990384615384615, "grad_norm": 3.838958263397217, "learning_rate": 1.8722676631415456e-05, "loss": 0.0828, "step": 4887 }, { "epoch": 4.7, "grad_norm": 5.958524703979492, "learning_rate": 1.872206739278782e-05, "loss": 0.1857, "step": 4888 }, { "epoch": 4.700961538461538, "grad_norm": 5.56052827835083, "learning_rate": 1.8721458018819295e-05, "loss": 0.1406, "step": 4889 }, { "epoch": 4.701923076923077, "grad_norm": 3.7249879837036133, "learning_rate": 1.872084850951934e-05, "loss": 0.0596, "step": 4890 }, { "epoch": 4.702884615384615, "grad_norm": 5.725547790527344, "learning_rate": 1.872023886489741e-05, "loss": 0.1749, "step": 4891 }, { "epoch": 4.703846153846154, "grad_norm": 3.519321918487549, "learning_rate": 1.8719629084962967e-05, "loss": 0.0869, "step": 4892 }, { "epoch": 4.704807692307693, "grad_norm": 4.897353172302246, "learning_rate": 1.871901916972547e-05, "loss": 0.1079, "step": 4893 }, { "epoch": 4.7057692307692305, "grad_norm": 5.053989410400391, "learning_rate": 1.871840911919439e-05, "loss": 0.0741, "step": 4894 }, { "epoch": 4.706730769230769, "grad_norm": 3.6703200340270996, "learning_rate": 1.8717798933379184e-05, "loss": 0.1134, "step": 4895 }, { "epoch": 4.707692307692308, "grad_norm": 2.130075454711914, "learning_rate": 1.871718861228933e-05, "loss": 0.0237, "step": 4896 }, { "epoch": 4.7086538461538465, "grad_norm": 5.082350254058838, "learning_rate": 1.871657815593429e-05, "loss": 0.1998, "step": 4897 }, { "epoch": 4.709615384615384, "grad_norm": 4.498843193054199, "learning_rate": 1.871596756432354e-05, "loss": 0.0592, "step": 4898 }, { "epoch": 4.710576923076923, "grad_norm": 4.163650035858154, "learning_rate": 1.8715356837466557e-05, "loss": 0.1054, "step": 4899 }, { "epoch": 4.711538461538462, "grad_norm": 4.539283752441406, "learning_rate": 1.8714745975372813e-05, "loss": 0.0632, "step": 4900 }, { "epoch": 4.7125, "grad_norm": 1.6051701307296753, "learning_rate": 1.871413497805179e-05, "loss": 0.0171, "step": 4901 }, { "epoch": 4.713461538461538, "grad_norm": 4.07218074798584, "learning_rate": 1.8713523845512966e-05, "loss": 0.0661, "step": 4902 }, { "epoch": 4.714423076923077, "grad_norm": 5.791037559509277, "learning_rate": 1.8712912577765826e-05, "loss": 0.1002, "step": 4903 }, { "epoch": 4.7153846153846155, "grad_norm": 2.7652628421783447, "learning_rate": 1.8712301174819858e-05, "loss": 0.0331, "step": 4904 }, { "epoch": 4.716346153846154, "grad_norm": 3.5614850521087646, "learning_rate": 1.8711689636684545e-05, "loss": 0.027, "step": 4905 }, { "epoch": 4.717307692307692, "grad_norm": 3.7824056148529053, "learning_rate": 1.8711077963369377e-05, "loss": 0.0712, "step": 4906 }, { "epoch": 4.718269230769231, "grad_norm": 3.095519781112671, "learning_rate": 1.871046615488384e-05, "loss": 0.0797, "step": 4907 }, { "epoch": 4.719230769230769, "grad_norm": 4.582278728485107, "learning_rate": 1.8709854211237443e-05, "loss": 0.0578, "step": 4908 }, { "epoch": 4.720192307692308, "grad_norm": 1.8497531414031982, "learning_rate": 1.8709242132439665e-05, "loss": 0.0247, "step": 4909 }, { "epoch": 4.721153846153846, "grad_norm": 3.2259891033172607, "learning_rate": 1.8708629918500012e-05, "loss": 0.0744, "step": 4910 }, { "epoch": 4.7221153846153845, "grad_norm": 3.7840816974639893, "learning_rate": 1.8708017569427983e-05, "loss": 0.0441, "step": 4911 }, { "epoch": 4.723076923076923, "grad_norm": 3.6427652835845947, "learning_rate": 1.870740508523308e-05, "loss": 0.052, "step": 4912 }, { "epoch": 4.724038461538462, "grad_norm": 2.800835371017456, "learning_rate": 1.8706792465924804e-05, "loss": 0.0457, "step": 4913 }, { "epoch": 4.725, "grad_norm": 4.927961826324463, "learning_rate": 1.8706179711512663e-05, "loss": 0.0598, "step": 4914 }, { "epoch": 4.725961538461538, "grad_norm": 5.4550604820251465, "learning_rate": 1.8705566822006166e-05, "loss": 0.0823, "step": 4915 }, { "epoch": 4.726923076923077, "grad_norm": 2.1422057151794434, "learning_rate": 1.870495379741482e-05, "loss": 0.0441, "step": 4916 }, { "epoch": 4.727884615384616, "grad_norm": 4.025485992431641, "learning_rate": 1.8704340637748144e-05, "loss": 0.0756, "step": 4917 }, { "epoch": 4.7288461538461535, "grad_norm": 3.5188727378845215, "learning_rate": 1.8703727343015647e-05, "loss": 0.049, "step": 4918 }, { "epoch": 4.729807692307692, "grad_norm": 3.7460854053497314, "learning_rate": 1.8703113913226847e-05, "loss": 0.06, "step": 4919 }, { "epoch": 4.730769230769231, "grad_norm": 4.336733341217041, "learning_rate": 1.870250034839126e-05, "loss": 0.1203, "step": 4920 }, { "epoch": 4.7317307692307695, "grad_norm": 6.111994743347168, "learning_rate": 1.870188664851841e-05, "loss": 0.1124, "step": 4921 }, { "epoch": 4.732692307692307, "grad_norm": 4.889878273010254, "learning_rate": 1.8701272813617823e-05, "loss": 0.0629, "step": 4922 }, { "epoch": 4.733653846153846, "grad_norm": 3.156322479248047, "learning_rate": 1.8700658843699013e-05, "loss": 0.0657, "step": 4923 }, { "epoch": 4.734615384615385, "grad_norm": 3.466214418411255, "learning_rate": 1.8700044738771516e-05, "loss": 0.0962, "step": 4924 }, { "epoch": 4.735576923076923, "grad_norm": 3.912356376647949, "learning_rate": 1.8699430498844863e-05, "loss": 0.0767, "step": 4925 }, { "epoch": 4.736538461538462, "grad_norm": 5.069802761077881, "learning_rate": 1.869881612392858e-05, "loss": 0.1081, "step": 4926 }, { "epoch": 4.7375, "grad_norm": 4.202798366546631, "learning_rate": 1.8698201614032198e-05, "loss": 0.0795, "step": 4927 }, { "epoch": 4.7384615384615385, "grad_norm": 2.292862892150879, "learning_rate": 1.869758696916526e-05, "loss": 0.0438, "step": 4928 }, { "epoch": 4.739423076923077, "grad_norm": 2.7127442359924316, "learning_rate": 1.8696972189337296e-05, "loss": 0.0429, "step": 4929 }, { "epoch": 4.740384615384615, "grad_norm": 3.50346302986145, "learning_rate": 1.8696357274557847e-05, "loss": 0.0731, "step": 4930 }, { "epoch": 4.741346153846154, "grad_norm": 3.9068801403045654, "learning_rate": 1.8695742224836462e-05, "loss": 0.0801, "step": 4931 }, { "epoch": 4.742307692307692, "grad_norm": 2.2739949226379395, "learning_rate": 1.8695127040182678e-05, "loss": 0.0415, "step": 4932 }, { "epoch": 4.743269230769231, "grad_norm": 5.277772903442383, "learning_rate": 1.8694511720606038e-05, "loss": 0.124, "step": 4933 }, { "epoch": 4.74423076923077, "grad_norm": 2.8264408111572266, "learning_rate": 1.8693896266116098e-05, "loss": 0.0521, "step": 4934 }, { "epoch": 4.7451923076923075, "grad_norm": 6.045731067657471, "learning_rate": 1.8693280676722406e-05, "loss": 0.1642, "step": 4935 }, { "epoch": 4.746153846153846, "grad_norm": 4.722403049468994, "learning_rate": 1.8692664952434508e-05, "loss": 0.0849, "step": 4936 }, { "epoch": 4.747115384615385, "grad_norm": 3.686495304107666, "learning_rate": 1.8692049093261967e-05, "loss": 0.0774, "step": 4937 }, { "epoch": 4.748076923076923, "grad_norm": 3.8039419651031494, "learning_rate": 1.869143309921433e-05, "loss": 0.0804, "step": 4938 }, { "epoch": 4.749038461538461, "grad_norm": 2.8280649185180664, "learning_rate": 1.8690816970301165e-05, "loss": 0.0716, "step": 4939 }, { "epoch": 4.75, "grad_norm": 4.034535884857178, "learning_rate": 1.8690200706532024e-05, "loss": 0.1014, "step": 4940 }, { "epoch": 4.750961538461539, "grad_norm": 2.5483450889587402, "learning_rate": 1.8689584307916477e-05, "loss": 0.0568, "step": 4941 }, { "epoch": 4.751923076923077, "grad_norm": 2.424194812774658, "learning_rate": 1.868896777446408e-05, "loss": 0.0266, "step": 4942 }, { "epoch": 4.752884615384615, "grad_norm": 2.0806782245635986, "learning_rate": 1.868835110618441e-05, "loss": 0.0336, "step": 4943 }, { "epoch": 4.753846153846154, "grad_norm": 9.408628463745117, "learning_rate": 1.868773430308703e-05, "loss": 0.1362, "step": 4944 }, { "epoch": 4.7548076923076925, "grad_norm": 4.502102375030518, "learning_rate": 1.8687117365181514e-05, "loss": 0.0774, "step": 4945 }, { "epoch": 4.75576923076923, "grad_norm": 3.042036771774292, "learning_rate": 1.868650029247743e-05, "loss": 0.0691, "step": 4946 }, { "epoch": 4.756730769230769, "grad_norm": 2.718710422515869, "learning_rate": 1.8685883084984354e-05, "loss": 0.0666, "step": 4947 }, { "epoch": 4.757692307692308, "grad_norm": 3.8626415729522705, "learning_rate": 1.8685265742711868e-05, "loss": 0.0797, "step": 4948 }, { "epoch": 4.758653846153846, "grad_norm": 4.367475986480713, "learning_rate": 1.8684648265669548e-05, "loss": 0.1179, "step": 4949 }, { "epoch": 4.759615384615385, "grad_norm": 0.7655346393585205, "learning_rate": 1.8684030653866977e-05, "loss": 0.0084, "step": 4950 }, { "epoch": 4.760576923076923, "grad_norm": 4.106597900390625, "learning_rate": 1.8683412907313735e-05, "loss": 0.0635, "step": 4951 }, { "epoch": 4.7615384615384615, "grad_norm": 3.001643419265747, "learning_rate": 1.8682795026019412e-05, "loss": 0.0781, "step": 4952 }, { "epoch": 4.7625, "grad_norm": 5.940906524658203, "learning_rate": 1.8682177009993592e-05, "loss": 0.1001, "step": 4953 }, { "epoch": 4.763461538461538, "grad_norm": 2.7199628353118896, "learning_rate": 1.868155885924587e-05, "loss": 0.0508, "step": 4954 }, { "epoch": 4.764423076923077, "grad_norm": 5.984686374664307, "learning_rate": 1.868094057378583e-05, "loss": 0.1434, "step": 4955 }, { "epoch": 4.765384615384615, "grad_norm": 3.1621904373168945, "learning_rate": 1.8680322153623077e-05, "loss": 0.0487, "step": 4956 }, { "epoch": 4.766346153846154, "grad_norm": 2.9487156867980957, "learning_rate": 1.8679703598767196e-05, "loss": 0.039, "step": 4957 }, { "epoch": 4.767307692307693, "grad_norm": 2.780784845352173, "learning_rate": 1.867908490922779e-05, "loss": 0.0463, "step": 4958 }, { "epoch": 4.7682692307692305, "grad_norm": 2.0327816009521484, "learning_rate": 1.867846608501446e-05, "loss": 0.0219, "step": 4959 }, { "epoch": 4.769230769230769, "grad_norm": 3.2224490642547607, "learning_rate": 1.8677847126136807e-05, "loss": 0.0735, "step": 4960 }, { "epoch": 4.770192307692308, "grad_norm": 2.338956594467163, "learning_rate": 1.8677228032604437e-05, "loss": 0.0304, "step": 4961 }, { "epoch": 4.7711538461538465, "grad_norm": 4.093987941741943, "learning_rate": 1.867660880442695e-05, "loss": 0.099, "step": 4962 }, { "epoch": 4.772115384615384, "grad_norm": 3.4725725650787354, "learning_rate": 1.8675989441613968e-05, "loss": 0.0753, "step": 4963 }, { "epoch": 4.773076923076923, "grad_norm": 3.6335675716400146, "learning_rate": 1.867536994417509e-05, "loss": 0.0758, "step": 4964 }, { "epoch": 4.774038461538462, "grad_norm": 2.805640697479248, "learning_rate": 1.867475031211993e-05, "loss": 0.0409, "step": 4965 }, { "epoch": 4.775, "grad_norm": 2.8944711685180664, "learning_rate": 1.8674130545458108e-05, "loss": 0.0492, "step": 4966 }, { "epoch": 4.775961538461538, "grad_norm": 2.547844886779785, "learning_rate": 1.867351064419924e-05, "loss": 0.0538, "step": 4967 }, { "epoch": 4.776923076923077, "grad_norm": 4.708627700805664, "learning_rate": 1.867289060835294e-05, "loss": 0.0974, "step": 4968 }, { "epoch": 4.7778846153846155, "grad_norm": 4.754507064819336, "learning_rate": 1.8672270437928833e-05, "loss": 0.1299, "step": 4969 }, { "epoch": 4.778846153846154, "grad_norm": 4.816864013671875, "learning_rate": 1.8671650132936542e-05, "loss": 0.1937, "step": 4970 }, { "epoch": 4.779807692307692, "grad_norm": 4.237277507781982, "learning_rate": 1.867102969338569e-05, "loss": 0.1118, "step": 4971 }, { "epoch": 4.780769230769231, "grad_norm": 2.7627694606781006, "learning_rate": 1.867040911928591e-05, "loss": 0.0585, "step": 4972 }, { "epoch": 4.781730769230769, "grad_norm": 3.971947431564331, "learning_rate": 1.8669788410646824e-05, "loss": 0.1176, "step": 4973 }, { "epoch": 4.782692307692308, "grad_norm": 4.2183308601379395, "learning_rate": 1.8669167567478072e-05, "loss": 0.0948, "step": 4974 }, { "epoch": 4.783653846153846, "grad_norm": 1.4342577457427979, "learning_rate": 1.8668546589789277e-05, "loss": 0.0145, "step": 4975 }, { "epoch": 4.7846153846153845, "grad_norm": 1.846125602722168, "learning_rate": 1.8667925477590084e-05, "loss": 0.0411, "step": 4976 }, { "epoch": 4.785576923076923, "grad_norm": 3.465862989425659, "learning_rate": 1.866730423089013e-05, "loss": 0.053, "step": 4977 }, { "epoch": 4.786538461538462, "grad_norm": 2.261587142944336, "learning_rate": 1.866668284969905e-05, "loss": 0.0597, "step": 4978 }, { "epoch": 4.7875, "grad_norm": 3.908156156539917, "learning_rate": 1.8666061334026488e-05, "loss": 0.075, "step": 4979 }, { "epoch": 4.788461538461538, "grad_norm": 3.871582269668579, "learning_rate": 1.866543968388209e-05, "loss": 0.0833, "step": 4980 }, { "epoch": 4.789423076923077, "grad_norm": 3.157053232192993, "learning_rate": 1.86648178992755e-05, "loss": 0.0491, "step": 4981 }, { "epoch": 4.790384615384616, "grad_norm": 0.922480583190918, "learning_rate": 1.866419598021637e-05, "loss": 0.006, "step": 4982 }, { "epoch": 4.7913461538461535, "grad_norm": 3.4655187129974365, "learning_rate": 1.8663573926714344e-05, "loss": 0.1214, "step": 4983 }, { "epoch": 4.792307692307692, "grad_norm": 4.239721775054932, "learning_rate": 1.8662951738779077e-05, "loss": 0.1052, "step": 4984 }, { "epoch": 4.793269230769231, "grad_norm": 5.025671005249023, "learning_rate": 1.866232941642023e-05, "loss": 0.1153, "step": 4985 }, { "epoch": 4.7942307692307695, "grad_norm": 2.8004870414733887, "learning_rate": 1.866170695964745e-05, "loss": 0.0527, "step": 4986 }, { "epoch": 4.795192307692307, "grad_norm": 3.859574556350708, "learning_rate": 1.86610843684704e-05, "loss": 0.0787, "step": 4987 }, { "epoch": 4.796153846153846, "grad_norm": 2.1924307346343994, "learning_rate": 1.866046164289874e-05, "loss": 0.0282, "step": 4988 }, { "epoch": 4.797115384615385, "grad_norm": 2.6720306873321533, "learning_rate": 1.8659838782942136e-05, "loss": 0.0282, "step": 4989 }, { "epoch": 4.798076923076923, "grad_norm": 4.6389875411987305, "learning_rate": 1.865921578861025e-05, "loss": 0.1561, "step": 4990 }, { "epoch": 4.799038461538462, "grad_norm": 3.284445285797119, "learning_rate": 1.8658592659912743e-05, "loss": 0.0247, "step": 4991 }, { "epoch": 4.8, "grad_norm": 5.761175632476807, "learning_rate": 1.8657969396859298e-05, "loss": 0.1554, "step": 4992 }, { "epoch": 4.8009615384615385, "grad_norm": 2.3638432025909424, "learning_rate": 1.8657345999459577e-05, "loss": 0.0628, "step": 4993 }, { "epoch": 4.801923076923077, "grad_norm": 4.746992111206055, "learning_rate": 1.8656722467723253e-05, "loss": 0.1343, "step": 4994 }, { "epoch": 4.802884615384615, "grad_norm": 3.1245529651641846, "learning_rate": 1.8656098801660008e-05, "loss": 0.0403, "step": 4995 }, { "epoch": 4.803846153846154, "grad_norm": 4.46199369430542, "learning_rate": 1.865547500127951e-05, "loss": 0.0928, "step": 4996 }, { "epoch": 4.804807692307692, "grad_norm": 3.5978100299835205, "learning_rate": 1.865485106659145e-05, "loss": 0.0629, "step": 4997 }, { "epoch": 4.805769230769231, "grad_norm": 3.63820219039917, "learning_rate": 1.8654226997605494e-05, "loss": 0.0603, "step": 4998 }, { "epoch": 4.80673076923077, "grad_norm": 5.196817398071289, "learning_rate": 1.865360279433134e-05, "loss": 0.1594, "step": 4999 }, { "epoch": 4.8076923076923075, "grad_norm": 1.9935804605484009, "learning_rate": 1.865297845677867e-05, "loss": 0.0284, "step": 5000 }, { "epoch": 4.808653846153846, "grad_norm": 0.7150020599365234, "learning_rate": 1.8652353984957168e-05, "loss": 0.0041, "step": 5001 }, { "epoch": 4.809615384615385, "grad_norm": 4.315548896789551, "learning_rate": 1.865172937887653e-05, "loss": 0.1337, "step": 5002 }, { "epoch": 4.810576923076923, "grad_norm": 4.034842014312744, "learning_rate": 1.865110463854644e-05, "loss": 0.1386, "step": 5003 }, { "epoch": 4.811538461538461, "grad_norm": 2.2792985439300537, "learning_rate": 1.86504797639766e-05, "loss": 0.0173, "step": 5004 }, { "epoch": 4.8125, "grad_norm": 4.166052341461182, "learning_rate": 1.86498547551767e-05, "loss": 0.0528, "step": 5005 }, { "epoch": 4.813461538461539, "grad_norm": 2.7637619972229004, "learning_rate": 1.8649229612156444e-05, "loss": 0.0658, "step": 5006 }, { "epoch": 4.814423076923077, "grad_norm": 3.701442003250122, "learning_rate": 1.8648604334925525e-05, "loss": 0.0485, "step": 5007 }, { "epoch": 4.815384615384615, "grad_norm": 3.0701773166656494, "learning_rate": 1.8647978923493654e-05, "loss": 0.0585, "step": 5008 }, { "epoch": 4.816346153846154, "grad_norm": 2.4025797843933105, "learning_rate": 1.864735337787053e-05, "loss": 0.0572, "step": 5009 }, { "epoch": 4.8173076923076925, "grad_norm": 3.021573305130005, "learning_rate": 1.8646727698065865e-05, "loss": 0.0302, "step": 5010 }, { "epoch": 4.81826923076923, "grad_norm": 2.409166097640991, "learning_rate": 1.864610188408936e-05, "loss": 0.039, "step": 5011 }, { "epoch": 4.819230769230769, "grad_norm": 3.0626227855682373, "learning_rate": 1.8645475935950734e-05, "loss": 0.0682, "step": 5012 }, { "epoch": 4.820192307692308, "grad_norm": 3.5791542530059814, "learning_rate": 1.864484985365969e-05, "loss": 0.1241, "step": 5013 }, { "epoch": 4.821153846153846, "grad_norm": 3.313664436340332, "learning_rate": 1.8644223637225952e-05, "loss": 0.0453, "step": 5014 }, { "epoch": 4.822115384615385, "grad_norm": 3.6467034816741943, "learning_rate": 1.8643597286659233e-05, "loss": 0.0255, "step": 5015 }, { "epoch": 4.823076923076923, "grad_norm": 4.346382141113281, "learning_rate": 1.8642970801969255e-05, "loss": 0.0787, "step": 5016 }, { "epoch": 4.8240384615384615, "grad_norm": 3.1832029819488525, "learning_rate": 1.864234418316573e-05, "loss": 0.0503, "step": 5017 }, { "epoch": 4.825, "grad_norm": 2.3244245052337646, "learning_rate": 1.8641717430258396e-05, "loss": 0.03, "step": 5018 }, { "epoch": 4.825961538461538, "grad_norm": 5.044416904449463, "learning_rate": 1.8641090543256967e-05, "loss": 0.1251, "step": 5019 }, { "epoch": 4.826923076923077, "grad_norm": 3.8556792736053467, "learning_rate": 1.8640463522171174e-05, "loss": 0.0804, "step": 5020 }, { "epoch": 4.827884615384615, "grad_norm": 1.8183000087738037, "learning_rate": 1.863983636701075e-05, "loss": 0.0147, "step": 5021 }, { "epoch": 4.828846153846154, "grad_norm": 4.140280246734619, "learning_rate": 1.8639209077785415e-05, "loss": 0.0903, "step": 5022 }, { "epoch": 4.829807692307693, "grad_norm": 3.709829330444336, "learning_rate": 1.863858165450492e-05, "loss": 0.0669, "step": 5023 }, { "epoch": 4.8307692307692305, "grad_norm": 4.602888107299805, "learning_rate": 1.8637954097178986e-05, "loss": 0.0746, "step": 5024 }, { "epoch": 4.831730769230769, "grad_norm": 6.003120422363281, "learning_rate": 1.863732640581736e-05, "loss": 0.1498, "step": 5025 }, { "epoch": 4.832692307692308, "grad_norm": 3.7353742122650146, "learning_rate": 1.8636698580429776e-05, "loss": 0.1359, "step": 5026 }, { "epoch": 4.8336538461538465, "grad_norm": 4.803210735321045, "learning_rate": 1.863607062102598e-05, "loss": 0.1017, "step": 5027 }, { "epoch": 4.834615384615384, "grad_norm": 3.95397686958313, "learning_rate": 1.863544252761571e-05, "loss": 0.0817, "step": 5028 }, { "epoch": 4.835576923076923, "grad_norm": 2.694772958755493, "learning_rate": 1.8634814300208722e-05, "loss": 0.0347, "step": 5029 }, { "epoch": 4.836538461538462, "grad_norm": 4.922035217285156, "learning_rate": 1.8634185938814758e-05, "loss": 0.1073, "step": 5030 }, { "epoch": 4.8375, "grad_norm": 5.069565296173096, "learning_rate": 1.8633557443443567e-05, "loss": 0.0664, "step": 5031 }, { "epoch": 4.838461538461538, "grad_norm": 2.1246135234832764, "learning_rate": 1.863292881410491e-05, "loss": 0.0208, "step": 5032 }, { "epoch": 4.839423076923077, "grad_norm": 2.2173056602478027, "learning_rate": 1.863230005080853e-05, "loss": 0.0354, "step": 5033 }, { "epoch": 4.8403846153846155, "grad_norm": 2.9623334407806396, "learning_rate": 1.8631671153564187e-05, "loss": 0.0523, "step": 5034 }, { "epoch": 4.841346153846154, "grad_norm": 3.5694663524627686, "learning_rate": 1.8631042122381644e-05, "loss": 0.0395, "step": 5035 }, { "epoch": 4.842307692307692, "grad_norm": 3.7444968223571777, "learning_rate": 1.863041295727066e-05, "loss": 0.0626, "step": 5036 }, { "epoch": 4.843269230769231, "grad_norm": 3.267496347427368, "learning_rate": 1.8629783658240995e-05, "loss": 0.0448, "step": 5037 }, { "epoch": 4.844230769230769, "grad_norm": 5.557301998138428, "learning_rate": 1.8629154225302418e-05, "loss": 0.1464, "step": 5038 }, { "epoch": 4.845192307692308, "grad_norm": 1.1912027597427368, "learning_rate": 1.862852465846469e-05, "loss": 0.0119, "step": 5039 }, { "epoch": 4.846153846153846, "grad_norm": 3.9493398666381836, "learning_rate": 1.8627894957737584e-05, "loss": 0.0999, "step": 5040 }, { "epoch": 4.8471153846153845, "grad_norm": 3.1202080249786377, "learning_rate": 1.8627265123130877e-05, "loss": 0.0386, "step": 5041 }, { "epoch": 4.848076923076923, "grad_norm": 7.789299011230469, "learning_rate": 1.8626635154654326e-05, "loss": 0.1695, "step": 5042 }, { "epoch": 4.849038461538462, "grad_norm": 2.9854393005371094, "learning_rate": 1.8626005052317723e-05, "loss": 0.0658, "step": 5043 }, { "epoch": 4.85, "grad_norm": 3.7870733737945557, "learning_rate": 1.862537481613084e-05, "loss": 0.0931, "step": 5044 }, { "epoch": 4.850961538461538, "grad_norm": 3.1593220233917236, "learning_rate": 1.8624744446103453e-05, "loss": 0.0276, "step": 5045 }, { "epoch": 4.851923076923077, "grad_norm": 4.87758731842041, "learning_rate": 1.8624113942245345e-05, "loss": 0.1047, "step": 5046 }, { "epoch": 4.852884615384616, "grad_norm": 3.9320361614227295, "learning_rate": 1.8623483304566298e-05, "loss": 0.069, "step": 5047 }, { "epoch": 4.8538461538461535, "grad_norm": 3.2464208602905273, "learning_rate": 1.8622852533076102e-05, "loss": 0.0509, "step": 5048 }, { "epoch": 4.854807692307692, "grad_norm": 4.417942047119141, "learning_rate": 1.862222162778454e-05, "loss": 0.0573, "step": 5049 }, { "epoch": 4.855769230769231, "grad_norm": 3.9153642654418945, "learning_rate": 1.8621590588701408e-05, "loss": 0.1223, "step": 5050 }, { "epoch": 4.8567307692307695, "grad_norm": 4.653913497924805, "learning_rate": 1.862095941583649e-05, "loss": 0.0628, "step": 5051 }, { "epoch": 4.857692307692307, "grad_norm": 4.363727569580078, "learning_rate": 1.862032810919959e-05, "loss": 0.0623, "step": 5052 }, { "epoch": 4.858653846153846, "grad_norm": 1.693716287612915, "learning_rate": 1.8619696668800494e-05, "loss": 0.0195, "step": 5053 }, { "epoch": 4.859615384615385, "grad_norm": 6.6782450675964355, "learning_rate": 1.8619065094649003e-05, "loss": 0.0962, "step": 5054 }, { "epoch": 4.860576923076923, "grad_norm": 3.189131259918213, "learning_rate": 1.8618433386754923e-05, "loss": 0.0713, "step": 5055 }, { "epoch": 4.861538461538462, "grad_norm": 3.336411237716675, "learning_rate": 1.861780154512805e-05, "loss": 0.0534, "step": 5056 }, { "epoch": 4.8625, "grad_norm": 2.7606618404388428, "learning_rate": 1.861716956977819e-05, "loss": 0.0304, "step": 5057 }, { "epoch": 4.8634615384615385, "grad_norm": 3.534590482711792, "learning_rate": 1.8616537460715143e-05, "loss": 0.0697, "step": 5058 }, { "epoch": 4.864423076923077, "grad_norm": 3.7355031967163086, "learning_rate": 1.8615905217948732e-05, "loss": 0.0921, "step": 5059 }, { "epoch": 4.865384615384615, "grad_norm": 3.921323537826538, "learning_rate": 1.8615272841488755e-05, "loss": 0.0709, "step": 5060 }, { "epoch": 4.866346153846154, "grad_norm": 5.630016326904297, "learning_rate": 1.861464033134503e-05, "loss": 0.1292, "step": 5061 }, { "epoch": 4.867307692307692, "grad_norm": 4.256146430969238, "learning_rate": 1.8614007687527374e-05, "loss": 0.0566, "step": 5062 }, { "epoch": 4.868269230769231, "grad_norm": 3.3435475826263428, "learning_rate": 1.8613374910045598e-05, "loss": 0.0506, "step": 5063 }, { "epoch": 4.86923076923077, "grad_norm": 4.401789665222168, "learning_rate": 1.8612741998909523e-05, "loss": 0.0946, "step": 5064 }, { "epoch": 4.8701923076923075, "grad_norm": 6.053213596343994, "learning_rate": 1.861210895412897e-05, "loss": 0.0742, "step": 5065 }, { "epoch": 4.871153846153846, "grad_norm": 1.0988041162490845, "learning_rate": 1.861147577571376e-05, "loss": 0.01, "step": 5066 }, { "epoch": 4.872115384615385, "grad_norm": 3.7388105392456055, "learning_rate": 1.8610842463673723e-05, "loss": 0.0525, "step": 5067 }, { "epoch": 4.873076923076923, "grad_norm": 4.288512229919434, "learning_rate": 1.8610209018018686e-05, "loss": 0.0624, "step": 5068 }, { "epoch": 4.874038461538461, "grad_norm": 6.595183849334717, "learning_rate": 1.8609575438758473e-05, "loss": 0.2551, "step": 5069 }, { "epoch": 4.875, "grad_norm": 5.418453693389893, "learning_rate": 1.860894172590292e-05, "loss": 0.1085, "step": 5070 }, { "epoch": 4.875961538461539, "grad_norm": 2.526057481765747, "learning_rate": 1.8608307879461853e-05, "loss": 0.0317, "step": 5071 }, { "epoch": 4.876923076923077, "grad_norm": 2.427637815475464, "learning_rate": 1.8607673899445118e-05, "loss": 0.0331, "step": 5072 }, { "epoch": 4.877884615384615, "grad_norm": 6.8600382804870605, "learning_rate": 1.8607039785862545e-05, "loss": 0.1291, "step": 5073 }, { "epoch": 4.878846153846154, "grad_norm": 5.0348381996154785, "learning_rate": 1.860640553872398e-05, "loss": 0.0898, "step": 5074 }, { "epoch": 4.8798076923076925, "grad_norm": 3.870743751525879, "learning_rate": 1.8605771158039253e-05, "loss": 0.0935, "step": 5075 }, { "epoch": 4.88076923076923, "grad_norm": 5.335267543792725, "learning_rate": 1.860513664381822e-05, "loss": 0.1138, "step": 5076 }, { "epoch": 4.881730769230769, "grad_norm": 5.094538688659668, "learning_rate": 1.860450199607072e-05, "loss": 0.1249, "step": 5077 }, { "epoch": 4.882692307692308, "grad_norm": 3.4834702014923096, "learning_rate": 1.8603867214806606e-05, "loss": 0.0584, "step": 5078 }, { "epoch": 4.883653846153846, "grad_norm": 6.022697925567627, "learning_rate": 1.8603232300035722e-05, "loss": 0.0969, "step": 5079 }, { "epoch": 4.884615384615385, "grad_norm": 4.902624130249023, "learning_rate": 1.8602597251767925e-05, "loss": 0.0804, "step": 5080 }, { "epoch": 4.885576923076923, "grad_norm": 5.40745210647583, "learning_rate": 1.8601962070013065e-05, "loss": 0.1965, "step": 5081 }, { "epoch": 4.8865384615384615, "grad_norm": 5.754362106323242, "learning_rate": 1.8601326754781002e-05, "loss": 0.0575, "step": 5082 }, { "epoch": 4.8875, "grad_norm": 3.6265838146209717, "learning_rate": 1.860069130608159e-05, "loss": 0.056, "step": 5083 }, { "epoch": 4.888461538461538, "grad_norm": 5.563102722167969, "learning_rate": 1.8600055723924696e-05, "loss": 0.0856, "step": 5084 }, { "epoch": 4.889423076923077, "grad_norm": 4.29029655456543, "learning_rate": 1.8599420008320176e-05, "loss": 0.1188, "step": 5085 }, { "epoch": 4.890384615384615, "grad_norm": 5.318110942840576, "learning_rate": 1.8598784159277893e-05, "loss": 0.0992, "step": 5086 }, { "epoch": 4.891346153846154, "grad_norm": 2.7160868644714355, "learning_rate": 1.859814817680772e-05, "loss": 0.0777, "step": 5087 }, { "epoch": 4.892307692307693, "grad_norm": 3.921815872192383, "learning_rate": 1.8597512060919523e-05, "loss": 0.0972, "step": 5088 }, { "epoch": 4.8932692307692305, "grad_norm": 1.4284287691116333, "learning_rate": 1.859687581162317e-05, "loss": 0.0146, "step": 5089 }, { "epoch": 4.894230769230769, "grad_norm": 2.7978365421295166, "learning_rate": 1.8596239428928536e-05, "loss": 0.0323, "step": 5090 }, { "epoch": 4.895192307692308, "grad_norm": 2.81632137298584, "learning_rate": 1.85956029128455e-05, "loss": 0.0411, "step": 5091 }, { "epoch": 4.8961538461538465, "grad_norm": 2.955803394317627, "learning_rate": 1.859496626338393e-05, "loss": 0.0514, "step": 5092 }, { "epoch": 4.897115384615384, "grad_norm": 2.6737430095672607, "learning_rate": 1.8594329480553712e-05, "loss": 0.0438, "step": 5093 }, { "epoch": 4.898076923076923, "grad_norm": 3.557391881942749, "learning_rate": 1.8593692564364723e-05, "loss": 0.0414, "step": 5094 }, { "epoch": 4.899038461538462, "grad_norm": 5.684519290924072, "learning_rate": 1.859305551482685e-05, "loss": 0.124, "step": 5095 }, { "epoch": 4.9, "grad_norm": 1.786947250366211, "learning_rate": 1.8592418331949973e-05, "loss": 0.016, "step": 5096 }, { "epoch": 4.900961538461538, "grad_norm": 3.8613908290863037, "learning_rate": 1.8591781015743983e-05, "loss": 0.0751, "step": 5097 }, { "epoch": 4.901923076923077, "grad_norm": 3.665626287460327, "learning_rate": 1.859114356621877e-05, "loss": 0.0625, "step": 5098 }, { "epoch": 4.9028846153846155, "grad_norm": 5.263118743896484, "learning_rate": 1.859050598338422e-05, "loss": 0.1282, "step": 5099 }, { "epoch": 4.903846153846154, "grad_norm": 4.884466648101807, "learning_rate": 1.858986826725023e-05, "loss": 0.1626, "step": 5100 }, { "epoch": 4.904807692307692, "grad_norm": 5.051808834075928, "learning_rate": 1.85892304178267e-05, "loss": 0.0677, "step": 5101 }, { "epoch": 4.905769230769231, "grad_norm": 4.804536819458008, "learning_rate": 1.858859243512352e-05, "loss": 0.0629, "step": 5102 }, { "epoch": 4.906730769230769, "grad_norm": 2.6877546310424805, "learning_rate": 1.8587954319150593e-05, "loss": 0.0705, "step": 5103 }, { "epoch": 4.907692307692308, "grad_norm": 3.5125908851623535, "learning_rate": 1.858731606991782e-05, "loss": 0.094, "step": 5104 }, { "epoch": 4.908653846153846, "grad_norm": 3.104090690612793, "learning_rate": 1.8586677687435104e-05, "loss": 0.0468, "step": 5105 }, { "epoch": 4.9096153846153845, "grad_norm": 4.798995494842529, "learning_rate": 1.8586039171712353e-05, "loss": 0.1013, "step": 5106 }, { "epoch": 4.910576923076923, "grad_norm": 4.255552768707275, "learning_rate": 1.8585400522759475e-05, "loss": 0.0809, "step": 5107 }, { "epoch": 4.911538461538462, "grad_norm": 7.217440128326416, "learning_rate": 1.8584761740586376e-05, "loss": 0.13, "step": 5108 }, { "epoch": 4.9125, "grad_norm": 2.9338111877441406, "learning_rate": 1.8584122825202974e-05, "loss": 0.0379, "step": 5109 }, { "epoch": 4.913461538461538, "grad_norm": 2.9627878665924072, "learning_rate": 1.858348377661918e-05, "loss": 0.0572, "step": 5110 }, { "epoch": 4.914423076923077, "grad_norm": 4.409082889556885, "learning_rate": 1.8582844594844904e-05, "loss": 0.0817, "step": 5111 }, { "epoch": 4.915384615384616, "grad_norm": 5.2390055656433105, "learning_rate": 1.8582205279890074e-05, "loss": 0.0917, "step": 5112 }, { "epoch": 4.9163461538461535, "grad_norm": 3.9611873626708984, "learning_rate": 1.858156583176461e-05, "loss": 0.0439, "step": 5113 }, { "epoch": 4.917307692307692, "grad_norm": 3.322413921356201, "learning_rate": 1.8580926250478425e-05, "loss": 0.0314, "step": 5114 }, { "epoch": 4.918269230769231, "grad_norm": 2.246777057647705, "learning_rate": 1.858028653604145e-05, "loss": 0.0234, "step": 5115 }, { "epoch": 4.9192307692307695, "grad_norm": 3.548863649368286, "learning_rate": 1.8579646688463616e-05, "loss": 0.0404, "step": 5116 }, { "epoch": 4.920192307692307, "grad_norm": 4.672939300537109, "learning_rate": 1.8579006707754845e-05, "loss": 0.089, "step": 5117 }, { "epoch": 4.921153846153846, "grad_norm": 4.8156208992004395, "learning_rate": 1.8578366593925064e-05, "loss": 0.1684, "step": 5118 }, { "epoch": 4.922115384615385, "grad_norm": 4.206363201141357, "learning_rate": 1.8577726346984216e-05, "loss": 0.0552, "step": 5119 }, { "epoch": 4.923076923076923, "grad_norm": 3.874181032180786, "learning_rate": 1.857708596694223e-05, "loss": 0.0724, "step": 5120 }, { "epoch": 4.924038461538462, "grad_norm": 4.7837066650390625, "learning_rate": 1.857644545380904e-05, "loss": 0.1204, "step": 5121 }, { "epoch": 4.925, "grad_norm": 5.810371398925781, "learning_rate": 1.857580480759459e-05, "loss": 0.2039, "step": 5122 }, { "epoch": 4.9259615384615385, "grad_norm": 5.375856399536133, "learning_rate": 1.8575164028308816e-05, "loss": 0.1144, "step": 5123 }, { "epoch": 4.926923076923077, "grad_norm": 3.889460325241089, "learning_rate": 1.857452311596167e-05, "loss": 0.1043, "step": 5124 }, { "epoch": 4.927884615384615, "grad_norm": 1.905392050743103, "learning_rate": 1.857388207056309e-05, "loss": 0.0217, "step": 5125 }, { "epoch": 4.928846153846154, "grad_norm": 5.5413923263549805, "learning_rate": 1.8573240892123018e-05, "loss": 0.1175, "step": 5126 }, { "epoch": 4.929807692307692, "grad_norm": 4.470119953155518, "learning_rate": 1.8572599580651415e-05, "loss": 0.1301, "step": 5127 }, { "epoch": 4.930769230769231, "grad_norm": 7.913911819458008, "learning_rate": 1.8571958136158225e-05, "loss": 0.1491, "step": 5128 }, { "epoch": 4.93173076923077, "grad_norm": 2.9961681365966797, "learning_rate": 1.8571316558653404e-05, "loss": 0.0595, "step": 5129 }, { "epoch": 4.9326923076923075, "grad_norm": 3.5949597358703613, "learning_rate": 1.8570674848146904e-05, "loss": 0.0395, "step": 5130 }, { "epoch": 4.933653846153846, "grad_norm": 2.952425956726074, "learning_rate": 1.857003300464869e-05, "loss": 0.0268, "step": 5131 }, { "epoch": 4.934615384615385, "grad_norm": 3.5018258094787598, "learning_rate": 1.8569391028168712e-05, "loss": 0.0449, "step": 5132 }, { "epoch": 4.935576923076923, "grad_norm": 7.246274471282959, "learning_rate": 1.8568748918716938e-05, "loss": 0.2024, "step": 5133 }, { "epoch": 4.936538461538461, "grad_norm": 5.449563503265381, "learning_rate": 1.8568106676303332e-05, "loss": 0.2135, "step": 5134 }, { "epoch": 4.9375, "grad_norm": 7.144450664520264, "learning_rate": 1.8567464300937853e-05, "loss": 0.1791, "step": 5135 }, { "epoch": 4.938461538461539, "grad_norm": 3.69325590133667, "learning_rate": 1.8566821792630475e-05, "loss": 0.0661, "step": 5136 }, { "epoch": 4.939423076923077, "grad_norm": 4.031312465667725, "learning_rate": 1.856617915139117e-05, "loss": 0.0695, "step": 5137 }, { "epoch": 4.940384615384615, "grad_norm": 3.8561153411865234, "learning_rate": 1.85655363772299e-05, "loss": 0.0672, "step": 5138 }, { "epoch": 4.941346153846154, "grad_norm": 6.381619930267334, "learning_rate": 1.8564893470156645e-05, "loss": 0.1456, "step": 5139 }, { "epoch": 4.9423076923076925, "grad_norm": 5.80166482925415, "learning_rate": 1.8564250430181387e-05, "loss": 0.2283, "step": 5140 }, { "epoch": 4.94326923076923, "grad_norm": 3.648017644882202, "learning_rate": 1.8563607257314093e-05, "loss": 0.0941, "step": 5141 }, { "epoch": 4.944230769230769, "grad_norm": 4.68932580947876, "learning_rate": 1.8562963951564752e-05, "loss": 0.0997, "step": 5142 }, { "epoch": 4.945192307692308, "grad_norm": 2.5776941776275635, "learning_rate": 1.856232051294334e-05, "loss": 0.0657, "step": 5143 }, { "epoch": 4.946153846153846, "grad_norm": 3.4262478351593018, "learning_rate": 1.8561676941459845e-05, "loss": 0.0519, "step": 5144 }, { "epoch": 4.947115384615385, "grad_norm": 2.5398435592651367, "learning_rate": 1.856103323712425e-05, "loss": 0.0534, "step": 5145 }, { "epoch": 4.948076923076923, "grad_norm": 3.7224252223968506, "learning_rate": 1.8560389399946544e-05, "loss": 0.1112, "step": 5146 }, { "epoch": 4.9490384615384615, "grad_norm": 2.068758964538574, "learning_rate": 1.8559745429936723e-05, "loss": 0.0256, "step": 5147 }, { "epoch": 4.95, "grad_norm": 1.979899525642395, "learning_rate": 1.8559101327104774e-05, "loss": 0.0255, "step": 5148 }, { "epoch": 4.950961538461538, "grad_norm": 4.697188854217529, "learning_rate": 1.8558457091460694e-05, "loss": 0.1782, "step": 5149 }, { "epoch": 4.951923076923077, "grad_norm": 2.530045509338379, "learning_rate": 1.8557812723014476e-05, "loss": 0.0454, "step": 5150 }, { "epoch": 4.952884615384615, "grad_norm": 3.7716286182403564, "learning_rate": 1.8557168221776124e-05, "loss": 0.0483, "step": 5151 }, { "epoch": 4.953846153846154, "grad_norm": 2.5970587730407715, "learning_rate": 1.8556523587755637e-05, "loss": 0.0458, "step": 5152 }, { "epoch": 4.954807692307693, "grad_norm": 4.803806781768799, "learning_rate": 1.8555878820963014e-05, "loss": 0.0959, "step": 5153 }, { "epoch": 4.9557692307692305, "grad_norm": 3.5656418800354004, "learning_rate": 1.8555233921408266e-05, "loss": 0.0622, "step": 5154 }, { "epoch": 4.956730769230769, "grad_norm": 5.219088554382324, "learning_rate": 1.8554588889101395e-05, "loss": 0.0892, "step": 5155 }, { "epoch": 4.957692307692308, "grad_norm": 3.5213840007781982, "learning_rate": 1.855394372405241e-05, "loss": 0.0858, "step": 5156 }, { "epoch": 4.9586538461538465, "grad_norm": 4.008993148803711, "learning_rate": 1.8553298426271328e-05, "loss": 0.092, "step": 5157 }, { "epoch": 4.959615384615384, "grad_norm": 2.7579569816589355, "learning_rate": 1.8552652995768153e-05, "loss": 0.0831, "step": 5158 }, { "epoch": 4.960576923076923, "grad_norm": 2.9963440895080566, "learning_rate": 1.855200743255291e-05, "loss": 0.0513, "step": 5159 }, { "epoch": 4.961538461538462, "grad_norm": 2.7718393802642822, "learning_rate": 1.855136173663561e-05, "loss": 0.0323, "step": 5160 }, { "epoch": 4.9625, "grad_norm": 2.499941110610962, "learning_rate": 1.8550715908026275e-05, "loss": 0.0562, "step": 5161 }, { "epoch": 4.963461538461538, "grad_norm": 2.8157851696014404, "learning_rate": 1.855006994673492e-05, "loss": 0.0701, "step": 5162 }, { "epoch": 4.964423076923077, "grad_norm": 4.259493350982666, "learning_rate": 1.8549423852771577e-05, "loss": 0.11, "step": 5163 }, { "epoch": 4.9653846153846155, "grad_norm": 3.0790791511535645, "learning_rate": 1.8548777626146272e-05, "loss": 0.0371, "step": 5164 }, { "epoch": 4.966346153846154, "grad_norm": 7.190714359283447, "learning_rate": 1.8548131266869024e-05, "loss": 0.1825, "step": 5165 }, { "epoch": 4.967307692307692, "grad_norm": 5.1622209548950195, "learning_rate": 1.8547484774949865e-05, "loss": 0.1517, "step": 5166 }, { "epoch": 4.968269230769231, "grad_norm": 2.022552728652954, "learning_rate": 1.8546838150398835e-05, "loss": 0.0171, "step": 5167 }, { "epoch": 4.969230769230769, "grad_norm": 3.987231969833374, "learning_rate": 1.854619139322596e-05, "loss": 0.0753, "step": 5168 }, { "epoch": 4.970192307692308, "grad_norm": 2.8706588745117188, "learning_rate": 1.8545544503441274e-05, "loss": 0.0351, "step": 5169 }, { "epoch": 4.971153846153846, "grad_norm": 3.7107949256896973, "learning_rate": 1.854489748105482e-05, "loss": 0.0595, "step": 5170 }, { "epoch": 4.9721153846153845, "grad_norm": 5.517426013946533, "learning_rate": 1.8544250326076637e-05, "loss": 0.1732, "step": 5171 }, { "epoch": 4.973076923076923, "grad_norm": 3.059478998184204, "learning_rate": 1.8543603038516765e-05, "loss": 0.085, "step": 5172 }, { "epoch": 4.974038461538462, "grad_norm": 4.702993392944336, "learning_rate": 1.8542955618385248e-05, "loss": 0.0863, "step": 5173 }, { "epoch": 4.975, "grad_norm": 5.1050310134887695, "learning_rate": 1.8542308065692137e-05, "loss": 0.1172, "step": 5174 }, { "epoch": 4.975961538461538, "grad_norm": 4.710242748260498, "learning_rate": 1.854166038044747e-05, "loss": 0.0647, "step": 5175 }, { "epoch": 4.976923076923077, "grad_norm": 4.524159908294678, "learning_rate": 1.8541012562661307e-05, "loss": 0.06, "step": 5176 }, { "epoch": 4.977884615384616, "grad_norm": 3.232279062271118, "learning_rate": 1.8540364612343697e-05, "loss": 0.1784, "step": 5177 }, { "epoch": 4.9788461538461535, "grad_norm": 2.9781153202056885, "learning_rate": 1.8539716529504695e-05, "loss": 0.0644, "step": 5178 }, { "epoch": 4.979807692307692, "grad_norm": 3.733879566192627, "learning_rate": 1.8539068314154355e-05, "loss": 0.1481, "step": 5179 }, { "epoch": 4.980769230769231, "grad_norm": 4.232190132141113, "learning_rate": 1.8538419966302736e-05, "loss": 0.0492, "step": 5180 }, { "epoch": 4.9817307692307695, "grad_norm": 4.389691352844238, "learning_rate": 1.85377714859599e-05, "loss": 0.0972, "step": 5181 }, { "epoch": 4.982692307692307, "grad_norm": 3.686957836151123, "learning_rate": 1.8537122873135907e-05, "loss": 0.0986, "step": 5182 }, { "epoch": 4.983653846153846, "grad_norm": 4.172799110412598, "learning_rate": 1.8536474127840823e-05, "loss": 0.1006, "step": 5183 }, { "epoch": 4.984615384615385, "grad_norm": 3.6867146492004395, "learning_rate": 1.8535825250084714e-05, "loss": 0.1291, "step": 5184 }, { "epoch": 4.985576923076923, "grad_norm": 4.272925853729248, "learning_rate": 1.8535176239877653e-05, "loss": 0.119, "step": 5185 }, { "epoch": 4.986538461538462, "grad_norm": 2.3468878269195557, "learning_rate": 1.8534527097229707e-05, "loss": 0.0314, "step": 5186 }, { "epoch": 4.9875, "grad_norm": 4.150573253631592, "learning_rate": 1.8533877822150944e-05, "loss": 0.0987, "step": 5187 }, { "epoch": 4.9884615384615385, "grad_norm": 5.477178573608398, "learning_rate": 1.8533228414651448e-05, "loss": 0.1782, "step": 5188 }, { "epoch": 4.989423076923077, "grad_norm": 5.530093193054199, "learning_rate": 1.853257887474129e-05, "loss": 0.1728, "step": 5189 }, { "epoch": 4.990384615384615, "grad_norm": 5.078675270080566, "learning_rate": 1.853192920243055e-05, "loss": 0.1604, "step": 5190 }, { "epoch": 4.991346153846154, "grad_norm": 3.1724257469177246, "learning_rate": 1.8531279397729314e-05, "loss": 0.039, "step": 5191 }, { "epoch": 4.992307692307692, "grad_norm": 5.956324100494385, "learning_rate": 1.8530629460647658e-05, "loss": 0.0969, "step": 5192 }, { "epoch": 4.993269230769231, "grad_norm": 3.4034581184387207, "learning_rate": 1.8529979391195668e-05, "loss": 0.0985, "step": 5193 }, { "epoch": 4.99423076923077, "grad_norm": 3.018315315246582, "learning_rate": 1.852932918938344e-05, "loss": 0.0582, "step": 5194 }, { "epoch": 4.9951923076923075, "grad_norm": 1.8436530828475952, "learning_rate": 1.852867885522105e-05, "loss": 0.0259, "step": 5195 }, { "epoch": 4.996153846153846, "grad_norm": 6.667575359344482, "learning_rate": 1.8528028388718598e-05, "loss": 0.1205, "step": 5196 }, { "epoch": 4.997115384615385, "grad_norm": 4.422524929046631, "learning_rate": 1.852737778988617e-05, "loss": 0.1243, "step": 5197 }, { "epoch": 4.998076923076923, "grad_norm": 4.364805221557617, "learning_rate": 1.852672705873387e-05, "loss": 0.0529, "step": 5198 }, { "epoch": 4.999038461538461, "grad_norm": 3.4052393436431885, "learning_rate": 1.8526076195271794e-05, "loss": 0.0939, "step": 5199 }, { "epoch": 5.0, "grad_norm": 5.651750564575195, "learning_rate": 1.8525425199510038e-05, "loss": 0.0654, "step": 5200 }, { "epoch": 5.000961538461539, "grad_norm": 3.3657801151275635, "learning_rate": 1.8524774071458703e-05, "loss": 0.0533, "step": 5201 }, { "epoch": 5.001923076923077, "grad_norm": 6.86982536315918, "learning_rate": 1.8524122811127893e-05, "loss": 0.0952, "step": 5202 }, { "epoch": 5.002884615384615, "grad_norm": 1.5545696020126343, "learning_rate": 1.8523471418527718e-05, "loss": 0.0197, "step": 5203 }, { "epoch": 5.003846153846154, "grad_norm": 5.322366237640381, "learning_rate": 1.852281989366828e-05, "loss": 0.1139, "step": 5204 }, { "epoch": 5.0048076923076925, "grad_norm": 4.228423595428467, "learning_rate": 1.8522168236559693e-05, "loss": 0.1222, "step": 5205 }, { "epoch": 5.005769230769231, "grad_norm": 3.449000120162964, "learning_rate": 1.852151644721207e-05, "loss": 0.0441, "step": 5206 }, { "epoch": 5.006730769230769, "grad_norm": 2.5760536193847656, "learning_rate": 1.8520864525635517e-05, "loss": 0.0316, "step": 5207 }, { "epoch": 5.007692307692308, "grad_norm": 4.445796966552734, "learning_rate": 1.8520212471840156e-05, "loss": 0.0713, "step": 5208 }, { "epoch": 5.008653846153846, "grad_norm": 3.4814584255218506, "learning_rate": 1.8519560285836108e-05, "loss": 0.061, "step": 5209 }, { "epoch": 5.009615384615385, "grad_norm": 4.322628498077393, "learning_rate": 1.8518907967633487e-05, "loss": 0.0818, "step": 5210 }, { "epoch": 5.010576923076923, "grad_norm": 3.900250196456909, "learning_rate": 1.8518255517242413e-05, "loss": 0.065, "step": 5211 }, { "epoch": 5.0115384615384615, "grad_norm": 2.6810457706451416, "learning_rate": 1.851760293467302e-05, "loss": 0.0487, "step": 5212 }, { "epoch": 5.0125, "grad_norm": 3.4668526649475098, "learning_rate": 1.8516950219935424e-05, "loss": 0.0683, "step": 5213 }, { "epoch": 5.013461538461539, "grad_norm": 3.1073362827301025, "learning_rate": 1.8516297373039764e-05, "loss": 0.0425, "step": 5214 }, { "epoch": 5.014423076923077, "grad_norm": 4.062751293182373, "learning_rate": 1.8515644393996155e-05, "loss": 0.0854, "step": 5215 }, { "epoch": 5.015384615384615, "grad_norm": 1.9014655351638794, "learning_rate": 1.8514991282814743e-05, "loss": 0.0294, "step": 5216 }, { "epoch": 5.016346153846154, "grad_norm": 2.591399908065796, "learning_rate": 1.8514338039505654e-05, "loss": 0.0366, "step": 5217 }, { "epoch": 5.017307692307693, "grad_norm": 0.692717432975769, "learning_rate": 1.8513684664079033e-05, "loss": 0.0114, "step": 5218 }, { "epoch": 5.0182692307692305, "grad_norm": 7.452322006225586, "learning_rate": 1.851303115654501e-05, "loss": 0.1182, "step": 5219 }, { "epoch": 5.019230769230769, "grad_norm": 2.001591205596924, "learning_rate": 1.851237751691373e-05, "loss": 0.0319, "step": 5220 }, { "epoch": 5.020192307692308, "grad_norm": 2.197873115539551, "learning_rate": 1.8511723745195332e-05, "loss": 0.0331, "step": 5221 }, { "epoch": 5.0211538461538465, "grad_norm": 2.3138580322265625, "learning_rate": 1.851106984139997e-05, "loss": 0.0552, "step": 5222 }, { "epoch": 5.022115384615384, "grad_norm": 1.8329148292541504, "learning_rate": 1.8510415805537775e-05, "loss": 0.038, "step": 5223 }, { "epoch": 5.023076923076923, "grad_norm": 3.95051646232605, "learning_rate": 1.850976163761891e-05, "loss": 0.026, "step": 5224 }, { "epoch": 5.024038461538462, "grad_norm": 1.8972331285476685, "learning_rate": 1.850910733765352e-05, "loss": 0.0266, "step": 5225 }, { "epoch": 5.025, "grad_norm": 2.1867785453796387, "learning_rate": 1.8508452905651757e-05, "loss": 0.0377, "step": 5226 }, { "epoch": 5.025961538461538, "grad_norm": 4.357299327850342, "learning_rate": 1.8507798341623777e-05, "loss": 0.0908, "step": 5227 }, { "epoch": 5.026923076923077, "grad_norm": 1.708036184310913, "learning_rate": 1.8507143645579736e-05, "loss": 0.0136, "step": 5228 }, { "epoch": 5.0278846153846155, "grad_norm": 4.329788684844971, "learning_rate": 1.8506488817529794e-05, "loss": 0.0516, "step": 5229 }, { "epoch": 5.028846153846154, "grad_norm": 3.812913656234741, "learning_rate": 1.8505833857484116e-05, "loss": 0.1183, "step": 5230 }, { "epoch": 5.029807692307692, "grad_norm": 3.2281875610351562, "learning_rate": 1.8505178765452853e-05, "loss": 0.054, "step": 5231 }, { "epoch": 5.030769230769231, "grad_norm": 2.816560745239258, "learning_rate": 1.8504523541446186e-05, "loss": 0.0708, "step": 5232 }, { "epoch": 5.031730769230769, "grad_norm": 0.9428911209106445, "learning_rate": 1.850386818547427e-05, "loss": 0.0113, "step": 5233 }, { "epoch": 5.032692307692308, "grad_norm": 4.2535271644592285, "learning_rate": 1.8503212697547277e-05, "loss": 0.1886, "step": 5234 }, { "epoch": 5.033653846153846, "grad_norm": 1.5618268251419067, "learning_rate": 1.8502557077675378e-05, "loss": 0.0132, "step": 5235 }, { "epoch": 5.0346153846153845, "grad_norm": 1.7430983781814575, "learning_rate": 1.8501901325868753e-05, "loss": 0.0355, "step": 5236 }, { "epoch": 5.035576923076923, "grad_norm": 2.1228561401367188, "learning_rate": 1.850124544213757e-05, "loss": 0.0155, "step": 5237 }, { "epoch": 5.036538461538462, "grad_norm": 4.60170841217041, "learning_rate": 1.850058942649201e-05, "loss": 0.0807, "step": 5238 }, { "epoch": 5.0375, "grad_norm": 3.7204251289367676, "learning_rate": 1.8499933278942247e-05, "loss": 0.0422, "step": 5239 }, { "epoch": 5.038461538461538, "grad_norm": 4.1546406745910645, "learning_rate": 1.8499276999498473e-05, "loss": 0.0806, "step": 5240 }, { "epoch": 5.039423076923077, "grad_norm": 1.4337483644485474, "learning_rate": 1.849862058817086e-05, "loss": 0.0145, "step": 5241 }, { "epoch": 5.040384615384616, "grad_norm": 1.6212257146835327, "learning_rate": 1.8497964044969602e-05, "loss": 0.0171, "step": 5242 }, { "epoch": 5.0413461538461535, "grad_norm": 2.9624345302581787, "learning_rate": 1.849730736990488e-05, "loss": 0.0659, "step": 5243 }, { "epoch": 5.042307692307692, "grad_norm": 1.7823128700256348, "learning_rate": 1.8496650562986888e-05, "loss": 0.021, "step": 5244 }, { "epoch": 5.043269230769231, "grad_norm": 1.235100507736206, "learning_rate": 1.8495993624225814e-05, "loss": 0.0094, "step": 5245 }, { "epoch": 5.0442307692307695, "grad_norm": 1.8881675004959106, "learning_rate": 1.849533655363186e-05, "loss": 0.023, "step": 5246 }, { "epoch": 5.045192307692307, "grad_norm": 3.5201354026794434, "learning_rate": 1.8494679351215212e-05, "loss": 0.0317, "step": 5247 }, { "epoch": 5.046153846153846, "grad_norm": 3.443559408187866, "learning_rate": 1.8494022016986073e-05, "loss": 0.0467, "step": 5248 }, { "epoch": 5.047115384615385, "grad_norm": 4.739335536956787, "learning_rate": 1.8493364550954644e-05, "loss": 0.0672, "step": 5249 }, { "epoch": 5.048076923076923, "grad_norm": 0.48213130235671997, "learning_rate": 1.849270695313112e-05, "loss": 0.0041, "step": 5250 }, { "epoch": 5.049038461538461, "grad_norm": 1.8467655181884766, "learning_rate": 1.8492049223525716e-05, "loss": 0.0153, "step": 5251 }, { "epoch": 5.05, "grad_norm": 0.697226345539093, "learning_rate": 1.8491391362148627e-05, "loss": 0.0045, "step": 5252 }, { "epoch": 5.0509615384615385, "grad_norm": 1.798052430152893, "learning_rate": 1.849073336901007e-05, "loss": 0.0142, "step": 5253 }, { "epoch": 5.051923076923077, "grad_norm": 3.661832094192505, "learning_rate": 1.8490075244120248e-05, "loss": 0.0636, "step": 5254 }, { "epoch": 5.052884615384615, "grad_norm": 5.055478572845459, "learning_rate": 1.8489416987489377e-05, "loss": 0.0718, "step": 5255 }, { "epoch": 5.053846153846154, "grad_norm": 0.467372328042984, "learning_rate": 1.848875859912767e-05, "loss": 0.005, "step": 5256 }, { "epoch": 5.054807692307692, "grad_norm": 6.736345291137695, "learning_rate": 1.8488100079045345e-05, "loss": 0.0371, "step": 5257 }, { "epoch": 5.055769230769231, "grad_norm": 2.7443244457244873, "learning_rate": 1.8487441427252618e-05, "loss": 0.0274, "step": 5258 }, { "epoch": 5.056730769230769, "grad_norm": 3.4854323863983154, "learning_rate": 1.8486782643759712e-05, "loss": 0.1643, "step": 5259 }, { "epoch": 5.0576923076923075, "grad_norm": 1.2121565341949463, "learning_rate": 1.848612372857685e-05, "loss": 0.0109, "step": 5260 }, { "epoch": 5.058653846153846, "grad_norm": 2.5604236125946045, "learning_rate": 1.848546468171425e-05, "loss": 0.0228, "step": 5261 }, { "epoch": 5.059615384615385, "grad_norm": 1.5125670433044434, "learning_rate": 1.848480550318214e-05, "loss": 0.016, "step": 5262 }, { "epoch": 5.060576923076923, "grad_norm": 2.394535541534424, "learning_rate": 1.848414619299076e-05, "loss": 0.0336, "step": 5263 }, { "epoch": 5.061538461538461, "grad_norm": 3.095817804336548, "learning_rate": 1.8483486751150327e-05, "loss": 0.0226, "step": 5264 }, { "epoch": 5.0625, "grad_norm": 2.190406560897827, "learning_rate": 1.8482827177671076e-05, "loss": 0.0358, "step": 5265 }, { "epoch": 5.063461538461539, "grad_norm": 3.3550631999969482, "learning_rate": 1.8482167472563243e-05, "loss": 0.0328, "step": 5266 }, { "epoch": 5.064423076923077, "grad_norm": 5.314932823181152, "learning_rate": 1.848150763583707e-05, "loss": 0.1637, "step": 5267 }, { "epoch": 5.065384615384615, "grad_norm": 2.619483709335327, "learning_rate": 1.848084766750279e-05, "loss": 0.02, "step": 5268 }, { "epoch": 5.066346153846154, "grad_norm": 4.449071884155273, "learning_rate": 1.848018756757065e-05, "loss": 0.0895, "step": 5269 }, { "epoch": 5.0673076923076925, "grad_norm": 4.107062816619873, "learning_rate": 1.847952733605088e-05, "loss": 0.0566, "step": 5270 }, { "epoch": 5.068269230769231, "grad_norm": 3.5685877799987793, "learning_rate": 1.8478866972953736e-05, "loss": 0.0328, "step": 5271 }, { "epoch": 5.069230769230769, "grad_norm": 0.9327741265296936, "learning_rate": 1.847820647828946e-05, "loss": 0.0061, "step": 5272 }, { "epoch": 5.070192307692308, "grad_norm": 1.8196581602096558, "learning_rate": 1.8477545852068308e-05, "loss": 0.018, "step": 5273 }, { "epoch": 5.071153846153846, "grad_norm": 2.5254809856414795, "learning_rate": 1.8476885094300522e-05, "loss": 0.0291, "step": 5274 }, { "epoch": 5.072115384615385, "grad_norm": 1.7684638500213623, "learning_rate": 1.847622420499636e-05, "loss": 0.0143, "step": 5275 }, { "epoch": 5.073076923076923, "grad_norm": 2.2834386825561523, "learning_rate": 1.8475563184166073e-05, "loss": 0.0268, "step": 5276 }, { "epoch": 5.0740384615384615, "grad_norm": 2.273728609085083, "learning_rate": 1.8474902031819925e-05, "loss": 0.0439, "step": 5277 }, { "epoch": 5.075, "grad_norm": 5.953100204467773, "learning_rate": 1.8474240747968168e-05, "loss": 0.0511, "step": 5278 }, { "epoch": 5.075961538461539, "grad_norm": 1.1915911436080933, "learning_rate": 1.8473579332621064e-05, "loss": 0.0125, "step": 5279 }, { "epoch": 5.076923076923077, "grad_norm": 1.6715247631072998, "learning_rate": 1.8472917785788884e-05, "loss": 0.0128, "step": 5280 }, { "epoch": 5.077884615384615, "grad_norm": 2.9671804904937744, "learning_rate": 1.8472256107481887e-05, "loss": 0.036, "step": 5281 }, { "epoch": 5.078846153846154, "grad_norm": 4.118710041046143, "learning_rate": 1.8471594297710337e-05, "loss": 0.071, "step": 5282 }, { "epoch": 5.079807692307693, "grad_norm": 2.4784669876098633, "learning_rate": 1.847093235648451e-05, "loss": 0.037, "step": 5283 }, { "epoch": 5.0807692307692305, "grad_norm": 1.8389440774917603, "learning_rate": 1.8470270283814672e-05, "loss": 0.0119, "step": 5284 }, { "epoch": 5.081730769230769, "grad_norm": 1.166521430015564, "learning_rate": 1.8469608079711105e-05, "loss": 0.0386, "step": 5285 }, { "epoch": 5.082692307692308, "grad_norm": 3.088430643081665, "learning_rate": 1.846894574418407e-05, "loss": 0.0418, "step": 5286 }, { "epoch": 5.0836538461538465, "grad_norm": 2.289336919784546, "learning_rate": 1.846828327724386e-05, "loss": 0.0222, "step": 5287 }, { "epoch": 5.084615384615384, "grad_norm": 2.283966541290283, "learning_rate": 1.8467620678900747e-05, "loss": 0.0266, "step": 5288 }, { "epoch": 5.085576923076923, "grad_norm": 3.2787911891937256, "learning_rate": 1.846695794916501e-05, "loss": 0.0929, "step": 5289 }, { "epoch": 5.086538461538462, "grad_norm": 4.3770833015441895, "learning_rate": 1.8466295088046936e-05, "loss": 0.0972, "step": 5290 }, { "epoch": 5.0875, "grad_norm": 3.3696765899658203, "learning_rate": 1.8465632095556815e-05, "loss": 0.0379, "step": 5291 }, { "epoch": 5.088461538461538, "grad_norm": 2.3541228771209717, "learning_rate": 1.8464968971704924e-05, "loss": 0.0313, "step": 5292 }, { "epoch": 5.089423076923077, "grad_norm": 2.945946455001831, "learning_rate": 1.8464305716501564e-05, "loss": 0.0391, "step": 5293 }, { "epoch": 5.0903846153846155, "grad_norm": 2.3673453330993652, "learning_rate": 1.8463642329957018e-05, "loss": 0.0183, "step": 5294 }, { "epoch": 5.091346153846154, "grad_norm": 2.1668763160705566, "learning_rate": 1.8462978812081585e-05, "loss": 0.0417, "step": 5295 }, { "epoch": 5.092307692307692, "grad_norm": 0.8842533230781555, "learning_rate": 1.8462315162885563e-05, "loss": 0.0053, "step": 5296 }, { "epoch": 5.093269230769231, "grad_norm": 1.2004241943359375, "learning_rate": 1.8461651382379244e-05, "loss": 0.0076, "step": 5297 }, { "epoch": 5.094230769230769, "grad_norm": 5.038941860198975, "learning_rate": 1.8460987470572928e-05, "loss": 0.1112, "step": 5298 }, { "epoch": 5.095192307692308, "grad_norm": 4.152460098266602, "learning_rate": 1.846032342747692e-05, "loss": 0.0912, "step": 5299 }, { "epoch": 5.096153846153846, "grad_norm": 3.4707133769989014, "learning_rate": 1.8459659253101522e-05, "loss": 0.0628, "step": 5300 }, { "epoch": 5.0971153846153845, "grad_norm": 2.2318825721740723, "learning_rate": 1.8458994947457046e-05, "loss": 0.0185, "step": 5301 }, { "epoch": 5.098076923076923, "grad_norm": 0.887292742729187, "learning_rate": 1.845833051055379e-05, "loss": 0.0075, "step": 5302 }, { "epoch": 5.099038461538462, "grad_norm": 0.6486356258392334, "learning_rate": 1.8457665942402073e-05, "loss": 0.0032, "step": 5303 }, { "epoch": 5.1, "grad_norm": 3.2736170291900635, "learning_rate": 1.84570012430122e-05, "loss": 0.0479, "step": 5304 }, { "epoch": 5.100961538461538, "grad_norm": 1.6611040830612183, "learning_rate": 1.845633641239449e-05, "loss": 0.0195, "step": 5305 }, { "epoch": 5.101923076923077, "grad_norm": 2.6382977962493896, "learning_rate": 1.8455671450559262e-05, "loss": 0.0436, "step": 5306 }, { "epoch": 5.102884615384616, "grad_norm": 4.763827800750732, "learning_rate": 1.8455006357516824e-05, "loss": 0.1006, "step": 5307 }, { "epoch": 5.1038461538461535, "grad_norm": 2.910223960876465, "learning_rate": 1.8454341133277506e-05, "loss": 0.0164, "step": 5308 }, { "epoch": 5.104807692307692, "grad_norm": 2.894165515899658, "learning_rate": 1.8453675777851627e-05, "loss": 0.0273, "step": 5309 }, { "epoch": 5.105769230769231, "grad_norm": 2.1596262454986572, "learning_rate": 1.8453010291249513e-05, "loss": 0.0207, "step": 5310 }, { "epoch": 5.1067307692307695, "grad_norm": 3.4781758785247803, "learning_rate": 1.8452344673481483e-05, "loss": 0.0528, "step": 5311 }, { "epoch": 5.107692307692307, "grad_norm": 2.7361607551574707, "learning_rate": 1.8451678924557873e-05, "loss": 0.0223, "step": 5312 }, { "epoch": 5.108653846153846, "grad_norm": 4.008139133453369, "learning_rate": 1.8451013044489015e-05, "loss": 0.0646, "step": 5313 }, { "epoch": 5.109615384615385, "grad_norm": 2.3016130924224854, "learning_rate": 1.8450347033285234e-05, "loss": 0.0182, "step": 5314 }, { "epoch": 5.110576923076923, "grad_norm": 2.2336080074310303, "learning_rate": 1.844968089095687e-05, "loss": 0.0234, "step": 5315 }, { "epoch": 5.111538461538461, "grad_norm": 2.6807684898376465, "learning_rate": 1.844901461751426e-05, "loss": 0.0295, "step": 5316 }, { "epoch": 5.1125, "grad_norm": 0.7770818471908569, "learning_rate": 1.8448348212967734e-05, "loss": 0.0058, "step": 5317 }, { "epoch": 5.1134615384615385, "grad_norm": 2.2485733032226562, "learning_rate": 1.8447681677327648e-05, "loss": 0.0299, "step": 5318 }, { "epoch": 5.114423076923077, "grad_norm": 2.685307264328003, "learning_rate": 1.844701501060433e-05, "loss": 0.0281, "step": 5319 }, { "epoch": 5.115384615384615, "grad_norm": 6.75233793258667, "learning_rate": 1.8446348212808135e-05, "loss": 0.1601, "step": 5320 }, { "epoch": 5.116346153846154, "grad_norm": 3.828878879547119, "learning_rate": 1.84456812839494e-05, "loss": 0.0656, "step": 5321 }, { "epoch": 5.117307692307692, "grad_norm": 1.8694729804992676, "learning_rate": 1.8445014224038485e-05, "loss": 0.0179, "step": 5322 }, { "epoch": 5.118269230769231, "grad_norm": 2.8119091987609863, "learning_rate": 1.844434703308573e-05, "loss": 0.0643, "step": 5323 }, { "epoch": 5.119230769230769, "grad_norm": 0.11600539833307266, "learning_rate": 1.84436797111015e-05, "loss": 0.0012, "step": 5324 }, { "epoch": 5.1201923076923075, "grad_norm": 3.1086175441741943, "learning_rate": 1.8443012258096135e-05, "loss": 0.0589, "step": 5325 }, { "epoch": 5.121153846153846, "grad_norm": 2.0066933631896973, "learning_rate": 1.8442344674080002e-05, "loss": 0.0229, "step": 5326 }, { "epoch": 5.122115384615385, "grad_norm": 1.4005687236785889, "learning_rate": 1.844167695906346e-05, "loss": 0.0247, "step": 5327 }, { "epoch": 5.123076923076923, "grad_norm": 5.31471061706543, "learning_rate": 1.844100911305687e-05, "loss": 0.088, "step": 5328 }, { "epoch": 5.124038461538461, "grad_norm": 4.300619602203369, "learning_rate": 1.8440341136070584e-05, "loss": 0.0548, "step": 5329 }, { "epoch": 5.125, "grad_norm": 3.5182836055755615, "learning_rate": 1.843967302811498e-05, "loss": 0.0408, "step": 5330 }, { "epoch": 5.125961538461539, "grad_norm": 4.356247425079346, "learning_rate": 1.843900478920042e-05, "loss": 0.1203, "step": 5331 }, { "epoch": 5.126923076923077, "grad_norm": 2.941772699356079, "learning_rate": 1.843833641933728e-05, "loss": 0.0363, "step": 5332 }, { "epoch": 5.127884615384615, "grad_norm": 4.402605056762695, "learning_rate": 1.843766791853592e-05, "loss": 0.0813, "step": 5333 }, { "epoch": 5.128846153846154, "grad_norm": 2.604210376739502, "learning_rate": 1.8436999286806715e-05, "loss": 0.0243, "step": 5334 }, { "epoch": 5.1298076923076925, "grad_norm": 2.887869119644165, "learning_rate": 1.8436330524160048e-05, "loss": 0.0684, "step": 5335 }, { "epoch": 5.130769230769231, "grad_norm": 0.8511750102043152, "learning_rate": 1.843566163060629e-05, "loss": 0.0058, "step": 5336 }, { "epoch": 5.131730769230769, "grad_norm": 3.44521427154541, "learning_rate": 1.8434992606155822e-05, "loss": 0.0387, "step": 5337 }, { "epoch": 5.132692307692308, "grad_norm": 1.6089224815368652, "learning_rate": 1.8434323450819026e-05, "loss": 0.0183, "step": 5338 }, { "epoch": 5.133653846153846, "grad_norm": 2.769576072692871, "learning_rate": 1.843365416460628e-05, "loss": 0.0508, "step": 5339 }, { "epoch": 5.134615384615385, "grad_norm": 2.681715726852417, "learning_rate": 1.843298474752798e-05, "loss": 0.0124, "step": 5340 }, { "epoch": 5.135576923076923, "grad_norm": 2.8364601135253906, "learning_rate": 1.8432315199594506e-05, "loss": 0.0223, "step": 5341 }, { "epoch": 5.1365384615384615, "grad_norm": 2.861401319503784, "learning_rate": 1.8431645520816247e-05, "loss": 0.0479, "step": 5342 }, { "epoch": 5.1375, "grad_norm": 1.7978689670562744, "learning_rate": 1.8430975711203596e-05, "loss": 0.0135, "step": 5343 }, { "epoch": 5.138461538461539, "grad_norm": 0.5301749110221863, "learning_rate": 1.8430305770766947e-05, "loss": 0.0031, "step": 5344 }, { "epoch": 5.139423076923077, "grad_norm": 2.646634340286255, "learning_rate": 1.8429635699516694e-05, "loss": 0.0257, "step": 5345 }, { "epoch": 5.140384615384615, "grad_norm": 1.7283148765563965, "learning_rate": 1.842896549746324e-05, "loss": 0.0116, "step": 5346 }, { "epoch": 5.141346153846154, "grad_norm": 5.247320175170898, "learning_rate": 1.8428295164616975e-05, "loss": 0.0812, "step": 5347 }, { "epoch": 5.142307692307693, "grad_norm": 3.9597878456115723, "learning_rate": 1.8427624700988308e-05, "loss": 0.0283, "step": 5348 }, { "epoch": 5.1432692307692305, "grad_norm": 5.021088600158691, "learning_rate": 1.842695410658764e-05, "loss": 0.1436, "step": 5349 }, { "epoch": 5.144230769230769, "grad_norm": 4.627431392669678, "learning_rate": 1.8426283381425377e-05, "loss": 0.0379, "step": 5350 }, { "epoch": 5.145192307692308, "grad_norm": 2.759366035461426, "learning_rate": 1.842561252551193e-05, "loss": 0.0291, "step": 5351 }, { "epoch": 5.1461538461538465, "grad_norm": 0.692696750164032, "learning_rate": 1.8424941538857698e-05, "loss": 0.0042, "step": 5352 }, { "epoch": 5.147115384615384, "grad_norm": 3.6333444118499756, "learning_rate": 1.842427042147311e-05, "loss": 0.0581, "step": 5353 }, { "epoch": 5.148076923076923, "grad_norm": 2.59792423248291, "learning_rate": 1.842359917336856e-05, "loss": 0.0566, "step": 5354 }, { "epoch": 5.149038461538462, "grad_norm": 2.9841930866241455, "learning_rate": 1.842292779455448e-05, "loss": 0.0361, "step": 5355 }, { "epoch": 5.15, "grad_norm": 5.871306419372559, "learning_rate": 1.842225628504128e-05, "loss": 0.1727, "step": 5356 }, { "epoch": 5.150961538461538, "grad_norm": 2.6639466285705566, "learning_rate": 1.8421584644839382e-05, "loss": 0.0262, "step": 5357 }, { "epoch": 5.151923076923077, "grad_norm": 4.590816020965576, "learning_rate": 1.8420912873959208e-05, "loss": 0.0619, "step": 5358 }, { "epoch": 5.1528846153846155, "grad_norm": 0.2736847996711731, "learning_rate": 1.8420240972411177e-05, "loss": 0.0023, "step": 5359 }, { "epoch": 5.153846153846154, "grad_norm": 2.3052501678466797, "learning_rate": 1.8419568940205726e-05, "loss": 0.0402, "step": 5360 }, { "epoch": 5.154807692307692, "grad_norm": 2.5421152114868164, "learning_rate": 1.8418896777353272e-05, "loss": 0.0435, "step": 5361 }, { "epoch": 5.155769230769231, "grad_norm": 0.5267413854598999, "learning_rate": 1.8418224483864252e-05, "loss": 0.0046, "step": 5362 }, { "epoch": 5.156730769230769, "grad_norm": 4.92742919921875, "learning_rate": 1.8417552059749087e-05, "loss": 0.0836, "step": 5363 }, { "epoch": 5.157692307692308, "grad_norm": 3.097529888153076, "learning_rate": 1.841687950501823e-05, "loss": 0.0521, "step": 5364 }, { "epoch": 5.158653846153846, "grad_norm": 4.320487022399902, "learning_rate": 1.8416206819682102e-05, "loss": 0.0625, "step": 5365 }, { "epoch": 5.1596153846153845, "grad_norm": 1.7748427391052246, "learning_rate": 1.8415534003751143e-05, "loss": 0.0428, "step": 5366 }, { "epoch": 5.160576923076923, "grad_norm": 3.172654628753662, "learning_rate": 1.8414861057235797e-05, "loss": 0.0829, "step": 5367 }, { "epoch": 5.161538461538462, "grad_norm": 4.369329452514648, "learning_rate": 1.8414187980146504e-05, "loss": 0.1028, "step": 5368 }, { "epoch": 5.1625, "grad_norm": 1.6739522218704224, "learning_rate": 1.8413514772493713e-05, "loss": 0.0438, "step": 5369 }, { "epoch": 5.163461538461538, "grad_norm": 2.0501132011413574, "learning_rate": 1.8412841434287863e-05, "loss": 0.0246, "step": 5370 }, { "epoch": 5.164423076923077, "grad_norm": 2.399850606918335, "learning_rate": 1.8412167965539405e-05, "loss": 0.0504, "step": 5371 }, { "epoch": 5.165384615384616, "grad_norm": 4.099301338195801, "learning_rate": 1.841149436625879e-05, "loss": 0.0681, "step": 5372 }, { "epoch": 5.1663461538461535, "grad_norm": 3.818135976791382, "learning_rate": 1.8410820636456472e-05, "loss": 0.0607, "step": 5373 }, { "epoch": 5.167307692307692, "grad_norm": 0.3747449517250061, "learning_rate": 1.84101467761429e-05, "loss": 0.0029, "step": 5374 }, { "epoch": 5.168269230769231, "grad_norm": 2.593590497970581, "learning_rate": 1.8409472785328536e-05, "loss": 0.0359, "step": 5375 }, { "epoch": 5.1692307692307695, "grad_norm": 1.8965657949447632, "learning_rate": 1.8408798664023836e-05, "loss": 0.034, "step": 5376 }, { "epoch": 5.170192307692307, "grad_norm": 3.4592621326446533, "learning_rate": 1.8408124412239256e-05, "loss": 0.0562, "step": 5377 }, { "epoch": 5.171153846153846, "grad_norm": 3.397754430770874, "learning_rate": 1.8407450029985267e-05, "loss": 0.0252, "step": 5378 }, { "epoch": 5.172115384615385, "grad_norm": 0.1566896140575409, "learning_rate": 1.8406775517272328e-05, "loss": 0.0013, "step": 5379 }, { "epoch": 5.173076923076923, "grad_norm": 2.5877740383148193, "learning_rate": 1.8406100874110907e-05, "loss": 0.0616, "step": 5380 }, { "epoch": 5.174038461538461, "grad_norm": 3.9540741443634033, "learning_rate": 1.8405426100511473e-05, "loss": 0.0807, "step": 5381 }, { "epoch": 5.175, "grad_norm": 3.6064369678497314, "learning_rate": 1.840475119648449e-05, "loss": 0.0349, "step": 5382 }, { "epoch": 5.1759615384615385, "grad_norm": 0.4867446720600128, "learning_rate": 1.8404076162040442e-05, "loss": 0.0029, "step": 5383 }, { "epoch": 5.176923076923077, "grad_norm": 4.010498046875, "learning_rate": 1.8403400997189795e-05, "loss": 0.0653, "step": 5384 }, { "epoch": 5.177884615384615, "grad_norm": 4.032658100128174, "learning_rate": 1.8402725701943028e-05, "loss": 0.0842, "step": 5385 }, { "epoch": 5.178846153846154, "grad_norm": 5.18664026260376, "learning_rate": 1.8402050276310622e-05, "loss": 0.1339, "step": 5386 }, { "epoch": 5.179807692307692, "grad_norm": 1.2264653444290161, "learning_rate": 1.8401374720303054e-05, "loss": 0.0113, "step": 5387 }, { "epoch": 5.180769230769231, "grad_norm": 3.1049275398254395, "learning_rate": 1.8400699033930807e-05, "loss": 0.0474, "step": 5388 }, { "epoch": 5.181730769230769, "grad_norm": 1.546707272529602, "learning_rate": 1.8400023217204367e-05, "loss": 0.0122, "step": 5389 }, { "epoch": 5.1826923076923075, "grad_norm": 3.711153268814087, "learning_rate": 1.8399347270134223e-05, "loss": 0.0212, "step": 5390 }, { "epoch": 5.183653846153846, "grad_norm": 4.514772415161133, "learning_rate": 1.8398671192730863e-05, "loss": 0.0924, "step": 5391 }, { "epoch": 5.184615384615385, "grad_norm": 4.179081439971924, "learning_rate": 1.839799498500477e-05, "loss": 0.0639, "step": 5392 }, { "epoch": 5.185576923076923, "grad_norm": 3.4376914501190186, "learning_rate": 1.8397318646966445e-05, "loss": 0.0641, "step": 5393 }, { "epoch": 5.186538461538461, "grad_norm": 2.416686773300171, "learning_rate": 1.839664217862638e-05, "loss": 0.0354, "step": 5394 }, { "epoch": 5.1875, "grad_norm": 4.217051029205322, "learning_rate": 1.8395965579995073e-05, "loss": 0.1635, "step": 5395 }, { "epoch": 5.188461538461539, "grad_norm": 2.802943706512451, "learning_rate": 1.8395288851083022e-05, "loss": 0.0251, "step": 5396 }, { "epoch": 5.189423076923077, "grad_norm": 0.6424216628074646, "learning_rate": 1.8394611991900726e-05, "loss": 0.006, "step": 5397 }, { "epoch": 5.190384615384615, "grad_norm": 1.9695144891738892, "learning_rate": 1.8393935002458695e-05, "loss": 0.0268, "step": 5398 }, { "epoch": 5.191346153846154, "grad_norm": 2.742408275604248, "learning_rate": 1.8393257882767422e-05, "loss": 0.0205, "step": 5399 }, { "epoch": 5.1923076923076925, "grad_norm": 4.49808931350708, "learning_rate": 1.8392580632837423e-05, "loss": 0.0694, "step": 5400 }, { "epoch": 5.193269230769231, "grad_norm": 2.822599411010742, "learning_rate": 1.8391903252679206e-05, "loss": 0.0253, "step": 5401 }, { "epoch": 5.194230769230769, "grad_norm": 5.027798175811768, "learning_rate": 1.839122574230328e-05, "loss": 0.1067, "step": 5402 }, { "epoch": 5.195192307692308, "grad_norm": 3.324794292449951, "learning_rate": 1.8390548101720157e-05, "loss": 0.025, "step": 5403 }, { "epoch": 5.196153846153846, "grad_norm": 6.49741792678833, "learning_rate": 1.8389870330940356e-05, "loss": 0.0463, "step": 5404 }, { "epoch": 5.197115384615385, "grad_norm": 4.864259243011475, "learning_rate": 1.838919242997439e-05, "loss": 0.0627, "step": 5405 }, { "epoch": 5.198076923076923, "grad_norm": 1.5388870239257812, "learning_rate": 1.838851439883278e-05, "loss": 0.0168, "step": 5406 }, { "epoch": 5.1990384615384615, "grad_norm": 5.565110206604004, "learning_rate": 1.8387836237526045e-05, "loss": 0.0833, "step": 5407 }, { "epoch": 5.2, "grad_norm": 2.7213943004608154, "learning_rate": 1.838715794606471e-05, "loss": 0.0587, "step": 5408 }, { "epoch": 5.200961538461539, "grad_norm": 0.9382878541946411, "learning_rate": 1.83864795244593e-05, "loss": 0.0083, "step": 5409 }, { "epoch": 5.201923076923077, "grad_norm": 1.2064803838729858, "learning_rate": 1.8385800972720344e-05, "loss": 0.0095, "step": 5410 }, { "epoch": 5.202884615384615, "grad_norm": 1.731704831123352, "learning_rate": 1.838512229085837e-05, "loss": 0.0243, "step": 5411 }, { "epoch": 5.203846153846154, "grad_norm": 2.857724189758301, "learning_rate": 1.83844434788839e-05, "loss": 0.0708, "step": 5412 }, { "epoch": 5.204807692307693, "grad_norm": 2.472834587097168, "learning_rate": 1.8383764536807486e-05, "loss": 0.0361, "step": 5413 }, { "epoch": 5.2057692307692305, "grad_norm": 5.098223686218262, "learning_rate": 1.8383085464639645e-05, "loss": 0.0685, "step": 5414 }, { "epoch": 5.206730769230769, "grad_norm": 5.220248699188232, "learning_rate": 1.838240626239092e-05, "loss": 0.0758, "step": 5415 }, { "epoch": 5.207692307692308, "grad_norm": 4.302567481994629, "learning_rate": 1.8381726930071858e-05, "loss": 0.0758, "step": 5416 }, { "epoch": 5.2086538461538465, "grad_norm": 2.7789878845214844, "learning_rate": 1.838104746769299e-05, "loss": 0.0717, "step": 5417 }, { "epoch": 5.209615384615384, "grad_norm": 6.130456924438477, "learning_rate": 1.8380367875264868e-05, "loss": 0.0589, "step": 5418 }, { "epoch": 5.210576923076923, "grad_norm": 4.026650428771973, "learning_rate": 1.837968815279803e-05, "loss": 0.098, "step": 5419 }, { "epoch": 5.211538461538462, "grad_norm": 3.932168960571289, "learning_rate": 1.8379008300303022e-05, "loss": 0.0944, "step": 5420 }, { "epoch": 5.2125, "grad_norm": 3.096921682357788, "learning_rate": 1.83783283177904e-05, "loss": 0.0342, "step": 5421 }, { "epoch": 5.213461538461538, "grad_norm": 2.311530828475952, "learning_rate": 1.8377648205270716e-05, "loss": 0.0284, "step": 5422 }, { "epoch": 5.214423076923077, "grad_norm": 2.9423365592956543, "learning_rate": 1.8376967962754517e-05, "loss": 0.0426, "step": 5423 }, { "epoch": 5.2153846153846155, "grad_norm": 3.228254556655884, "learning_rate": 1.837628759025236e-05, "loss": 0.0953, "step": 5424 }, { "epoch": 5.216346153846154, "grad_norm": 2.5015790462493896, "learning_rate": 1.837560708777481e-05, "loss": 0.0598, "step": 5425 }, { "epoch": 5.217307692307692, "grad_norm": 2.277052879333496, "learning_rate": 1.837492645533241e-05, "loss": 0.0209, "step": 5426 }, { "epoch": 5.218269230769231, "grad_norm": 1.3121998310089111, "learning_rate": 1.8374245692935737e-05, "loss": 0.0157, "step": 5427 }, { "epoch": 5.219230769230769, "grad_norm": 3.5676345825195312, "learning_rate": 1.8373564800595353e-05, "loss": 0.04, "step": 5428 }, { "epoch": 5.220192307692308, "grad_norm": 6.466397285461426, "learning_rate": 1.8372883778321813e-05, "loss": 0.267, "step": 5429 }, { "epoch": 5.221153846153846, "grad_norm": 1.2296162843704224, "learning_rate": 1.8372202626125696e-05, "loss": 0.0197, "step": 5430 }, { "epoch": 5.2221153846153845, "grad_norm": 13.477916717529297, "learning_rate": 1.8371521344017562e-05, "loss": 0.0458, "step": 5431 }, { "epoch": 5.223076923076923, "grad_norm": 4.351600170135498, "learning_rate": 1.8370839932007987e-05, "loss": 0.0695, "step": 5432 }, { "epoch": 5.224038461538462, "grad_norm": 2.801492929458618, "learning_rate": 1.8370158390107547e-05, "loss": 0.0464, "step": 5433 }, { "epoch": 5.225, "grad_norm": 1.8154922723770142, "learning_rate": 1.8369476718326815e-05, "loss": 0.0202, "step": 5434 }, { "epoch": 5.225961538461538, "grad_norm": 1.6789120435714722, "learning_rate": 1.8368794916676365e-05, "loss": 0.0126, "step": 5435 }, { "epoch": 5.226923076923077, "grad_norm": 1.656677007675171, "learning_rate": 1.8368112985166783e-05, "loss": 0.0126, "step": 5436 }, { "epoch": 5.227884615384616, "grad_norm": 4.42171573638916, "learning_rate": 1.8367430923808646e-05, "loss": 0.0705, "step": 5437 }, { "epoch": 5.2288461538461535, "grad_norm": 7.048471927642822, "learning_rate": 1.836674873261254e-05, "loss": 0.0965, "step": 5438 }, { "epoch": 5.229807692307692, "grad_norm": 2.067821502685547, "learning_rate": 1.836606641158905e-05, "loss": 0.021, "step": 5439 }, { "epoch": 5.230769230769231, "grad_norm": 3.6786885261535645, "learning_rate": 1.8365383960748763e-05, "loss": 0.0661, "step": 5440 }, { "epoch": 5.2317307692307695, "grad_norm": 2.675013542175293, "learning_rate": 1.8364701380102267e-05, "loss": 0.0587, "step": 5441 }, { "epoch": 5.232692307692307, "grad_norm": 1.8743199110031128, "learning_rate": 1.836401866966016e-05, "loss": 0.0341, "step": 5442 }, { "epoch": 5.233653846153846, "grad_norm": 2.3393826484680176, "learning_rate": 1.836333582943303e-05, "loss": 0.0795, "step": 5443 }, { "epoch": 5.234615384615385, "grad_norm": 1.0783475637435913, "learning_rate": 1.836265285943147e-05, "loss": 0.0104, "step": 5444 }, { "epoch": 5.235576923076923, "grad_norm": 5.155951499938965, "learning_rate": 1.8361969759666086e-05, "loss": 0.1184, "step": 5445 }, { "epoch": 5.236538461538461, "grad_norm": 3.143336534500122, "learning_rate": 1.8361286530147474e-05, "loss": 0.1041, "step": 5446 }, { "epoch": 5.2375, "grad_norm": 3.508087158203125, "learning_rate": 1.8360603170886234e-05, "loss": 0.066, "step": 5447 }, { "epoch": 5.2384615384615385, "grad_norm": 3.0807881355285645, "learning_rate": 1.8359919681892967e-05, "loss": 0.0462, "step": 5448 }, { "epoch": 5.239423076923077, "grad_norm": 2.735490083694458, "learning_rate": 1.8359236063178287e-05, "loss": 0.0337, "step": 5449 }, { "epoch": 5.240384615384615, "grad_norm": 2.4849538803100586, "learning_rate": 1.8358552314752797e-05, "loss": 0.041, "step": 5450 }, { "epoch": 5.241346153846154, "grad_norm": 1.704569697380066, "learning_rate": 1.8357868436627105e-05, "loss": 0.0173, "step": 5451 }, { "epoch": 5.242307692307692, "grad_norm": 1.5833357572555542, "learning_rate": 1.835718442881183e-05, "loss": 0.015, "step": 5452 }, { "epoch": 5.243269230769231, "grad_norm": 1.492666244506836, "learning_rate": 1.8356500291317574e-05, "loss": 0.0411, "step": 5453 }, { "epoch": 5.244230769230769, "grad_norm": 3.1450612545013428, "learning_rate": 1.8355816024154966e-05, "loss": 0.0808, "step": 5454 }, { "epoch": 5.2451923076923075, "grad_norm": 1.5027390718460083, "learning_rate": 1.835513162733461e-05, "loss": 0.016, "step": 5455 }, { "epoch": 5.246153846153846, "grad_norm": 6.496498107910156, "learning_rate": 1.835444710086714e-05, "loss": 0.1063, "step": 5456 }, { "epoch": 5.247115384615385, "grad_norm": 0.7890937924385071, "learning_rate": 1.8353762444763167e-05, "loss": 0.0082, "step": 5457 }, { "epoch": 5.248076923076923, "grad_norm": 4.116024494171143, "learning_rate": 1.835307765903332e-05, "loss": 0.079, "step": 5458 }, { "epoch": 5.249038461538461, "grad_norm": 3.2328803539276123, "learning_rate": 1.8352392743688225e-05, "loss": 0.0234, "step": 5459 }, { "epoch": 5.25, "grad_norm": 2.856557607650757, "learning_rate": 1.8351707698738507e-05, "loss": 0.0486, "step": 5460 }, { "epoch": 5.250961538461539, "grad_norm": 0.8912555575370789, "learning_rate": 1.8351022524194797e-05, "loss": 0.006, "step": 5461 }, { "epoch": 5.251923076923077, "grad_norm": 1.279866337776184, "learning_rate": 1.835033722006773e-05, "loss": 0.0131, "step": 5462 }, { "epoch": 5.252884615384615, "grad_norm": 2.9229092597961426, "learning_rate": 1.8349651786367933e-05, "loss": 0.0498, "step": 5463 }, { "epoch": 5.253846153846154, "grad_norm": 9.630088806152344, "learning_rate": 1.834896622310605e-05, "loss": 0.0558, "step": 5464 }, { "epoch": 5.2548076923076925, "grad_norm": 2.666834592819214, "learning_rate": 1.8348280530292712e-05, "loss": 0.025, "step": 5465 }, { "epoch": 5.25576923076923, "grad_norm": 2.347626209259033, "learning_rate": 1.8347594707938564e-05, "loss": 0.0799, "step": 5466 }, { "epoch": 5.256730769230769, "grad_norm": 4.831603050231934, "learning_rate": 1.8346908756054247e-05, "loss": 0.0308, "step": 5467 }, { "epoch": 5.257692307692308, "grad_norm": 3.2904105186462402, "learning_rate": 1.8346222674650403e-05, "loss": 0.0349, "step": 5468 }, { "epoch": 5.258653846153846, "grad_norm": 3.3145127296447754, "learning_rate": 1.8345536463737678e-05, "loss": 0.0293, "step": 5469 }, { "epoch": 5.259615384615385, "grad_norm": 2.419922351837158, "learning_rate": 1.8344850123326724e-05, "loss": 0.0405, "step": 5470 }, { "epoch": 5.260576923076923, "grad_norm": 4.592419147491455, "learning_rate": 1.8344163653428183e-05, "loss": 0.062, "step": 5471 }, { "epoch": 5.2615384615384615, "grad_norm": 1.4747241735458374, "learning_rate": 1.8343477054052715e-05, "loss": 0.015, "step": 5472 }, { "epoch": 5.2625, "grad_norm": 1.4899195432662964, "learning_rate": 1.834279032521097e-05, "loss": 0.0268, "step": 5473 }, { "epoch": 5.263461538461539, "grad_norm": 2.9976370334625244, "learning_rate": 1.8342103466913607e-05, "loss": 0.0476, "step": 5474 }, { "epoch": 5.264423076923077, "grad_norm": 3.969374179840088, "learning_rate": 1.834141647917128e-05, "loss": 0.1114, "step": 5475 }, { "epoch": 5.265384615384615, "grad_norm": 6.47373628616333, "learning_rate": 1.8340729361994653e-05, "loss": 0.2433, "step": 5476 }, { "epoch": 5.266346153846154, "grad_norm": 1.6229021549224854, "learning_rate": 1.8340042115394384e-05, "loss": 0.0231, "step": 5477 }, { "epoch": 5.267307692307693, "grad_norm": 5.870424270629883, "learning_rate": 1.8339354739381138e-05, "loss": 0.1027, "step": 5478 }, { "epoch": 5.2682692307692305, "grad_norm": 2.1159229278564453, "learning_rate": 1.8338667233965585e-05, "loss": 0.025, "step": 5479 }, { "epoch": 5.269230769230769, "grad_norm": 3.6534085273742676, "learning_rate": 1.833797959915839e-05, "loss": 0.0652, "step": 5480 }, { "epoch": 5.270192307692308, "grad_norm": 1.548557996749878, "learning_rate": 1.8337291834970222e-05, "loss": 0.0212, "step": 5481 }, { "epoch": 5.2711538461538465, "grad_norm": 4.002499580383301, "learning_rate": 1.8336603941411755e-05, "loss": 0.0398, "step": 5482 }, { "epoch": 5.272115384615384, "grad_norm": 1.8496710062026978, "learning_rate": 1.833591591849366e-05, "loss": 0.0222, "step": 5483 }, { "epoch": 5.273076923076923, "grad_norm": 6.093774795532227, "learning_rate": 1.8335227766226618e-05, "loss": 0.1841, "step": 5484 }, { "epoch": 5.274038461538462, "grad_norm": 3.4244189262390137, "learning_rate": 1.8334539484621304e-05, "loss": 0.0408, "step": 5485 }, { "epoch": 5.275, "grad_norm": 1.9140831232070923, "learning_rate": 1.83338510736884e-05, "loss": 0.047, "step": 5486 }, { "epoch": 5.275961538461538, "grad_norm": 2.175663948059082, "learning_rate": 1.8333162533438587e-05, "loss": 0.0259, "step": 5487 }, { "epoch": 5.276923076923077, "grad_norm": 3.2563769817352295, "learning_rate": 1.833247386388255e-05, "loss": 0.0624, "step": 5488 }, { "epoch": 5.2778846153846155, "grad_norm": 5.599069118499756, "learning_rate": 1.833178506503097e-05, "loss": 0.1103, "step": 5489 }, { "epoch": 5.278846153846154, "grad_norm": 3.290786027908325, "learning_rate": 1.833109613689454e-05, "loss": 0.029, "step": 5490 }, { "epoch": 5.279807692307692, "grad_norm": 2.4108712673187256, "learning_rate": 1.833040707948395e-05, "loss": 0.0151, "step": 5491 }, { "epoch": 5.280769230769231, "grad_norm": 1.8017185926437378, "learning_rate": 1.8329717892809896e-05, "loss": 0.0379, "step": 5492 }, { "epoch": 5.281730769230769, "grad_norm": 1.935131311416626, "learning_rate": 1.8329028576883065e-05, "loss": 0.014, "step": 5493 }, { "epoch": 5.282692307692308, "grad_norm": 2.0474941730499268, "learning_rate": 1.8328339131714154e-05, "loss": 0.0179, "step": 5494 }, { "epoch": 5.283653846153846, "grad_norm": 4.493368625640869, "learning_rate": 1.8327649557313864e-05, "loss": 0.0933, "step": 5495 }, { "epoch": 5.2846153846153845, "grad_norm": 0.6156935691833496, "learning_rate": 1.8326959853692893e-05, "loss": 0.0064, "step": 5496 }, { "epoch": 5.285576923076923, "grad_norm": 2.174454689025879, "learning_rate": 1.8326270020861946e-05, "loss": 0.0237, "step": 5497 }, { "epoch": 5.286538461538462, "grad_norm": 4.439608573913574, "learning_rate": 1.8325580058831725e-05, "loss": 0.0937, "step": 5498 }, { "epoch": 5.2875, "grad_norm": 2.738542318344116, "learning_rate": 1.8324889967612937e-05, "loss": 0.0401, "step": 5499 }, { "epoch": 5.288461538461538, "grad_norm": 1.57672119140625, "learning_rate": 1.832419974721629e-05, "loss": 0.0166, "step": 5500 }, { "epoch": 5.289423076923077, "grad_norm": 3.4221670627593994, "learning_rate": 1.83235093976525e-05, "loss": 0.0482, "step": 5501 }, { "epoch": 5.290384615384616, "grad_norm": 2.334329605102539, "learning_rate": 1.8322818918932264e-05, "loss": 0.0349, "step": 5502 }, { "epoch": 5.2913461538461535, "grad_norm": 3.8097422122955322, "learning_rate": 1.832212831106631e-05, "loss": 0.0486, "step": 5503 }, { "epoch": 5.292307692307692, "grad_norm": 3.1521875858306885, "learning_rate": 1.8321437574065347e-05, "loss": 0.0828, "step": 5504 }, { "epoch": 5.293269230769231, "grad_norm": 3.2833991050720215, "learning_rate": 1.8320746707940096e-05, "loss": 0.0353, "step": 5505 }, { "epoch": 5.2942307692307695, "grad_norm": 1.5085994005203247, "learning_rate": 1.8320055712701282e-05, "loss": 0.01, "step": 5506 }, { "epoch": 5.295192307692307, "grad_norm": 2.794065475463867, "learning_rate": 1.831936458835962e-05, "loss": 0.0348, "step": 5507 }, { "epoch": 5.296153846153846, "grad_norm": 2.781902313232422, "learning_rate": 1.8318673334925837e-05, "loss": 0.0506, "step": 5508 }, { "epoch": 5.297115384615385, "grad_norm": 4.726905822753906, "learning_rate": 1.8317981952410657e-05, "loss": 0.073, "step": 5509 }, { "epoch": 5.298076923076923, "grad_norm": 2.0996885299682617, "learning_rate": 1.831729044082481e-05, "loss": 0.0222, "step": 5510 }, { "epoch": 5.299038461538461, "grad_norm": 3.143202304840088, "learning_rate": 1.831659880017903e-05, "loss": 0.0544, "step": 5511 }, { "epoch": 5.3, "grad_norm": 2.5410475730895996, "learning_rate": 1.8315907030484043e-05, "loss": 0.0315, "step": 5512 }, { "epoch": 5.3009615384615385, "grad_norm": 1.5234010219573975, "learning_rate": 1.8315215131750586e-05, "loss": 0.0152, "step": 5513 }, { "epoch": 5.301923076923077, "grad_norm": 3.4445104598999023, "learning_rate": 1.8314523103989393e-05, "loss": 0.0776, "step": 5514 }, { "epoch": 5.302884615384615, "grad_norm": 2.50557279586792, "learning_rate": 1.8313830947211205e-05, "loss": 0.0146, "step": 5515 }, { "epoch": 5.303846153846154, "grad_norm": 4.572528839111328, "learning_rate": 1.8313138661426764e-05, "loss": 0.0284, "step": 5516 }, { "epoch": 5.304807692307692, "grad_norm": 3.639950752258301, "learning_rate": 1.831244624664681e-05, "loss": 0.0951, "step": 5517 }, { "epoch": 5.305769230769231, "grad_norm": 4.200791835784912, "learning_rate": 1.8311753702882083e-05, "loss": 0.0943, "step": 5518 }, { "epoch": 5.30673076923077, "grad_norm": 3.193087339401245, "learning_rate": 1.831106103014334e-05, "loss": 0.0402, "step": 5519 }, { "epoch": 5.3076923076923075, "grad_norm": 2.138827323913574, "learning_rate": 1.8310368228441312e-05, "loss": 0.0191, "step": 5520 }, { "epoch": 5.308653846153846, "grad_norm": 2.4880850315093994, "learning_rate": 1.830967529778677e-05, "loss": 0.0345, "step": 5521 }, { "epoch": 5.309615384615385, "grad_norm": 2.286654233932495, "learning_rate": 1.830898223819045e-05, "loss": 0.0235, "step": 5522 }, { "epoch": 5.310576923076923, "grad_norm": 2.6935338973999023, "learning_rate": 1.8308289049663113e-05, "loss": 0.0434, "step": 5523 }, { "epoch": 5.311538461538461, "grad_norm": 4.210364818572998, "learning_rate": 1.8307595732215517e-05, "loss": 0.1063, "step": 5524 }, { "epoch": 5.3125, "grad_norm": 7.534151077270508, "learning_rate": 1.8306902285858415e-05, "loss": 0.2088, "step": 5525 }, { "epoch": 5.313461538461539, "grad_norm": 1.1152983903884888, "learning_rate": 1.830620871060257e-05, "loss": 0.0062, "step": 5526 }, { "epoch": 5.314423076923077, "grad_norm": 2.2302794456481934, "learning_rate": 1.830551500645875e-05, "loss": 0.0271, "step": 5527 }, { "epoch": 5.315384615384615, "grad_norm": 2.297879934310913, "learning_rate": 1.8304821173437706e-05, "loss": 0.0271, "step": 5528 }, { "epoch": 5.316346153846154, "grad_norm": 2.474989891052246, "learning_rate": 1.8304127211550216e-05, "loss": 0.0563, "step": 5529 }, { "epoch": 5.3173076923076925, "grad_norm": 3.6123228073120117, "learning_rate": 1.8303433120807043e-05, "loss": 0.0377, "step": 5530 }, { "epoch": 5.31826923076923, "grad_norm": 1.7345744371414185, "learning_rate": 1.8302738901218953e-05, "loss": 0.0122, "step": 5531 }, { "epoch": 5.319230769230769, "grad_norm": 1.5210660696029663, "learning_rate": 1.8302044552796733e-05, "loss": 0.0207, "step": 5532 }, { "epoch": 5.320192307692308, "grad_norm": 3.508016586303711, "learning_rate": 1.830135007555114e-05, "loss": 0.0471, "step": 5533 }, { "epoch": 5.321153846153846, "grad_norm": 4.691296577453613, "learning_rate": 1.8300655469492964e-05, "loss": 0.13, "step": 5534 }, { "epoch": 5.322115384615385, "grad_norm": 2.2868850231170654, "learning_rate": 1.8299960734632974e-05, "loss": 0.0339, "step": 5535 }, { "epoch": 5.323076923076923, "grad_norm": 3.2797858715057373, "learning_rate": 1.8299265870981954e-05, "loss": 0.0716, "step": 5536 }, { "epoch": 5.3240384615384615, "grad_norm": 3.057770252227783, "learning_rate": 1.8298570878550687e-05, "loss": 0.0713, "step": 5537 }, { "epoch": 5.325, "grad_norm": 1.656226634979248, "learning_rate": 1.829787575734995e-05, "loss": 0.0253, "step": 5538 }, { "epoch": 5.325961538461539, "grad_norm": 5.067387580871582, "learning_rate": 1.829718050739054e-05, "loss": 0.086, "step": 5539 }, { "epoch": 5.326923076923077, "grad_norm": 4.837586402893066, "learning_rate": 1.829648512868324e-05, "loss": 0.0833, "step": 5540 }, { "epoch": 5.327884615384615, "grad_norm": 1.6133779287338257, "learning_rate": 1.8295789621238844e-05, "loss": 0.0159, "step": 5541 }, { "epoch": 5.328846153846154, "grad_norm": 1.2903220653533936, "learning_rate": 1.8295093985068138e-05, "loss": 0.0147, "step": 5542 }, { "epoch": 5.329807692307693, "grad_norm": 1.7626197338104248, "learning_rate": 1.829439822018192e-05, "loss": 0.0173, "step": 5543 }, { "epoch": 5.3307692307692305, "grad_norm": 1.7935391664505005, "learning_rate": 1.8293702326590984e-05, "loss": 0.016, "step": 5544 }, { "epoch": 5.331730769230769, "grad_norm": 5.094060897827148, "learning_rate": 1.829300630430613e-05, "loss": 0.0669, "step": 5545 }, { "epoch": 5.332692307692308, "grad_norm": 2.7840707302093506, "learning_rate": 1.8292310153338156e-05, "loss": 0.0118, "step": 5546 }, { "epoch": 5.3336538461538465, "grad_norm": 2.2308456897735596, "learning_rate": 1.8291613873697868e-05, "loss": 0.0183, "step": 5547 }, { "epoch": 5.334615384615384, "grad_norm": 3.592620611190796, "learning_rate": 1.829091746539607e-05, "loss": 0.0539, "step": 5548 }, { "epoch": 5.335576923076923, "grad_norm": 2.9223990440368652, "learning_rate": 1.8290220928443565e-05, "loss": 0.0324, "step": 5549 }, { "epoch": 5.336538461538462, "grad_norm": 0.9418154358863831, "learning_rate": 1.8289524262851163e-05, "loss": 0.0084, "step": 5550 }, { "epoch": 5.3375, "grad_norm": 2.882416248321533, "learning_rate": 1.8288827468629673e-05, "loss": 0.0366, "step": 5551 }, { "epoch": 5.338461538461538, "grad_norm": 3.0735485553741455, "learning_rate": 1.8288130545789905e-05, "loss": 0.0258, "step": 5552 }, { "epoch": 5.339423076923077, "grad_norm": 1.345638394355774, "learning_rate": 1.8287433494342683e-05, "loss": 0.0266, "step": 5553 }, { "epoch": 5.3403846153846155, "grad_norm": 2.8836829662323, "learning_rate": 1.8286736314298813e-05, "loss": 0.0359, "step": 5554 }, { "epoch": 5.341346153846154, "grad_norm": 1.6797834634780884, "learning_rate": 1.8286039005669118e-05, "loss": 0.0269, "step": 5555 }, { "epoch": 5.342307692307692, "grad_norm": 0.271123468875885, "learning_rate": 1.8285341568464416e-05, "loss": 0.0024, "step": 5556 }, { "epoch": 5.343269230769231, "grad_norm": 5.23530912399292, "learning_rate": 1.8284644002695528e-05, "loss": 0.1273, "step": 5557 }, { "epoch": 5.344230769230769, "grad_norm": 3.871812105178833, "learning_rate": 1.828394630837328e-05, "loss": 0.1037, "step": 5558 }, { "epoch": 5.345192307692308, "grad_norm": 2.4539778232574463, "learning_rate": 1.8283248485508504e-05, "loss": 0.0217, "step": 5559 }, { "epoch": 5.346153846153846, "grad_norm": 3.8676064014434814, "learning_rate": 1.8282550534112017e-05, "loss": 0.0868, "step": 5560 }, { "epoch": 5.3471153846153845, "grad_norm": 1.0331376791000366, "learning_rate": 1.8281852454194656e-05, "loss": 0.0055, "step": 5561 }, { "epoch": 5.348076923076923, "grad_norm": 4.104068756103516, "learning_rate": 1.8281154245767255e-05, "loss": 0.0824, "step": 5562 }, { "epoch": 5.349038461538462, "grad_norm": 3.916776657104492, "learning_rate": 1.828045590884064e-05, "loss": 0.0642, "step": 5563 }, { "epoch": 5.35, "grad_norm": 3.5664196014404297, "learning_rate": 1.8279757443425658e-05, "loss": 0.0496, "step": 5564 }, { "epoch": 5.350961538461538, "grad_norm": 5.083926200866699, "learning_rate": 1.8279058849533135e-05, "loss": 0.0987, "step": 5565 }, { "epoch": 5.351923076923077, "grad_norm": 3.3957901000976562, "learning_rate": 1.827836012717392e-05, "loss": 0.1253, "step": 5566 }, { "epoch": 5.352884615384616, "grad_norm": 2.8375697135925293, "learning_rate": 1.8277661276358854e-05, "loss": 0.0568, "step": 5567 }, { "epoch": 5.3538461538461535, "grad_norm": 0.7251665592193604, "learning_rate": 1.827696229709878e-05, "loss": 0.0081, "step": 5568 }, { "epoch": 5.354807692307692, "grad_norm": 1.226560354232788, "learning_rate": 1.827626318940454e-05, "loss": 0.0118, "step": 5569 }, { "epoch": 5.355769230769231, "grad_norm": 4.1542840003967285, "learning_rate": 1.827556395328699e-05, "loss": 0.0836, "step": 5570 }, { "epoch": 5.3567307692307695, "grad_norm": 2.686861991882324, "learning_rate": 1.8274864588756974e-05, "loss": 0.0162, "step": 5571 }, { "epoch": 5.357692307692307, "grad_norm": 3.3339033126831055, "learning_rate": 1.8274165095825345e-05, "loss": 0.0292, "step": 5572 }, { "epoch": 5.358653846153846, "grad_norm": 2.7968356609344482, "learning_rate": 1.827346547450296e-05, "loss": 0.041, "step": 5573 }, { "epoch": 5.359615384615385, "grad_norm": 1.7097407579421997, "learning_rate": 1.8272765724800672e-05, "loss": 0.0208, "step": 5574 }, { "epoch": 5.360576923076923, "grad_norm": 4.152385234832764, "learning_rate": 1.8272065846729342e-05, "loss": 0.0746, "step": 5575 }, { "epoch": 5.361538461538461, "grad_norm": 3.30625319480896, "learning_rate": 1.8271365840299825e-05, "loss": 0.072, "step": 5576 }, { "epoch": 5.3625, "grad_norm": 2.3614420890808105, "learning_rate": 1.8270665705522987e-05, "loss": 0.0317, "step": 5577 }, { "epoch": 5.3634615384615385, "grad_norm": 1.8440102338790894, "learning_rate": 1.8269965442409693e-05, "loss": 0.011, "step": 5578 }, { "epoch": 5.364423076923077, "grad_norm": 1.8808293342590332, "learning_rate": 1.826926505097081e-05, "loss": 0.0659, "step": 5579 }, { "epoch": 5.365384615384615, "grad_norm": 3.6601853370666504, "learning_rate": 1.8268564531217195e-05, "loss": 0.0717, "step": 5580 }, { "epoch": 5.366346153846154, "grad_norm": 2.7413277626037598, "learning_rate": 1.8267863883159736e-05, "loss": 0.0234, "step": 5581 }, { "epoch": 5.367307692307692, "grad_norm": 2.190727949142456, "learning_rate": 1.8267163106809288e-05, "loss": 0.0197, "step": 5582 }, { "epoch": 5.368269230769231, "grad_norm": 3.409154176712036, "learning_rate": 1.8266462202176737e-05, "loss": 0.0395, "step": 5583 }, { "epoch": 5.36923076923077, "grad_norm": 2.758908271789551, "learning_rate": 1.8265761169272952e-05, "loss": 0.1055, "step": 5584 }, { "epoch": 5.3701923076923075, "grad_norm": 3.697441816329956, "learning_rate": 1.8265060008108815e-05, "loss": 0.0562, "step": 5585 }, { "epoch": 5.371153846153846, "grad_norm": 2.481762170791626, "learning_rate": 1.8264358718695204e-05, "loss": 0.1064, "step": 5586 }, { "epoch": 5.372115384615385, "grad_norm": 2.891486406326294, "learning_rate": 1.8263657301043002e-05, "loss": 0.0411, "step": 5587 }, { "epoch": 5.373076923076923, "grad_norm": 3.37719464302063, "learning_rate": 1.826295575516309e-05, "loss": 0.0559, "step": 5588 }, { "epoch": 5.374038461538461, "grad_norm": 3.7669591903686523, "learning_rate": 1.8262254081066356e-05, "loss": 0.074, "step": 5589 }, { "epoch": 5.375, "grad_norm": 1.4377212524414062, "learning_rate": 1.8261552278763693e-05, "loss": 0.0151, "step": 5590 }, { "epoch": 5.375961538461539, "grad_norm": 2.521989345550537, "learning_rate": 1.8260850348265982e-05, "loss": 0.0587, "step": 5591 }, { "epoch": 5.376923076923077, "grad_norm": 2.136896848678589, "learning_rate": 1.8260148289584118e-05, "loss": 0.0263, "step": 5592 }, { "epoch": 5.377884615384615, "grad_norm": 4.39532470703125, "learning_rate": 1.8259446102729e-05, "loss": 0.084, "step": 5593 }, { "epoch": 5.378846153846154, "grad_norm": 4.675159931182861, "learning_rate": 1.8258743787711513e-05, "loss": 0.0899, "step": 5594 }, { "epoch": 5.3798076923076925, "grad_norm": 4.192607879638672, "learning_rate": 1.8258041344542567e-05, "loss": 0.0642, "step": 5595 }, { "epoch": 5.38076923076923, "grad_norm": 6.51711368560791, "learning_rate": 1.8257338773233053e-05, "loss": 0.1414, "step": 5596 }, { "epoch": 5.381730769230769, "grad_norm": 6.446413993835449, "learning_rate": 1.825663607379388e-05, "loss": 0.1392, "step": 5597 }, { "epoch": 5.382692307692308, "grad_norm": 4.218593120574951, "learning_rate": 1.8255933246235943e-05, "loss": 0.054, "step": 5598 }, { "epoch": 5.383653846153846, "grad_norm": 22.711959838867188, "learning_rate": 1.825523029057016e-05, "loss": 0.0657, "step": 5599 }, { "epoch": 5.384615384615385, "grad_norm": 3.6676173210144043, "learning_rate": 1.8254527206807427e-05, "loss": 0.0388, "step": 5600 }, { "epoch": 5.385576923076923, "grad_norm": 2.973461866378784, "learning_rate": 1.8253823994958657e-05, "loss": 0.0218, "step": 5601 }, { "epoch": 5.3865384615384615, "grad_norm": 4.291010856628418, "learning_rate": 1.8253120655034767e-05, "loss": 0.0922, "step": 5602 }, { "epoch": 5.3875, "grad_norm": 2.129281759262085, "learning_rate": 1.8252417187046665e-05, "loss": 0.0268, "step": 5603 }, { "epoch": 5.388461538461539, "grad_norm": 3.3108181953430176, "learning_rate": 1.8251713591005268e-05, "loss": 0.0698, "step": 5604 }, { "epoch": 5.389423076923077, "grad_norm": 2.567808151245117, "learning_rate": 1.8251009866921498e-05, "loss": 0.0323, "step": 5605 }, { "epoch": 5.390384615384615, "grad_norm": 2.1455020904541016, "learning_rate": 1.8250306014806268e-05, "loss": 0.0231, "step": 5606 }, { "epoch": 5.391346153846154, "grad_norm": 2.208869457244873, "learning_rate": 1.8249602034670503e-05, "loss": 0.0185, "step": 5607 }, { "epoch": 5.392307692307693, "grad_norm": 2.6098792552948, "learning_rate": 1.824889792652513e-05, "loss": 0.0329, "step": 5608 }, { "epoch": 5.3932692307692305, "grad_norm": 0.7930631637573242, "learning_rate": 1.8248193690381068e-05, "loss": 0.0052, "step": 5609 }, { "epoch": 5.394230769230769, "grad_norm": 2.994732141494751, "learning_rate": 1.8247489326249252e-05, "loss": 0.0622, "step": 5610 }, { "epoch": 5.395192307692308, "grad_norm": 3.633554697036743, "learning_rate": 1.8246784834140603e-05, "loss": 0.0256, "step": 5611 }, { "epoch": 5.3961538461538465, "grad_norm": 2.255425453186035, "learning_rate": 1.824608021406606e-05, "loss": 0.0387, "step": 5612 }, { "epoch": 5.397115384615384, "grad_norm": 1.4793357849121094, "learning_rate": 1.8245375466036553e-05, "loss": 0.0114, "step": 5613 }, { "epoch": 5.398076923076923, "grad_norm": 2.8076367378234863, "learning_rate": 1.8244670590063022e-05, "loss": 0.0628, "step": 5614 }, { "epoch": 5.399038461538462, "grad_norm": 2.6455047130584717, "learning_rate": 1.8243965586156396e-05, "loss": 0.0339, "step": 5615 }, { "epoch": 5.4, "grad_norm": 3.2677152156829834, "learning_rate": 1.824326045432762e-05, "loss": 0.0421, "step": 5616 }, { "epoch": 5.400961538461538, "grad_norm": 4.215162754058838, "learning_rate": 1.824255519458764e-05, "loss": 0.0789, "step": 5617 }, { "epoch": 5.401923076923077, "grad_norm": 3.4252800941467285, "learning_rate": 1.824184980694739e-05, "loss": 0.0376, "step": 5618 }, { "epoch": 5.4028846153846155, "grad_norm": 6.6804022789001465, "learning_rate": 1.8241144291417824e-05, "loss": 0.0844, "step": 5619 }, { "epoch": 5.403846153846154, "grad_norm": 3.737424373626709, "learning_rate": 1.8240438648009888e-05, "loss": 0.0645, "step": 5620 }, { "epoch": 5.404807692307692, "grad_norm": 3.6513779163360596, "learning_rate": 1.8239732876734525e-05, "loss": 0.0772, "step": 5621 }, { "epoch": 5.405769230769231, "grad_norm": 3.0757851600646973, "learning_rate": 1.823902697760269e-05, "loss": 0.0458, "step": 5622 }, { "epoch": 5.406730769230769, "grad_norm": 3.542131185531616, "learning_rate": 1.8238320950625344e-05, "loss": 0.0967, "step": 5623 }, { "epoch": 5.407692307692308, "grad_norm": 1.9994920492172241, "learning_rate": 1.8237614795813435e-05, "loss": 0.0219, "step": 5624 }, { "epoch": 5.408653846153846, "grad_norm": 3.666123390197754, "learning_rate": 1.8236908513177916e-05, "loss": 0.1394, "step": 5625 }, { "epoch": 5.4096153846153845, "grad_norm": 1.5578577518463135, "learning_rate": 1.8236202102729757e-05, "loss": 0.0178, "step": 5626 }, { "epoch": 5.410576923076923, "grad_norm": 2.3961455821990967, "learning_rate": 1.823549556447991e-05, "loss": 0.0223, "step": 5627 }, { "epoch": 5.411538461538462, "grad_norm": 3.346667528152466, "learning_rate": 1.8234788898439347e-05, "loss": 0.1109, "step": 5628 }, { "epoch": 5.4125, "grad_norm": 5.250056743621826, "learning_rate": 1.8234082104619024e-05, "loss": 0.0394, "step": 5629 }, { "epoch": 5.413461538461538, "grad_norm": 1.802804708480835, "learning_rate": 1.8233375183029917e-05, "loss": 0.0185, "step": 5630 }, { "epoch": 5.414423076923077, "grad_norm": 2.9764089584350586, "learning_rate": 1.8232668133682993e-05, "loss": 0.0499, "step": 5631 }, { "epoch": 5.415384615384616, "grad_norm": 3.4286961555480957, "learning_rate": 1.823196095658922e-05, "loss": 0.0478, "step": 5632 }, { "epoch": 5.4163461538461535, "grad_norm": 5.262566566467285, "learning_rate": 1.8231253651759575e-05, "loss": 0.0907, "step": 5633 }, { "epoch": 5.417307692307692, "grad_norm": 2.5880560874938965, "learning_rate": 1.8230546219205032e-05, "loss": 0.0357, "step": 5634 }, { "epoch": 5.418269230769231, "grad_norm": 4.0803022384643555, "learning_rate": 1.8229838658936566e-05, "loss": 0.0557, "step": 5635 }, { "epoch": 5.4192307692307695, "grad_norm": 2.3232245445251465, "learning_rate": 1.822913097096516e-05, "loss": 0.0267, "step": 5636 }, { "epoch": 5.420192307692307, "grad_norm": 2.844949960708618, "learning_rate": 1.822842315530179e-05, "loss": 0.0297, "step": 5637 }, { "epoch": 5.421153846153846, "grad_norm": 4.659126281738281, "learning_rate": 1.8227715211957445e-05, "loss": 0.12, "step": 5638 }, { "epoch": 5.422115384615385, "grad_norm": 5.054737091064453, "learning_rate": 1.822700714094311e-05, "loss": 0.1091, "step": 5639 }, { "epoch": 5.423076923076923, "grad_norm": 3.0445261001586914, "learning_rate": 1.8226298942269767e-05, "loss": 0.0249, "step": 5640 }, { "epoch": 5.424038461538461, "grad_norm": 5.470250129699707, "learning_rate": 1.8225590615948413e-05, "loss": 0.1254, "step": 5641 }, { "epoch": 5.425, "grad_norm": 3.089627504348755, "learning_rate": 1.822488216199003e-05, "loss": 0.1, "step": 5642 }, { "epoch": 5.4259615384615385, "grad_norm": 2.4980545043945312, "learning_rate": 1.8224173580405617e-05, "loss": 0.0327, "step": 5643 }, { "epoch": 5.426923076923077, "grad_norm": 2.531497001647949, "learning_rate": 1.822346487120617e-05, "loss": 0.0196, "step": 5644 }, { "epoch": 5.427884615384615, "grad_norm": 1.1144403219223022, "learning_rate": 1.822275603440268e-05, "loss": 0.0148, "step": 5645 }, { "epoch": 5.428846153846154, "grad_norm": 5.563899993896484, "learning_rate": 1.822204707000615e-05, "loss": 0.1322, "step": 5646 }, { "epoch": 5.429807692307692, "grad_norm": 2.311826467514038, "learning_rate": 1.822133797802758e-05, "loss": 0.0286, "step": 5647 }, { "epoch": 5.430769230769231, "grad_norm": 3.504155397415161, "learning_rate": 1.822062875847798e-05, "loss": 0.0523, "step": 5648 }, { "epoch": 5.43173076923077, "grad_norm": 1.508920431137085, "learning_rate": 1.8219919411368344e-05, "loss": 0.0143, "step": 5649 }, { "epoch": 5.4326923076923075, "grad_norm": 1.348963737487793, "learning_rate": 1.8219209936709682e-05, "loss": 0.0068, "step": 5650 }, { "epoch": 5.433653846153846, "grad_norm": 3.3132126331329346, "learning_rate": 1.8218500334513008e-05, "loss": 0.0432, "step": 5651 }, { "epoch": 5.434615384615385, "grad_norm": 1.8374710083007812, "learning_rate": 1.821779060478933e-05, "loss": 0.0162, "step": 5652 }, { "epoch": 5.435576923076923, "grad_norm": 2.476050853729248, "learning_rate": 1.821708074754966e-05, "loss": 0.0297, "step": 5653 }, { "epoch": 5.436538461538461, "grad_norm": 2.5280895233154297, "learning_rate": 1.8216370762805016e-05, "loss": 0.0253, "step": 5654 }, { "epoch": 5.4375, "grad_norm": 4.0644917488098145, "learning_rate": 1.821566065056641e-05, "loss": 0.0432, "step": 5655 }, { "epoch": 5.438461538461539, "grad_norm": 2.544346570968628, "learning_rate": 1.8214950410844865e-05, "loss": 0.0339, "step": 5656 }, { "epoch": 5.439423076923077, "grad_norm": 1.9812071323394775, "learning_rate": 1.8214240043651398e-05, "loss": 0.0521, "step": 5657 }, { "epoch": 5.440384615384615, "grad_norm": 0.7675870656967163, "learning_rate": 1.8213529548997036e-05, "loss": 0.0056, "step": 5658 }, { "epoch": 5.441346153846154, "grad_norm": 2.780219793319702, "learning_rate": 1.8212818926892798e-05, "loss": 0.0653, "step": 5659 }, { "epoch": 5.4423076923076925, "grad_norm": 5.117752552032471, "learning_rate": 1.8212108177349722e-05, "loss": 0.0789, "step": 5660 }, { "epoch": 5.44326923076923, "grad_norm": 3.434037208557129, "learning_rate": 1.8211397300378823e-05, "loss": 0.0663, "step": 5661 }, { "epoch": 5.444230769230769, "grad_norm": 3.419642210006714, "learning_rate": 1.8210686295991137e-05, "loss": 0.0808, "step": 5662 }, { "epoch": 5.445192307692308, "grad_norm": 3.891451597213745, "learning_rate": 1.8209975164197705e-05, "loss": 0.0898, "step": 5663 }, { "epoch": 5.446153846153846, "grad_norm": 2.6151859760284424, "learning_rate": 1.8209263905009548e-05, "loss": 0.0574, "step": 5664 }, { "epoch": 5.447115384615385, "grad_norm": 2.0887043476104736, "learning_rate": 1.8208552518437712e-05, "loss": 0.0162, "step": 5665 }, { "epoch": 5.448076923076923, "grad_norm": 3.3555335998535156, "learning_rate": 1.8207841004493233e-05, "loss": 0.0467, "step": 5666 }, { "epoch": 5.4490384615384615, "grad_norm": 3.3145556449890137, "learning_rate": 1.8207129363187148e-05, "loss": 0.0318, "step": 5667 }, { "epoch": 5.45, "grad_norm": 1.9046061038970947, "learning_rate": 1.8206417594530507e-05, "loss": 0.0301, "step": 5668 }, { "epoch": 5.450961538461539, "grad_norm": 2.0294034481048584, "learning_rate": 1.8205705698534348e-05, "loss": 0.0222, "step": 5669 }, { "epoch": 5.451923076923077, "grad_norm": 3.834662675857544, "learning_rate": 1.820499367520972e-05, "loss": 0.0275, "step": 5670 }, { "epoch": 5.452884615384615, "grad_norm": 4.1186203956604, "learning_rate": 1.8204281524567673e-05, "loss": 0.0567, "step": 5671 }, { "epoch": 5.453846153846154, "grad_norm": 0.782604455947876, "learning_rate": 1.8203569246619255e-05, "loss": 0.0056, "step": 5672 }, { "epoch": 5.454807692307693, "grad_norm": 1.2254189252853394, "learning_rate": 1.8202856841375517e-05, "loss": 0.0097, "step": 5673 }, { "epoch": 5.4557692307692305, "grad_norm": 4.498067378997803, "learning_rate": 1.820214430884752e-05, "loss": 0.0551, "step": 5674 }, { "epoch": 5.456730769230769, "grad_norm": 4.452544212341309, "learning_rate": 1.8201431649046317e-05, "loss": 0.1674, "step": 5675 }, { "epoch": 5.457692307692308, "grad_norm": 5.651585102081299, "learning_rate": 1.8200718861982963e-05, "loss": 0.1305, "step": 5676 }, { "epoch": 5.4586538461538465, "grad_norm": 3.620739459991455, "learning_rate": 1.820000594766852e-05, "loss": 0.0231, "step": 5677 }, { "epoch": 5.459615384615384, "grad_norm": 3.643033742904663, "learning_rate": 1.8199292906114053e-05, "loss": 0.0719, "step": 5678 }, { "epoch": 5.460576923076923, "grad_norm": 3.056882619857788, "learning_rate": 1.8198579737330625e-05, "loss": 0.073, "step": 5679 }, { "epoch": 5.461538461538462, "grad_norm": 3.6545250415802, "learning_rate": 1.8197866441329303e-05, "loss": 0.0968, "step": 5680 }, { "epoch": 5.4625, "grad_norm": 3.542538642883301, "learning_rate": 1.8197153018121156e-05, "loss": 0.0583, "step": 5681 }, { "epoch": 5.463461538461538, "grad_norm": 3.133150577545166, "learning_rate": 1.819643946771725e-05, "loss": 0.0337, "step": 5682 }, { "epoch": 5.464423076923077, "grad_norm": 1.7721911668777466, "learning_rate": 1.8195725790128658e-05, "loss": 0.017, "step": 5683 }, { "epoch": 5.4653846153846155, "grad_norm": 1.7123006582260132, "learning_rate": 1.8195011985366456e-05, "loss": 0.0146, "step": 5684 }, { "epoch": 5.466346153846154, "grad_norm": 1.5148676633834839, "learning_rate": 1.8194298053441725e-05, "loss": 0.0387, "step": 5685 }, { "epoch": 5.467307692307692, "grad_norm": 5.764978408813477, "learning_rate": 1.819358399436553e-05, "loss": 0.1215, "step": 5686 }, { "epoch": 5.468269230769231, "grad_norm": 3.7027201652526855, "learning_rate": 1.8192869808148966e-05, "loss": 0.0717, "step": 5687 }, { "epoch": 5.469230769230769, "grad_norm": 3.4941437244415283, "learning_rate": 1.8192155494803104e-05, "loss": 0.0718, "step": 5688 }, { "epoch": 5.470192307692308, "grad_norm": 3.569899559020996, "learning_rate": 1.8191441054339036e-05, "loss": 0.0387, "step": 5689 }, { "epoch": 5.471153846153846, "grad_norm": 3.19132924079895, "learning_rate": 1.8190726486767842e-05, "loss": 0.0401, "step": 5690 }, { "epoch": 5.4721153846153845, "grad_norm": 2.361860752105713, "learning_rate": 1.8190011792100615e-05, "loss": 0.0546, "step": 5691 }, { "epoch": 5.473076923076923, "grad_norm": 1.9229800701141357, "learning_rate": 1.818929697034844e-05, "loss": 0.0164, "step": 5692 }, { "epoch": 5.474038461538462, "grad_norm": 2.569139003753662, "learning_rate": 1.8188582021522414e-05, "loss": 0.0228, "step": 5693 }, { "epoch": 5.475, "grad_norm": 6.106594085693359, "learning_rate": 1.8187866945633626e-05, "loss": 0.1063, "step": 5694 }, { "epoch": 5.475961538461538, "grad_norm": 3.183598518371582, "learning_rate": 1.8187151742693176e-05, "loss": 0.0331, "step": 5695 }, { "epoch": 5.476923076923077, "grad_norm": 2.527000904083252, "learning_rate": 1.8186436412712156e-05, "loss": 0.0141, "step": 5696 }, { "epoch": 5.477884615384616, "grad_norm": 2.29585599899292, "learning_rate": 1.818572095570167e-05, "loss": 0.0378, "step": 5697 }, { "epoch": 5.4788461538461535, "grad_norm": 1.9625523090362549, "learning_rate": 1.8185005371672826e-05, "loss": 0.0179, "step": 5698 }, { "epoch": 5.479807692307692, "grad_norm": 2.4917314052581787, "learning_rate": 1.8184289660636715e-05, "loss": 0.0353, "step": 5699 }, { "epoch": 5.480769230769231, "grad_norm": 3.1504898071289062, "learning_rate": 1.8183573822604452e-05, "loss": 0.0312, "step": 5700 }, { "epoch": 5.4817307692307695, "grad_norm": 3.2406365871429443, "learning_rate": 1.818285785758714e-05, "loss": 0.062, "step": 5701 }, { "epoch": 5.482692307692307, "grad_norm": 1.7582989931106567, "learning_rate": 1.818214176559589e-05, "loss": 0.02, "step": 5702 }, { "epoch": 5.483653846153846, "grad_norm": 0.6797812581062317, "learning_rate": 1.8181425546641817e-05, "loss": 0.0057, "step": 5703 }, { "epoch": 5.484615384615385, "grad_norm": 3.2493820190429688, "learning_rate": 1.8180709200736032e-05, "loss": 0.0348, "step": 5704 }, { "epoch": 5.485576923076923, "grad_norm": 4.649667263031006, "learning_rate": 1.817999272788965e-05, "loss": 0.0452, "step": 5705 }, { "epoch": 5.486538461538461, "grad_norm": 4.042712211608887, "learning_rate": 1.8179276128113786e-05, "loss": 0.0472, "step": 5706 }, { "epoch": 5.4875, "grad_norm": 1.5945141315460205, "learning_rate": 1.8178559401419563e-05, "loss": 0.0101, "step": 5707 }, { "epoch": 5.4884615384615385, "grad_norm": 1.7362565994262695, "learning_rate": 1.8177842547818104e-05, "loss": 0.0209, "step": 5708 }, { "epoch": 5.489423076923077, "grad_norm": 4.360647678375244, "learning_rate": 1.8177125567320528e-05, "loss": 0.1043, "step": 5709 }, { "epoch": 5.490384615384615, "grad_norm": 2.9559667110443115, "learning_rate": 1.8176408459937965e-05, "loss": 0.0644, "step": 5710 }, { "epoch": 5.491346153846154, "grad_norm": 3.949723243713379, "learning_rate": 1.817569122568154e-05, "loss": 0.1093, "step": 5711 }, { "epoch": 5.492307692307692, "grad_norm": 2.4899775981903076, "learning_rate": 1.817497386456238e-05, "loss": 0.0288, "step": 5712 }, { "epoch": 5.493269230769231, "grad_norm": 5.037588596343994, "learning_rate": 1.817425637659162e-05, "loss": 0.1183, "step": 5713 }, { "epoch": 5.49423076923077, "grad_norm": 2.8139984607696533, "learning_rate": 1.8173538761780394e-05, "loss": 0.0609, "step": 5714 }, { "epoch": 5.4951923076923075, "grad_norm": 3.1897099018096924, "learning_rate": 1.8172821020139832e-05, "loss": 0.055, "step": 5715 }, { "epoch": 5.496153846153846, "grad_norm": 1.3713536262512207, "learning_rate": 1.8172103151681076e-05, "loss": 0.0105, "step": 5716 }, { "epoch": 5.497115384615385, "grad_norm": 4.981205463409424, "learning_rate": 1.8171385156415265e-05, "loss": 0.0994, "step": 5717 }, { "epoch": 5.498076923076923, "grad_norm": 3.494675397872925, "learning_rate": 1.817066703435354e-05, "loss": 0.0573, "step": 5718 }, { "epoch": 5.499038461538461, "grad_norm": 2.0178353786468506, "learning_rate": 1.816994878550704e-05, "loss": 0.0774, "step": 5719 }, { "epoch": 5.5, "grad_norm": 2.4723806381225586, "learning_rate": 1.8169230409886918e-05, "loss": 0.0235, "step": 5720 }, { "epoch": 5.500961538461539, "grad_norm": 1.3846137523651123, "learning_rate": 1.8168511907504314e-05, "loss": 0.0174, "step": 5721 }, { "epoch": 5.501923076923077, "grad_norm": 4.43712854385376, "learning_rate": 1.8167793278370378e-05, "loss": 0.0618, "step": 5722 }, { "epoch": 5.502884615384615, "grad_norm": 4.829501628875732, "learning_rate": 1.8167074522496265e-05, "loss": 0.081, "step": 5723 }, { "epoch": 5.503846153846154, "grad_norm": 3.4821503162384033, "learning_rate": 1.8166355639893123e-05, "loss": 0.0509, "step": 5724 }, { "epoch": 5.5048076923076925, "grad_norm": 2.571925163269043, "learning_rate": 1.816563663057211e-05, "loss": 0.0137, "step": 5725 }, { "epoch": 5.50576923076923, "grad_norm": 2.0539751052856445, "learning_rate": 1.8164917494544383e-05, "loss": 0.0197, "step": 5726 }, { "epoch": 5.506730769230769, "grad_norm": 1.6190354824066162, "learning_rate": 1.8164198231821102e-05, "loss": 0.0188, "step": 5727 }, { "epoch": 5.507692307692308, "grad_norm": 4.342814922332764, "learning_rate": 1.8163478842413426e-05, "loss": 0.0815, "step": 5728 }, { "epoch": 5.508653846153846, "grad_norm": 3.5719377994537354, "learning_rate": 1.8162759326332514e-05, "loss": 0.0804, "step": 5729 }, { "epoch": 5.509615384615385, "grad_norm": 3.931427240371704, "learning_rate": 1.8162039683589537e-05, "loss": 0.0272, "step": 5730 }, { "epoch": 5.510576923076923, "grad_norm": 2.920375108718872, "learning_rate": 1.8161319914195657e-05, "loss": 0.0554, "step": 5731 }, { "epoch": 5.5115384615384615, "grad_norm": 7.2537689208984375, "learning_rate": 1.816060001816205e-05, "loss": 0.1309, "step": 5732 }, { "epoch": 5.5125, "grad_norm": 4.614797592163086, "learning_rate": 1.815987999549988e-05, "loss": 0.0563, "step": 5733 }, { "epoch": 5.513461538461538, "grad_norm": 4.221951484680176, "learning_rate": 1.815915984622032e-05, "loss": 0.0896, "step": 5734 }, { "epoch": 5.514423076923077, "grad_norm": 3.1257882118225098, "learning_rate": 1.8158439570334547e-05, "loss": 0.0322, "step": 5735 }, { "epoch": 5.515384615384615, "grad_norm": 3.887855052947998, "learning_rate": 1.8157719167853737e-05, "loss": 0.0615, "step": 5736 }, { "epoch": 5.516346153846154, "grad_norm": 2.3873445987701416, "learning_rate": 1.8156998638789067e-05, "loss": 0.0201, "step": 5737 }, { "epoch": 5.517307692307693, "grad_norm": 2.8684465885162354, "learning_rate": 1.815627798315172e-05, "loss": 0.0511, "step": 5738 }, { "epoch": 5.5182692307692305, "grad_norm": 0.3243951201438904, "learning_rate": 1.8155557200952873e-05, "loss": 0.0023, "step": 5739 }, { "epoch": 5.519230769230769, "grad_norm": 2.6819663047790527, "learning_rate": 1.815483629220372e-05, "loss": 0.0243, "step": 5740 }, { "epoch": 5.520192307692308, "grad_norm": 2.329780101776123, "learning_rate": 1.8154115256915437e-05, "loss": 0.026, "step": 5741 }, { "epoch": 5.5211538461538465, "grad_norm": 2.795321226119995, "learning_rate": 1.8153394095099223e-05, "loss": 0.0511, "step": 5742 }, { "epoch": 5.522115384615384, "grad_norm": 2.1105527877807617, "learning_rate": 1.8152672806766258e-05, "loss": 0.0402, "step": 5743 }, { "epoch": 5.523076923076923, "grad_norm": 0.8499346375465393, "learning_rate": 1.815195139192774e-05, "loss": 0.0059, "step": 5744 }, { "epoch": 5.524038461538462, "grad_norm": 5.230781078338623, "learning_rate": 1.8151229850594862e-05, "loss": 0.0982, "step": 5745 }, { "epoch": 5.525, "grad_norm": 4.886106967926025, "learning_rate": 1.8150508182778824e-05, "loss": 0.0743, "step": 5746 }, { "epoch": 5.525961538461538, "grad_norm": 4.8300065994262695, "learning_rate": 1.8149786388490816e-05, "loss": 0.1636, "step": 5747 }, { "epoch": 5.526923076923077, "grad_norm": 3.7170517444610596, "learning_rate": 1.8149064467742043e-05, "loss": 0.0948, "step": 5748 }, { "epoch": 5.5278846153846155, "grad_norm": 1.2155362367630005, "learning_rate": 1.814834242054371e-05, "loss": 0.0096, "step": 5749 }, { "epoch": 5.528846153846154, "grad_norm": 3.7867510318756104, "learning_rate": 1.814762024690701e-05, "loss": 0.0744, "step": 5750 }, { "epoch": 5.529807692307692, "grad_norm": 6.07834529876709, "learning_rate": 1.8146897946843162e-05, "loss": 0.0975, "step": 5751 }, { "epoch": 5.530769230769231, "grad_norm": 4.0582122802734375, "learning_rate": 1.814617552036337e-05, "loss": 0.0516, "step": 5752 }, { "epoch": 5.531730769230769, "grad_norm": 4.726490020751953, "learning_rate": 1.814545296747884e-05, "loss": 0.0399, "step": 5753 }, { "epoch": 5.532692307692308, "grad_norm": 6.377073764801025, "learning_rate": 1.814473028820079e-05, "loss": 0.1514, "step": 5754 }, { "epoch": 5.533653846153846, "grad_norm": 3.910464286804199, "learning_rate": 1.814400748254043e-05, "loss": 0.0646, "step": 5755 }, { "epoch": 5.5346153846153845, "grad_norm": 2.6138641834259033, "learning_rate": 1.814328455050897e-05, "loss": 0.0441, "step": 5756 }, { "epoch": 5.535576923076923, "grad_norm": 3.1519036293029785, "learning_rate": 1.814256149211764e-05, "loss": 0.0509, "step": 5757 }, { "epoch": 5.536538461538462, "grad_norm": 4.066286563873291, "learning_rate": 1.8141838307377652e-05, "loss": 0.0436, "step": 5758 }, { "epoch": 5.5375, "grad_norm": 2.4566972255706787, "learning_rate": 1.814111499630023e-05, "loss": 0.0379, "step": 5759 }, { "epoch": 5.538461538461538, "grad_norm": 2.100409507751465, "learning_rate": 1.8140391558896596e-05, "loss": 0.0283, "step": 5760 }, { "epoch": 5.539423076923077, "grad_norm": 5.393190383911133, "learning_rate": 1.813966799517798e-05, "loss": 0.1459, "step": 5761 }, { "epoch": 5.540384615384616, "grad_norm": 3.6330485343933105, "learning_rate": 1.8138944305155604e-05, "loss": 0.0872, "step": 5762 }, { "epoch": 5.5413461538461535, "grad_norm": 2.358051300048828, "learning_rate": 1.8138220488840698e-05, "loss": 0.0345, "step": 5763 }, { "epoch": 5.542307692307692, "grad_norm": 4.488400459289551, "learning_rate": 1.81374965462445e-05, "loss": 0.0776, "step": 5764 }, { "epoch": 5.543269230769231, "grad_norm": 2.9815425872802734, "learning_rate": 1.8136772477378234e-05, "loss": 0.0457, "step": 5765 }, { "epoch": 5.5442307692307695, "grad_norm": 3.364492893218994, "learning_rate": 1.8136048282253145e-05, "loss": 0.0573, "step": 5766 }, { "epoch": 5.545192307692307, "grad_norm": 3.9536495208740234, "learning_rate": 1.8135323960880462e-05, "loss": 0.1105, "step": 5767 }, { "epoch": 5.546153846153846, "grad_norm": 2.2458198070526123, "learning_rate": 1.813459951327143e-05, "loss": 0.0261, "step": 5768 }, { "epoch": 5.547115384615385, "grad_norm": 2.218021869659424, "learning_rate": 1.8133874939437288e-05, "loss": 0.0585, "step": 5769 }, { "epoch": 5.548076923076923, "grad_norm": 3.0700225830078125, "learning_rate": 1.8133150239389276e-05, "loss": 0.0423, "step": 5770 }, { "epoch": 5.549038461538462, "grad_norm": 3.568930149078369, "learning_rate": 1.8132425413138647e-05, "loss": 0.052, "step": 5771 }, { "epoch": 5.55, "grad_norm": 3.0576469898223877, "learning_rate": 1.8131700460696644e-05, "loss": 0.0514, "step": 5772 }, { "epoch": 5.5509615384615385, "grad_norm": 4.5765700340271, "learning_rate": 1.8130975382074514e-05, "loss": 0.1254, "step": 5773 }, { "epoch": 5.551923076923077, "grad_norm": 3.16330623626709, "learning_rate": 1.813025017728351e-05, "loss": 0.0402, "step": 5774 }, { "epoch": 5.552884615384615, "grad_norm": 5.063016891479492, "learning_rate": 1.8129524846334886e-05, "loss": 0.1009, "step": 5775 }, { "epoch": 5.553846153846154, "grad_norm": 1.1781364679336548, "learning_rate": 1.81287993892399e-05, "loss": 0.0085, "step": 5776 }, { "epoch": 5.554807692307692, "grad_norm": 3.1330816745758057, "learning_rate": 1.81280738060098e-05, "loss": 0.0775, "step": 5777 }, { "epoch": 5.555769230769231, "grad_norm": 2.853607416152954, "learning_rate": 1.812734809665585e-05, "loss": 0.0536, "step": 5778 }, { "epoch": 5.55673076923077, "grad_norm": 3.1866841316223145, "learning_rate": 1.8126622261189318e-05, "loss": 0.0633, "step": 5779 }, { "epoch": 5.5576923076923075, "grad_norm": 4.125570297241211, "learning_rate": 1.8125896299621453e-05, "loss": 0.1189, "step": 5780 }, { "epoch": 5.558653846153846, "grad_norm": 3.8398029804229736, "learning_rate": 1.812517021196353e-05, "loss": 0.0662, "step": 5781 }, { "epoch": 5.559615384615385, "grad_norm": 2.9654316902160645, "learning_rate": 1.8124443998226807e-05, "loss": 0.0354, "step": 5782 }, { "epoch": 5.560576923076923, "grad_norm": 2.6680076122283936, "learning_rate": 1.8123717658422563e-05, "loss": 0.0697, "step": 5783 }, { "epoch": 5.561538461538461, "grad_norm": 3.1788692474365234, "learning_rate": 1.8122991192562062e-05, "loss": 0.0385, "step": 5784 }, { "epoch": 5.5625, "grad_norm": 1.4269123077392578, "learning_rate": 1.812226460065658e-05, "loss": 0.0136, "step": 5785 }, { "epoch": 5.563461538461539, "grad_norm": 3.5426840782165527, "learning_rate": 1.8121537882717392e-05, "loss": 0.0418, "step": 5786 }, { "epoch": 5.564423076923077, "grad_norm": 3.0939342975616455, "learning_rate": 1.8120811038755767e-05, "loss": 0.033, "step": 5787 }, { "epoch": 5.565384615384615, "grad_norm": 2.407285451889038, "learning_rate": 1.812008406878299e-05, "loss": 0.0469, "step": 5788 }, { "epoch": 5.566346153846154, "grad_norm": 5.015601634979248, "learning_rate": 1.8119356972810344e-05, "loss": 0.1419, "step": 5789 }, { "epoch": 5.5673076923076925, "grad_norm": 1.1575500965118408, "learning_rate": 1.8118629750849106e-05, "loss": 0.0097, "step": 5790 }, { "epoch": 5.56826923076923, "grad_norm": 2.6858270168304443, "learning_rate": 1.8117902402910564e-05, "loss": 0.0359, "step": 5791 }, { "epoch": 5.569230769230769, "grad_norm": 2.9800240993499756, "learning_rate": 1.8117174929006e-05, "loss": 0.0467, "step": 5792 }, { "epoch": 5.570192307692308, "grad_norm": 4.453067779541016, "learning_rate": 1.8116447329146704e-05, "loss": 0.0692, "step": 5793 }, { "epoch": 5.571153846153846, "grad_norm": 0.3718898892402649, "learning_rate": 1.811571960334397e-05, "loss": 0.0038, "step": 5794 }, { "epoch": 5.572115384615385, "grad_norm": 4.583909511566162, "learning_rate": 1.8114991751609082e-05, "loss": 0.124, "step": 5795 }, { "epoch": 5.573076923076923, "grad_norm": 3.381664514541626, "learning_rate": 1.8114263773953346e-05, "loss": 0.0872, "step": 5796 }, { "epoch": 5.5740384615384615, "grad_norm": 3.6771063804626465, "learning_rate": 1.8113535670388046e-05, "loss": 0.0682, "step": 5797 }, { "epoch": 5.575, "grad_norm": 4.004786491394043, "learning_rate": 1.8112807440924488e-05, "loss": 0.0913, "step": 5798 }, { "epoch": 5.575961538461538, "grad_norm": 5.74469518661499, "learning_rate": 1.8112079085573965e-05, "loss": 0.0299, "step": 5799 }, { "epoch": 5.576923076923077, "grad_norm": 3.0648720264434814, "learning_rate": 1.811135060434779e-05, "loss": 0.0325, "step": 5800 }, { "epoch": 5.577884615384615, "grad_norm": 1.372834324836731, "learning_rate": 1.8110621997257256e-05, "loss": 0.0137, "step": 5801 }, { "epoch": 5.578846153846154, "grad_norm": 2.6684274673461914, "learning_rate": 1.8109893264313675e-05, "loss": 0.0215, "step": 5802 }, { "epoch": 5.579807692307693, "grad_norm": 0.6123650670051575, "learning_rate": 1.810916440552835e-05, "loss": 0.0058, "step": 5803 }, { "epoch": 5.5807692307692305, "grad_norm": 2.5623891353607178, "learning_rate": 1.8108435420912596e-05, "loss": 0.03, "step": 5804 }, { "epoch": 5.581730769230769, "grad_norm": 3.7348897457122803, "learning_rate": 1.810770631047772e-05, "loss": 0.0837, "step": 5805 }, { "epoch": 5.582692307692308, "grad_norm": 3.1105666160583496, "learning_rate": 1.810697707423504e-05, "loss": 0.0313, "step": 5806 }, { "epoch": 5.5836538461538465, "grad_norm": 2.412026882171631, "learning_rate": 1.8106247712195868e-05, "loss": 0.0314, "step": 5807 }, { "epoch": 5.584615384615384, "grad_norm": 5.144499778747559, "learning_rate": 1.8105518224371527e-05, "loss": 0.132, "step": 5808 }, { "epoch": 5.585576923076923, "grad_norm": 3.371603012084961, "learning_rate": 1.8104788610773327e-05, "loss": 0.0278, "step": 5809 }, { "epoch": 5.586538461538462, "grad_norm": 3.077935218811035, "learning_rate": 1.8104058871412595e-05, "loss": 0.0542, "step": 5810 }, { "epoch": 5.5875, "grad_norm": 3.077935218811035, "learning_rate": 1.810332900630066e-05, "loss": 0.1182, "step": 5811 }, { "epoch": 5.588461538461538, "grad_norm": 5.3221893310546875, "learning_rate": 1.810332900630066e-05, "loss": 0.0656, "step": 5812 }, { "epoch": 5.589423076923077, "grad_norm": 2.985856294631958, "learning_rate": 1.8102599015448836e-05, "loss": 0.0745, "step": 5813 }, { "epoch": 5.5903846153846155, "grad_norm": 0.8206223249435425, "learning_rate": 1.810186889886846e-05, "loss": 0.0071, "step": 5814 }, { "epoch": 5.591346153846154, "grad_norm": 3.7621655464172363, "learning_rate": 1.8101138656570856e-05, "loss": 0.0979, "step": 5815 }, { "epoch": 5.592307692307692, "grad_norm": 2.2055959701538086, "learning_rate": 1.8100408288567356e-05, "loss": 0.0463, "step": 5816 }, { "epoch": 5.593269230769231, "grad_norm": 4.245020389556885, "learning_rate": 1.8099677794869297e-05, "loss": 0.0655, "step": 5817 }, { "epoch": 5.594230769230769, "grad_norm": 4.35905647277832, "learning_rate": 1.8098947175488007e-05, "loss": 0.1029, "step": 5818 }, { "epoch": 5.595192307692308, "grad_norm": 3.0804483890533447, "learning_rate": 1.8098216430434828e-05, "loss": 0.077, "step": 5819 }, { "epoch": 5.596153846153846, "grad_norm": 3.8122055530548096, "learning_rate": 1.80974855597211e-05, "loss": 0.0813, "step": 5820 }, { "epoch": 5.5971153846153845, "grad_norm": 2.8739986419677734, "learning_rate": 1.8096754563358157e-05, "loss": 0.0702, "step": 5821 }, { "epoch": 5.598076923076923, "grad_norm": 0.5359196066856384, "learning_rate": 1.8096023441357353e-05, "loss": 0.0064, "step": 5822 }, { "epoch": 5.599038461538462, "grad_norm": 3.157817840576172, "learning_rate": 1.8095292193730026e-05, "loss": 0.0512, "step": 5823 }, { "epoch": 5.6, "grad_norm": 2.7603282928466797, "learning_rate": 1.809456082048752e-05, "loss": 0.0903, "step": 5824 }, { "epoch": 5.600961538461538, "grad_norm": 5.991824626922607, "learning_rate": 1.8093829321641187e-05, "loss": 0.0906, "step": 5825 }, { "epoch": 5.601923076923077, "grad_norm": 2.518259048461914, "learning_rate": 1.8093097697202383e-05, "loss": 0.0246, "step": 5826 }, { "epoch": 5.602884615384616, "grad_norm": 2.5108754634857178, "learning_rate": 1.809236594718245e-05, "loss": 0.0368, "step": 5827 }, { "epoch": 5.6038461538461535, "grad_norm": 2.969957113265991, "learning_rate": 1.809163407159275e-05, "loss": 0.0227, "step": 5828 }, { "epoch": 5.604807692307692, "grad_norm": 2.727419137954712, "learning_rate": 1.809090207044464e-05, "loss": 0.0665, "step": 5829 }, { "epoch": 5.605769230769231, "grad_norm": 4.27468729019165, "learning_rate": 1.8090169943749477e-05, "loss": 0.1564, "step": 5830 }, { "epoch": 5.6067307692307695, "grad_norm": 4.505585193634033, "learning_rate": 1.8089437691518618e-05, "loss": 0.0943, "step": 5831 }, { "epoch": 5.607692307692307, "grad_norm": 4.6466145515441895, "learning_rate": 1.808870531376343e-05, "loss": 0.0541, "step": 5832 }, { "epoch": 5.608653846153846, "grad_norm": 4.2483906745910645, "learning_rate": 1.8087972810495273e-05, "loss": 0.0924, "step": 5833 }, { "epoch": 5.609615384615385, "grad_norm": 3.879483699798584, "learning_rate": 1.808724018172552e-05, "loss": 0.0459, "step": 5834 }, { "epoch": 5.610576923076923, "grad_norm": 5.21394681930542, "learning_rate": 1.808650742746553e-05, "loss": 0.3923, "step": 5835 }, { "epoch": 5.611538461538462, "grad_norm": 2.696056365966797, "learning_rate": 1.8085774547726684e-05, "loss": 0.0323, "step": 5836 }, { "epoch": 5.6125, "grad_norm": 3.8577163219451904, "learning_rate": 1.8085041542520347e-05, "loss": 0.1113, "step": 5837 }, { "epoch": 5.6134615384615385, "grad_norm": 2.1239192485809326, "learning_rate": 1.8084308411857893e-05, "loss": 0.0292, "step": 5838 }, { "epoch": 5.614423076923077, "grad_norm": 4.00819206237793, "learning_rate": 1.80835751557507e-05, "loss": 0.0795, "step": 5839 }, { "epoch": 5.615384615384615, "grad_norm": 9.508289337158203, "learning_rate": 1.808284177421014e-05, "loss": 0.0686, "step": 5840 }, { "epoch": 5.616346153846154, "grad_norm": 5.854674339294434, "learning_rate": 1.8082108267247605e-05, "loss": 0.0983, "step": 5841 }, { "epoch": 5.617307692307692, "grad_norm": 4.390746116638184, "learning_rate": 1.8081374634874472e-05, "loss": 0.0246, "step": 5842 }, { "epoch": 5.618269230769231, "grad_norm": 3.0363049507141113, "learning_rate": 1.808064087710212e-05, "loss": 0.1036, "step": 5843 }, { "epoch": 5.61923076923077, "grad_norm": 1.7550675868988037, "learning_rate": 1.8079906993941938e-05, "loss": 0.0214, "step": 5844 }, { "epoch": 5.6201923076923075, "grad_norm": 2.428579092025757, "learning_rate": 1.8079172985405315e-05, "loss": 0.021, "step": 5845 }, { "epoch": 5.621153846153846, "grad_norm": 4.7719831466674805, "learning_rate": 1.8078438851503638e-05, "loss": 0.0639, "step": 5846 }, { "epoch": 5.622115384615385, "grad_norm": 1.8563413619995117, "learning_rate": 1.80777045922483e-05, "loss": 0.019, "step": 5847 }, { "epoch": 5.623076923076923, "grad_norm": 4.31284236907959, "learning_rate": 1.8076970207650694e-05, "loss": 0.1277, "step": 5848 }, { "epoch": 5.624038461538461, "grad_norm": 5.399333953857422, "learning_rate": 1.807623569772222e-05, "loss": 0.1498, "step": 5849 }, { "epoch": 5.625, "grad_norm": 2.8720693588256836, "learning_rate": 1.8075501062474266e-05, "loss": 0.0258, "step": 5850 }, { "epoch": 5.625961538461539, "grad_norm": 1.5989071130752563, "learning_rate": 1.8074766301918242e-05, "loss": 0.0252, "step": 5851 }, { "epoch": 5.626923076923077, "grad_norm": 3.7100026607513428, "learning_rate": 1.807403141606554e-05, "loss": 0.0645, "step": 5852 }, { "epoch": 5.627884615384615, "grad_norm": 4.258780002593994, "learning_rate": 1.807329640492757e-05, "loss": 0.0545, "step": 5853 }, { "epoch": 5.628846153846154, "grad_norm": 3.3726887702941895, "learning_rate": 1.8072561268515733e-05, "loss": 0.0565, "step": 5854 }, { "epoch": 5.6298076923076925, "grad_norm": 2.5961740016937256, "learning_rate": 1.8071826006841437e-05, "loss": 0.0542, "step": 5855 }, { "epoch": 5.63076923076923, "grad_norm": 5.955567836761475, "learning_rate": 1.8071090619916095e-05, "loss": 0.0583, "step": 5856 }, { "epoch": 5.631730769230769, "grad_norm": 3.1652936935424805, "learning_rate": 1.807035510775111e-05, "loss": 0.0295, "step": 5857 }, { "epoch": 5.632692307692308, "grad_norm": 2.6300013065338135, "learning_rate": 1.8069619470357906e-05, "loss": 0.0607, "step": 5858 }, { "epoch": 5.633653846153846, "grad_norm": 4.573586940765381, "learning_rate": 1.806888370774789e-05, "loss": 0.0412, "step": 5859 }, { "epoch": 5.634615384615385, "grad_norm": 4.480947494506836, "learning_rate": 1.8068147819932476e-05, "loss": 0.0461, "step": 5860 }, { "epoch": 5.635576923076923, "grad_norm": 3.3396894931793213, "learning_rate": 1.8067411806923094e-05, "loss": 0.0436, "step": 5861 }, { "epoch": 5.6365384615384615, "grad_norm": 4.3808417320251465, "learning_rate": 1.8066675668731156e-05, "loss": 0.0694, "step": 5862 }, { "epoch": 5.6375, "grad_norm": 5.789760112762451, "learning_rate": 1.8065939405368084e-05, "loss": 0.125, "step": 5863 }, { "epoch": 5.638461538461538, "grad_norm": 1.172364592552185, "learning_rate": 1.8065203016845313e-05, "loss": 0.0115, "step": 5864 }, { "epoch": 5.639423076923077, "grad_norm": 4.241501808166504, "learning_rate": 1.8064466503174257e-05, "loss": 0.0565, "step": 5865 }, { "epoch": 5.640384615384615, "grad_norm": 3.4899067878723145, "learning_rate": 1.806372986436635e-05, "loss": 0.051, "step": 5866 }, { "epoch": 5.641346153846154, "grad_norm": 4.813353061676025, "learning_rate": 1.806299310043302e-05, "loss": 0.1049, "step": 5867 }, { "epoch": 5.642307692307693, "grad_norm": 3.338641881942749, "learning_rate": 1.8062256211385706e-05, "loss": 0.0851, "step": 5868 }, { "epoch": 5.6432692307692305, "grad_norm": 1.3329018354415894, "learning_rate": 1.8061519197235835e-05, "loss": 0.0113, "step": 5869 }, { "epoch": 5.644230769230769, "grad_norm": 3.6986072063446045, "learning_rate": 1.806078205799485e-05, "loss": 0.0546, "step": 5870 }, { "epoch": 5.645192307692308, "grad_norm": 2.4383041858673096, "learning_rate": 1.806004479367418e-05, "loss": 0.0301, "step": 5871 }, { "epoch": 5.6461538461538465, "grad_norm": 3.799682140350342, "learning_rate": 1.8059307404285276e-05, "loss": 0.0424, "step": 5872 }, { "epoch": 5.647115384615384, "grad_norm": 1.5749356746673584, "learning_rate": 1.8058569889839572e-05, "loss": 0.0227, "step": 5873 }, { "epoch": 5.648076923076923, "grad_norm": 3.9248239994049072, "learning_rate": 1.805783225034852e-05, "loss": 0.0674, "step": 5874 }, { "epoch": 5.649038461538462, "grad_norm": 1.8081797361373901, "learning_rate": 1.805709448582355e-05, "loss": 0.0266, "step": 5875 }, { "epoch": 5.65, "grad_norm": 2.0034890174865723, "learning_rate": 1.8056356596276133e-05, "loss": 0.0283, "step": 5876 }, { "epoch": 5.650961538461538, "grad_norm": 4.059054851531982, "learning_rate": 1.80556185817177e-05, "loss": 0.0702, "step": 5877 }, { "epoch": 5.651923076923077, "grad_norm": 4.163649559020996, "learning_rate": 1.805488044215971e-05, "loss": 0.1021, "step": 5878 }, { "epoch": 5.6528846153846155, "grad_norm": 2.5416789054870605, "learning_rate": 1.805414217761362e-05, "loss": 0.0244, "step": 5879 }, { "epoch": 5.653846153846154, "grad_norm": 5.36486291885376, "learning_rate": 1.8053403788090876e-05, "loss": 0.0649, "step": 5880 }, { "epoch": 5.654807692307692, "grad_norm": 1.2292866706848145, "learning_rate": 1.8052665273602947e-05, "loss": 0.0102, "step": 5881 }, { "epoch": 5.655769230769231, "grad_norm": 4.239928722381592, "learning_rate": 1.8051926634161282e-05, "loss": 0.0888, "step": 5882 }, { "epoch": 5.656730769230769, "grad_norm": 4.667212963104248, "learning_rate": 1.8051187869777353e-05, "loss": 0.0747, "step": 5883 }, { "epoch": 5.657692307692308, "grad_norm": 2.1435089111328125, "learning_rate": 1.8050448980462616e-05, "loss": 0.015, "step": 5884 }, { "epoch": 5.658653846153846, "grad_norm": 3.931584358215332, "learning_rate": 1.804970996622854e-05, "loss": 0.132, "step": 5885 }, { "epoch": 5.6596153846153845, "grad_norm": 3.057400941848755, "learning_rate": 1.804897082708659e-05, "loss": 0.0565, "step": 5886 }, { "epoch": 5.660576923076923, "grad_norm": 2.7167928218841553, "learning_rate": 1.8048231563048234e-05, "loss": 0.0419, "step": 5887 }, { "epoch": 5.661538461538462, "grad_norm": 2.072380304336548, "learning_rate": 1.804749217412495e-05, "loss": 0.0214, "step": 5888 }, { "epoch": 5.6625, "grad_norm": 5.129115104675293, "learning_rate": 1.80467526603282e-05, "loss": 0.0859, "step": 5889 }, { "epoch": 5.663461538461538, "grad_norm": 4.430208206176758, "learning_rate": 1.804601302166947e-05, "loss": 0.0325, "step": 5890 }, { "epoch": 5.664423076923077, "grad_norm": 2.192012310028076, "learning_rate": 1.8045273258160234e-05, "loss": 0.0217, "step": 5891 }, { "epoch": 5.665384615384616, "grad_norm": 2.8958306312561035, "learning_rate": 1.804453336981197e-05, "loss": 0.0361, "step": 5892 }, { "epoch": 5.6663461538461535, "grad_norm": 2.063307046890259, "learning_rate": 1.8043793356636154e-05, "loss": 0.0203, "step": 5893 }, { "epoch": 5.667307692307692, "grad_norm": 4.848748207092285, "learning_rate": 1.8043053218644274e-05, "loss": 0.0583, "step": 5894 }, { "epoch": 5.668269230769231, "grad_norm": 1.3974913358688354, "learning_rate": 1.804231295584782e-05, "loss": 0.0254, "step": 5895 }, { "epoch": 5.6692307692307695, "grad_norm": 4.176110744476318, "learning_rate": 1.8041572568258267e-05, "loss": 0.1126, "step": 5896 }, { "epoch": 5.670192307692307, "grad_norm": 0.581260621547699, "learning_rate": 1.804083205588711e-05, "loss": 0.005, "step": 5897 }, { "epoch": 5.671153846153846, "grad_norm": 3.5710396766662598, "learning_rate": 1.804009141874584e-05, "loss": 0.05, "step": 5898 }, { "epoch": 5.672115384615385, "grad_norm": 3.5053162574768066, "learning_rate": 1.8039350656845948e-05, "loss": 0.0729, "step": 5899 }, { "epoch": 5.673076923076923, "grad_norm": 3.2721340656280518, "learning_rate": 1.8038609770198933e-05, "loss": 0.0862, "step": 5900 }, { "epoch": 5.674038461538462, "grad_norm": 1.9221910238265991, "learning_rate": 1.8037868758816283e-05, "loss": 0.0221, "step": 5901 }, { "epoch": 5.675, "grad_norm": 1.8208681344985962, "learning_rate": 1.80371276227095e-05, "loss": 0.0434, "step": 5902 }, { "epoch": 5.6759615384615385, "grad_norm": 1.9276585578918457, "learning_rate": 1.803638636189009e-05, "loss": 0.0251, "step": 5903 }, { "epoch": 5.676923076923077, "grad_norm": 2.0447838306427, "learning_rate": 1.8035644976369546e-05, "loss": 0.0276, "step": 5904 }, { "epoch": 5.677884615384615, "grad_norm": 2.9689395427703857, "learning_rate": 1.8034903466159376e-05, "loss": 0.0779, "step": 5905 }, { "epoch": 5.678846153846154, "grad_norm": 2.464698076248169, "learning_rate": 1.8034161831271088e-05, "loss": 0.0323, "step": 5906 }, { "epoch": 5.679807692307692, "grad_norm": 4.901663780212402, "learning_rate": 1.8033420071716186e-05, "loss": 0.0795, "step": 5907 }, { "epoch": 5.680769230769231, "grad_norm": 1.357141137123108, "learning_rate": 1.8032678187506187e-05, "loss": 0.0114, "step": 5908 }, { "epoch": 5.68173076923077, "grad_norm": 4.1528000831604, "learning_rate": 1.8031936178652596e-05, "loss": 0.0367, "step": 5909 }, { "epoch": 5.6826923076923075, "grad_norm": 2.073016881942749, "learning_rate": 1.803119404516693e-05, "loss": 0.0273, "step": 5910 }, { "epoch": 5.683653846153846, "grad_norm": 5.557704448699951, "learning_rate": 1.80304517870607e-05, "loss": 0.1323, "step": 5911 }, { "epoch": 5.684615384615385, "grad_norm": 2.622659921646118, "learning_rate": 1.802970940434543e-05, "loss": 0.0453, "step": 5912 }, { "epoch": 5.685576923076923, "grad_norm": 0.6926822662353516, "learning_rate": 1.8028966897032635e-05, "loss": 0.0058, "step": 5913 }, { "epoch": 5.686538461538461, "grad_norm": 1.8692914247512817, "learning_rate": 1.8028224265133842e-05, "loss": 0.0123, "step": 5914 }, { "epoch": 5.6875, "grad_norm": 2.492053747177124, "learning_rate": 1.802748150866057e-05, "loss": 0.0218, "step": 5915 }, { "epoch": 5.688461538461539, "grad_norm": 5.051177978515625, "learning_rate": 1.802673862762434e-05, "loss": 0.0889, "step": 5916 }, { "epoch": 5.689423076923077, "grad_norm": 1.834667444229126, "learning_rate": 1.8025995622036693e-05, "loss": 0.0384, "step": 5917 }, { "epoch": 5.690384615384615, "grad_norm": 3.429278612136841, "learning_rate": 1.8025252491909143e-05, "loss": 0.0759, "step": 5918 }, { "epoch": 5.691346153846154, "grad_norm": 4.176840305328369, "learning_rate": 1.8024509237253235e-05, "loss": 0.0371, "step": 5919 }, { "epoch": 5.6923076923076925, "grad_norm": 4.403529644012451, "learning_rate": 1.8023765858080492e-05, "loss": 0.1102, "step": 5920 }, { "epoch": 5.69326923076923, "grad_norm": 2.227341413497925, "learning_rate": 1.802302235440245e-05, "loss": 0.03, "step": 5921 }, { "epoch": 5.694230769230769, "grad_norm": 2.2511589527130127, "learning_rate": 1.8022278726230648e-05, "loss": 0.0203, "step": 5922 }, { "epoch": 5.695192307692308, "grad_norm": 4.1400065422058105, "learning_rate": 1.802153497357663e-05, "loss": 0.0571, "step": 5923 }, { "epoch": 5.696153846153846, "grad_norm": 3.437307119369507, "learning_rate": 1.802079109645193e-05, "loss": 0.0948, "step": 5924 }, { "epoch": 5.697115384615385, "grad_norm": 3.1346378326416016, "learning_rate": 1.8020047094868092e-05, "loss": 0.069, "step": 5925 }, { "epoch": 5.698076923076923, "grad_norm": 3.7788991928100586, "learning_rate": 1.8019302968836662e-05, "loss": 0.0691, "step": 5926 }, { "epoch": 5.6990384615384615, "grad_norm": 3.214423418045044, "learning_rate": 1.8018558718369187e-05, "loss": 0.044, "step": 5927 }, { "epoch": 5.7, "grad_norm": 5.100910186767578, "learning_rate": 1.8017814343477214e-05, "loss": 0.146, "step": 5928 }, { "epoch": 5.700961538461538, "grad_norm": 3.0408921241760254, "learning_rate": 1.8017069844172296e-05, "loss": 0.0358, "step": 5929 }, { "epoch": 5.701923076923077, "grad_norm": 4.8158979415893555, "learning_rate": 1.8016325220465984e-05, "loss": 0.0758, "step": 5930 }, { "epoch": 5.702884615384615, "grad_norm": 2.518409252166748, "learning_rate": 1.801558047236983e-05, "loss": 0.0341, "step": 5931 }, { "epoch": 5.703846153846154, "grad_norm": 4.411163806915283, "learning_rate": 1.801483559989539e-05, "loss": 0.0869, "step": 5932 }, { "epoch": 5.704807692307693, "grad_norm": 4.773325443267822, "learning_rate": 1.801409060305423e-05, "loss": 0.1612, "step": 5933 }, { "epoch": 5.7057692307692305, "grad_norm": 3.4660701751708984, "learning_rate": 1.8013345481857903e-05, "loss": 0.0523, "step": 5934 }, { "epoch": 5.706730769230769, "grad_norm": 2.509589433670044, "learning_rate": 1.801260023631797e-05, "loss": 0.0489, "step": 5935 }, { "epoch": 5.707692307692308, "grad_norm": 2.546891450881958, "learning_rate": 1.8011854866446e-05, "loss": 0.0227, "step": 5936 }, { "epoch": 5.7086538461538465, "grad_norm": 3.793225049972534, "learning_rate": 1.801110937225356e-05, "loss": 0.103, "step": 5937 }, { "epoch": 5.709615384615384, "grad_norm": 2.3620505332946777, "learning_rate": 1.8010363753752212e-05, "loss": 0.0245, "step": 5938 }, { "epoch": 5.710576923076923, "grad_norm": 3.9044198989868164, "learning_rate": 1.8009618010953532e-05, "loss": 0.0407, "step": 5939 }, { "epoch": 5.711538461538462, "grad_norm": 3.606743335723877, "learning_rate": 1.8008872143869085e-05, "loss": 0.0547, "step": 5940 }, { "epoch": 5.7125, "grad_norm": 4.420180797576904, "learning_rate": 1.800812615251045e-05, "loss": 0.0715, "step": 5941 }, { "epoch": 5.713461538461538, "grad_norm": 2.5560688972473145, "learning_rate": 1.8007380036889198e-05, "loss": 0.0414, "step": 5942 }, { "epoch": 5.714423076923077, "grad_norm": 2.8595821857452393, "learning_rate": 1.8006633797016914e-05, "loss": 0.108, "step": 5943 }, { "epoch": 5.7153846153846155, "grad_norm": 3.3292107582092285, "learning_rate": 1.8005887432905167e-05, "loss": 0.0867, "step": 5944 }, { "epoch": 5.716346153846154, "grad_norm": 3.5590696334838867, "learning_rate": 1.8005140944565546e-05, "loss": 0.0601, "step": 5945 }, { "epoch": 5.717307692307692, "grad_norm": 2.0970382690429688, "learning_rate": 1.8004394332009634e-05, "loss": 0.0229, "step": 5946 }, { "epoch": 5.718269230769231, "grad_norm": 4.264588832855225, "learning_rate": 1.8003647595249016e-05, "loss": 0.0503, "step": 5947 }, { "epoch": 5.719230769230769, "grad_norm": 2.782390594482422, "learning_rate": 1.8002900734295274e-05, "loss": 0.0444, "step": 5948 }, { "epoch": 5.720192307692308, "grad_norm": 3.5441768169403076, "learning_rate": 1.800215374916e-05, "loss": 0.0405, "step": 5949 }, { "epoch": 5.721153846153846, "grad_norm": 3.2857887744903564, "learning_rate": 1.800140663985479e-05, "loss": 0.0742, "step": 5950 }, { "epoch": 5.7221153846153845, "grad_norm": 1.9548784494400024, "learning_rate": 1.800065940639123e-05, "loss": 0.0426, "step": 5951 }, { "epoch": 5.723076923076923, "grad_norm": 3.672727346420288, "learning_rate": 1.799991204878092e-05, "loss": 0.1126, "step": 5952 }, { "epoch": 5.724038461538462, "grad_norm": 4.0785651206970215, "learning_rate": 1.799916456703545e-05, "loss": 0.0802, "step": 5953 }, { "epoch": 5.725, "grad_norm": 4.143614292144775, "learning_rate": 1.7998416961166426e-05, "loss": 0.083, "step": 5954 }, { "epoch": 5.725961538461538, "grad_norm": 1.9627259969711304, "learning_rate": 1.799766923118545e-05, "loss": 0.029, "step": 5955 }, { "epoch": 5.726923076923077, "grad_norm": 2.9484035968780518, "learning_rate": 1.7996921377104114e-05, "loss": 0.0238, "step": 5956 }, { "epoch": 5.727884615384616, "grad_norm": 2.2083168029785156, "learning_rate": 1.799617339893403e-05, "loss": 0.0351, "step": 5957 }, { "epoch": 5.7288461538461535, "grad_norm": 4.9102654457092285, "learning_rate": 1.7995425296686804e-05, "loss": 0.0926, "step": 5958 }, { "epoch": 5.729807692307692, "grad_norm": 2.6158409118652344, "learning_rate": 1.7994677070374048e-05, "loss": 0.0363, "step": 5959 }, { "epoch": 5.730769230769231, "grad_norm": 3.0150692462921143, "learning_rate": 1.799392872000736e-05, "loss": 0.1422, "step": 5960 }, { "epoch": 5.7317307692307695, "grad_norm": 1.9615241289138794, "learning_rate": 1.7993180245598366e-05, "loss": 0.0174, "step": 5961 }, { "epoch": 5.732692307692307, "grad_norm": 1.432411789894104, "learning_rate": 1.7992431647158673e-05, "loss": 0.0128, "step": 5962 }, { "epoch": 5.733653846153846, "grad_norm": 4.122586727142334, "learning_rate": 1.79916829246999e-05, "loss": 0.0456, "step": 5963 }, { "epoch": 5.734615384615385, "grad_norm": 3.3686108589172363, "learning_rate": 1.799093407823366e-05, "loss": 0.0498, "step": 5964 }, { "epoch": 5.735576923076923, "grad_norm": 1.7317819595336914, "learning_rate": 1.7990185107771575e-05, "loss": 0.0545, "step": 5965 }, { "epoch": 5.736538461538462, "grad_norm": 6.634426116943359, "learning_rate": 1.798943601332527e-05, "loss": 0.0901, "step": 5966 }, { "epoch": 5.7375, "grad_norm": 1.593829870223999, "learning_rate": 1.7988686794906368e-05, "loss": 0.0163, "step": 5967 }, { "epoch": 5.7384615384615385, "grad_norm": 2.934176445007324, "learning_rate": 1.7987937452526494e-05, "loss": 0.0356, "step": 5968 }, { "epoch": 5.739423076923077, "grad_norm": 2.8452770709991455, "learning_rate": 1.7987187986197273e-05, "loss": 0.0336, "step": 5969 }, { "epoch": 5.740384615384615, "grad_norm": 2.3095004558563232, "learning_rate": 1.798643839593034e-05, "loss": 0.0216, "step": 5970 }, { "epoch": 5.741346153846154, "grad_norm": 3.4982643127441406, "learning_rate": 1.798568868173732e-05, "loss": 0.0602, "step": 5971 }, { "epoch": 5.742307692307692, "grad_norm": 3.227546215057373, "learning_rate": 1.798493884362985e-05, "loss": 0.1091, "step": 5972 }, { "epoch": 5.743269230769231, "grad_norm": 2.368565320968628, "learning_rate": 1.7984188881619563e-05, "loss": 0.0259, "step": 5973 }, { "epoch": 5.74423076923077, "grad_norm": 2.2548866271972656, "learning_rate": 1.79834387957181e-05, "loss": 0.0288, "step": 5974 }, { "epoch": 5.7451923076923075, "grad_norm": 5.383958339691162, "learning_rate": 1.7982688585937098e-05, "loss": 0.1391, "step": 5975 }, { "epoch": 5.746153846153846, "grad_norm": 2.4026050567626953, "learning_rate": 1.7981938252288196e-05, "loss": 0.0339, "step": 5976 }, { "epoch": 5.747115384615385, "grad_norm": 3.783297300338745, "learning_rate": 1.798118779478304e-05, "loss": 0.062, "step": 5977 }, { "epoch": 5.748076923076923, "grad_norm": 4.347620964050293, "learning_rate": 1.7980437213433274e-05, "loss": 0.0715, "step": 5978 }, { "epoch": 5.749038461538461, "grad_norm": 3.1272740364074707, "learning_rate": 1.7979686508250548e-05, "loss": 0.0295, "step": 5979 }, { "epoch": 5.75, "grad_norm": 2.914482593536377, "learning_rate": 1.7978935679246502e-05, "loss": 0.0373, "step": 5980 }, { "epoch": 5.750961538461539, "grad_norm": 3.483715772628784, "learning_rate": 1.7978184726432796e-05, "loss": 0.0764, "step": 5981 }, { "epoch": 5.751923076923077, "grad_norm": 3.1799428462982178, "learning_rate": 1.797743364982108e-05, "loss": 0.0312, "step": 5982 }, { "epoch": 5.752884615384615, "grad_norm": 3.092099905014038, "learning_rate": 1.7976682449423e-05, "loss": 0.061, "step": 5983 }, { "epoch": 5.753846153846154, "grad_norm": 2.9966111183166504, "learning_rate": 1.7975931125250227e-05, "loss": 0.0245, "step": 5984 }, { "epoch": 5.7548076923076925, "grad_norm": 2.1913959980010986, "learning_rate": 1.797517967731441e-05, "loss": 0.024, "step": 5985 }, { "epoch": 5.75576923076923, "grad_norm": 6.678224563598633, "learning_rate": 1.797442810562721e-05, "loss": 0.0547, "step": 5986 }, { "epoch": 5.756730769230769, "grad_norm": 3.5030574798583984, "learning_rate": 1.797367641020029e-05, "loss": 0.0541, "step": 5987 }, { "epoch": 5.757692307692308, "grad_norm": 4.639074802398682, "learning_rate": 1.7972924591045314e-05, "loss": 0.0548, "step": 5988 }, { "epoch": 5.758653846153846, "grad_norm": 4.044325351715088, "learning_rate": 1.7972172648173952e-05, "loss": 0.0485, "step": 5989 }, { "epoch": 5.759615384615385, "grad_norm": 2.335986375808716, "learning_rate": 1.797142058159787e-05, "loss": 0.0676, "step": 5990 }, { "epoch": 5.760576923076923, "grad_norm": 1.3721909523010254, "learning_rate": 1.797066839132873e-05, "loss": 0.011, "step": 5991 }, { "epoch": 5.7615384615384615, "grad_norm": 2.378204584121704, "learning_rate": 1.7969916077378214e-05, "loss": 0.0305, "step": 5992 }, { "epoch": 5.7625, "grad_norm": 4.459324836730957, "learning_rate": 1.7969163639757994e-05, "loss": 0.0442, "step": 5993 }, { "epoch": 5.763461538461538, "grad_norm": 3.1363680362701416, "learning_rate": 1.796841107847974e-05, "loss": 0.0412, "step": 5994 }, { "epoch": 5.764423076923077, "grad_norm": 3.5064592361450195, "learning_rate": 1.7967658393555135e-05, "loss": 0.0366, "step": 5995 }, { "epoch": 5.765384615384615, "grad_norm": 4.895812511444092, "learning_rate": 1.7966905584995854e-05, "loss": 0.1279, "step": 5996 }, { "epoch": 5.766346153846154, "grad_norm": 6.335011959075928, "learning_rate": 1.796615265281358e-05, "loss": 0.2019, "step": 5997 }, { "epoch": 5.767307692307693, "grad_norm": 5.194746971130371, "learning_rate": 1.7965399597020002e-05, "loss": 0.0347, "step": 5998 }, { "epoch": 5.7682692307692305, "grad_norm": 3.3243184089660645, "learning_rate": 1.79646464176268e-05, "loss": 0.0413, "step": 5999 }, { "epoch": 5.769230769230769, "grad_norm": 5.626204490661621, "learning_rate": 1.796389311464566e-05, "loss": 0.1358, "step": 6000 }, { "epoch": 5.770192307692308, "grad_norm": 3.13252854347229, "learning_rate": 1.7963139688088273e-05, "loss": 0.0296, "step": 6001 }, { "epoch": 5.7711538461538465, "grad_norm": 3.916825294494629, "learning_rate": 1.7962386137966327e-05, "loss": 0.0625, "step": 6002 }, { "epoch": 5.772115384615384, "grad_norm": 5.139433860778809, "learning_rate": 1.796163246429152e-05, "loss": 0.0377, "step": 6003 }, { "epoch": 5.773076923076923, "grad_norm": 3.941309690475464, "learning_rate": 1.7960878667075542e-05, "loss": 0.0587, "step": 6004 }, { "epoch": 5.774038461538462, "grad_norm": 3.8067679405212402, "learning_rate": 1.796012474633009e-05, "loss": 0.0851, "step": 6005 }, { "epoch": 5.775, "grad_norm": 2.5834367275238037, "learning_rate": 1.795937070206687e-05, "loss": 0.0376, "step": 6006 }, { "epoch": 5.775961538461538, "grad_norm": 3.693025588989258, "learning_rate": 1.7958616534297577e-05, "loss": 0.0913, "step": 6007 }, { "epoch": 5.776923076923077, "grad_norm": 3.5059990882873535, "learning_rate": 1.795786224303391e-05, "loss": 0.0507, "step": 6008 }, { "epoch": 5.7778846153846155, "grad_norm": 4.036098003387451, "learning_rate": 1.795710782828758e-05, "loss": 0.0944, "step": 6009 }, { "epoch": 5.778846153846154, "grad_norm": 2.710350275039673, "learning_rate": 1.795635329007029e-05, "loss": 0.0218, "step": 6010 }, { "epoch": 5.779807692307692, "grad_norm": 4.12847900390625, "learning_rate": 1.7955598628393746e-05, "loss": 0.0328, "step": 6011 }, { "epoch": 5.780769230769231, "grad_norm": 4.554603099822998, "learning_rate": 1.7954843843269665e-05, "loss": 0.1106, "step": 6012 }, { "epoch": 5.781730769230769, "grad_norm": 3.632373332977295, "learning_rate": 1.7954088934709753e-05, "loss": 0.076, "step": 6013 }, { "epoch": 5.782692307692308, "grad_norm": 1.649719476699829, "learning_rate": 1.7953333902725726e-05, "loss": 0.0132, "step": 6014 }, { "epoch": 5.783653846153846, "grad_norm": 3.101991653442383, "learning_rate": 1.7952578747329297e-05, "loss": 0.0693, "step": 6015 }, { "epoch": 5.7846153846153845, "grad_norm": 7.698202610015869, "learning_rate": 1.7951823468532188e-05, "loss": 0.2768, "step": 6016 }, { "epoch": 5.785576923076923, "grad_norm": 2.914172887802124, "learning_rate": 1.7951068066346116e-05, "loss": 0.0578, "step": 6017 }, { "epoch": 5.786538461538462, "grad_norm": 4.440522193908691, "learning_rate": 1.7950312540782808e-05, "loss": 0.0736, "step": 6018 }, { "epoch": 5.7875, "grad_norm": 3.039175271987915, "learning_rate": 1.7949556891853975e-05, "loss": 0.0651, "step": 6019 }, { "epoch": 5.788461538461538, "grad_norm": 1.4829909801483154, "learning_rate": 1.7948801119571358e-05, "loss": 0.0105, "step": 6020 }, { "epoch": 5.789423076923077, "grad_norm": 5.736337184906006, "learning_rate": 1.7948045223946676e-05, "loss": 0.1353, "step": 6021 }, { "epoch": 5.790384615384616, "grad_norm": 1.635647177696228, "learning_rate": 1.7947289204991657e-05, "loss": 0.0139, "step": 6022 }, { "epoch": 5.7913461538461535, "grad_norm": 2.2043917179107666, "learning_rate": 1.7946533062718035e-05, "loss": 0.0227, "step": 6023 }, { "epoch": 5.792307692307692, "grad_norm": 3.548450231552124, "learning_rate": 1.7945776797137544e-05, "loss": 0.0745, "step": 6024 }, { "epoch": 5.793269230769231, "grad_norm": 4.342573165893555, "learning_rate": 1.794502040826192e-05, "loss": 0.055, "step": 6025 }, { "epoch": 5.7942307692307695, "grad_norm": 3.169646739959717, "learning_rate": 1.7944263896102893e-05, "loss": 0.0262, "step": 6026 }, { "epoch": 5.795192307692307, "grad_norm": 2.762453317642212, "learning_rate": 1.794350726067221e-05, "loss": 0.0407, "step": 6027 }, { "epoch": 5.796153846153846, "grad_norm": 2.0413827896118164, "learning_rate": 1.79427505019816e-05, "loss": 0.0393, "step": 6028 }, { "epoch": 5.797115384615385, "grad_norm": 3.333641767501831, "learning_rate": 1.7941993620042825e-05, "loss": 0.0327, "step": 6029 }, { "epoch": 5.798076923076923, "grad_norm": 0.4367358982563019, "learning_rate": 1.7941236614867615e-05, "loss": 0.0042, "step": 6030 }, { "epoch": 5.799038461538462, "grad_norm": 1.9723552465438843, "learning_rate": 1.7940479486467713e-05, "loss": 0.0307, "step": 6031 }, { "epoch": 5.8, "grad_norm": 3.9079508781433105, "learning_rate": 1.7939722234854884e-05, "loss": 0.0465, "step": 6032 }, { "epoch": 5.8009615384615385, "grad_norm": 1.9624496698379517, "learning_rate": 1.7938964860040863e-05, "loss": 0.0211, "step": 6033 }, { "epoch": 5.801923076923077, "grad_norm": 3.537116765975952, "learning_rate": 1.7938207362037412e-05, "loss": 0.0421, "step": 6034 }, { "epoch": 5.802884615384615, "grad_norm": 0.6333282589912415, "learning_rate": 1.793744974085628e-05, "loss": 0.0055, "step": 6035 }, { "epoch": 5.803846153846154, "grad_norm": 5.041123390197754, "learning_rate": 1.793669199650922e-05, "loss": 0.1301, "step": 6036 }, { "epoch": 5.804807692307692, "grad_norm": 2.2908005714416504, "learning_rate": 1.7935934129008e-05, "loss": 0.0524, "step": 6037 }, { "epoch": 5.805769230769231, "grad_norm": 4.13927698135376, "learning_rate": 1.793517613836437e-05, "loss": 0.0184, "step": 6038 }, { "epoch": 5.80673076923077, "grad_norm": 2.272753953933716, "learning_rate": 1.7934418024590096e-05, "loss": 0.0379, "step": 6039 }, { "epoch": 5.8076923076923075, "grad_norm": 1.620698094367981, "learning_rate": 1.7933659787696943e-05, "loss": 0.0168, "step": 6040 }, { "epoch": 5.808653846153846, "grad_norm": 1.3289623260498047, "learning_rate": 1.7932901427696674e-05, "loss": 0.0092, "step": 6041 }, { "epoch": 5.809615384615385, "grad_norm": 3.7625315189361572, "learning_rate": 1.7932142944601062e-05, "loss": 0.0361, "step": 6042 }, { "epoch": 5.810576923076923, "grad_norm": 3.396707057952881, "learning_rate": 1.7931384338421865e-05, "loss": 0.0344, "step": 6043 }, { "epoch": 5.811538461538461, "grad_norm": 2.5987279415130615, "learning_rate": 1.7930625609170865e-05, "loss": 0.041, "step": 6044 }, { "epoch": 5.8125, "grad_norm": 4.003989219665527, "learning_rate": 1.7929866756859832e-05, "loss": 0.0709, "step": 6045 }, { "epoch": 5.813461538461539, "grad_norm": 5.626323699951172, "learning_rate": 1.792910778150054e-05, "loss": 0.0859, "step": 6046 }, { "epoch": 5.814423076923077, "grad_norm": 3.7902653217315674, "learning_rate": 1.7928348683104767e-05, "loss": 0.0579, "step": 6047 }, { "epoch": 5.815384615384615, "grad_norm": 2.7547311782836914, "learning_rate": 1.792758946168429e-05, "loss": 0.0188, "step": 6048 }, { "epoch": 5.816346153846154, "grad_norm": 3.228508949279785, "learning_rate": 1.7926830117250896e-05, "loss": 0.0738, "step": 6049 }, { "epoch": 5.8173076923076925, "grad_norm": 2.771773338317871, "learning_rate": 1.792607064981636e-05, "loss": 0.0537, "step": 6050 }, { "epoch": 5.81826923076923, "grad_norm": 3.572282314300537, "learning_rate": 1.7925311059392472e-05, "loss": 0.0485, "step": 6051 }, { "epoch": 5.819230769230769, "grad_norm": 5.427223205566406, "learning_rate": 1.7924551345991018e-05, "loss": 0.0965, "step": 6052 }, { "epoch": 5.820192307692308, "grad_norm": 1.4124301671981812, "learning_rate": 1.792379150962378e-05, "loss": 0.0105, "step": 6053 }, { "epoch": 5.821153846153846, "grad_norm": 1.6569167375564575, "learning_rate": 1.792303155030256e-05, "loss": 0.0154, "step": 6054 }, { "epoch": 5.822115384615385, "grad_norm": 4.615982532501221, "learning_rate": 1.792227146803914e-05, "loss": 0.0498, "step": 6055 }, { "epoch": 5.823076923076923, "grad_norm": 3.369114398956299, "learning_rate": 1.7921511262845323e-05, "loss": 0.051, "step": 6056 }, { "epoch": 5.8240384615384615, "grad_norm": 2.579812526702881, "learning_rate": 1.79207509347329e-05, "loss": 0.0512, "step": 6057 }, { "epoch": 5.825, "grad_norm": 3.2841832637786865, "learning_rate": 1.7919990483713666e-05, "loss": 0.0322, "step": 6058 }, { "epoch": 5.825961538461538, "grad_norm": 1.5765752792358398, "learning_rate": 1.7919229909799426e-05, "loss": 0.0284, "step": 6059 }, { "epoch": 5.826923076923077, "grad_norm": 4.693995475769043, "learning_rate": 1.791846921300198e-05, "loss": 0.1477, "step": 6060 }, { "epoch": 5.827884615384615, "grad_norm": 4.233278751373291, "learning_rate": 1.7917708393333134e-05, "loss": 0.0443, "step": 6061 }, { "epoch": 5.828846153846154, "grad_norm": 5.071722507476807, "learning_rate": 1.7916947450804687e-05, "loss": 0.0897, "step": 6062 }, { "epoch": 5.829807692307693, "grad_norm": 1.179826259613037, "learning_rate": 1.7916186385428456e-05, "loss": 0.0062, "step": 6063 }, { "epoch": 5.8307692307692305, "grad_norm": 4.485159873962402, "learning_rate": 1.7915425197216246e-05, "loss": 0.0523, "step": 6064 }, { "epoch": 5.831730769230769, "grad_norm": 4.173933982849121, "learning_rate": 1.7914663886179868e-05, "loss": 0.0916, "step": 6065 }, { "epoch": 5.832692307692308, "grad_norm": 5.79387092590332, "learning_rate": 1.7913902452331132e-05, "loss": 0.0911, "step": 6066 }, { "epoch": 5.8336538461538465, "grad_norm": 3.5885510444641113, "learning_rate": 1.7913140895681864e-05, "loss": 0.082, "step": 6067 }, { "epoch": 5.834615384615384, "grad_norm": 5.071421146392822, "learning_rate": 1.791237921624387e-05, "loss": 0.127, "step": 6068 }, { "epoch": 5.835576923076923, "grad_norm": 3.829416036605835, "learning_rate": 1.7911617414028972e-05, "loss": 0.0673, "step": 6069 }, { "epoch": 5.836538461538462, "grad_norm": 3.832935333251953, "learning_rate": 1.7910855489048992e-05, "loss": 0.1007, "step": 6070 }, { "epoch": 5.8375, "grad_norm": 1.513426423072815, "learning_rate": 1.7910093441315757e-05, "loss": 0.0178, "step": 6071 }, { "epoch": 5.838461538461538, "grad_norm": 3.8986968994140625, "learning_rate": 1.7909331270841085e-05, "loss": 0.0967, "step": 6072 }, { "epoch": 5.839423076923077, "grad_norm": 5.06958532333374, "learning_rate": 1.7908568977636804e-05, "loss": 0.0931, "step": 6073 }, { "epoch": 5.8403846153846155, "grad_norm": 1.6022089719772339, "learning_rate": 1.7907806561714745e-05, "loss": 0.0308, "step": 6074 }, { "epoch": 5.841346153846154, "grad_norm": 3.2739715576171875, "learning_rate": 1.790704402308674e-05, "loss": 0.1045, "step": 6075 }, { "epoch": 5.842307692307692, "grad_norm": 3.226147413253784, "learning_rate": 1.7906281361764614e-05, "loss": 0.0486, "step": 6076 }, { "epoch": 5.843269230769231, "grad_norm": 2.264284372329712, "learning_rate": 1.7905518577760207e-05, "loss": 0.0454, "step": 6077 }, { "epoch": 5.844230769230769, "grad_norm": 2.258591413497925, "learning_rate": 1.7904755671085358e-05, "loss": 0.0374, "step": 6078 }, { "epoch": 5.845192307692308, "grad_norm": 1.3015414476394653, "learning_rate": 1.79039926417519e-05, "loss": 0.01, "step": 6079 }, { "epoch": 5.846153846153846, "grad_norm": 1.4347690343856812, "learning_rate": 1.790322948977167e-05, "loss": 0.0109, "step": 6080 }, { "epoch": 5.8471153846153845, "grad_norm": 2.680429697036743, "learning_rate": 1.7902466215156517e-05, "loss": 0.0806, "step": 6081 }, { "epoch": 5.848076923076923, "grad_norm": 0.915050745010376, "learning_rate": 1.790170281791828e-05, "loss": 0.0063, "step": 6082 }, { "epoch": 5.849038461538462, "grad_norm": 3.366321086883545, "learning_rate": 1.7900939298068807e-05, "loss": 0.0568, "step": 6083 }, { "epoch": 5.85, "grad_norm": 4.680519104003906, "learning_rate": 1.7900175655619944e-05, "loss": 0.0793, "step": 6084 }, { "epoch": 5.850961538461538, "grad_norm": 3.7420642375946045, "learning_rate": 1.7899411890583546e-05, "loss": 0.0585, "step": 6085 }, { "epoch": 5.851923076923077, "grad_norm": 4.246768474578857, "learning_rate": 1.7898648002971456e-05, "loss": 0.0895, "step": 6086 }, { "epoch": 5.852884615384616, "grad_norm": 3.3374643325805664, "learning_rate": 1.7897883992795534e-05, "loss": 0.08, "step": 6087 }, { "epoch": 5.8538461538461535, "grad_norm": 3.0755820274353027, "learning_rate": 1.789711986006763e-05, "loss": 0.0446, "step": 6088 }, { "epoch": 5.854807692307692, "grad_norm": 3.5580430030822754, "learning_rate": 1.7896355604799604e-05, "loss": 0.0473, "step": 6089 }, { "epoch": 5.855769230769231, "grad_norm": 4.855564594268799, "learning_rate": 1.7895591227003316e-05, "loss": 0.1933, "step": 6090 }, { "epoch": 5.8567307692307695, "grad_norm": 3.9428904056549072, "learning_rate": 1.7894826726690624e-05, "loss": 0.0582, "step": 6091 }, { "epoch": 5.857692307692307, "grad_norm": 4.686680316925049, "learning_rate": 1.7894062103873393e-05, "loss": 0.1194, "step": 6092 }, { "epoch": 5.858653846153846, "grad_norm": 2.5762455463409424, "learning_rate": 1.789329735856349e-05, "loss": 0.0093, "step": 6093 }, { "epoch": 5.859615384615385, "grad_norm": 3.7793922424316406, "learning_rate": 1.7892532490772773e-05, "loss": 0.072, "step": 6094 }, { "epoch": 5.860576923076923, "grad_norm": 3.1414928436279297, "learning_rate": 1.7891767500513117e-05, "loss": 0.0416, "step": 6095 }, { "epoch": 5.861538461538462, "grad_norm": 4.142014980316162, "learning_rate": 1.7891002387796394e-05, "loss": 0.0859, "step": 6096 }, { "epoch": 5.8625, "grad_norm": 2.3383190631866455, "learning_rate": 1.789023715263447e-05, "loss": 0.0318, "step": 6097 }, { "epoch": 5.8634615384615385, "grad_norm": 1.088689923286438, "learning_rate": 1.788947179503923e-05, "loss": 0.0053, "step": 6098 }, { "epoch": 5.864423076923077, "grad_norm": 1.0760455131530762, "learning_rate": 1.7888706315022534e-05, "loss": 0.0086, "step": 6099 }, { "epoch": 5.865384615384615, "grad_norm": 2.84126615524292, "learning_rate": 1.7887940712596277e-05, "loss": 0.0343, "step": 6100 }, { "epoch": 5.866346153846154, "grad_norm": 3.8260700702667236, "learning_rate": 1.7887174987772326e-05, "loss": 0.0523, "step": 6101 }, { "epoch": 5.867307692307692, "grad_norm": 3.0595340728759766, "learning_rate": 1.788640914056257e-05, "loss": 0.0193, "step": 6102 }, { "epoch": 5.868269230769231, "grad_norm": 1.2247439622879028, "learning_rate": 1.788564317097889e-05, "loss": 0.0186, "step": 6103 }, { "epoch": 5.86923076923077, "grad_norm": 2.3602659702301025, "learning_rate": 1.788487707903317e-05, "loss": 0.0246, "step": 6104 }, { "epoch": 5.8701923076923075, "grad_norm": 3.190023422241211, "learning_rate": 1.7884110864737302e-05, "loss": 0.0352, "step": 6105 }, { "epoch": 5.871153846153846, "grad_norm": 4.318516731262207, "learning_rate": 1.788334452810317e-05, "loss": 0.068, "step": 6106 }, { "epoch": 5.872115384615385, "grad_norm": 4.440852165222168, "learning_rate": 1.7882578069142673e-05, "loss": 0.1365, "step": 6107 }, { "epoch": 5.873076923076923, "grad_norm": 3.0424177646636963, "learning_rate": 1.78818114878677e-05, "loss": 0.0277, "step": 6108 }, { "epoch": 5.874038461538461, "grad_norm": 3.07621693611145, "learning_rate": 1.788104478429014e-05, "loss": 0.0507, "step": 6109 }, { "epoch": 5.875, "grad_norm": 4.150256156921387, "learning_rate": 1.78802779584219e-05, "loss": 0.0659, "step": 6110 }, { "epoch": 5.875961538461539, "grad_norm": 1.5112210512161255, "learning_rate": 1.787951101027487e-05, "loss": 0.0156, "step": 6111 }, { "epoch": 5.876923076923077, "grad_norm": 2.8130578994750977, "learning_rate": 1.7878743939860958e-05, "loss": 0.0319, "step": 6112 }, { "epoch": 5.877884615384615, "grad_norm": 1.340497612953186, "learning_rate": 1.7877976747192064e-05, "loss": 0.0174, "step": 6113 }, { "epoch": 5.878846153846154, "grad_norm": 5.172229766845703, "learning_rate": 1.7877209432280094e-05, "loss": 0.1288, "step": 6114 }, { "epoch": 5.8798076923076925, "grad_norm": 4.953789234161377, "learning_rate": 1.7876441995136952e-05, "loss": 0.1363, "step": 6115 }, { "epoch": 5.88076923076923, "grad_norm": 2.634831666946411, "learning_rate": 1.7875674435774546e-05, "loss": 0.0298, "step": 6116 }, { "epoch": 5.881730769230769, "grad_norm": 4.0143818855285645, "learning_rate": 1.7874906754204787e-05, "loss": 0.0763, "step": 6117 }, { "epoch": 5.882692307692308, "grad_norm": 3.5771543979644775, "learning_rate": 1.787413895043959e-05, "loss": 0.0259, "step": 6118 }, { "epoch": 5.883653846153846, "grad_norm": 3.4977805614471436, "learning_rate": 1.7873371024490868e-05, "loss": 0.0351, "step": 6119 }, { "epoch": 5.884615384615385, "grad_norm": 3.7983925342559814, "learning_rate": 1.7872602976370532e-05, "loss": 0.0946, "step": 6120 }, { "epoch": 5.885576923076923, "grad_norm": 4.828663349151611, "learning_rate": 1.7871834806090502e-05, "loss": 0.0615, "step": 6121 }, { "epoch": 5.8865384615384615, "grad_norm": 3.825235366821289, "learning_rate": 1.7871066513662703e-05, "loss": 0.0549, "step": 6122 }, { "epoch": 5.8875, "grad_norm": 3.4061293601989746, "learning_rate": 1.7870298099099055e-05, "loss": 0.0576, "step": 6123 }, { "epoch": 5.888461538461538, "grad_norm": 3.8519678115844727, "learning_rate": 1.7869529562411478e-05, "loss": 0.0579, "step": 6124 }, { "epoch": 5.889423076923077, "grad_norm": 2.6595616340637207, "learning_rate": 1.7868760903611893e-05, "loss": 0.0244, "step": 6125 }, { "epoch": 5.890384615384615, "grad_norm": 1.6039526462554932, "learning_rate": 1.7867992122712242e-05, "loss": 0.0204, "step": 6126 }, { "epoch": 5.891346153846154, "grad_norm": 2.864682674407959, "learning_rate": 1.786722321972444e-05, "loss": 0.0989, "step": 6127 }, { "epoch": 5.892307692307693, "grad_norm": 6.080778121948242, "learning_rate": 1.7866454194660426e-05, "loss": 0.0901, "step": 6128 }, { "epoch": 5.8932692307692305, "grad_norm": 4.384327411651611, "learning_rate": 1.786568504753213e-05, "loss": 0.0592, "step": 6129 }, { "epoch": 5.894230769230769, "grad_norm": 3.2812774181365967, "learning_rate": 1.7864915778351487e-05, "loss": 0.0354, "step": 6130 }, { "epoch": 5.895192307692308, "grad_norm": 1.2915616035461426, "learning_rate": 1.7864146387130435e-05, "loss": 0.0183, "step": 6131 }, { "epoch": 5.8961538461538465, "grad_norm": 2.4608116149902344, "learning_rate": 1.786337687388091e-05, "loss": 0.0427, "step": 6132 }, { "epoch": 5.897115384615384, "grad_norm": 4.037960529327393, "learning_rate": 1.786260723861486e-05, "loss": 0.0797, "step": 6133 }, { "epoch": 5.898076923076923, "grad_norm": 2.7842624187469482, "learning_rate": 1.786183748134422e-05, "loss": 0.0404, "step": 6134 }, { "epoch": 5.899038461538462, "grad_norm": 6.239012718200684, "learning_rate": 1.7861067602080933e-05, "loss": 0.1887, "step": 6135 }, { "epoch": 5.9, "grad_norm": 3.520728349685669, "learning_rate": 1.7860297600836954e-05, "loss": 0.0477, "step": 6136 }, { "epoch": 5.900961538461538, "grad_norm": 4.972382068634033, "learning_rate": 1.785952747762422e-05, "loss": 0.1478, "step": 6137 }, { "epoch": 5.901923076923077, "grad_norm": 3.280000686645508, "learning_rate": 1.7858757232454692e-05, "loss": 0.128, "step": 6138 }, { "epoch": 5.9028846153846155, "grad_norm": 3.0569417476654053, "learning_rate": 1.7857986865340313e-05, "loss": 0.0549, "step": 6139 }, { "epoch": 5.903846153846154, "grad_norm": 4.824345111846924, "learning_rate": 1.785721637629304e-05, "loss": 0.0916, "step": 6140 }, { "epoch": 5.904807692307692, "grad_norm": 1.2130239009857178, "learning_rate": 1.7856445765324836e-05, "loss": 0.0174, "step": 6141 }, { "epoch": 5.905769230769231, "grad_norm": 3.0579020977020264, "learning_rate": 1.7855675032447648e-05, "loss": 0.1097, "step": 6142 }, { "epoch": 5.906730769230769, "grad_norm": 2.977966785430908, "learning_rate": 1.785490417767344e-05, "loss": 0.0651, "step": 6143 }, { "epoch": 5.907692307692308, "grad_norm": 0.9432505965232849, "learning_rate": 1.7854133201014172e-05, "loss": 0.0066, "step": 6144 }, { "epoch": 5.908653846153846, "grad_norm": 2.667809009552002, "learning_rate": 1.7853362102481812e-05, "loss": 0.0439, "step": 6145 }, { "epoch": 5.9096153846153845, "grad_norm": 0.836648166179657, "learning_rate": 1.785259088208832e-05, "loss": 0.007, "step": 6146 }, { "epoch": 5.910576923076923, "grad_norm": 2.2873692512512207, "learning_rate": 1.7851819539845664e-05, "loss": 0.0131, "step": 6147 }, { "epoch": 5.911538461538462, "grad_norm": 2.526949405670166, "learning_rate": 1.785104807576581e-05, "loss": 0.071, "step": 6148 }, { "epoch": 5.9125, "grad_norm": 4.357515335083008, "learning_rate": 1.7850276489860738e-05, "loss": 0.0484, "step": 6149 }, { "epoch": 5.913461538461538, "grad_norm": 3.3490824699401855, "learning_rate": 1.784950478214241e-05, "loss": 0.0614, "step": 6150 }, { "epoch": 5.914423076923077, "grad_norm": 1.4907855987548828, "learning_rate": 1.7848732952622806e-05, "loss": 0.0149, "step": 6151 }, { "epoch": 5.915384615384616, "grad_norm": 3.0801303386688232, "learning_rate": 1.7847961001313907e-05, "loss": 0.049, "step": 6152 }, { "epoch": 5.9163461538461535, "grad_norm": 1.7939934730529785, "learning_rate": 1.7847188928227687e-05, "loss": 0.0259, "step": 6153 }, { "epoch": 5.917307692307692, "grad_norm": 3.5389339923858643, "learning_rate": 1.784641673337612e-05, "loss": 0.0412, "step": 6154 }, { "epoch": 5.918269230769231, "grad_norm": 1.0091086626052856, "learning_rate": 1.78456444167712e-05, "loss": 0.0097, "step": 6155 }, { "epoch": 5.9192307692307695, "grad_norm": 3.673387050628662, "learning_rate": 1.78448719784249e-05, "loss": 0.0901, "step": 6156 }, { "epoch": 5.920192307692307, "grad_norm": 4.113821983337402, "learning_rate": 1.784409941834922e-05, "loss": 0.053, "step": 6157 }, { "epoch": 5.921153846153846, "grad_norm": 4.489761829376221, "learning_rate": 1.784332673655613e-05, "loss": 0.0438, "step": 6158 }, { "epoch": 5.922115384615385, "grad_norm": 3.5823745727539062, "learning_rate": 1.7842553933057633e-05, "loss": 0.0478, "step": 6159 }, { "epoch": 5.923076923076923, "grad_norm": 5.996418476104736, "learning_rate": 1.7841781007865715e-05, "loss": 0.1456, "step": 6160 }, { "epoch": 5.924038461538462, "grad_norm": 3.9636597633361816, "learning_rate": 1.7841007960992374e-05, "loss": 0.0532, "step": 6161 }, { "epoch": 5.925, "grad_norm": 4.129217624664307, "learning_rate": 1.78402347924496e-05, "loss": 0.0772, "step": 6162 }, { "epoch": 5.9259615384615385, "grad_norm": 2.606245517730713, "learning_rate": 1.7839461502249393e-05, "loss": 0.034, "step": 6163 }, { "epoch": 5.926923076923077, "grad_norm": 2.8329226970672607, "learning_rate": 1.783868809040375e-05, "loss": 0.0268, "step": 6164 }, { "epoch": 5.927884615384615, "grad_norm": 1.8233669996261597, "learning_rate": 1.783791455692468e-05, "loss": 0.0166, "step": 6165 }, { "epoch": 5.928846153846154, "grad_norm": 3.321258544921875, "learning_rate": 1.7837140901824177e-05, "loss": 0.0569, "step": 6166 }, { "epoch": 5.929807692307692, "grad_norm": 4.070566654205322, "learning_rate": 1.7836367125114248e-05, "loss": 0.061, "step": 6167 }, { "epoch": 5.930769230769231, "grad_norm": 3.5527734756469727, "learning_rate": 1.7835593226806902e-05, "loss": 0.0434, "step": 6168 }, { "epoch": 5.93173076923077, "grad_norm": 1.3777472972869873, "learning_rate": 1.7834819206914146e-05, "loss": 0.0233, "step": 6169 }, { "epoch": 5.9326923076923075, "grad_norm": 2.0567054748535156, "learning_rate": 1.783404506544799e-05, "loss": 0.0456, "step": 6170 }, { "epoch": 5.933653846153846, "grad_norm": 5.113249778747559, "learning_rate": 1.783327080242045e-05, "loss": 0.0796, "step": 6171 }, { "epoch": 5.934615384615385, "grad_norm": 9.217490196228027, "learning_rate": 1.7832496417843533e-05, "loss": 0.1109, "step": 6172 }, { "epoch": 5.935576923076923, "grad_norm": 4.0122809410095215, "learning_rate": 1.7831721911729264e-05, "loss": 0.059, "step": 6173 }, { "epoch": 5.936538461538461, "grad_norm": 3.6898646354675293, "learning_rate": 1.7830947284089654e-05, "loss": 0.0423, "step": 6174 }, { "epoch": 5.9375, "grad_norm": 2.342479705810547, "learning_rate": 1.7830172534936727e-05, "loss": 0.0454, "step": 6175 }, { "epoch": 5.938461538461539, "grad_norm": 4.153670310974121, "learning_rate": 1.7829397664282503e-05, "loss": 0.1063, "step": 6176 }, { "epoch": 5.939423076923077, "grad_norm": 3.905958414077759, "learning_rate": 1.7828622672139008e-05, "loss": 0.1244, "step": 6177 }, { "epoch": 5.940384615384615, "grad_norm": 2.364030599594116, "learning_rate": 1.782784755851826e-05, "loss": 0.0403, "step": 6178 }, { "epoch": 5.941346153846154, "grad_norm": 2.757387161254883, "learning_rate": 1.7827072323432298e-05, "loss": 0.0627, "step": 6179 }, { "epoch": 5.9423076923076925, "grad_norm": 5.355105400085449, "learning_rate": 1.7826296966893146e-05, "loss": 0.151, "step": 6180 }, { "epoch": 5.94326923076923, "grad_norm": 3.160646915435791, "learning_rate": 1.7825521488912833e-05, "loss": 0.0451, "step": 6181 }, { "epoch": 5.944230769230769, "grad_norm": 1.6234444379806519, "learning_rate": 1.782474588950339e-05, "loss": 0.0248, "step": 6182 }, { "epoch": 5.945192307692308, "grad_norm": 4.048202991485596, "learning_rate": 1.7823970168676862e-05, "loss": 0.0371, "step": 6183 }, { "epoch": 5.946153846153846, "grad_norm": 3.043490409851074, "learning_rate": 1.782319432644528e-05, "loss": 0.0536, "step": 6184 }, { "epoch": 5.947115384615385, "grad_norm": 2.6211671829223633, "learning_rate": 1.782241836282068e-05, "loss": 0.0591, "step": 6185 }, { "epoch": 5.948076923076923, "grad_norm": 3.4155800342559814, "learning_rate": 1.7821642277815108e-05, "loss": 0.0406, "step": 6186 }, { "epoch": 5.9490384615384615, "grad_norm": 4.874108791351318, "learning_rate": 1.7820866071440603e-05, "loss": 0.0894, "step": 6187 }, { "epoch": 5.95, "grad_norm": 5.429821491241455, "learning_rate": 1.782008974370921e-05, "loss": 0.1251, "step": 6188 }, { "epoch": 5.950961538461538, "grad_norm": 1.3591281175613403, "learning_rate": 1.7819313294632977e-05, "loss": 0.0144, "step": 6189 }, { "epoch": 5.951923076923077, "grad_norm": 3.4148902893066406, "learning_rate": 1.781853672422395e-05, "loss": 0.0347, "step": 6190 }, { "epoch": 5.952884615384615, "grad_norm": 4.485235691070557, "learning_rate": 1.781776003249418e-05, "loss": 0.069, "step": 6191 }, { "epoch": 5.953846153846154, "grad_norm": 2.769678831100464, "learning_rate": 1.7816983219455722e-05, "loss": 0.0372, "step": 6192 }, { "epoch": 5.954807692307693, "grad_norm": 4.76674222946167, "learning_rate": 1.781620628512062e-05, "loss": 0.0588, "step": 6193 }, { "epoch": 5.9557692307692305, "grad_norm": 2.8070952892303467, "learning_rate": 1.7815429229500946e-05, "loss": 0.063, "step": 6194 }, { "epoch": 5.956730769230769, "grad_norm": 1.919183373451233, "learning_rate": 1.781465205260874e-05, "loss": 0.0342, "step": 6195 }, { "epoch": 5.957692307692308, "grad_norm": 1.9754505157470703, "learning_rate": 1.7813874754456076e-05, "loss": 0.0349, "step": 6196 }, { "epoch": 5.9586538461538465, "grad_norm": 3.474565029144287, "learning_rate": 1.7813097335055008e-05, "loss": 0.0496, "step": 6197 }, { "epoch": 5.959615384615384, "grad_norm": 4.001271724700928, "learning_rate": 1.78123197944176e-05, "loss": 0.0687, "step": 6198 }, { "epoch": 5.960576923076923, "grad_norm": 3.1516811847686768, "learning_rate": 1.781154213255592e-05, "loss": 0.0243, "step": 6199 }, { "epoch": 5.961538461538462, "grad_norm": 4.8398237228393555, "learning_rate": 1.7810764349482027e-05, "loss": 0.0911, "step": 6200 }, { "epoch": 5.9625, "grad_norm": 2.6912405490875244, "learning_rate": 1.7809986445207997e-05, "loss": 0.0286, "step": 6201 }, { "epoch": 5.963461538461538, "grad_norm": 0.9760701060295105, "learning_rate": 1.7809208419745904e-05, "loss": 0.0161, "step": 6202 }, { "epoch": 5.964423076923077, "grad_norm": 2.8513600826263428, "learning_rate": 1.7808430273107813e-05, "loss": 0.0469, "step": 6203 }, { "epoch": 5.9653846153846155, "grad_norm": 2.2118780612945557, "learning_rate": 1.78076520053058e-05, "loss": 0.026, "step": 6204 }, { "epoch": 5.966346153846154, "grad_norm": 6.255458831787109, "learning_rate": 1.7806873616351944e-05, "loss": 0.1633, "step": 6205 }, { "epoch": 5.967307692307692, "grad_norm": 2.3153789043426514, "learning_rate": 1.7806095106258323e-05, "loss": 0.0196, "step": 6206 }, { "epoch": 5.968269230769231, "grad_norm": 10.260100364685059, "learning_rate": 1.7805316475037016e-05, "loss": 0.2259, "step": 6207 }, { "epoch": 5.969230769230769, "grad_norm": 4.103977680206299, "learning_rate": 1.7804537722700108e-05, "loss": 0.0843, "step": 6208 }, { "epoch": 5.970192307692308, "grad_norm": 2.9032909870147705, "learning_rate": 1.780375884925968e-05, "loss": 0.0196, "step": 6209 }, { "epoch": 5.971153846153846, "grad_norm": 2.6040782928466797, "learning_rate": 1.7802979854727816e-05, "loss": 0.0286, "step": 6210 }, { "epoch": 5.9721153846153845, "grad_norm": 4.774037837982178, "learning_rate": 1.780220073911661e-05, "loss": 0.1194, "step": 6211 }, { "epoch": 5.973076923076923, "grad_norm": 4.420910835266113, "learning_rate": 1.780142150243814e-05, "loss": 0.0806, "step": 6212 }, { "epoch": 5.974038461538462, "grad_norm": 3.7873947620391846, "learning_rate": 1.7800642144704512e-05, "loss": 0.0741, "step": 6213 }, { "epoch": 5.975, "grad_norm": 3.624541997909546, "learning_rate": 1.779986266592781e-05, "loss": 0.0823, "step": 6214 }, { "epoch": 5.975961538461538, "grad_norm": 2.965104818344116, "learning_rate": 1.7799083066120134e-05, "loss": 0.0907, "step": 6215 }, { "epoch": 5.976923076923077, "grad_norm": 3.0213019847869873, "learning_rate": 1.7798303345293577e-05, "loss": 0.0358, "step": 6216 }, { "epoch": 5.977884615384616, "grad_norm": 2.6890997886657715, "learning_rate": 1.7797523503460243e-05, "loss": 0.04, "step": 6217 }, { "epoch": 5.9788461538461535, "grad_norm": 3.8581197261810303, "learning_rate": 1.7796743540632226e-05, "loss": 0.0511, "step": 6218 }, { "epoch": 5.979807692307692, "grad_norm": 2.1879513263702393, "learning_rate": 1.7795963456821632e-05, "loss": 0.023, "step": 6219 }, { "epoch": 5.980769230769231, "grad_norm": 3.504122018814087, "learning_rate": 1.7795183252040568e-05, "loss": 0.085, "step": 6220 }, { "epoch": 5.9817307692307695, "grad_norm": 2.643955707550049, "learning_rate": 1.779440292630114e-05, "loss": 0.0358, "step": 6221 }, { "epoch": 5.982692307692307, "grad_norm": 3.40061354637146, "learning_rate": 1.779362247961545e-05, "loss": 0.0658, "step": 6222 }, { "epoch": 5.983653846153846, "grad_norm": 5.216546058654785, "learning_rate": 1.7792841911995616e-05, "loss": 0.1023, "step": 6223 }, { "epoch": 5.984615384615385, "grad_norm": 3.5290186405181885, "learning_rate": 1.7792061223453748e-05, "loss": 0.0427, "step": 6224 }, { "epoch": 5.985576923076923, "grad_norm": 4.750782012939453, "learning_rate": 1.7791280414001958e-05, "loss": 0.0764, "step": 6225 }, { "epoch": 5.986538461538462, "grad_norm": 2.3814821243286133, "learning_rate": 1.7790499483652363e-05, "loss": 0.0225, "step": 6226 }, { "epoch": 5.9875, "grad_norm": 3.143799304962158, "learning_rate": 1.778971843241708e-05, "loss": 0.046, "step": 6227 }, { "epoch": 5.9884615384615385, "grad_norm": 6.143487453460693, "learning_rate": 1.7788937260308234e-05, "loss": 0.1828, "step": 6228 }, { "epoch": 5.989423076923077, "grad_norm": 5.210989952087402, "learning_rate": 1.778815596733794e-05, "loss": 0.1468, "step": 6229 }, { "epoch": 5.990384615384615, "grad_norm": 3.5090761184692383, "learning_rate": 1.778737455351832e-05, "loss": 0.0739, "step": 6230 }, { "epoch": 5.991346153846154, "grad_norm": 3.200117349624634, "learning_rate": 1.7786593018861507e-05, "loss": 0.058, "step": 6231 }, { "epoch": 5.992307692307692, "grad_norm": 3.185417890548706, "learning_rate": 1.778581136337962e-05, "loss": 0.0413, "step": 6232 }, { "epoch": 5.993269230769231, "grad_norm": 3.910360813140869, "learning_rate": 1.7785029587084793e-05, "loss": 0.0616, "step": 6233 }, { "epoch": 5.99423076923077, "grad_norm": 5.35230827331543, "learning_rate": 1.7784247689989156e-05, "loss": 0.0973, "step": 6234 }, { "epoch": 5.9951923076923075, "grad_norm": 5.7820281982421875, "learning_rate": 1.778346567210484e-05, "loss": 0.23, "step": 6235 }, { "epoch": 5.996153846153846, "grad_norm": 3.387272596359253, "learning_rate": 1.7782683533443984e-05, "loss": 0.0591, "step": 6236 }, { "epoch": 5.997115384615385, "grad_norm": 0.844901978969574, "learning_rate": 1.778190127401872e-05, "loss": 0.007, "step": 6237 }, { "epoch": 5.998076923076923, "grad_norm": 0.37704935669898987, "learning_rate": 1.7781118893841185e-05, "loss": 0.0045, "step": 6238 }, { "epoch": 5.999038461538461, "grad_norm": 4.124421119689941, "learning_rate": 1.7780336392923526e-05, "loss": 0.0545, "step": 6239 }, { "epoch": 6.0, "grad_norm": 6.225756645202637, "learning_rate": 1.777955377127788e-05, "loss": 0.0812, "step": 6240 }, { "epoch": 6.000961538461539, "grad_norm": 3.5711936950683594, "learning_rate": 1.777877102891639e-05, "loss": 0.0565, "step": 6241 }, { "epoch": 6.001923076923077, "grad_norm": 3.368037462234497, "learning_rate": 1.7777988165851208e-05, "loss": 0.0396, "step": 6242 }, { "epoch": 6.002884615384615, "grad_norm": 1.6473246812820435, "learning_rate": 1.7777205182094476e-05, "loss": 0.009, "step": 6243 }, { "epoch": 6.003846153846154, "grad_norm": 0.9420005679130554, "learning_rate": 1.7776422077658344e-05, "loss": 0.0072, "step": 6244 }, { "epoch": 6.0048076923076925, "grad_norm": 0.5495569705963135, "learning_rate": 1.7775638852554965e-05, "loss": 0.0043, "step": 6245 }, { "epoch": 6.005769230769231, "grad_norm": 3.8617095947265625, "learning_rate": 1.7774855506796497e-05, "loss": 0.0769, "step": 6246 }, { "epoch": 6.006730769230769, "grad_norm": 0.32476288080215454, "learning_rate": 1.7774072040395082e-05, "loss": 0.0022, "step": 6247 }, { "epoch": 6.007692307692308, "grad_norm": 7.33122444152832, "learning_rate": 1.7773288453362894e-05, "loss": 0.0808, "step": 6248 }, { "epoch": 6.008653846153846, "grad_norm": 1.9632207155227661, "learning_rate": 1.777250474571208e-05, "loss": 0.0126, "step": 6249 }, { "epoch": 6.009615384615385, "grad_norm": 4.28929328918457, "learning_rate": 1.77717209174548e-05, "loss": 0.1384, "step": 6250 }, { "epoch": 6.010576923076923, "grad_norm": 1.472262978553772, "learning_rate": 1.7770936968603227e-05, "loss": 0.011, "step": 6251 }, { "epoch": 6.0115384615384615, "grad_norm": 2.29638409614563, "learning_rate": 1.777015289916952e-05, "loss": 0.0239, "step": 6252 }, { "epoch": 6.0125, "grad_norm": 1.9149397611618042, "learning_rate": 1.7769368709165842e-05, "loss": 0.0283, "step": 6253 }, { "epoch": 6.013461538461539, "grad_norm": 1.5066395998001099, "learning_rate": 1.7768584398604365e-05, "loss": 0.0121, "step": 6254 }, { "epoch": 6.014423076923077, "grad_norm": 0.5133434534072876, "learning_rate": 1.7767799967497258e-05, "loss": 0.0042, "step": 6255 }, { "epoch": 6.015384615384615, "grad_norm": 2.8553969860076904, "learning_rate": 1.7767015415856696e-05, "loss": 0.049, "step": 6256 }, { "epoch": 6.016346153846154, "grad_norm": 3.1412250995635986, "learning_rate": 1.776623074369485e-05, "loss": 0.0506, "step": 6257 }, { "epoch": 6.017307692307693, "grad_norm": 3.1015512943267822, "learning_rate": 1.7765445951023898e-05, "loss": 0.0754, "step": 6258 }, { "epoch": 6.0182692307692305, "grad_norm": 2.798494577407837, "learning_rate": 1.7764661037856013e-05, "loss": 0.0851, "step": 6259 }, { "epoch": 6.019230769230769, "grad_norm": 2.008556604385376, "learning_rate": 1.7763876004203376e-05, "loss": 0.0311, "step": 6260 }, { "epoch": 6.020192307692308, "grad_norm": 7.050513744354248, "learning_rate": 1.7763090850078175e-05, "loss": 0.1493, "step": 6261 }, { "epoch": 6.0211538461538465, "grad_norm": 2.783642530441284, "learning_rate": 1.7762305575492587e-05, "loss": 0.0464, "step": 6262 }, { "epoch": 6.022115384615384, "grad_norm": 4.440674304962158, "learning_rate": 1.7761520180458798e-05, "loss": 0.0827, "step": 6263 }, { "epoch": 6.023076923076923, "grad_norm": 1.5679105520248413, "learning_rate": 1.7760734664988996e-05, "loss": 0.0156, "step": 6264 }, { "epoch": 6.024038461538462, "grad_norm": 4.5777411460876465, "learning_rate": 1.7759949029095368e-05, "loss": 0.1113, "step": 6265 }, { "epoch": 6.025, "grad_norm": 5.51215124130249, "learning_rate": 1.7759163272790108e-05, "loss": 0.1302, "step": 6266 }, { "epoch": 6.025961538461538, "grad_norm": 0.5384945273399353, "learning_rate": 1.7758377396085403e-05, "loss": 0.0049, "step": 6267 }, { "epoch": 6.026923076923077, "grad_norm": 3.5440890789031982, "learning_rate": 1.7757591398993457e-05, "loss": 0.1111, "step": 6268 }, { "epoch": 6.0278846153846155, "grad_norm": 1.8529248237609863, "learning_rate": 1.775680528152646e-05, "loss": 0.0216, "step": 6269 }, { "epoch": 6.028846153846154, "grad_norm": 2.3518693447113037, "learning_rate": 1.775601904369661e-05, "loss": 0.0381, "step": 6270 }, { "epoch": 6.029807692307692, "grad_norm": 2.391446590423584, "learning_rate": 1.7755232685516107e-05, "loss": 0.0475, "step": 6271 }, { "epoch": 6.030769230769231, "grad_norm": 2.6699066162109375, "learning_rate": 1.7754446206997152e-05, "loss": 0.1113, "step": 6272 }, { "epoch": 6.031730769230769, "grad_norm": 1.0760362148284912, "learning_rate": 1.7753659608151953e-05, "loss": 0.0132, "step": 6273 }, { "epoch": 6.032692307692308, "grad_norm": 2.851029396057129, "learning_rate": 1.7752872888992714e-05, "loss": 0.0734, "step": 6274 }, { "epoch": 6.033653846153846, "grad_norm": 1.7272982597351074, "learning_rate": 1.775208604953164e-05, "loss": 0.0484, "step": 6275 }, { "epoch": 6.0346153846153845, "grad_norm": 2.035914182662964, "learning_rate": 1.775129908978095e-05, "loss": 0.031, "step": 6276 }, { "epoch": 6.035576923076923, "grad_norm": 2.073575973510742, "learning_rate": 1.775051200975284e-05, "loss": 0.0326, "step": 6277 }, { "epoch": 6.036538461538462, "grad_norm": 1.6360803842544556, "learning_rate": 1.7749724809459534e-05, "loss": 0.0162, "step": 6278 }, { "epoch": 6.0375, "grad_norm": 2.37566876411438, "learning_rate": 1.774893748891324e-05, "loss": 0.0364, "step": 6279 }, { "epoch": 6.038461538461538, "grad_norm": 1.934726595878601, "learning_rate": 1.7748150048126187e-05, "loss": 0.0272, "step": 6280 }, { "epoch": 6.039423076923077, "grad_norm": 2.146796464920044, "learning_rate": 1.774736248711058e-05, "loss": 0.046, "step": 6281 }, { "epoch": 6.040384615384616, "grad_norm": 2.627596139907837, "learning_rate": 1.7746574805878648e-05, "loss": 0.0435, "step": 6282 }, { "epoch": 6.0413461538461535, "grad_norm": 3.403978109359741, "learning_rate": 1.774578700444261e-05, "loss": 0.0655, "step": 6283 }, { "epoch": 6.042307692307692, "grad_norm": 1.3209737539291382, "learning_rate": 1.7744999082814693e-05, "loss": 0.0144, "step": 6284 }, { "epoch": 6.043269230769231, "grad_norm": 2.072680950164795, "learning_rate": 1.774421104100712e-05, "loss": 0.0586, "step": 6285 }, { "epoch": 6.0442307692307695, "grad_norm": 4.215863227844238, "learning_rate": 1.774342287903212e-05, "loss": 0.0281, "step": 6286 }, { "epoch": 6.045192307692307, "grad_norm": 2.366135358810425, "learning_rate": 1.7742634596901924e-05, "loss": 0.0536, "step": 6287 }, { "epoch": 6.046153846153846, "grad_norm": 2.0094056129455566, "learning_rate": 1.7741846194628763e-05, "loss": 0.0203, "step": 6288 }, { "epoch": 6.047115384615385, "grad_norm": 1.6629952192306519, "learning_rate": 1.774105767222487e-05, "loss": 0.0249, "step": 6289 }, { "epoch": 6.048076923076923, "grad_norm": 3.7973875999450684, "learning_rate": 1.7740269029702486e-05, "loss": 0.1138, "step": 6290 }, { "epoch": 6.049038461538461, "grad_norm": 1.9104063510894775, "learning_rate": 1.7739480267073842e-05, "loss": 0.016, "step": 6291 }, { "epoch": 6.05, "grad_norm": 0.8626465201377869, "learning_rate": 1.7738691384351177e-05, "loss": 0.0071, "step": 6292 }, { "epoch": 6.0509615384615385, "grad_norm": 0.9332122802734375, "learning_rate": 1.7737902381546737e-05, "loss": 0.0094, "step": 6293 }, { "epoch": 6.051923076923077, "grad_norm": 1.6159695386886597, "learning_rate": 1.773711325867276e-05, "loss": 0.0167, "step": 6294 }, { "epoch": 6.052884615384615, "grad_norm": 1.9542945623397827, "learning_rate": 1.7736324015741498e-05, "loss": 0.02, "step": 6295 }, { "epoch": 6.053846153846154, "grad_norm": 3.130784749984741, "learning_rate": 1.7735534652765188e-05, "loss": 0.0332, "step": 6296 }, { "epoch": 6.054807692307692, "grad_norm": 1.9100345373153687, "learning_rate": 1.7734745169756083e-05, "loss": 0.02, "step": 6297 }, { "epoch": 6.055769230769231, "grad_norm": 4.207269191741943, "learning_rate": 1.7733955566726438e-05, "loss": 0.0894, "step": 6298 }, { "epoch": 6.056730769230769, "grad_norm": 3.4445154666900635, "learning_rate": 1.77331658436885e-05, "loss": 0.0407, "step": 6299 }, { "epoch": 6.0576923076923075, "grad_norm": 2.614482879638672, "learning_rate": 1.773237600065453e-05, "loss": 0.0429, "step": 6300 }, { "epoch": 6.058653846153846, "grad_norm": 2.051560878753662, "learning_rate": 1.7731586037636774e-05, "loss": 0.0163, "step": 6301 }, { "epoch": 6.059615384615385, "grad_norm": 2.7877142429351807, "learning_rate": 1.7730795954647493e-05, "loss": 0.0305, "step": 6302 }, { "epoch": 6.060576923076923, "grad_norm": 0.6161323189735413, "learning_rate": 1.773000575169895e-05, "loss": 0.0037, "step": 6303 }, { "epoch": 6.061538461538461, "grad_norm": 0.2768300771713257, "learning_rate": 1.7729215428803407e-05, "loss": 0.0024, "step": 6304 }, { "epoch": 6.0625, "grad_norm": 1.4300419092178345, "learning_rate": 1.7728424985973124e-05, "loss": 0.021, "step": 6305 }, { "epoch": 6.063461538461539, "grad_norm": 3.169558525085449, "learning_rate": 1.7727634423220367e-05, "loss": 0.111, "step": 6306 }, { "epoch": 6.064423076923077, "grad_norm": 0.8672295212745667, "learning_rate": 1.77268437405574e-05, "loss": 0.005, "step": 6307 }, { "epoch": 6.065384615384615, "grad_norm": 0.7235162258148193, "learning_rate": 1.77260529379965e-05, "loss": 0.0073, "step": 6308 }, { "epoch": 6.066346153846154, "grad_norm": 2.2455861568450928, "learning_rate": 1.7725262015549936e-05, "loss": 0.0223, "step": 6309 }, { "epoch": 6.0673076923076925, "grad_norm": 2.9656858444213867, "learning_rate": 1.7724470973229978e-05, "loss": 0.0518, "step": 6310 }, { "epoch": 6.068269230769231, "grad_norm": 1.7139865159988403, "learning_rate": 1.7723679811048904e-05, "loss": 0.0192, "step": 6311 }, { "epoch": 6.069230769230769, "grad_norm": 1.0146400928497314, "learning_rate": 1.7722888529018984e-05, "loss": 0.0096, "step": 6312 }, { "epoch": 6.070192307692308, "grad_norm": 3.0293655395507812, "learning_rate": 1.7722097127152503e-05, "loss": 0.0299, "step": 6313 }, { "epoch": 6.071153846153846, "grad_norm": 2.8291401863098145, "learning_rate": 1.7721305605461735e-05, "loss": 0.0252, "step": 6314 }, { "epoch": 6.072115384615385, "grad_norm": 1.3292229175567627, "learning_rate": 1.772051396395897e-05, "loss": 0.009, "step": 6315 }, { "epoch": 6.073076923076923, "grad_norm": 0.7257528901100159, "learning_rate": 1.7719722202656484e-05, "loss": 0.0044, "step": 6316 }, { "epoch": 6.0740384615384615, "grad_norm": 4.018776893615723, "learning_rate": 1.771893032156657e-05, "loss": 0.0981, "step": 6317 }, { "epoch": 6.075, "grad_norm": 2.69991135597229, "learning_rate": 1.771813832070151e-05, "loss": 0.0323, "step": 6318 }, { "epoch": 6.075961538461539, "grad_norm": 1.8387528657913208, "learning_rate": 1.7717346200073597e-05, "loss": 0.0586, "step": 6319 }, { "epoch": 6.076923076923077, "grad_norm": 4.117644786834717, "learning_rate": 1.771655395969512e-05, "loss": 0.0949, "step": 6320 }, { "epoch": 6.077884615384615, "grad_norm": 4.147373199462891, "learning_rate": 1.7715761599578374e-05, "loss": 0.0521, "step": 6321 }, { "epoch": 6.078846153846154, "grad_norm": 2.315368175506592, "learning_rate": 1.7714969119735653e-05, "loss": 0.0165, "step": 6322 }, { "epoch": 6.079807692307693, "grad_norm": 3.8029420375823975, "learning_rate": 1.7714176520179255e-05, "loss": 0.0729, "step": 6323 }, { "epoch": 6.0807692307692305, "grad_norm": 1.8458869457244873, "learning_rate": 1.771338380092148e-05, "loss": 0.0206, "step": 6324 }, { "epoch": 6.081730769230769, "grad_norm": 1.2800140380859375, "learning_rate": 1.7712590961974623e-05, "loss": 0.0095, "step": 6325 }, { "epoch": 6.082692307692308, "grad_norm": 1.8241537809371948, "learning_rate": 1.7711798003350994e-05, "loss": 0.0197, "step": 6326 }, { "epoch": 6.0836538461538465, "grad_norm": 3.6978495121002197, "learning_rate": 1.7711004925062887e-05, "loss": 0.0519, "step": 6327 }, { "epoch": 6.084615384615384, "grad_norm": 3.1334948539733887, "learning_rate": 1.7710211727122625e-05, "loss": 0.0379, "step": 6328 }, { "epoch": 6.085576923076923, "grad_norm": 4.235892295837402, "learning_rate": 1.77094184095425e-05, "loss": 0.0808, "step": 6329 }, { "epoch": 6.086538461538462, "grad_norm": 1.47114098072052, "learning_rate": 1.770862497233483e-05, "loss": 0.0171, "step": 6330 }, { "epoch": 6.0875, "grad_norm": 3.2308735847473145, "learning_rate": 1.7707831415511926e-05, "loss": 0.0664, "step": 6331 }, { "epoch": 6.088461538461538, "grad_norm": 1.1488679647445679, "learning_rate": 1.7707037739086096e-05, "loss": 0.0094, "step": 6332 }, { "epoch": 6.089423076923077, "grad_norm": 2.0472090244293213, "learning_rate": 1.7706243943069666e-05, "loss": 0.0188, "step": 6333 }, { "epoch": 6.0903846153846155, "grad_norm": 2.8344192504882812, "learning_rate": 1.7705450027474945e-05, "loss": 0.0389, "step": 6334 }, { "epoch": 6.091346153846154, "grad_norm": 3.9580190181732178, "learning_rate": 1.7704655992314253e-05, "loss": 0.046, "step": 6335 }, { "epoch": 6.092307692307692, "grad_norm": 2.7772674560546875, "learning_rate": 1.7703861837599916e-05, "loss": 0.0274, "step": 6336 }, { "epoch": 6.093269230769231, "grad_norm": 4.8801960945129395, "learning_rate": 1.7703067563344252e-05, "loss": 0.1039, "step": 6337 }, { "epoch": 6.094230769230769, "grad_norm": 1.7454618215560913, "learning_rate": 1.770227316955959e-05, "loss": 0.0149, "step": 6338 }, { "epoch": 6.095192307692308, "grad_norm": 2.8763208389282227, "learning_rate": 1.770147865625825e-05, "loss": 0.0621, "step": 6339 }, { "epoch": 6.096153846153846, "grad_norm": 0.42402639985084534, "learning_rate": 1.7700684023452568e-05, "loss": 0.0033, "step": 6340 }, { "epoch": 6.0971153846153845, "grad_norm": 1.6766939163208008, "learning_rate": 1.769988927115487e-05, "loss": 0.016, "step": 6341 }, { "epoch": 6.098076923076923, "grad_norm": 3.4491965770721436, "learning_rate": 1.769909439937749e-05, "loss": 0.0538, "step": 6342 }, { "epoch": 6.099038461538462, "grad_norm": 0.9956341981887817, "learning_rate": 1.769829940813276e-05, "loss": 0.0077, "step": 6343 }, { "epoch": 6.1, "grad_norm": 2.1701364517211914, "learning_rate": 1.7697504297433023e-05, "loss": 0.0283, "step": 6344 }, { "epoch": 6.100961538461538, "grad_norm": 2.463311195373535, "learning_rate": 1.7696709067290605e-05, "loss": 0.0246, "step": 6345 }, { "epoch": 6.101923076923077, "grad_norm": 1.8285603523254395, "learning_rate": 1.7695913717717854e-05, "loss": 0.017, "step": 6346 }, { "epoch": 6.102884615384616, "grad_norm": 4.2175469398498535, "learning_rate": 1.7695118248727107e-05, "loss": 0.0605, "step": 6347 }, { "epoch": 6.1038461538461535, "grad_norm": 2.011230707168579, "learning_rate": 1.7694322660330715e-05, "loss": 0.0555, "step": 6348 }, { "epoch": 6.104807692307692, "grad_norm": 1.0310049057006836, "learning_rate": 1.769352695254101e-05, "loss": 0.0091, "step": 6349 }, { "epoch": 6.105769230769231, "grad_norm": 3.543084144592285, "learning_rate": 1.7692731125370355e-05, "loss": 0.0317, "step": 6350 }, { "epoch": 6.1067307692307695, "grad_norm": 0.5190975069999695, "learning_rate": 1.7691935178831086e-05, "loss": 0.0055, "step": 6351 }, { "epoch": 6.107692307692307, "grad_norm": 5.050075054168701, "learning_rate": 1.769113911293556e-05, "loss": 0.2276, "step": 6352 }, { "epoch": 6.108653846153846, "grad_norm": 2.2301175594329834, "learning_rate": 1.769034292769613e-05, "loss": 0.0268, "step": 6353 }, { "epoch": 6.109615384615385, "grad_norm": 2.5066871643066406, "learning_rate": 1.7689546623125147e-05, "loss": 0.0293, "step": 6354 }, { "epoch": 6.110576923076923, "grad_norm": 0.7256991863250732, "learning_rate": 1.768875019923497e-05, "loss": 0.006, "step": 6355 }, { "epoch": 6.111538461538461, "grad_norm": 1.9256596565246582, "learning_rate": 1.7687953656037953e-05, "loss": 0.0147, "step": 6356 }, { "epoch": 6.1125, "grad_norm": 2.2759172916412354, "learning_rate": 1.768715699354646e-05, "loss": 0.0476, "step": 6357 }, { "epoch": 6.1134615384615385, "grad_norm": 2.712813138961792, "learning_rate": 1.7686360211772854e-05, "loss": 0.0378, "step": 6358 }, { "epoch": 6.114423076923077, "grad_norm": 2.3679118156433105, "learning_rate": 1.7685563310729494e-05, "loss": 0.0333, "step": 6359 }, { "epoch": 6.115384615384615, "grad_norm": 3.8941712379455566, "learning_rate": 1.7684766290428754e-05, "loss": 0.0658, "step": 6360 }, { "epoch": 6.116346153846154, "grad_norm": 2.7101054191589355, "learning_rate": 1.768396915088299e-05, "loss": 0.0249, "step": 6361 }, { "epoch": 6.117307692307692, "grad_norm": 3.3988420963287354, "learning_rate": 1.7683171892104578e-05, "loss": 0.046, "step": 6362 }, { "epoch": 6.118269230769231, "grad_norm": 0.8757596611976624, "learning_rate": 1.768237451410589e-05, "loss": 0.0073, "step": 6363 }, { "epoch": 6.119230769230769, "grad_norm": 2.894357204437256, "learning_rate": 1.7681577016899294e-05, "loss": 0.0433, "step": 6364 }, { "epoch": 6.1201923076923075, "grad_norm": 4.568248748779297, "learning_rate": 1.7680779400497172e-05, "loss": 0.0996, "step": 6365 }, { "epoch": 6.121153846153846, "grad_norm": 5.150471210479736, "learning_rate": 1.7679981664911893e-05, "loss": 0.2409, "step": 6366 }, { "epoch": 6.122115384615385, "grad_norm": 2.028398275375366, "learning_rate": 1.767918381015584e-05, "loss": 0.0249, "step": 6367 }, { "epoch": 6.123076923076923, "grad_norm": 1.8731142282485962, "learning_rate": 1.767838583624139e-05, "loss": 0.012, "step": 6368 }, { "epoch": 6.124038461538461, "grad_norm": 1.1093707084655762, "learning_rate": 1.7677587743180934e-05, "loss": 0.0042, "step": 6369 }, { "epoch": 6.125, "grad_norm": 0.2975514531135559, "learning_rate": 1.7676789530986845e-05, "loss": 0.0027, "step": 6370 }, { "epoch": 6.125961538461539, "grad_norm": 1.6175806522369385, "learning_rate": 1.7675991199671517e-05, "loss": 0.0177, "step": 6371 }, { "epoch": 6.126923076923077, "grad_norm": 2.0918896198272705, "learning_rate": 1.7675192749247328e-05, "loss": 0.016, "step": 6372 }, { "epoch": 6.127884615384615, "grad_norm": 4.354818344116211, "learning_rate": 1.7674394179726678e-05, "loss": 0.1316, "step": 6373 }, { "epoch": 6.128846153846154, "grad_norm": 1.551432728767395, "learning_rate": 1.7673595491121957e-05, "loss": 0.0185, "step": 6374 }, { "epoch": 6.1298076923076925, "grad_norm": 4.596005439758301, "learning_rate": 1.7672796683445552e-05, "loss": 0.0403, "step": 6375 }, { "epoch": 6.130769230769231, "grad_norm": 2.2742955684661865, "learning_rate": 1.767199775670986e-05, "loss": 0.0385, "step": 6376 }, { "epoch": 6.131730769230769, "grad_norm": 4.108942031860352, "learning_rate": 1.7671198710927284e-05, "loss": 0.0581, "step": 6377 }, { "epoch": 6.132692307692308, "grad_norm": 0.7049075961112976, "learning_rate": 1.767039954611022e-05, "loss": 0.0086, "step": 6378 }, { "epoch": 6.133653846153846, "grad_norm": 1.0430964231491089, "learning_rate": 1.766960026227106e-05, "loss": 0.0074, "step": 6379 }, { "epoch": 6.134615384615385, "grad_norm": 2.0033814907073975, "learning_rate": 1.7668800859422223e-05, "loss": 0.0369, "step": 6380 }, { "epoch": 6.135576923076923, "grad_norm": 1.6032054424285889, "learning_rate": 1.76680013375761e-05, "loss": 0.0109, "step": 6381 }, { "epoch": 6.1365384615384615, "grad_norm": 2.3282549381256104, "learning_rate": 1.76672016967451e-05, "loss": 0.0341, "step": 6382 }, { "epoch": 6.1375, "grad_norm": 2.353760242462158, "learning_rate": 1.7666401936941636e-05, "loss": 0.0251, "step": 6383 }, { "epoch": 6.138461538461539, "grad_norm": 1.2002774477005005, "learning_rate": 1.766560205817811e-05, "loss": 0.0083, "step": 6384 }, { "epoch": 6.139423076923077, "grad_norm": 0.8201936483383179, "learning_rate": 1.7664802060466944e-05, "loss": 0.0078, "step": 6385 }, { "epoch": 6.140384615384615, "grad_norm": 2.9065468311309814, "learning_rate": 1.766400194382054e-05, "loss": 0.057, "step": 6386 }, { "epoch": 6.141346153846154, "grad_norm": 3.119807004928589, "learning_rate": 1.7663201708251322e-05, "loss": 0.0516, "step": 6387 }, { "epoch": 6.142307692307693, "grad_norm": 3.7624478340148926, "learning_rate": 1.7662401353771708e-05, "loss": 0.0797, "step": 6388 }, { "epoch": 6.1432692307692305, "grad_norm": 4.3436126708984375, "learning_rate": 1.7661600880394113e-05, "loss": 0.0614, "step": 6389 }, { "epoch": 6.144230769230769, "grad_norm": 2.259526491165161, "learning_rate": 1.7660800288130954e-05, "loss": 0.0278, "step": 6390 }, { "epoch": 6.145192307692308, "grad_norm": 1.2028822898864746, "learning_rate": 1.7659999576994664e-05, "loss": 0.009, "step": 6391 }, { "epoch": 6.1461538461538465, "grad_norm": 2.3706374168395996, "learning_rate": 1.7659198746997658e-05, "loss": 0.0272, "step": 6392 }, { "epoch": 6.147115384615384, "grad_norm": 1.770700454711914, "learning_rate": 1.765839779815237e-05, "loss": 0.0112, "step": 6393 }, { "epoch": 6.148076923076923, "grad_norm": 3.369527578353882, "learning_rate": 1.7657596730471223e-05, "loss": 0.0576, "step": 6394 }, { "epoch": 6.149038461538462, "grad_norm": 4.364760875701904, "learning_rate": 1.7656795543966652e-05, "loss": 0.0804, "step": 6395 }, { "epoch": 6.15, "grad_norm": 2.543022871017456, "learning_rate": 1.7655994238651084e-05, "loss": 0.0377, "step": 6396 }, { "epoch": 6.150961538461538, "grad_norm": 2.5506036281585693, "learning_rate": 1.765519281453696e-05, "loss": 0.0365, "step": 6397 }, { "epoch": 6.151923076923077, "grad_norm": 0.8609874844551086, "learning_rate": 1.7654391271636707e-05, "loss": 0.0067, "step": 6398 }, { "epoch": 6.1528846153846155, "grad_norm": 1.0059198141098022, "learning_rate": 1.7653589609962767e-05, "loss": 0.0046, "step": 6399 }, { "epoch": 6.153846153846154, "grad_norm": 1.3213849067687988, "learning_rate": 1.765278782952758e-05, "loss": 0.0069, "step": 6400 }, { "epoch": 6.154807692307692, "grad_norm": 3.039951801300049, "learning_rate": 1.7651985930343586e-05, "loss": 0.041, "step": 6401 }, { "epoch": 6.155769230769231, "grad_norm": 2.0321476459503174, "learning_rate": 1.7651183912423228e-05, "loss": 0.0219, "step": 6402 }, { "epoch": 6.156730769230769, "grad_norm": 1.7294756174087524, "learning_rate": 1.7650381775778954e-05, "loss": 0.0165, "step": 6403 }, { "epoch": 6.157692307692308, "grad_norm": 2.151538848876953, "learning_rate": 1.7649579520423206e-05, "loss": 0.041, "step": 6404 }, { "epoch": 6.158653846153846, "grad_norm": 1.1382454633712769, "learning_rate": 1.764877714636844e-05, "loss": 0.0089, "step": 6405 }, { "epoch": 6.1596153846153845, "grad_norm": 8.07247543334961, "learning_rate": 1.7647974653627095e-05, "loss": 0.0934, "step": 6406 }, { "epoch": 6.160576923076923, "grad_norm": 0.30398690700531006, "learning_rate": 1.7647172042211632e-05, "loss": 0.003, "step": 6407 }, { "epoch": 6.161538461538462, "grad_norm": 4.496737480163574, "learning_rate": 1.7646369312134507e-05, "loss": 0.0839, "step": 6408 }, { "epoch": 6.1625, "grad_norm": 0.4214463531970978, "learning_rate": 1.7645566463408167e-05, "loss": 0.0029, "step": 6409 }, { "epoch": 6.163461538461538, "grad_norm": 1.2168784141540527, "learning_rate": 1.764476349604508e-05, "loss": 0.0084, "step": 6410 }, { "epoch": 6.164423076923077, "grad_norm": 3.7463483810424805, "learning_rate": 1.7643960410057696e-05, "loss": 0.1275, "step": 6411 }, { "epoch": 6.165384615384616, "grad_norm": 3.230337142944336, "learning_rate": 1.7643157205458483e-05, "loss": 0.0276, "step": 6412 }, { "epoch": 6.1663461538461535, "grad_norm": 1.0731481313705444, "learning_rate": 1.7642353882259903e-05, "loss": 0.0092, "step": 6413 }, { "epoch": 6.167307692307692, "grad_norm": 1.4838439226150513, "learning_rate": 1.764155044047442e-05, "loss": 0.0468, "step": 6414 }, { "epoch": 6.168269230769231, "grad_norm": 1.8807786703109741, "learning_rate": 1.7640746880114505e-05, "loss": 0.0131, "step": 6415 }, { "epoch": 6.1692307692307695, "grad_norm": 1.4286561012268066, "learning_rate": 1.763994320119262e-05, "loss": 0.0229, "step": 6416 }, { "epoch": 6.170192307692307, "grad_norm": 3.185870885848999, "learning_rate": 1.7639139403721238e-05, "loss": 0.0546, "step": 6417 }, { "epoch": 6.171153846153846, "grad_norm": 2.246882200241089, "learning_rate": 1.7638335487712836e-05, "loss": 0.0335, "step": 6418 }, { "epoch": 6.172115384615385, "grad_norm": 3.296151876449585, "learning_rate": 1.763753145317989e-05, "loss": 0.0456, "step": 6419 }, { "epoch": 6.173076923076923, "grad_norm": 2.390925168991089, "learning_rate": 1.7636727300134863e-05, "loss": 0.021, "step": 6420 }, { "epoch": 6.174038461538461, "grad_norm": 2.820319175720215, "learning_rate": 1.7635923028590247e-05, "loss": 0.0443, "step": 6421 }, { "epoch": 6.175, "grad_norm": 1.0823893547058105, "learning_rate": 1.7635118638558514e-05, "loss": 0.0091, "step": 6422 }, { "epoch": 6.1759615384615385, "grad_norm": 4.779728889465332, "learning_rate": 1.763431413005215e-05, "loss": 0.0802, "step": 6423 }, { "epoch": 6.176923076923077, "grad_norm": 2.9801504611968994, "learning_rate": 1.7633509503083636e-05, "loss": 0.0176, "step": 6424 }, { "epoch": 6.177884615384615, "grad_norm": 2.4532976150512695, "learning_rate": 1.763270475766546e-05, "loss": 0.0346, "step": 6425 }, { "epoch": 6.178846153846154, "grad_norm": 2.5668227672576904, "learning_rate": 1.7631899893810108e-05, "loss": 0.0415, "step": 6426 }, { "epoch": 6.179807692307692, "grad_norm": 3.633133888244629, "learning_rate": 1.763109491153007e-05, "loss": 0.0617, "step": 6427 }, { "epoch": 6.180769230769231, "grad_norm": 3.2659077644348145, "learning_rate": 1.7630289810837836e-05, "loss": 0.0289, "step": 6428 }, { "epoch": 6.181730769230769, "grad_norm": 2.912722110748291, "learning_rate": 1.7629484591745895e-05, "loss": 0.0296, "step": 6429 }, { "epoch": 6.1826923076923075, "grad_norm": 1.3894068002700806, "learning_rate": 1.7628679254266747e-05, "loss": 0.01, "step": 6430 }, { "epoch": 6.183653846153846, "grad_norm": 2.0633199214935303, "learning_rate": 1.7627873798412887e-05, "loss": 0.0368, "step": 6431 }, { "epoch": 6.184615384615385, "grad_norm": 2.879511833190918, "learning_rate": 1.7627068224196816e-05, "loss": 0.0342, "step": 6432 }, { "epoch": 6.185576923076923, "grad_norm": 2.47924542427063, "learning_rate": 1.7626262531631028e-05, "loss": 0.0233, "step": 6433 }, { "epoch": 6.186538461538461, "grad_norm": 3.237776756286621, "learning_rate": 1.7625456720728028e-05, "loss": 0.0525, "step": 6434 }, { "epoch": 6.1875, "grad_norm": 1.8458300828933716, "learning_rate": 1.7624650791500323e-05, "loss": 0.0239, "step": 6435 }, { "epoch": 6.188461538461539, "grad_norm": 1.0025449991226196, "learning_rate": 1.762384474396041e-05, "loss": 0.0049, "step": 6436 }, { "epoch": 6.189423076923077, "grad_norm": 4.891287803649902, "learning_rate": 1.762303857812081e-05, "loss": 0.0883, "step": 6437 }, { "epoch": 6.190384615384615, "grad_norm": 3.549161911010742, "learning_rate": 1.7622232293994023e-05, "loss": 0.0437, "step": 6438 }, { "epoch": 6.191346153846154, "grad_norm": 2.870661497116089, "learning_rate": 1.7621425891592558e-05, "loss": 0.026, "step": 6439 }, { "epoch": 6.1923076923076925, "grad_norm": 1.4356622695922852, "learning_rate": 1.7620619370928932e-05, "loss": 0.0114, "step": 6440 }, { "epoch": 6.193269230769231, "grad_norm": 8.435359954833984, "learning_rate": 1.7619812732015664e-05, "loss": 0.1071, "step": 6441 }, { "epoch": 6.194230769230769, "grad_norm": 3.3687291145324707, "learning_rate": 1.7619005974865265e-05, "loss": 0.0557, "step": 6442 }, { "epoch": 6.195192307692308, "grad_norm": 5.59334135055542, "learning_rate": 1.761819909949025e-05, "loss": 0.0581, "step": 6443 }, { "epoch": 6.196153846153846, "grad_norm": 3.017622947692871, "learning_rate": 1.761739210590315e-05, "loss": 0.0455, "step": 6444 }, { "epoch": 6.197115384615385, "grad_norm": 2.6080849170684814, "learning_rate": 1.7616584994116478e-05, "loss": 0.0277, "step": 6445 }, { "epoch": 6.198076923076923, "grad_norm": 4.739394664764404, "learning_rate": 1.7615777764142763e-05, "loss": 0.0618, "step": 6446 }, { "epoch": 6.1990384615384615, "grad_norm": 1.4948852062225342, "learning_rate": 1.7614970415994527e-05, "loss": 0.0119, "step": 6447 }, { "epoch": 6.2, "grad_norm": 1.1228317022323608, "learning_rate": 1.7614162949684305e-05, "loss": 0.0111, "step": 6448 }, { "epoch": 6.200961538461539, "grad_norm": 3.339374542236328, "learning_rate": 1.7613355365224615e-05, "loss": 0.0735, "step": 6449 }, { "epoch": 6.201923076923077, "grad_norm": 1.8590558767318726, "learning_rate": 1.7612547662627996e-05, "loss": 0.0163, "step": 6450 }, { "epoch": 6.202884615384615, "grad_norm": 0.6444083452224731, "learning_rate": 1.7611739841906985e-05, "loss": 0.0043, "step": 6451 }, { "epoch": 6.203846153846154, "grad_norm": 2.7455103397369385, "learning_rate": 1.7610931903074103e-05, "loss": 0.0243, "step": 6452 }, { "epoch": 6.204807692307693, "grad_norm": 5.993405342102051, "learning_rate": 1.76101238461419e-05, "loss": 0.1419, "step": 6453 }, { "epoch": 6.2057692307692305, "grad_norm": 5.491947650909424, "learning_rate": 1.7609315671122912e-05, "loss": 0.1162, "step": 6454 }, { "epoch": 6.206730769230769, "grad_norm": 0.5089796185493469, "learning_rate": 1.7608507378029678e-05, "loss": 0.0032, "step": 6455 }, { "epoch": 6.207692307692308, "grad_norm": 5.00860071182251, "learning_rate": 1.760769896687474e-05, "loss": 0.1118, "step": 6456 }, { "epoch": 6.2086538461538465, "grad_norm": 4.610771656036377, "learning_rate": 1.760689043767064e-05, "loss": 0.0369, "step": 6457 }, { "epoch": 6.209615384615384, "grad_norm": 1.7772235870361328, "learning_rate": 1.7606081790429925e-05, "loss": 0.0237, "step": 6458 }, { "epoch": 6.210576923076923, "grad_norm": 1.7663583755493164, "learning_rate": 1.7605273025165147e-05, "loss": 0.0404, "step": 6459 }, { "epoch": 6.211538461538462, "grad_norm": 2.7617201805114746, "learning_rate": 1.7604464141888856e-05, "loss": 0.0214, "step": 6460 }, { "epoch": 6.2125, "grad_norm": 1.704252004623413, "learning_rate": 1.7603655140613597e-05, "loss": 0.0595, "step": 6461 }, { "epoch": 6.213461538461538, "grad_norm": 6.73333215713501, "learning_rate": 1.7602846021351925e-05, "loss": 0.093, "step": 6462 }, { "epoch": 6.214423076923077, "grad_norm": 3.3292715549468994, "learning_rate": 1.7602036784116397e-05, "loss": 0.1062, "step": 6463 }, { "epoch": 6.2153846153846155, "grad_norm": 2.4212136268615723, "learning_rate": 1.7601227428919575e-05, "loss": 0.0175, "step": 6464 }, { "epoch": 6.216346153846154, "grad_norm": 3.4111201763153076, "learning_rate": 1.760041795577401e-05, "loss": 0.0306, "step": 6465 }, { "epoch": 6.217307692307692, "grad_norm": 0.383797824382782, "learning_rate": 1.759960836469226e-05, "loss": 0.0037, "step": 6466 }, { "epoch": 6.218269230769231, "grad_norm": 1.7350653409957886, "learning_rate": 1.75987986556869e-05, "loss": 0.0162, "step": 6467 }, { "epoch": 6.219230769230769, "grad_norm": 3.278693675994873, "learning_rate": 1.7597988828770484e-05, "loss": 0.035, "step": 6468 }, { "epoch": 6.220192307692308, "grad_norm": 1.5744655132293701, "learning_rate": 1.759717888395558e-05, "loss": 0.0099, "step": 6469 }, { "epoch": 6.221153846153846, "grad_norm": 2.370934009552002, "learning_rate": 1.759636882125476e-05, "loss": 0.0249, "step": 6470 }, { "epoch": 6.2221153846153845, "grad_norm": 2.2023940086364746, "learning_rate": 1.7595558640680586e-05, "loss": 0.0373, "step": 6471 }, { "epoch": 6.223076923076923, "grad_norm": 2.9846365451812744, "learning_rate": 1.759474834224564e-05, "loss": 0.0577, "step": 6472 }, { "epoch": 6.224038461538462, "grad_norm": 2.5765795707702637, "learning_rate": 1.7593937925962488e-05, "loss": 0.0489, "step": 6473 }, { "epoch": 6.225, "grad_norm": 4.604921340942383, "learning_rate": 1.7593127391843706e-05, "loss": 0.0349, "step": 6474 }, { "epoch": 6.225961538461538, "grad_norm": 3.524115800857544, "learning_rate": 1.7592316739901875e-05, "loss": 0.0925, "step": 6475 }, { "epoch": 6.226923076923077, "grad_norm": 3.1333725452423096, "learning_rate": 1.759150597014957e-05, "loss": 0.0599, "step": 6476 }, { "epoch": 6.227884615384616, "grad_norm": 3.234868049621582, "learning_rate": 1.7590695082599374e-05, "loss": 0.0497, "step": 6477 }, { "epoch": 6.2288461538461535, "grad_norm": 2.154947519302368, "learning_rate": 1.7589884077263867e-05, "loss": 0.0193, "step": 6478 }, { "epoch": 6.229807692307692, "grad_norm": 2.6304867267608643, "learning_rate": 1.7589072954155636e-05, "loss": 0.0379, "step": 6479 }, { "epoch": 6.230769230769231, "grad_norm": 2.9909932613372803, "learning_rate": 1.758826171328727e-05, "loss": 0.034, "step": 6480 }, { "epoch": 6.2317307692307695, "grad_norm": 0.8962897062301636, "learning_rate": 1.758745035467135e-05, "loss": 0.0089, "step": 6481 }, { "epoch": 6.232692307692307, "grad_norm": 3.6487858295440674, "learning_rate": 1.7586638878320467e-05, "loss": 0.084, "step": 6482 }, { "epoch": 6.233653846153846, "grad_norm": 1.8613440990447998, "learning_rate": 1.7585827284247216e-05, "loss": 0.0165, "step": 6483 }, { "epoch": 6.234615384615385, "grad_norm": 2.2776079177856445, "learning_rate": 1.758501557246419e-05, "loss": 0.0238, "step": 6484 }, { "epoch": 6.235576923076923, "grad_norm": 3.10915207862854, "learning_rate": 1.7584203742983988e-05, "loss": 0.0275, "step": 6485 }, { "epoch": 6.236538461538461, "grad_norm": 2.5448925495147705, "learning_rate": 1.75833917958192e-05, "loss": 0.0427, "step": 6486 }, { "epoch": 6.2375, "grad_norm": 3.0535330772399902, "learning_rate": 1.758257973098243e-05, "loss": 0.0466, "step": 6487 }, { "epoch": 6.2384615384615385, "grad_norm": 4.5475263595581055, "learning_rate": 1.7581767548486276e-05, "loss": 0.172, "step": 6488 }, { "epoch": 6.239423076923077, "grad_norm": 3.283008098602295, "learning_rate": 1.758095524834334e-05, "loss": 0.0626, "step": 6489 }, { "epoch": 6.240384615384615, "grad_norm": 2.391576051712036, "learning_rate": 1.7580142830566233e-05, "loss": 0.0661, "step": 6490 }, { "epoch": 6.241346153846154, "grad_norm": 3.678821325302124, "learning_rate": 1.7579330295167555e-05, "loss": 0.103, "step": 6491 }, { "epoch": 6.242307692307692, "grad_norm": 2.5144617557525635, "learning_rate": 1.7578517642159917e-05, "loss": 0.0186, "step": 6492 }, { "epoch": 6.243269230769231, "grad_norm": 3.170435667037964, "learning_rate": 1.7577704871555924e-05, "loss": 0.0488, "step": 6493 }, { "epoch": 6.244230769230769, "grad_norm": 2.406142473220825, "learning_rate": 1.7576891983368192e-05, "loss": 0.0373, "step": 6494 }, { "epoch": 6.2451923076923075, "grad_norm": 3.4133107662200928, "learning_rate": 1.757607897760934e-05, "loss": 0.0687, "step": 6495 }, { "epoch": 6.246153846153846, "grad_norm": 1.4192959070205688, "learning_rate": 1.757526585429197e-05, "loss": 0.0358, "step": 6496 }, { "epoch": 6.247115384615385, "grad_norm": 1.9283828735351562, "learning_rate": 1.7574452613428712e-05, "loss": 0.019, "step": 6497 }, { "epoch": 6.248076923076923, "grad_norm": 1.7691854238510132, "learning_rate": 1.757363925503218e-05, "loss": 0.0278, "step": 6498 }, { "epoch": 6.249038461538461, "grad_norm": 1.294637680053711, "learning_rate": 1.7572825779114993e-05, "loss": 0.0075, "step": 6499 }, { "epoch": 6.25, "grad_norm": 5.286144256591797, "learning_rate": 1.7572012185689778e-05, "loss": 0.0644, "step": 6500 }, { "epoch": 6.250961538461539, "grad_norm": 4.490984916687012, "learning_rate": 1.7571198474769157e-05, "loss": 0.053, "step": 6501 }, { "epoch": 6.251923076923077, "grad_norm": 2.0054330825805664, "learning_rate": 1.7570384646365756e-05, "loss": 0.0304, "step": 6502 }, { "epoch": 6.252884615384615, "grad_norm": 2.051083564758301, "learning_rate": 1.7569570700492202e-05, "loss": 0.0286, "step": 6503 }, { "epoch": 6.253846153846154, "grad_norm": 1.1115106344223022, "learning_rate": 1.7568756637161134e-05, "loss": 0.0074, "step": 6504 }, { "epoch": 6.2548076923076925, "grad_norm": 2.2973790168762207, "learning_rate": 1.756794245638517e-05, "loss": 0.0215, "step": 6505 }, { "epoch": 6.25576923076923, "grad_norm": 2.468877077102661, "learning_rate": 1.7567128158176955e-05, "loss": 0.0187, "step": 6506 }, { "epoch": 6.256730769230769, "grad_norm": 1.2436223030090332, "learning_rate": 1.7566313742549116e-05, "loss": 0.0121, "step": 6507 }, { "epoch": 6.257692307692308, "grad_norm": 0.635606586933136, "learning_rate": 1.75654992095143e-05, "loss": 0.0057, "step": 6508 }, { "epoch": 6.258653846153846, "grad_norm": 2.1464569568634033, "learning_rate": 1.7564684559085138e-05, "loss": 0.022, "step": 6509 }, { "epoch": 6.259615384615385, "grad_norm": 3.0088257789611816, "learning_rate": 1.7563869791274274e-05, "loss": 0.0486, "step": 6510 }, { "epoch": 6.260576923076923, "grad_norm": 2.599534273147583, "learning_rate": 1.7563054906094348e-05, "loss": 0.0454, "step": 6511 }, { "epoch": 6.2615384615384615, "grad_norm": 2.627119302749634, "learning_rate": 1.756223990355801e-05, "loss": 0.0323, "step": 6512 }, { "epoch": 6.2625, "grad_norm": 4.688491344451904, "learning_rate": 1.75614247836779e-05, "loss": 0.0856, "step": 6513 }, { "epoch": 6.263461538461539, "grad_norm": 1.5651400089263916, "learning_rate": 1.7560609546466674e-05, "loss": 0.0251, "step": 6514 }, { "epoch": 6.264423076923077, "grad_norm": 3.4242382049560547, "learning_rate": 1.755979419193698e-05, "loss": 0.0472, "step": 6515 }, { "epoch": 6.265384615384615, "grad_norm": 1.3060015439987183, "learning_rate": 1.7558978720101464e-05, "loss": 0.0145, "step": 6516 }, { "epoch": 6.266346153846154, "grad_norm": 2.023174524307251, "learning_rate": 1.7558163130972784e-05, "loss": 0.0196, "step": 6517 }, { "epoch": 6.267307692307693, "grad_norm": 2.5461673736572266, "learning_rate": 1.7557347424563597e-05, "loss": 0.0464, "step": 6518 }, { "epoch": 6.2682692307692305, "grad_norm": 3.0211472511291504, "learning_rate": 1.7556531600886554e-05, "loss": 0.0321, "step": 6519 }, { "epoch": 6.269230769230769, "grad_norm": 3.852166175842285, "learning_rate": 1.7555715659954323e-05, "loss": 0.0958, "step": 6520 }, { "epoch": 6.270192307692308, "grad_norm": 1.828202724456787, "learning_rate": 1.755489960177956e-05, "loss": 0.0098, "step": 6521 }, { "epoch": 6.2711538461538465, "grad_norm": 2.554992198944092, "learning_rate": 1.7554083426374927e-05, "loss": 0.0646, "step": 6522 }, { "epoch": 6.272115384615384, "grad_norm": 2.1747806072235107, "learning_rate": 1.7553267133753092e-05, "loss": 0.0457, "step": 6523 }, { "epoch": 6.273076923076923, "grad_norm": 4.71502685546875, "learning_rate": 1.755245072392672e-05, "loss": 0.1146, "step": 6524 }, { "epoch": 6.274038461538462, "grad_norm": 3.472557544708252, "learning_rate": 1.7551634196908475e-05, "loss": 0.0432, "step": 6525 }, { "epoch": 6.275, "grad_norm": 1.0148755311965942, "learning_rate": 1.7550817552711036e-05, "loss": 0.0067, "step": 6526 }, { "epoch": 6.275961538461538, "grad_norm": 1.2622681856155396, "learning_rate": 1.7550000791347066e-05, "loss": 0.0153, "step": 6527 }, { "epoch": 6.276923076923077, "grad_norm": 0.48815634846687317, "learning_rate": 1.7549183912829245e-05, "loss": 0.0033, "step": 6528 }, { "epoch": 6.2778846153846155, "grad_norm": 2.8447935581207275, "learning_rate": 1.7548366917170247e-05, "loss": 0.084, "step": 6529 }, { "epoch": 6.278846153846154, "grad_norm": 2.2135188579559326, "learning_rate": 1.7547549804382746e-05, "loss": 0.0412, "step": 6530 }, { "epoch": 6.279807692307692, "grad_norm": 2.6425583362579346, "learning_rate": 1.7546732574479426e-05, "loss": 0.0341, "step": 6531 }, { "epoch": 6.280769230769231, "grad_norm": 3.7872133255004883, "learning_rate": 1.7545915227472967e-05, "loss": 0.098, "step": 6532 }, { "epoch": 6.281730769230769, "grad_norm": 2.4379336833953857, "learning_rate": 1.7545097763376047e-05, "loss": 0.0375, "step": 6533 }, { "epoch": 6.282692307692308, "grad_norm": 0.7944756746292114, "learning_rate": 1.7544280182201353e-05, "loss": 0.0047, "step": 6534 }, { "epoch": 6.283653846153846, "grad_norm": 1.3636517524719238, "learning_rate": 1.7543462483961577e-05, "loss": 0.0127, "step": 6535 }, { "epoch": 6.2846153846153845, "grad_norm": 3.793090343475342, "learning_rate": 1.7542644668669402e-05, "loss": 0.1602, "step": 6536 }, { "epoch": 6.285576923076923, "grad_norm": 3.389411449432373, "learning_rate": 1.7541826736337518e-05, "loss": 0.0514, "step": 6537 }, { "epoch": 6.286538461538462, "grad_norm": 3.156566619873047, "learning_rate": 1.754100868697862e-05, "loss": 0.1259, "step": 6538 }, { "epoch": 6.2875, "grad_norm": 2.796163320541382, "learning_rate": 1.75401905206054e-05, "loss": 0.0356, "step": 6539 }, { "epoch": 6.288461538461538, "grad_norm": 3.7050132751464844, "learning_rate": 1.753937223723055e-05, "loss": 0.0509, "step": 6540 }, { "epoch": 6.289423076923077, "grad_norm": 3.258453607559204, "learning_rate": 1.7538553836866773e-05, "loss": 0.0406, "step": 6541 }, { "epoch": 6.290384615384616, "grad_norm": 1.327507734298706, "learning_rate": 1.7537735319526766e-05, "loss": 0.0134, "step": 6542 }, { "epoch": 6.2913461538461535, "grad_norm": 2.943263530731201, "learning_rate": 1.7536916685223232e-05, "loss": 0.0341, "step": 6543 }, { "epoch": 6.292307692307692, "grad_norm": 2.7568182945251465, "learning_rate": 1.7536097933968866e-05, "loss": 0.028, "step": 6544 }, { "epoch": 6.293269230769231, "grad_norm": 2.2091827392578125, "learning_rate": 1.753527906577638e-05, "loss": 0.0325, "step": 6545 }, { "epoch": 6.2942307692307695, "grad_norm": 2.6109812259674072, "learning_rate": 1.7534460080658486e-05, "loss": 0.0259, "step": 6546 }, { "epoch": 6.295192307692307, "grad_norm": 1.8047823905944824, "learning_rate": 1.7533640978627875e-05, "loss": 0.0382, "step": 6547 }, { "epoch": 6.296153846153846, "grad_norm": 3.006772756576538, "learning_rate": 1.753282175969727e-05, "loss": 0.0651, "step": 6548 }, { "epoch": 6.297115384615385, "grad_norm": 4.261025905609131, "learning_rate": 1.7532002423879383e-05, "loss": 0.0675, "step": 6549 }, { "epoch": 6.298076923076923, "grad_norm": 4.197536945343018, "learning_rate": 1.7531182971186923e-05, "loss": 0.0635, "step": 6550 }, { "epoch": 6.299038461538461, "grad_norm": 1.9737437963485718, "learning_rate": 1.7530363401632604e-05, "loss": 0.0208, "step": 6551 }, { "epoch": 6.3, "grad_norm": 1.9176039695739746, "learning_rate": 1.752954371522915e-05, "loss": 0.0249, "step": 6552 }, { "epoch": 6.3009615384615385, "grad_norm": 3.388059616088867, "learning_rate": 1.7528723911989275e-05, "loss": 0.0257, "step": 6553 }, { "epoch": 6.301923076923077, "grad_norm": 1.1039955615997314, "learning_rate": 1.7527903991925702e-05, "loss": 0.0099, "step": 6554 }, { "epoch": 6.302884615384615, "grad_norm": 1.4658504724502563, "learning_rate": 1.7527083955051153e-05, "loss": 0.0152, "step": 6555 }, { "epoch": 6.303846153846154, "grad_norm": 0.924640953540802, "learning_rate": 1.752626380137835e-05, "loss": 0.0102, "step": 6556 }, { "epoch": 6.304807692307692, "grad_norm": 0.8063534498214722, "learning_rate": 1.752544353092003e-05, "loss": 0.0068, "step": 6557 }, { "epoch": 6.305769230769231, "grad_norm": 2.4029722213745117, "learning_rate": 1.7524623143688905e-05, "loss": 0.0271, "step": 6558 }, { "epoch": 6.30673076923077, "grad_norm": 1.9278868436813354, "learning_rate": 1.7523802639697715e-05, "loss": 0.0282, "step": 6559 }, { "epoch": 6.3076923076923075, "grad_norm": 4.532595157623291, "learning_rate": 1.752298201895919e-05, "loss": 0.1057, "step": 6560 }, { "epoch": 6.308653846153846, "grad_norm": 4.134960174560547, "learning_rate": 1.7522161281486067e-05, "loss": 0.0538, "step": 6561 }, { "epoch": 6.309615384615385, "grad_norm": 1.1558122634887695, "learning_rate": 1.7521340427291077e-05, "loss": 0.011, "step": 6562 }, { "epoch": 6.310576923076923, "grad_norm": 1.309855580329895, "learning_rate": 1.7520519456386957e-05, "loss": 0.01, "step": 6563 }, { "epoch": 6.311538461538461, "grad_norm": 4.295964241027832, "learning_rate": 1.7519698368786448e-05, "loss": 0.1061, "step": 6564 }, { "epoch": 6.3125, "grad_norm": 3.1860711574554443, "learning_rate": 1.751887716450229e-05, "loss": 0.1258, "step": 6565 }, { "epoch": 6.313461538461539, "grad_norm": 3.231491804122925, "learning_rate": 1.7518055843547228e-05, "loss": 0.0603, "step": 6566 }, { "epoch": 6.314423076923077, "grad_norm": 1.363064169883728, "learning_rate": 1.7517234405934003e-05, "loss": 0.0201, "step": 6567 }, { "epoch": 6.315384615384615, "grad_norm": 3.0888257026672363, "learning_rate": 1.7516412851675362e-05, "loss": 0.0671, "step": 6568 }, { "epoch": 6.316346153846154, "grad_norm": 2.441270351409912, "learning_rate": 1.751559118078406e-05, "loss": 0.0271, "step": 6569 }, { "epoch": 6.3173076923076925, "grad_norm": 4.380022048950195, "learning_rate": 1.7514769393272833e-05, "loss": 0.0392, "step": 6570 }, { "epoch": 6.31826923076923, "grad_norm": 2.695613384246826, "learning_rate": 1.7513947489154443e-05, "loss": 0.0507, "step": 6571 }, { "epoch": 6.319230769230769, "grad_norm": 3.3789987564086914, "learning_rate": 1.751312546844164e-05, "loss": 0.0287, "step": 6572 }, { "epoch": 6.320192307692308, "grad_norm": 2.7557785511016846, "learning_rate": 1.751230333114718e-05, "loss": 0.068, "step": 6573 }, { "epoch": 6.321153846153846, "grad_norm": 0.5165614485740662, "learning_rate": 1.7511481077283826e-05, "loss": 0.0048, "step": 6574 }, { "epoch": 6.322115384615385, "grad_norm": 2.8852760791778564, "learning_rate": 1.751065870686433e-05, "loss": 0.0383, "step": 6575 }, { "epoch": 6.323076923076923, "grad_norm": 4.1215009689331055, "learning_rate": 1.750983621990145e-05, "loss": 0.0981, "step": 6576 }, { "epoch": 6.3240384615384615, "grad_norm": 1.4479032754898071, "learning_rate": 1.7509013616407956e-05, "loss": 0.0104, "step": 6577 }, { "epoch": 6.325, "grad_norm": 4.046736240386963, "learning_rate": 1.750819089639661e-05, "loss": 0.0857, "step": 6578 }, { "epoch": 6.325961538461539, "grad_norm": 1.5098373889923096, "learning_rate": 1.7507368059880178e-05, "loss": 0.0185, "step": 6579 }, { "epoch": 6.326923076923077, "grad_norm": 2.0724921226501465, "learning_rate": 1.7506545106871423e-05, "loss": 0.0234, "step": 6580 }, { "epoch": 6.327884615384615, "grad_norm": 5.9865546226501465, "learning_rate": 1.7505722037383126e-05, "loss": 0.1122, "step": 6581 }, { "epoch": 6.328846153846154, "grad_norm": 2.3949027061462402, "learning_rate": 1.7504898851428046e-05, "loss": 0.0253, "step": 6582 }, { "epoch": 6.329807692307693, "grad_norm": 2.868133068084717, "learning_rate": 1.7504075549018966e-05, "loss": 0.043, "step": 6583 }, { "epoch": 6.3307692307692305, "grad_norm": 2.9012255668640137, "learning_rate": 1.7503252130168657e-05, "loss": 0.0485, "step": 6584 }, { "epoch": 6.331730769230769, "grad_norm": 0.8083155751228333, "learning_rate": 1.7502428594889894e-05, "loss": 0.0051, "step": 6585 }, { "epoch": 6.332692307692308, "grad_norm": 1.9965341091156006, "learning_rate": 1.7501604943195466e-05, "loss": 0.0148, "step": 6586 }, { "epoch": 6.3336538461538465, "grad_norm": 2.208313226699829, "learning_rate": 1.7500781175098137e-05, "loss": 0.0416, "step": 6587 }, { "epoch": 6.334615384615384, "grad_norm": 0.9108104109764099, "learning_rate": 1.7499957290610707e-05, "loss": 0.0094, "step": 6588 }, { "epoch": 6.335576923076923, "grad_norm": 3.379657745361328, "learning_rate": 1.7499133289745946e-05, "loss": 0.1491, "step": 6589 }, { "epoch": 6.336538461538462, "grad_norm": 3.1713147163391113, "learning_rate": 1.7498309172516647e-05, "loss": 0.0635, "step": 6590 }, { "epoch": 6.3375, "grad_norm": 2.5495917797088623, "learning_rate": 1.7497484938935598e-05, "loss": 0.0847, "step": 6591 }, { "epoch": 6.338461538461538, "grad_norm": 3.5451602935791016, "learning_rate": 1.7496660589015587e-05, "loss": 0.0709, "step": 6592 }, { "epoch": 6.339423076923077, "grad_norm": 2.33823823928833, "learning_rate": 1.7495836122769407e-05, "loss": 0.0362, "step": 6593 }, { "epoch": 6.3403846153846155, "grad_norm": 5.481456756591797, "learning_rate": 1.749501154020985e-05, "loss": 0.1381, "step": 6594 }, { "epoch": 6.341346153846154, "grad_norm": 3.135948657989502, "learning_rate": 1.7494186841349713e-05, "loss": 0.0641, "step": 6595 }, { "epoch": 6.342307692307692, "grad_norm": 2.4887638092041016, "learning_rate": 1.749336202620179e-05, "loss": 0.0408, "step": 6596 }, { "epoch": 6.343269230769231, "grad_norm": 2.352057695388794, "learning_rate": 1.749253709477888e-05, "loss": 0.016, "step": 6597 }, { "epoch": 6.344230769230769, "grad_norm": 1.4133554697036743, "learning_rate": 1.7491712047093785e-05, "loss": 0.0073, "step": 6598 }, { "epoch": 6.345192307692308, "grad_norm": 2.907186508178711, "learning_rate": 1.7490886883159306e-05, "loss": 0.046, "step": 6599 }, { "epoch": 6.346153846153846, "grad_norm": 1.975562572479248, "learning_rate": 1.749006160298825e-05, "loss": 0.0218, "step": 6600 }, { "epoch": 6.3471153846153845, "grad_norm": 3.8452975749969482, "learning_rate": 1.748923620659342e-05, "loss": 0.0798, "step": 6601 }, { "epoch": 6.348076923076923, "grad_norm": 4.17004919052124, "learning_rate": 1.7488410693987627e-05, "loss": 0.132, "step": 6602 }, { "epoch": 6.349038461538462, "grad_norm": 2.182598114013672, "learning_rate": 1.7487585065183675e-05, "loss": 0.0202, "step": 6603 }, { "epoch": 6.35, "grad_norm": 1.3588964939117432, "learning_rate": 1.7486759320194383e-05, "loss": 0.0162, "step": 6604 }, { "epoch": 6.350961538461538, "grad_norm": 2.4271178245544434, "learning_rate": 1.7485933459032557e-05, "loss": 0.031, "step": 6605 }, { "epoch": 6.351923076923077, "grad_norm": 1.8264490365982056, "learning_rate": 1.7485107481711014e-05, "loss": 0.0609, "step": 6606 }, { "epoch": 6.352884615384616, "grad_norm": 3.662456512451172, "learning_rate": 1.7484281388242572e-05, "loss": 0.0293, "step": 6607 }, { "epoch": 6.3538461538461535, "grad_norm": 0.7865803837776184, "learning_rate": 1.7483455178640048e-05, "loss": 0.0072, "step": 6608 }, { "epoch": 6.354807692307692, "grad_norm": 1.0141817331314087, "learning_rate": 1.7482628852916263e-05, "loss": 0.0117, "step": 6609 }, { "epoch": 6.355769230769231, "grad_norm": 2.2231132984161377, "learning_rate": 1.748180241108404e-05, "loss": 0.0182, "step": 6610 }, { "epoch": 6.3567307692307695, "grad_norm": 2.3085110187530518, "learning_rate": 1.7480975853156205e-05, "loss": 0.0391, "step": 6611 }, { "epoch": 6.357692307692307, "grad_norm": 2.221604347229004, "learning_rate": 1.7480149179145577e-05, "loss": 0.0244, "step": 6612 }, { "epoch": 6.358653846153846, "grad_norm": 2.5154178142547607, "learning_rate": 1.747932238906499e-05, "loss": 0.049, "step": 6613 }, { "epoch": 6.359615384615385, "grad_norm": 2.891436815261841, "learning_rate": 1.7478495482927275e-05, "loss": 0.0364, "step": 6614 }, { "epoch": 6.360576923076923, "grad_norm": 1.7458199262619019, "learning_rate": 1.747766846074525e-05, "loss": 0.0094, "step": 6615 }, { "epoch": 6.361538461538461, "grad_norm": 3.689143180847168, "learning_rate": 1.7476841322531766e-05, "loss": 0.0568, "step": 6616 }, { "epoch": 6.3625, "grad_norm": 3.8526997566223145, "learning_rate": 1.7476014068299646e-05, "loss": 0.0662, "step": 6617 }, { "epoch": 6.3634615384615385, "grad_norm": 1.2844396829605103, "learning_rate": 1.747518669806173e-05, "loss": 0.0077, "step": 6618 }, { "epoch": 6.364423076923077, "grad_norm": 3.477600336074829, "learning_rate": 1.7474359211830852e-05, "loss": 0.0771, "step": 6619 }, { "epoch": 6.365384615384615, "grad_norm": 4.181346893310547, "learning_rate": 1.7473531609619857e-05, "loss": 0.1005, "step": 6620 }, { "epoch": 6.366346153846154, "grad_norm": 3.197403907775879, "learning_rate": 1.7472703891441588e-05, "loss": 0.0547, "step": 6621 }, { "epoch": 6.367307692307692, "grad_norm": 1.7812275886535645, "learning_rate": 1.7471876057308886e-05, "loss": 0.0211, "step": 6622 }, { "epoch": 6.368269230769231, "grad_norm": 2.4603967666625977, "learning_rate": 1.74710481072346e-05, "loss": 0.0362, "step": 6623 }, { "epoch": 6.36923076923077, "grad_norm": 3.9241786003112793, "learning_rate": 1.747022004123157e-05, "loss": 0.0643, "step": 6624 }, { "epoch": 6.3701923076923075, "grad_norm": 1.1877895593643188, "learning_rate": 1.7469391859312653e-05, "loss": 0.0106, "step": 6625 }, { "epoch": 6.371153846153846, "grad_norm": 2.762777328491211, "learning_rate": 1.7468563561490697e-05, "loss": 0.1231, "step": 6626 }, { "epoch": 6.372115384615385, "grad_norm": 2.005544900894165, "learning_rate": 1.7467735147778553e-05, "loss": 0.0234, "step": 6627 }, { "epoch": 6.373076923076923, "grad_norm": 3.1104047298431396, "learning_rate": 1.7466906618189077e-05, "loss": 0.0273, "step": 6628 }, { "epoch": 6.374038461538461, "grad_norm": 0.36766546964645386, "learning_rate": 1.7466077972735126e-05, "loss": 0.0036, "step": 6629 }, { "epoch": 6.375, "grad_norm": 0.5692585706710815, "learning_rate": 1.746524921142956e-05, "loss": 0.0045, "step": 6630 }, { "epoch": 6.375961538461539, "grad_norm": 3.4921576976776123, "learning_rate": 1.7464420334285232e-05, "loss": 0.0354, "step": 6631 }, { "epoch": 6.376923076923077, "grad_norm": 2.1094777584075928, "learning_rate": 1.7463591341315008e-05, "loss": 0.017, "step": 6632 }, { "epoch": 6.377884615384615, "grad_norm": 2.59245228767395, "learning_rate": 1.7462762232531755e-05, "loss": 0.0219, "step": 6633 }, { "epoch": 6.378846153846154, "grad_norm": 3.5605592727661133, "learning_rate": 1.7461933007948333e-05, "loss": 0.0272, "step": 6634 }, { "epoch": 6.3798076923076925, "grad_norm": 1.4257185459136963, "learning_rate": 1.7461103667577616e-05, "loss": 0.0134, "step": 6635 }, { "epoch": 6.38076923076923, "grad_norm": 2.0988235473632812, "learning_rate": 1.7460274211432463e-05, "loss": 0.0174, "step": 6636 }, { "epoch": 6.381730769230769, "grad_norm": 0.8461649417877197, "learning_rate": 1.745944463952575e-05, "loss": 0.0057, "step": 6637 }, { "epoch": 6.382692307692308, "grad_norm": 2.734372615814209, "learning_rate": 1.745861495187035e-05, "loss": 0.032, "step": 6638 }, { "epoch": 6.383653846153846, "grad_norm": 3.067094087600708, "learning_rate": 1.745778514847914e-05, "loss": 0.0597, "step": 6639 }, { "epoch": 6.384615384615385, "grad_norm": 1.6614220142364502, "learning_rate": 1.7456955229364988e-05, "loss": 0.0154, "step": 6640 }, { "epoch": 6.385576923076923, "grad_norm": 1.4392365217208862, "learning_rate": 1.745612519454078e-05, "loss": 0.0092, "step": 6641 }, { "epoch": 6.3865384615384615, "grad_norm": 3.404984474182129, "learning_rate": 1.745529504401939e-05, "loss": 0.1029, "step": 6642 }, { "epoch": 6.3875, "grad_norm": 2.8484530448913574, "learning_rate": 1.7454464777813705e-05, "loss": 0.0419, "step": 6643 }, { "epoch": 6.388461538461539, "grad_norm": 5.554009437561035, "learning_rate": 1.74536343959366e-05, "loss": 0.0948, "step": 6644 }, { "epoch": 6.389423076923077, "grad_norm": 3.130781888961792, "learning_rate": 1.745280389840097e-05, "loss": 0.0443, "step": 6645 }, { "epoch": 6.390384615384615, "grad_norm": 1.8422439098358154, "learning_rate": 1.7451973285219698e-05, "loss": 0.0321, "step": 6646 }, { "epoch": 6.391346153846154, "grad_norm": 1.6452101469039917, "learning_rate": 1.7451142556405668e-05, "loss": 0.0108, "step": 6647 }, { "epoch": 6.392307692307693, "grad_norm": 0.8227193355560303, "learning_rate": 1.7450311711971777e-05, "loss": 0.0093, "step": 6648 }, { "epoch": 6.3932692307692305, "grad_norm": 3.347360372543335, "learning_rate": 1.7449480751930915e-05, "loss": 0.0452, "step": 6649 }, { "epoch": 6.394230769230769, "grad_norm": 4.122896671295166, "learning_rate": 1.7448649676295972e-05, "loss": 0.0436, "step": 6650 }, { "epoch": 6.395192307692308, "grad_norm": 2.50614070892334, "learning_rate": 1.744781848507985e-05, "loss": 0.0297, "step": 6651 }, { "epoch": 6.3961538461538465, "grad_norm": 3.151120901107788, "learning_rate": 1.7446987178295443e-05, "loss": 0.0368, "step": 6652 }, { "epoch": 6.397115384615384, "grad_norm": 1.064801812171936, "learning_rate": 1.7446155755955654e-05, "loss": 0.0109, "step": 6653 }, { "epoch": 6.398076923076923, "grad_norm": 5.11804723739624, "learning_rate": 1.7445324218073382e-05, "loss": 0.1015, "step": 6654 }, { "epoch": 6.399038461538462, "grad_norm": 2.6952624320983887, "learning_rate": 1.7444492564661528e-05, "loss": 0.0348, "step": 6655 }, { "epoch": 6.4, "grad_norm": 1.6740710735321045, "learning_rate": 1.7443660795733e-05, "loss": 0.0172, "step": 6656 }, { "epoch": 6.400961538461538, "grad_norm": 1.0617070198059082, "learning_rate": 1.7442828911300703e-05, "loss": 0.0072, "step": 6657 }, { "epoch": 6.401923076923077, "grad_norm": 3.5462467670440674, "learning_rate": 1.7441996911377547e-05, "loss": 0.0903, "step": 6658 }, { "epoch": 6.4028846153846155, "grad_norm": 0.9846839904785156, "learning_rate": 1.744116479597644e-05, "loss": 0.0073, "step": 6659 }, { "epoch": 6.403846153846154, "grad_norm": 1.6659833192825317, "learning_rate": 1.7440332565110293e-05, "loss": 0.0349, "step": 6660 }, { "epoch": 6.404807692307692, "grad_norm": 1.4483085870742798, "learning_rate": 1.7439500218792023e-05, "loss": 0.0101, "step": 6661 }, { "epoch": 6.405769230769231, "grad_norm": 2.8247592449188232, "learning_rate": 1.7438667757034547e-05, "loss": 0.0455, "step": 6662 }, { "epoch": 6.406730769230769, "grad_norm": 2.2444944381713867, "learning_rate": 1.7437835179850773e-05, "loss": 0.0354, "step": 6663 }, { "epoch": 6.407692307692308, "grad_norm": 3.5163917541503906, "learning_rate": 1.743700248725363e-05, "loss": 0.0917, "step": 6664 }, { "epoch": 6.408653846153846, "grad_norm": 0.12283799052238464, "learning_rate": 1.7436169679256042e-05, "loss": 0.001, "step": 6665 }, { "epoch": 6.4096153846153845, "grad_norm": 1.0903736352920532, "learning_rate": 1.743533675587092e-05, "loss": 0.0099, "step": 6666 }, { "epoch": 6.410576923076923, "grad_norm": 1.651725172996521, "learning_rate": 1.743450371711119e-05, "loss": 0.021, "step": 6667 }, { "epoch": 6.411538461538462, "grad_norm": 1.4570426940917969, "learning_rate": 1.743367056298979e-05, "loss": 0.014, "step": 6668 }, { "epoch": 6.4125, "grad_norm": 2.4538815021514893, "learning_rate": 1.743283729351963e-05, "loss": 0.0209, "step": 6669 }, { "epoch": 6.413461538461538, "grad_norm": 1.9164353609085083, "learning_rate": 1.7432003908713663e-05, "loss": 0.0355, "step": 6670 }, { "epoch": 6.414423076923077, "grad_norm": 2.2042531967163086, "learning_rate": 1.7431170408584802e-05, "loss": 0.0303, "step": 6671 }, { "epoch": 6.415384615384616, "grad_norm": 0.9198943376541138, "learning_rate": 1.7430336793145985e-05, "loss": 0.0045, "step": 6672 }, { "epoch": 6.4163461538461535, "grad_norm": 3.0122320652008057, "learning_rate": 1.742950306241015e-05, "loss": 0.1138, "step": 6673 }, { "epoch": 6.417307692307692, "grad_norm": 3.8172590732574463, "learning_rate": 1.7428669216390236e-05, "loss": 0.1093, "step": 6674 }, { "epoch": 6.418269230769231, "grad_norm": 2.2483205795288086, "learning_rate": 1.7427835255099173e-05, "loss": 0.0188, "step": 6675 }, { "epoch": 6.4192307692307695, "grad_norm": 3.5654897689819336, "learning_rate": 1.742700117854991e-05, "loss": 0.0352, "step": 6676 }, { "epoch": 6.420192307692307, "grad_norm": 3.8048336505889893, "learning_rate": 1.742616698675539e-05, "loss": 0.1612, "step": 6677 }, { "epoch": 6.421153846153846, "grad_norm": 2.5710017681121826, "learning_rate": 1.742533267972855e-05, "loss": 0.0266, "step": 6678 }, { "epoch": 6.422115384615385, "grad_norm": 1.5894768238067627, "learning_rate": 1.742449825748234e-05, "loss": 0.0129, "step": 6679 }, { "epoch": 6.423076923076923, "grad_norm": 2.5916988849639893, "learning_rate": 1.7423663720029707e-05, "loss": 0.0124, "step": 6680 }, { "epoch": 6.424038461538461, "grad_norm": 2.761484384536743, "learning_rate": 1.7422829067383603e-05, "loss": 0.0632, "step": 6681 }, { "epoch": 6.425, "grad_norm": 2.1328554153442383, "learning_rate": 1.7421994299556977e-05, "loss": 0.0581, "step": 6682 }, { "epoch": 6.4259615384615385, "grad_norm": 2.4526283740997314, "learning_rate": 1.7421159416562783e-05, "loss": 0.0493, "step": 6683 }, { "epoch": 6.426923076923077, "grad_norm": 2.5003066062927246, "learning_rate": 1.7420324418413973e-05, "loss": 0.0859, "step": 6684 }, { "epoch": 6.427884615384615, "grad_norm": 2.0557098388671875, "learning_rate": 1.7419489305123512e-05, "loss": 0.0183, "step": 6685 }, { "epoch": 6.428846153846154, "grad_norm": 3.3422634601593018, "learning_rate": 1.741865407670435e-05, "loss": 0.036, "step": 6686 }, { "epoch": 6.429807692307692, "grad_norm": 3.255674123764038, "learning_rate": 1.7417818733169453e-05, "loss": 0.0538, "step": 6687 }, { "epoch": 6.430769230769231, "grad_norm": 2.8088011741638184, "learning_rate": 1.7416983274531777e-05, "loss": 0.0516, "step": 6688 }, { "epoch": 6.43173076923077, "grad_norm": 0.8776519894599915, "learning_rate": 1.7416147700804286e-05, "loss": 0.0051, "step": 6689 }, { "epoch": 6.4326923076923075, "grad_norm": 1.9955343008041382, "learning_rate": 1.741531201199995e-05, "loss": 0.0414, "step": 6690 }, { "epoch": 6.433653846153846, "grad_norm": 3.4797301292419434, "learning_rate": 1.741447620813174e-05, "loss": 0.081, "step": 6691 }, { "epoch": 6.434615384615385, "grad_norm": 3.257488250732422, "learning_rate": 1.741364028921262e-05, "loss": 0.0367, "step": 6692 }, { "epoch": 6.435576923076923, "grad_norm": 4.037185192108154, "learning_rate": 1.741280425525556e-05, "loss": 0.0413, "step": 6693 }, { "epoch": 6.436538461538461, "grad_norm": 2.198143243789673, "learning_rate": 1.7411968106273532e-05, "loss": 0.0717, "step": 6694 }, { "epoch": 6.4375, "grad_norm": 0.4904644191265106, "learning_rate": 1.7411131842279515e-05, "loss": 0.0056, "step": 6695 }, { "epoch": 6.438461538461539, "grad_norm": 2.28407883644104, "learning_rate": 1.7410295463286484e-05, "loss": 0.054, "step": 6696 }, { "epoch": 6.439423076923077, "grad_norm": 1.263028860092163, "learning_rate": 1.7409458969307413e-05, "loss": 0.0411, "step": 6697 }, { "epoch": 6.440384615384615, "grad_norm": 3.3642189502716064, "learning_rate": 1.7408622360355288e-05, "loss": 0.0192, "step": 6698 }, { "epoch": 6.441346153846154, "grad_norm": 0.7239986658096313, "learning_rate": 1.740778563644309e-05, "loss": 0.0069, "step": 6699 }, { "epoch": 6.4423076923076925, "grad_norm": 2.2594106197357178, "learning_rate": 1.7406948797583796e-05, "loss": 0.0143, "step": 6700 }, { "epoch": 6.44326923076923, "grad_norm": 2.071225166320801, "learning_rate": 1.74061118437904e-05, "loss": 0.0242, "step": 6701 }, { "epoch": 6.444230769230769, "grad_norm": 0.557861864566803, "learning_rate": 1.7405274775075882e-05, "loss": 0.0057, "step": 6702 }, { "epoch": 6.445192307692308, "grad_norm": 0.8060851097106934, "learning_rate": 1.7404437591453237e-05, "loss": 0.0072, "step": 6703 }, { "epoch": 6.446153846153846, "grad_norm": 2.5481534004211426, "learning_rate": 1.740360029293545e-05, "loss": 0.0139, "step": 6704 }, { "epoch": 6.447115384615385, "grad_norm": 1.5301873683929443, "learning_rate": 1.7402762879535514e-05, "loss": 0.0155, "step": 6705 }, { "epoch": 6.448076923076923, "grad_norm": 3.1041407585144043, "learning_rate": 1.7401925351266425e-05, "loss": 0.0809, "step": 6706 }, { "epoch": 6.4490384615384615, "grad_norm": 5.571041107177734, "learning_rate": 1.7401087708141183e-05, "loss": 0.0776, "step": 6707 }, { "epoch": 6.45, "grad_norm": 1.7332113981246948, "learning_rate": 1.7400249950172775e-05, "loss": 0.015, "step": 6708 }, { "epoch": 6.450961538461539, "grad_norm": 0.40974465012550354, "learning_rate": 1.7399412077374214e-05, "loss": 0.0033, "step": 6709 }, { "epoch": 6.451923076923077, "grad_norm": 1.0514451265335083, "learning_rate": 1.739857408975849e-05, "loss": 0.0086, "step": 6710 }, { "epoch": 6.452884615384615, "grad_norm": 2.542724370956421, "learning_rate": 1.7397735987338615e-05, "loss": 0.0425, "step": 6711 }, { "epoch": 6.453846153846154, "grad_norm": 2.307675838470459, "learning_rate": 1.7396897770127588e-05, "loss": 0.0332, "step": 6712 }, { "epoch": 6.454807692307693, "grad_norm": 0.34007057547569275, "learning_rate": 1.7396059438138416e-05, "loss": 0.0025, "step": 6713 }, { "epoch": 6.4557692307692305, "grad_norm": 0.3774815499782562, "learning_rate": 1.739522099138411e-05, "loss": 0.0034, "step": 6714 }, { "epoch": 6.456730769230769, "grad_norm": 3.877819776535034, "learning_rate": 1.7394382429877676e-05, "loss": 0.0539, "step": 6715 }, { "epoch": 6.457692307692308, "grad_norm": 0.033775694668293, "learning_rate": 1.739354375363213e-05, "loss": 0.0003, "step": 6716 }, { "epoch": 6.4586538461538465, "grad_norm": 4.7169189453125, "learning_rate": 1.7392704962660488e-05, "loss": 0.059, "step": 6717 }, { "epoch": 6.459615384615384, "grad_norm": 2.1419942378997803, "learning_rate": 1.739186605697576e-05, "loss": 0.0421, "step": 6718 }, { "epoch": 6.460576923076923, "grad_norm": 3.059727191925049, "learning_rate": 1.7391027036590967e-05, "loss": 0.0602, "step": 6719 }, { "epoch": 6.461538461538462, "grad_norm": 1.601090669631958, "learning_rate": 1.7390187901519124e-05, "loss": 0.0095, "step": 6720 }, { "epoch": 6.4625, "grad_norm": 3.691542625427246, "learning_rate": 1.738934865177326e-05, "loss": 0.0614, "step": 6721 }, { "epoch": 6.463461538461538, "grad_norm": 1.988686442375183, "learning_rate": 1.7388509287366386e-05, "loss": 0.0217, "step": 6722 }, { "epoch": 6.464423076923077, "grad_norm": 1.4592866897583008, "learning_rate": 1.7387669808311535e-05, "loss": 0.0129, "step": 6723 }, { "epoch": 6.4653846153846155, "grad_norm": 1.6097341775894165, "learning_rate": 1.7386830214621733e-05, "loss": 0.0188, "step": 6724 }, { "epoch": 6.466346153846154, "grad_norm": 2.2878448963165283, "learning_rate": 1.7385990506310006e-05, "loss": 0.0241, "step": 6725 }, { "epoch": 6.467307692307692, "grad_norm": 2.798905372619629, "learning_rate": 1.738515068338938e-05, "loss": 0.0223, "step": 6726 }, { "epoch": 6.468269230769231, "grad_norm": 1.3202991485595703, "learning_rate": 1.7384310745872896e-05, "loss": 0.0088, "step": 6727 }, { "epoch": 6.469230769230769, "grad_norm": 1.4949880838394165, "learning_rate": 1.738347069377358e-05, "loss": 0.0103, "step": 6728 }, { "epoch": 6.470192307692308, "grad_norm": 1.2881557941436768, "learning_rate": 1.7382630527104467e-05, "loss": 0.0272, "step": 6729 }, { "epoch": 6.471153846153846, "grad_norm": 3.171999931335449, "learning_rate": 1.7381790245878597e-05, "loss": 0.028, "step": 6730 }, { "epoch": 6.4721153846153845, "grad_norm": 1.7199949026107788, "learning_rate": 1.7380949850109007e-05, "loss": 0.0119, "step": 6731 }, { "epoch": 6.473076923076923, "grad_norm": 1.0728927850723267, "learning_rate": 1.7380109339808742e-05, "loss": 0.0115, "step": 6732 }, { "epoch": 6.474038461538462, "grad_norm": 1.6221458911895752, "learning_rate": 1.7379268714990835e-05, "loss": 0.0112, "step": 6733 }, { "epoch": 6.475, "grad_norm": 3.119415283203125, "learning_rate": 1.737842797566834e-05, "loss": 0.0549, "step": 6734 }, { "epoch": 6.475961538461538, "grad_norm": 3.6513242721557617, "learning_rate": 1.7377587121854294e-05, "loss": 0.0624, "step": 6735 }, { "epoch": 6.476923076923077, "grad_norm": 3.077562093734741, "learning_rate": 1.7376746153561752e-05, "loss": 0.0388, "step": 6736 }, { "epoch": 6.477884615384616, "grad_norm": 2.977224588394165, "learning_rate": 1.737590507080376e-05, "loss": 0.0189, "step": 6737 }, { "epoch": 6.4788461538461535, "grad_norm": 0.7899975180625916, "learning_rate": 1.737506387359337e-05, "loss": 0.0065, "step": 6738 }, { "epoch": 6.479807692307692, "grad_norm": 2.349147319793701, "learning_rate": 1.7374222561943633e-05, "loss": 0.061, "step": 6739 }, { "epoch": 6.480769230769231, "grad_norm": 2.8097193241119385, "learning_rate": 1.7373381135867605e-05, "loss": 0.0528, "step": 6740 }, { "epoch": 6.4817307692307695, "grad_norm": 4.36846923828125, "learning_rate": 1.737253959537834e-05, "loss": 0.056, "step": 6741 }, { "epoch": 6.482692307692307, "grad_norm": 3.560925006866455, "learning_rate": 1.73716979404889e-05, "loss": 0.0932, "step": 6742 }, { "epoch": 6.483653846153846, "grad_norm": 1.988840103149414, "learning_rate": 1.7370856171212348e-05, "loss": 0.025, "step": 6743 }, { "epoch": 6.484615384615385, "grad_norm": 2.0656824111938477, "learning_rate": 1.7370014287561736e-05, "loss": 0.052, "step": 6744 }, { "epoch": 6.485576923076923, "grad_norm": 3.5262625217437744, "learning_rate": 1.7369172289550132e-05, "loss": 0.0816, "step": 6745 }, { "epoch": 6.486538461538461, "grad_norm": 2.203878402709961, "learning_rate": 1.736833017719061e-05, "loss": 0.0305, "step": 6746 }, { "epoch": 6.4875, "grad_norm": 3.176569938659668, "learning_rate": 1.7367487950496223e-05, "loss": 0.0234, "step": 6747 }, { "epoch": 6.4884615384615385, "grad_norm": 1.0065487623214722, "learning_rate": 1.7366645609480047e-05, "loss": 0.0065, "step": 6748 }, { "epoch": 6.489423076923077, "grad_norm": 2.210235595703125, "learning_rate": 1.7365803154155152e-05, "loss": 0.0613, "step": 6749 }, { "epoch": 6.490384615384615, "grad_norm": 0.9645440578460693, "learning_rate": 1.7364960584534612e-05, "loss": 0.006, "step": 6750 }, { "epoch": 6.491346153846154, "grad_norm": 1.4523358345031738, "learning_rate": 1.73641179006315e-05, "loss": 0.0268, "step": 6751 }, { "epoch": 6.492307692307692, "grad_norm": 1.9282081127166748, "learning_rate": 1.736327510245889e-05, "loss": 0.0229, "step": 6752 }, { "epoch": 6.493269230769231, "grad_norm": 3.768616199493408, "learning_rate": 1.7362432190029862e-05, "loss": 0.0486, "step": 6753 }, { "epoch": 6.49423076923077, "grad_norm": 3.2809221744537354, "learning_rate": 1.7361589163357497e-05, "loss": 0.048, "step": 6754 }, { "epoch": 6.4951923076923075, "grad_norm": 3.804316997528076, "learning_rate": 1.7360746022454868e-05, "loss": 0.0916, "step": 6755 }, { "epoch": 6.496153846153846, "grad_norm": 1.7667754888534546, "learning_rate": 1.735990276733507e-05, "loss": 0.0209, "step": 6756 }, { "epoch": 6.497115384615385, "grad_norm": 3.1336936950683594, "learning_rate": 1.7359059398011182e-05, "loss": 0.0191, "step": 6757 }, { "epoch": 6.498076923076923, "grad_norm": 2.164717674255371, "learning_rate": 1.735821591449629e-05, "loss": 0.0603, "step": 6758 }, { "epoch": 6.499038461538461, "grad_norm": 2.2340667247772217, "learning_rate": 1.735737231680348e-05, "loss": 0.0237, "step": 6759 }, { "epoch": 6.5, "grad_norm": 2.3152503967285156, "learning_rate": 1.7356528604945848e-05, "loss": 0.0177, "step": 6760 }, { "epoch": 6.500961538461539, "grad_norm": 3.224550724029541, "learning_rate": 1.7355684778936482e-05, "loss": 0.0424, "step": 6761 }, { "epoch": 6.501923076923077, "grad_norm": 5.453657150268555, "learning_rate": 1.735484083878848e-05, "loss": 0.1543, "step": 6762 }, { "epoch": 6.502884615384615, "grad_norm": 1.452823519706726, "learning_rate": 1.7353996784514932e-05, "loss": 0.0104, "step": 6763 }, { "epoch": 6.503846153846154, "grad_norm": 7.599140167236328, "learning_rate": 1.7353152616128937e-05, "loss": 0.0161, "step": 6764 }, { "epoch": 6.5048076923076925, "grad_norm": 0.9861971735954285, "learning_rate": 1.7352308333643596e-05, "loss": 0.0108, "step": 6765 }, { "epoch": 6.50576923076923, "grad_norm": 2.249114990234375, "learning_rate": 1.7351463937072008e-05, "loss": 0.0593, "step": 6766 }, { "epoch": 6.506730769230769, "grad_norm": 5.3164381980896, "learning_rate": 1.7350619426427273e-05, "loss": 0.1543, "step": 6767 }, { "epoch": 6.507692307692308, "grad_norm": 2.179933786392212, "learning_rate": 1.7349774801722502e-05, "loss": 0.0146, "step": 6768 }, { "epoch": 6.508653846153846, "grad_norm": 2.6471850872039795, "learning_rate": 1.7348930062970795e-05, "loss": 0.027, "step": 6769 }, { "epoch": 6.509615384615385, "grad_norm": 3.0163261890411377, "learning_rate": 1.7348085210185263e-05, "loss": 0.036, "step": 6770 }, { "epoch": 6.510576923076923, "grad_norm": 4.563403129577637, "learning_rate": 1.7347240243379015e-05, "loss": 0.1017, "step": 6771 }, { "epoch": 6.5115384615384615, "grad_norm": 2.59269118309021, "learning_rate": 1.7346395162565165e-05, "loss": 0.0487, "step": 6772 }, { "epoch": 6.5125, "grad_norm": 1.3537626266479492, "learning_rate": 1.7345549967756822e-05, "loss": 0.0108, "step": 6773 }, { "epoch": 6.513461538461538, "grad_norm": 2.518332004547119, "learning_rate": 1.73447046589671e-05, "loss": 0.1293, "step": 6774 }, { "epoch": 6.514423076923077, "grad_norm": 2.57430362701416, "learning_rate": 1.734385923620912e-05, "loss": 0.0235, "step": 6775 }, { "epoch": 6.515384615384615, "grad_norm": 4.052264213562012, "learning_rate": 1.7343013699496e-05, "loss": 0.0495, "step": 6776 }, { "epoch": 6.516346153846154, "grad_norm": 3.5743472576141357, "learning_rate": 1.734216804884086e-05, "loss": 0.0453, "step": 6777 }, { "epoch": 6.517307692307693, "grad_norm": 4.190402030944824, "learning_rate": 1.7341322284256816e-05, "loss": 0.0631, "step": 6778 }, { "epoch": 6.5182692307692305, "grad_norm": 1.648640513420105, "learning_rate": 1.7340476405757e-05, "loss": 0.0222, "step": 6779 }, { "epoch": 6.519230769230769, "grad_norm": 0.5718839764595032, "learning_rate": 1.7339630413354532e-05, "loss": 0.0048, "step": 6780 }, { "epoch": 6.520192307692308, "grad_norm": 1.4065568447113037, "learning_rate": 1.7338784307062542e-05, "loss": 0.0105, "step": 6781 }, { "epoch": 6.5211538461538465, "grad_norm": 2.6528639793395996, "learning_rate": 1.733793808689416e-05, "loss": 0.0257, "step": 6782 }, { "epoch": 6.522115384615384, "grad_norm": 2.896223306655884, "learning_rate": 1.7337091752862517e-05, "loss": 0.0573, "step": 6783 }, { "epoch": 6.523076923076923, "grad_norm": 3.3015289306640625, "learning_rate": 1.733624530498074e-05, "loss": 0.0362, "step": 6784 }, { "epoch": 6.524038461538462, "grad_norm": 0.8140242099761963, "learning_rate": 1.733539874326197e-05, "loss": 0.0079, "step": 6785 }, { "epoch": 6.525, "grad_norm": 2.890333890914917, "learning_rate": 1.733455206771934e-05, "loss": 0.0537, "step": 6786 }, { "epoch": 6.525961538461538, "grad_norm": 3.1849145889282227, "learning_rate": 1.7333705278365987e-05, "loss": 0.0662, "step": 6787 }, { "epoch": 6.526923076923077, "grad_norm": 2.5172808170318604, "learning_rate": 1.7332858375215057e-05, "loss": 0.0721, "step": 6788 }, { "epoch": 6.5278846153846155, "grad_norm": 3.1013219356536865, "learning_rate": 1.7332011358279683e-05, "loss": 0.0444, "step": 6789 }, { "epoch": 6.528846153846154, "grad_norm": 2.296703815460205, "learning_rate": 1.7331164227573012e-05, "loss": 0.0264, "step": 6790 }, { "epoch": 6.529807692307692, "grad_norm": 3.192556619644165, "learning_rate": 1.733031698310819e-05, "loss": 0.0767, "step": 6791 }, { "epoch": 6.530769230769231, "grad_norm": 3.244093656539917, "learning_rate": 1.732946962489836e-05, "loss": 0.0298, "step": 6792 }, { "epoch": 6.531730769230769, "grad_norm": 2.631835699081421, "learning_rate": 1.7328622152956677e-05, "loss": 0.0171, "step": 6793 }, { "epoch": 6.532692307692308, "grad_norm": 3.9044837951660156, "learning_rate": 1.7327774567296284e-05, "loss": 0.0674, "step": 6794 }, { "epoch": 6.533653846153846, "grad_norm": 2.32381010055542, "learning_rate": 1.7326926867930337e-05, "loss": 0.0363, "step": 6795 }, { "epoch": 6.5346153846153845, "grad_norm": 1.8676396608352661, "learning_rate": 1.7326079054871993e-05, "loss": 0.0352, "step": 6796 }, { "epoch": 6.535576923076923, "grad_norm": 3.1083431243896484, "learning_rate": 1.73252311281344e-05, "loss": 0.0209, "step": 6797 }, { "epoch": 6.536538461538462, "grad_norm": 1.710040807723999, "learning_rate": 1.732438308773072e-05, "loss": 0.0129, "step": 6798 }, { "epoch": 6.5375, "grad_norm": 3.4532079696655273, "learning_rate": 1.7323534933674112e-05, "loss": 0.0485, "step": 6799 }, { "epoch": 6.538461538461538, "grad_norm": 1.7371506690979004, "learning_rate": 1.7322686665977738e-05, "loss": 0.0257, "step": 6800 }, { "epoch": 6.539423076923077, "grad_norm": 2.7884795665740967, "learning_rate": 1.7321838284654754e-05, "loss": 0.0229, "step": 6801 }, { "epoch": 6.540384615384616, "grad_norm": 2.443708658218384, "learning_rate": 1.7320989789718333e-05, "loss": 0.018, "step": 6802 }, { "epoch": 6.5413461538461535, "grad_norm": 1.89469575881958, "learning_rate": 1.7320141181181634e-05, "loss": 0.0183, "step": 6803 }, { "epoch": 6.542307692307692, "grad_norm": 0.3329109847545624, "learning_rate": 1.7319292459057834e-05, "loss": 0.0018, "step": 6804 }, { "epoch": 6.543269230769231, "grad_norm": 1.7061454057693481, "learning_rate": 1.7318443623360092e-05, "loss": 0.0121, "step": 6805 }, { "epoch": 6.5442307692307695, "grad_norm": 1.2960432767868042, "learning_rate": 1.7317594674101585e-05, "loss": 0.0117, "step": 6806 }, { "epoch": 6.545192307692307, "grad_norm": 2.035968780517578, "learning_rate": 1.731674561129549e-05, "loss": 0.0058, "step": 6807 }, { "epoch": 6.546153846153846, "grad_norm": 2.27229642868042, "learning_rate": 1.731589643495497e-05, "loss": 0.0152, "step": 6808 }, { "epoch": 6.547115384615385, "grad_norm": 2.6391849517822266, "learning_rate": 1.7315047145093215e-05, "loss": 0.031, "step": 6809 }, { "epoch": 6.548076923076923, "grad_norm": 2.6891629695892334, "learning_rate": 1.7314197741723394e-05, "loss": 0.0499, "step": 6810 }, { "epoch": 6.549038461538462, "grad_norm": 4.50255823135376, "learning_rate": 1.7313348224858693e-05, "loss": 0.0577, "step": 6811 }, { "epoch": 6.55, "grad_norm": 1.632317304611206, "learning_rate": 1.7312498594512292e-05, "loss": 0.0107, "step": 6812 }, { "epoch": 6.5509615384615385, "grad_norm": 1.129665732383728, "learning_rate": 1.7311648850697374e-05, "loss": 0.0082, "step": 6813 }, { "epoch": 6.551923076923077, "grad_norm": 3.3418712615966797, "learning_rate": 1.7310798993427124e-05, "loss": 0.0421, "step": 6814 }, { "epoch": 6.552884615384615, "grad_norm": 5.0156474113464355, "learning_rate": 1.7309949022714733e-05, "loss": 0.1131, "step": 6815 }, { "epoch": 6.553846153846154, "grad_norm": 3.2545740604400635, "learning_rate": 1.7309098938573385e-05, "loss": 0.0183, "step": 6816 }, { "epoch": 6.554807692307692, "grad_norm": 3.133998394012451, "learning_rate": 1.7308248741016277e-05, "loss": 0.0306, "step": 6817 }, { "epoch": 6.555769230769231, "grad_norm": 3.135735273361206, "learning_rate": 1.7307398430056595e-05, "loss": 0.053, "step": 6818 }, { "epoch": 6.55673076923077, "grad_norm": 1.3581050634384155, "learning_rate": 1.7306548005707537e-05, "loss": 0.009, "step": 6819 }, { "epoch": 6.5576923076923075, "grad_norm": 0.9227078557014465, "learning_rate": 1.73056974679823e-05, "loss": 0.0056, "step": 6820 }, { "epoch": 6.558653846153846, "grad_norm": 2.709047317504883, "learning_rate": 1.730484681689408e-05, "loss": 0.0237, "step": 6821 }, { "epoch": 6.559615384615385, "grad_norm": 3.4854609966278076, "learning_rate": 1.7303996052456074e-05, "loss": 0.0543, "step": 6822 }, { "epoch": 6.560576923076923, "grad_norm": 4.436407089233398, "learning_rate": 1.7303145174681487e-05, "loss": 0.0473, "step": 6823 }, { "epoch": 6.561538461538461, "grad_norm": 4.236645698547363, "learning_rate": 1.7302294183583524e-05, "loss": 0.065, "step": 6824 }, { "epoch": 6.5625, "grad_norm": 2.9939961433410645, "learning_rate": 1.730144307917539e-05, "loss": 0.0199, "step": 6825 }, { "epoch": 6.563461538461539, "grad_norm": 1.0028855800628662, "learning_rate": 1.7300591861470282e-05, "loss": 0.0092, "step": 6826 }, { "epoch": 6.564423076923077, "grad_norm": 2.6492466926574707, "learning_rate": 1.729974053048142e-05, "loss": 0.0185, "step": 6827 }, { "epoch": 6.565384615384615, "grad_norm": 2.068941831588745, "learning_rate": 1.7298889086222006e-05, "loss": 0.0144, "step": 6828 }, { "epoch": 6.566346153846154, "grad_norm": 4.464074611663818, "learning_rate": 1.7298037528705257e-05, "loss": 0.0631, "step": 6829 }, { "epoch": 6.5673076923076925, "grad_norm": 2.5552728176116943, "learning_rate": 1.7297185857944385e-05, "loss": 0.0555, "step": 6830 }, { "epoch": 6.56826923076923, "grad_norm": 1.8256744146347046, "learning_rate": 1.7296334073952606e-05, "loss": 0.029, "step": 6831 }, { "epoch": 6.569230769230769, "grad_norm": 1.5670192241668701, "learning_rate": 1.7295482176743136e-05, "loss": 0.0139, "step": 6832 }, { "epoch": 6.570192307692308, "grad_norm": 2.9134857654571533, "learning_rate": 1.7294630166329194e-05, "loss": 0.0275, "step": 6833 }, { "epoch": 6.571153846153846, "grad_norm": 3.0034053325653076, "learning_rate": 1.7293778042724e-05, "loss": 0.0366, "step": 6834 }, { "epoch": 6.572115384615385, "grad_norm": 4.092484474182129, "learning_rate": 1.729292580594078e-05, "loss": 0.0324, "step": 6835 }, { "epoch": 6.573076923076923, "grad_norm": 1.9082013368606567, "learning_rate": 1.7292073455992756e-05, "loss": 0.0164, "step": 6836 }, { "epoch": 6.5740384615384615, "grad_norm": 2.90398907661438, "learning_rate": 1.7291220992893155e-05, "loss": 0.0374, "step": 6837 }, { "epoch": 6.575, "grad_norm": 1.5423195362091064, "learning_rate": 1.72903684166552e-05, "loss": 0.0112, "step": 6838 }, { "epoch": 6.575961538461538, "grad_norm": 0.6349000930786133, "learning_rate": 1.7289515727292127e-05, "loss": 0.0032, "step": 6839 }, { "epoch": 6.576923076923077, "grad_norm": 0.9012309312820435, "learning_rate": 1.7288662924817164e-05, "loss": 0.005, "step": 6840 }, { "epoch": 6.577884615384615, "grad_norm": 2.27304744720459, "learning_rate": 1.7287810009243543e-05, "loss": 0.0442, "step": 6841 }, { "epoch": 6.578846153846154, "grad_norm": 3.4875669479370117, "learning_rate": 1.7286956980584505e-05, "loss": 0.0263, "step": 6842 }, { "epoch": 6.579807692307693, "grad_norm": 5.064981460571289, "learning_rate": 1.7286103838853274e-05, "loss": 0.0572, "step": 6843 }, { "epoch": 6.5807692307692305, "grad_norm": 0.9715607166290283, "learning_rate": 1.72852505840631e-05, "loss": 0.006, "step": 6844 }, { "epoch": 6.581730769230769, "grad_norm": 3.768428087234497, "learning_rate": 1.728439721622722e-05, "loss": 0.1117, "step": 6845 }, { "epoch": 6.582692307692308, "grad_norm": 3.4314186573028564, "learning_rate": 1.7283543735358872e-05, "loss": 0.0596, "step": 6846 }, { "epoch": 6.5836538461538465, "grad_norm": 2.360201120376587, "learning_rate": 1.7282690141471303e-05, "loss": 0.0128, "step": 6847 }, { "epoch": 6.584615384615384, "grad_norm": 1.0793205499649048, "learning_rate": 1.7281836434577758e-05, "loss": 0.0062, "step": 6848 }, { "epoch": 6.585576923076923, "grad_norm": 3.7245800495147705, "learning_rate": 1.7280982614691485e-05, "loss": 0.0405, "step": 6849 }, { "epoch": 6.586538461538462, "grad_norm": 1.1763033866882324, "learning_rate": 1.728012868182573e-05, "loss": 0.0275, "step": 6850 }, { "epoch": 6.5875, "grad_norm": 4.685609817504883, "learning_rate": 1.7279274635993744e-05, "loss": 0.0845, "step": 6851 }, { "epoch": 6.588461538461538, "grad_norm": 4.220608234405518, "learning_rate": 1.727842047720878e-05, "loss": 0.043, "step": 6852 }, { "epoch": 6.589423076923077, "grad_norm": 2.048858880996704, "learning_rate": 1.7277566205484093e-05, "loss": 0.017, "step": 6853 }, { "epoch": 6.5903846153846155, "grad_norm": 4.08575439453125, "learning_rate": 1.7276711820832938e-05, "loss": 0.0525, "step": 6854 }, { "epoch": 6.591346153846154, "grad_norm": 2.003818988800049, "learning_rate": 1.727585732326857e-05, "loss": 0.0618, "step": 6855 }, { "epoch": 6.592307692307692, "grad_norm": 2.340517520904541, "learning_rate": 1.7275002712804255e-05, "loss": 0.0453, "step": 6856 }, { "epoch": 6.593269230769231, "grad_norm": 2.7365167140960693, "learning_rate": 1.7274147989453246e-05, "loss": 0.0241, "step": 6857 }, { "epoch": 6.594230769230769, "grad_norm": 3.244269847869873, "learning_rate": 1.7273293153228813e-05, "loss": 0.0192, "step": 6858 }, { "epoch": 6.595192307692308, "grad_norm": 1.51091730594635, "learning_rate": 1.7272438204144215e-05, "loss": 0.008, "step": 6859 }, { "epoch": 6.596153846153846, "grad_norm": 5.579986095428467, "learning_rate": 1.727158314221272e-05, "loss": 0.08, "step": 6860 }, { "epoch": 6.5971153846153845, "grad_norm": 1.0919830799102783, "learning_rate": 1.7270727967447596e-05, "loss": 0.0077, "step": 6861 }, { "epoch": 6.598076923076923, "grad_norm": 1.4186153411865234, "learning_rate": 1.7269872679862114e-05, "loss": 0.0093, "step": 6862 }, { "epoch": 6.599038461538462, "grad_norm": 3.952909469604492, "learning_rate": 1.7269017279469546e-05, "loss": 0.0486, "step": 6863 }, { "epoch": 6.6, "grad_norm": 2.2737410068511963, "learning_rate": 1.7268161766283164e-05, "loss": 0.0247, "step": 6864 }, { "epoch": 6.600961538461538, "grad_norm": 0.7983156442642212, "learning_rate": 1.7267306140316242e-05, "loss": 0.0046, "step": 6865 }, { "epoch": 6.601923076923077, "grad_norm": 3.55301570892334, "learning_rate": 1.726645040158206e-05, "loss": 0.0231, "step": 6866 }, { "epoch": 6.602884615384616, "grad_norm": 5.828713417053223, "learning_rate": 1.7265594550093892e-05, "loss": 0.0496, "step": 6867 }, { "epoch": 6.6038461538461535, "grad_norm": 1.0519806146621704, "learning_rate": 1.726473858586502e-05, "loss": 0.0407, "step": 6868 }, { "epoch": 6.604807692307692, "grad_norm": 1.1697860956192017, "learning_rate": 1.726388250890873e-05, "loss": 0.0092, "step": 6869 }, { "epoch": 6.605769230769231, "grad_norm": 3.0696849822998047, "learning_rate": 1.72630263192383e-05, "loss": 0.0343, "step": 6870 }, { "epoch": 6.6067307692307695, "grad_norm": 1.0631343126296997, "learning_rate": 1.726217001686702e-05, "loss": 0.0064, "step": 6871 }, { "epoch": 6.607692307692307, "grad_norm": 3.063026189804077, "learning_rate": 1.7261313601808177e-05, "loss": 0.044, "step": 6872 }, { "epoch": 6.608653846153846, "grad_norm": 2.33548641204834, "learning_rate": 1.7260457074075058e-05, "loss": 0.0327, "step": 6873 }, { "epoch": 6.609615384615385, "grad_norm": 3.9595608711242676, "learning_rate": 1.7259600433680954e-05, "loss": 0.1083, "step": 6874 }, { "epoch": 6.610576923076923, "grad_norm": 2.877025604248047, "learning_rate": 1.7258743680639155e-05, "loss": 0.0271, "step": 6875 }, { "epoch": 6.611538461538462, "grad_norm": 3.451817512512207, "learning_rate": 1.725788681496296e-05, "loss": 0.0745, "step": 6876 }, { "epoch": 6.6125, "grad_norm": 1.3241093158721924, "learning_rate": 1.725702983666566e-05, "loss": 0.0088, "step": 6877 }, { "epoch": 6.6134615384615385, "grad_norm": 2.8986425399780273, "learning_rate": 1.7256172745760566e-05, "loss": 0.1611, "step": 6878 }, { "epoch": 6.614423076923077, "grad_norm": 1.4095109701156616, "learning_rate": 1.725531554226096e-05, "loss": 0.0105, "step": 6879 }, { "epoch": 6.615384615384615, "grad_norm": 4.316768646240234, "learning_rate": 1.725445822618015e-05, "loss": 0.0837, "step": 6880 }, { "epoch": 6.616346153846154, "grad_norm": 1.1571890115737915, "learning_rate": 1.725360079753144e-05, "loss": 0.0086, "step": 6881 }, { "epoch": 6.617307692307692, "grad_norm": 2.918578624725342, "learning_rate": 1.725274325632814e-05, "loss": 0.0404, "step": 6882 }, { "epoch": 6.618269230769231, "grad_norm": 1.798244833946228, "learning_rate": 1.7251885602583547e-05, "loss": 0.045, "step": 6883 }, { "epoch": 6.61923076923077, "grad_norm": 5.27461051940918, "learning_rate": 1.7251027836310976e-05, "loss": 0.1278, "step": 6884 }, { "epoch": 6.6201923076923075, "grad_norm": 2.2550201416015625, "learning_rate": 1.725016995752373e-05, "loss": 0.0537, "step": 6885 }, { "epoch": 6.621153846153846, "grad_norm": 0.7476505041122437, "learning_rate": 1.724931196623513e-05, "loss": 0.0066, "step": 6886 }, { "epoch": 6.622115384615385, "grad_norm": 4.376140594482422, "learning_rate": 1.7248453862458482e-05, "loss": 0.0964, "step": 6887 }, { "epoch": 6.623076923076923, "grad_norm": 0.7790513634681702, "learning_rate": 1.7247595646207104e-05, "loss": 0.0043, "step": 6888 }, { "epoch": 6.624038461538461, "grad_norm": 4.149867057800293, "learning_rate": 1.7246737317494315e-05, "loss": 0.0545, "step": 6889 }, { "epoch": 6.625, "grad_norm": 3.690744400024414, "learning_rate": 1.724587887633343e-05, "loss": 0.0427, "step": 6890 }, { "epoch": 6.625961538461539, "grad_norm": 4.150428295135498, "learning_rate": 1.7245020322737774e-05, "loss": 0.0356, "step": 6891 }, { "epoch": 6.626923076923077, "grad_norm": 2.1804394721984863, "learning_rate": 1.7244161656720666e-05, "loss": 0.0101, "step": 6892 }, { "epoch": 6.627884615384615, "grad_norm": 2.414658308029175, "learning_rate": 1.7243302878295426e-05, "loss": 0.0275, "step": 6893 }, { "epoch": 6.628846153846154, "grad_norm": 4.236049652099609, "learning_rate": 1.7242443987475388e-05, "loss": 0.0767, "step": 6894 }, { "epoch": 6.6298076923076925, "grad_norm": 4.706640243530273, "learning_rate": 1.7241584984273875e-05, "loss": 0.0532, "step": 6895 }, { "epoch": 6.63076923076923, "grad_norm": 4.3741888999938965, "learning_rate": 1.7240725868704218e-05, "loss": 0.0827, "step": 6896 }, { "epoch": 6.631730769230769, "grad_norm": 4.517917156219482, "learning_rate": 1.7239866640779745e-05, "loss": 0.1632, "step": 6897 }, { "epoch": 6.632692307692308, "grad_norm": 1.3537484407424927, "learning_rate": 1.723900730051379e-05, "loss": 0.0132, "step": 6898 }, { "epoch": 6.633653846153846, "grad_norm": 2.338116407394409, "learning_rate": 1.7238147847919688e-05, "loss": 0.03, "step": 6899 }, { "epoch": 6.634615384615385, "grad_norm": 4.658620357513428, "learning_rate": 1.7237288283010776e-05, "loss": 0.0763, "step": 6900 }, { "epoch": 6.635576923076923, "grad_norm": 0.7078801989555359, "learning_rate": 1.7236428605800393e-05, "loss": 0.0065, "step": 6901 }, { "epoch": 6.6365384615384615, "grad_norm": 1.9217227697372437, "learning_rate": 1.7235568816301874e-05, "loss": 0.0222, "step": 6902 }, { "epoch": 6.6375, "grad_norm": 3.55368709564209, "learning_rate": 1.7234708914528562e-05, "loss": 0.0514, "step": 6903 }, { "epoch": 6.638461538461538, "grad_norm": 1.9742854833602905, "learning_rate": 1.7233848900493803e-05, "loss": 0.0143, "step": 6904 }, { "epoch": 6.639423076923077, "grad_norm": 2.3250350952148438, "learning_rate": 1.7232988774210943e-05, "loss": 0.0159, "step": 6905 }, { "epoch": 6.640384615384615, "grad_norm": 1.311429500579834, "learning_rate": 1.723212853569332e-05, "loss": 0.0108, "step": 6906 }, { "epoch": 6.641346153846154, "grad_norm": 3.0381901264190674, "learning_rate": 1.723126818495429e-05, "loss": 0.0335, "step": 6907 }, { "epoch": 6.642307692307693, "grad_norm": 0.7380444407463074, "learning_rate": 1.7230407722007204e-05, "loss": 0.0064, "step": 6908 }, { "epoch": 6.6432692307692305, "grad_norm": 1.4392098188400269, "learning_rate": 1.722954714686541e-05, "loss": 0.0122, "step": 6909 }, { "epoch": 6.644230769230769, "grad_norm": 4.113431930541992, "learning_rate": 1.7228686459542264e-05, "loss": 0.0444, "step": 6910 }, { "epoch": 6.645192307692308, "grad_norm": 3.1033453941345215, "learning_rate": 1.722782566005112e-05, "loss": 0.0837, "step": 6911 }, { "epoch": 6.6461538461538465, "grad_norm": 1.2864956855773926, "learning_rate": 1.722696474840533e-05, "loss": 0.0115, "step": 6912 }, { "epoch": 6.647115384615384, "grad_norm": 5.232944488525391, "learning_rate": 1.7226103724618264e-05, "loss": 0.102, "step": 6913 }, { "epoch": 6.648076923076923, "grad_norm": 1.2017289400100708, "learning_rate": 1.722524258870327e-05, "loss": 0.0079, "step": 6914 }, { "epoch": 6.649038461538462, "grad_norm": 1.9099279642105103, "learning_rate": 1.7224381340673723e-05, "loss": 0.0319, "step": 6915 }, { "epoch": 6.65, "grad_norm": 1.150242805480957, "learning_rate": 1.722351998054298e-05, "loss": 0.0371, "step": 6916 }, { "epoch": 6.650961538461538, "grad_norm": 3.367063045501709, "learning_rate": 1.7222658508324403e-05, "loss": 0.0236, "step": 6917 }, { "epoch": 6.651923076923077, "grad_norm": 3.668748617172241, "learning_rate": 1.722179692403137e-05, "loss": 0.0499, "step": 6918 }, { "epoch": 6.6528846153846155, "grad_norm": 2.0426456928253174, "learning_rate": 1.722093522767724e-05, "loss": 0.0219, "step": 6919 }, { "epoch": 6.653846153846154, "grad_norm": 0.9068637490272522, "learning_rate": 1.7220073419275394e-05, "loss": 0.0077, "step": 6920 }, { "epoch": 6.654807692307692, "grad_norm": 2.5821762084960938, "learning_rate": 1.7219211498839193e-05, "loss": 0.0612, "step": 6921 }, { "epoch": 6.655769230769231, "grad_norm": 2.2799956798553467, "learning_rate": 1.7218349466382024e-05, "loss": 0.0247, "step": 6922 }, { "epoch": 6.656730769230769, "grad_norm": 2.729982376098633, "learning_rate": 1.721748732191725e-05, "loss": 0.0302, "step": 6923 }, { "epoch": 6.657692307692308, "grad_norm": 2.8183858394622803, "learning_rate": 1.721662506545826e-05, "loss": 0.0426, "step": 6924 }, { "epoch": 6.658653846153846, "grad_norm": 2.7874948978424072, "learning_rate": 1.721576269701843e-05, "loss": 0.0265, "step": 6925 }, { "epoch": 6.6596153846153845, "grad_norm": 2.6303791999816895, "learning_rate": 1.721490021661114e-05, "loss": 0.0409, "step": 6926 }, { "epoch": 6.660576923076923, "grad_norm": 1.9400105476379395, "learning_rate": 1.7214037624249773e-05, "loss": 0.0254, "step": 6927 }, { "epoch": 6.661538461538462, "grad_norm": 2.3136963844299316, "learning_rate": 1.7213174919947717e-05, "loss": 0.0218, "step": 6928 }, { "epoch": 6.6625, "grad_norm": 3.9941039085388184, "learning_rate": 1.7212312103718355e-05, "loss": 0.0679, "step": 6929 }, { "epoch": 6.663461538461538, "grad_norm": 2.088675022125244, "learning_rate": 1.7211449175575076e-05, "loss": 0.0475, "step": 6930 }, { "epoch": 6.664423076923077, "grad_norm": 2.16996431350708, "learning_rate": 1.7210586135531275e-05, "loss": 0.0504, "step": 6931 }, { "epoch": 6.665384615384616, "grad_norm": 1.6910021305084229, "learning_rate": 1.7209722983600336e-05, "loss": 0.0104, "step": 6932 }, { "epoch": 6.6663461538461535, "grad_norm": 1.5914169549942017, "learning_rate": 1.7208859719795658e-05, "loss": 0.0057, "step": 6933 }, { "epoch": 6.667307692307692, "grad_norm": 3.5557525157928467, "learning_rate": 1.7207996344130636e-05, "loss": 0.024, "step": 6934 }, { "epoch": 6.668269230769231, "grad_norm": 1.9337728023529053, "learning_rate": 1.7207132856618668e-05, "loss": 0.0237, "step": 6935 }, { "epoch": 6.6692307692307695, "grad_norm": 0.377610981464386, "learning_rate": 1.7206269257273146e-05, "loss": 0.0026, "step": 6936 }, { "epoch": 6.670192307692307, "grad_norm": 1.9345767498016357, "learning_rate": 1.7205405546107474e-05, "loss": 0.024, "step": 6937 }, { "epoch": 6.671153846153846, "grad_norm": 0.6503114700317383, "learning_rate": 1.720454172313506e-05, "loss": 0.0037, "step": 6938 }, { "epoch": 6.672115384615385, "grad_norm": 5.828881740570068, "learning_rate": 1.7203677788369302e-05, "loss": 0.1065, "step": 6939 }, { "epoch": 6.673076923076923, "grad_norm": 0.8360196352005005, "learning_rate": 1.7202813741823608e-05, "loss": 0.0246, "step": 6940 }, { "epoch": 6.674038461538462, "grad_norm": 3.102315664291382, "learning_rate": 1.7201949583511385e-05, "loss": 0.0666, "step": 6941 }, { "epoch": 6.675, "grad_norm": 5.035252571105957, "learning_rate": 1.7201085313446042e-05, "loss": 0.093, "step": 6942 }, { "epoch": 6.6759615384615385, "grad_norm": 4.249536991119385, "learning_rate": 1.7200220931640988e-05, "loss": 0.1299, "step": 6943 }, { "epoch": 6.676923076923077, "grad_norm": 2.364250898361206, "learning_rate": 1.7199356438109638e-05, "loss": 0.0212, "step": 6944 }, { "epoch": 6.677884615384615, "grad_norm": 4.999189853668213, "learning_rate": 1.7198491832865407e-05, "loss": 0.058, "step": 6945 }, { "epoch": 6.678846153846154, "grad_norm": 2.2108471393585205, "learning_rate": 1.719762711592171e-05, "loss": 0.0146, "step": 6946 }, { "epoch": 6.679807692307692, "grad_norm": 3.228830337524414, "learning_rate": 1.7196762287291963e-05, "loss": 0.0588, "step": 6947 }, { "epoch": 6.680769230769231, "grad_norm": 4.034938335418701, "learning_rate": 1.719589734698959e-05, "loss": 0.0527, "step": 6948 }, { "epoch": 6.68173076923077, "grad_norm": 3.2070934772491455, "learning_rate": 1.7195032295028008e-05, "loss": 0.0196, "step": 6949 }, { "epoch": 6.6826923076923075, "grad_norm": 1.786243200302124, "learning_rate": 1.7194167131420643e-05, "loss": 0.0232, "step": 6950 }, { "epoch": 6.683653846153846, "grad_norm": 3.287976026535034, "learning_rate": 1.719330185618092e-05, "loss": 0.0222, "step": 6951 }, { "epoch": 6.684615384615385, "grad_norm": 5.222612380981445, "learning_rate": 1.7192436469322264e-05, "loss": 0.07, "step": 6952 }, { "epoch": 6.685576923076923, "grad_norm": 6.019970417022705, "learning_rate": 1.71915709708581e-05, "loss": 0.1031, "step": 6953 }, { "epoch": 6.686538461538461, "grad_norm": 1.8220055103302002, "learning_rate": 1.7190705360801864e-05, "loss": 0.009, "step": 6954 }, { "epoch": 6.6875, "grad_norm": 3.3440701961517334, "learning_rate": 1.7189839639166985e-05, "loss": 0.0395, "step": 6955 }, { "epoch": 6.688461538461539, "grad_norm": 4.258077144622803, "learning_rate": 1.7188973805966896e-05, "loss": 0.0759, "step": 6956 }, { "epoch": 6.689423076923077, "grad_norm": 2.8664135932922363, "learning_rate": 1.7188107861215035e-05, "loss": 0.0882, "step": 6957 }, { "epoch": 6.690384615384615, "grad_norm": 3.644190549850464, "learning_rate": 1.7187241804924838e-05, "loss": 0.0181, "step": 6958 }, { "epoch": 6.691346153846154, "grad_norm": 4.46804141998291, "learning_rate": 1.7186375637109736e-05, "loss": 0.0453, "step": 6959 }, { "epoch": 6.6923076923076925, "grad_norm": 5.6588921546936035, "learning_rate": 1.718550935778318e-05, "loss": 0.1659, "step": 6960 }, { "epoch": 6.69326923076923, "grad_norm": 1.8247251510620117, "learning_rate": 1.718464296695861e-05, "loss": 0.014, "step": 6961 }, { "epoch": 6.694230769230769, "grad_norm": 2.393976926803589, "learning_rate": 1.7183776464649465e-05, "loss": 0.0343, "step": 6962 }, { "epoch": 6.695192307692308, "grad_norm": 2.317796230316162, "learning_rate": 1.7182909850869192e-05, "loss": 0.0208, "step": 6963 }, { "epoch": 6.696153846153846, "grad_norm": 5.504111289978027, "learning_rate": 1.7182043125631246e-05, "loss": 0.0833, "step": 6964 }, { "epoch": 6.697115384615385, "grad_norm": 1.6611275672912598, "learning_rate": 1.7181176288949063e-05, "loss": 0.0194, "step": 6965 }, { "epoch": 6.698076923076923, "grad_norm": 2.0781943798065186, "learning_rate": 1.7180309340836107e-05, "loss": 0.0444, "step": 6966 }, { "epoch": 6.6990384615384615, "grad_norm": 2.999574899673462, "learning_rate": 1.7179442281305817e-05, "loss": 0.0342, "step": 6967 }, { "epoch": 6.7, "grad_norm": 0.883464515209198, "learning_rate": 1.717857511037166e-05, "loss": 0.0064, "step": 6968 }, { "epoch": 6.700961538461538, "grad_norm": 3.3094325065612793, "learning_rate": 1.7177707828047082e-05, "loss": 0.0184, "step": 6969 }, { "epoch": 6.701923076923077, "grad_norm": 2.111304998397827, "learning_rate": 1.7176840434345547e-05, "loss": 0.0329, "step": 6970 }, { "epoch": 6.702884615384615, "grad_norm": 1.1664785146713257, "learning_rate": 1.7175972929280514e-05, "loss": 0.0099, "step": 6971 }, { "epoch": 6.703846153846154, "grad_norm": 3.124643564224243, "learning_rate": 1.7175105312865437e-05, "loss": 0.045, "step": 6972 }, { "epoch": 6.704807692307693, "grad_norm": 1.520215392112732, "learning_rate": 1.717423758511379e-05, "loss": 0.0151, "step": 6973 }, { "epoch": 6.7057692307692305, "grad_norm": 3.5284037590026855, "learning_rate": 1.7173369746039026e-05, "loss": 0.0932, "step": 6974 }, { "epoch": 6.706730769230769, "grad_norm": 3.555347204208374, "learning_rate": 1.717250179565462e-05, "loss": 0.013, "step": 6975 }, { "epoch": 6.707692307692308, "grad_norm": 4.98006010055542, "learning_rate": 1.717163373397404e-05, "loss": 0.076, "step": 6976 }, { "epoch": 6.7086538461538465, "grad_norm": 3.1023831367492676, "learning_rate": 1.7170765561010747e-05, "loss": 0.0639, "step": 6977 }, { "epoch": 6.709615384615384, "grad_norm": 1.0561712980270386, "learning_rate": 1.7169897276778225e-05, "loss": 0.0106, "step": 6978 }, { "epoch": 6.710576923076923, "grad_norm": 2.8914473056793213, "learning_rate": 1.7169028881289935e-05, "loss": 0.0305, "step": 6979 }, { "epoch": 6.711538461538462, "grad_norm": 4.805508613586426, "learning_rate": 1.716816037455936e-05, "loss": 0.2438, "step": 6980 }, { "epoch": 6.7125, "grad_norm": 4.146651268005371, "learning_rate": 1.7167291756599972e-05, "loss": 0.0558, "step": 6981 }, { "epoch": 6.713461538461538, "grad_norm": 3.042081832885742, "learning_rate": 1.7166423027425255e-05, "loss": 0.0356, "step": 6982 }, { "epoch": 6.714423076923077, "grad_norm": 5.472408294677734, "learning_rate": 1.7165554187048688e-05, "loss": 0.0668, "step": 6983 }, { "epoch": 6.7153846153846155, "grad_norm": 4.229529857635498, "learning_rate": 1.7164685235483746e-05, "loss": 0.0624, "step": 6984 }, { "epoch": 6.716346153846154, "grad_norm": 2.88559627532959, "learning_rate": 1.716381617274392e-05, "loss": 0.0277, "step": 6985 }, { "epoch": 6.717307692307692, "grad_norm": 3.365427255630493, "learning_rate": 1.716294699884269e-05, "loss": 0.029, "step": 6986 }, { "epoch": 6.718269230769231, "grad_norm": 2.4188787937164307, "learning_rate": 1.7162077713793547e-05, "loss": 0.0213, "step": 6987 }, { "epoch": 6.719230769230769, "grad_norm": 1.1524957418441772, "learning_rate": 1.7161208317609975e-05, "loss": 0.0112, "step": 6988 }, { "epoch": 6.720192307692308, "grad_norm": 1.9449665546417236, "learning_rate": 1.7160338810305473e-05, "loss": 0.0636, "step": 6989 }, { "epoch": 6.721153846153846, "grad_norm": 3.6297762393951416, "learning_rate": 1.7159469191893524e-05, "loss": 0.0274, "step": 6990 }, { "epoch": 6.7221153846153845, "grad_norm": 2.261972188949585, "learning_rate": 1.715859946238763e-05, "loss": 0.0305, "step": 6991 }, { "epoch": 6.723076923076923, "grad_norm": 3.8414885997772217, "learning_rate": 1.715772962180128e-05, "loss": 0.0476, "step": 6992 }, { "epoch": 6.724038461538462, "grad_norm": 3.8819527626037598, "learning_rate": 1.7156859670147975e-05, "loss": 0.0832, "step": 6993 }, { "epoch": 6.725, "grad_norm": 2.4463140964508057, "learning_rate": 1.715598960744121e-05, "loss": 0.0244, "step": 6994 }, { "epoch": 6.725961538461538, "grad_norm": 5.1197991371154785, "learning_rate": 1.7155119433694492e-05, "loss": 0.1587, "step": 6995 }, { "epoch": 6.726923076923077, "grad_norm": 2.8379855155944824, "learning_rate": 1.715424914892132e-05, "loss": 0.0481, "step": 6996 }, { "epoch": 6.727884615384616, "grad_norm": 4.023687362670898, "learning_rate": 1.71533787531352e-05, "loss": 0.0994, "step": 6997 }, { "epoch": 6.7288461538461535, "grad_norm": 0.3026619255542755, "learning_rate": 1.7152508246349637e-05, "loss": 0.0037, "step": 6998 }, { "epoch": 6.729807692307692, "grad_norm": 4.849809169769287, "learning_rate": 1.7151637628578134e-05, "loss": 0.0797, "step": 6999 }, { "epoch": 6.730769230769231, "grad_norm": 2.9486069679260254, "learning_rate": 1.7150766899834205e-05, "loss": 0.0347, "step": 7000 }, { "epoch": 6.7317307692307695, "grad_norm": 2.4214417934417725, "learning_rate": 1.7149896060131366e-05, "loss": 0.0331, "step": 7001 }, { "epoch": 6.732692307692307, "grad_norm": 1.6229780912399292, "learning_rate": 1.714902510948312e-05, "loss": 0.0646, "step": 7002 }, { "epoch": 6.733653846153846, "grad_norm": 3.1586756706237793, "learning_rate": 1.714815404790299e-05, "loss": 0.08, "step": 7003 }, { "epoch": 6.734615384615385, "grad_norm": 3.3389532566070557, "learning_rate": 1.7147282875404487e-05, "loss": 0.0632, "step": 7004 }, { "epoch": 6.735576923076923, "grad_norm": 3.290780782699585, "learning_rate": 1.714641159200113e-05, "loss": 0.0589, "step": 7005 }, { "epoch": 6.736538461538462, "grad_norm": 2.6802475452423096, "learning_rate": 1.714554019770644e-05, "loss": 0.0582, "step": 7006 }, { "epoch": 6.7375, "grad_norm": 2.6036040782928467, "learning_rate": 1.7144668692533942e-05, "loss": 0.043, "step": 7007 }, { "epoch": 6.7384615384615385, "grad_norm": 3.1535701751708984, "learning_rate": 1.714379707649715e-05, "loss": 0.0259, "step": 7008 }, { "epoch": 6.739423076923077, "grad_norm": 3.6351733207702637, "learning_rate": 1.7142925349609596e-05, "loss": 0.0705, "step": 7009 }, { "epoch": 6.740384615384615, "grad_norm": 2.6891870498657227, "learning_rate": 1.7142053511884807e-05, "loss": 0.1064, "step": 7010 }, { "epoch": 6.741346153846154, "grad_norm": 2.5594990253448486, "learning_rate": 1.7141181563336306e-05, "loss": 0.0324, "step": 7011 }, { "epoch": 6.742307692307692, "grad_norm": 4.140498638153076, "learning_rate": 1.7140309503977625e-05, "loss": 0.0801, "step": 7012 }, { "epoch": 6.743269230769231, "grad_norm": 1.6608059406280518, "learning_rate": 1.7139437333822303e-05, "loss": 0.0117, "step": 7013 }, { "epoch": 6.74423076923077, "grad_norm": 2.321103811264038, "learning_rate": 1.713856505288386e-05, "loss": 0.0215, "step": 7014 }, { "epoch": 6.7451923076923075, "grad_norm": 2.0184507369995117, "learning_rate": 1.7137692661175848e-05, "loss": 0.0266, "step": 7015 }, { "epoch": 6.746153846153846, "grad_norm": 1.0299427509307861, "learning_rate": 1.7136820158711785e-05, "loss": 0.0073, "step": 7016 }, { "epoch": 6.747115384615385, "grad_norm": 2.101456880569458, "learning_rate": 1.713594754550523e-05, "loss": 0.0411, "step": 7017 }, { "epoch": 6.748076923076923, "grad_norm": 2.505126476287842, "learning_rate": 1.7135074821569704e-05, "loss": 0.031, "step": 7018 }, { "epoch": 6.749038461538461, "grad_norm": 3.8449132442474365, "learning_rate": 1.713420198691876e-05, "loss": 0.0441, "step": 7019 }, { "epoch": 6.75, "grad_norm": 3.442798137664795, "learning_rate": 1.713332904156594e-05, "loss": 0.0406, "step": 7020 }, { "epoch": 6.750961538461539, "grad_norm": 1.1545889377593994, "learning_rate": 1.713245598552479e-05, "loss": 0.0045, "step": 7021 }, { "epoch": 6.751923076923077, "grad_norm": 3.0503835678100586, "learning_rate": 1.7131582818808855e-05, "loss": 0.0315, "step": 7022 }, { "epoch": 6.752884615384615, "grad_norm": 2.4939427375793457, "learning_rate": 1.7130709541431686e-05, "loss": 0.02, "step": 7023 }, { "epoch": 6.753846153846154, "grad_norm": 6.323622703552246, "learning_rate": 1.7129836153406837e-05, "loss": 0.0852, "step": 7024 }, { "epoch": 6.7548076923076925, "grad_norm": 3.827054023742676, "learning_rate": 1.7128962654747853e-05, "loss": 0.0532, "step": 7025 }, { "epoch": 6.75576923076923, "grad_norm": 3.1020119190216064, "learning_rate": 1.7128089045468294e-05, "loss": 0.0422, "step": 7026 }, { "epoch": 6.756730769230769, "grad_norm": 4.564643859863281, "learning_rate": 1.7127215325581713e-05, "loss": 0.0547, "step": 7027 }, { "epoch": 6.757692307692308, "grad_norm": 0.9539644718170166, "learning_rate": 1.712634149510167e-05, "loss": 0.0061, "step": 7028 }, { "epoch": 6.758653846153846, "grad_norm": 2.8812096118927, "learning_rate": 1.712546755404172e-05, "loss": 0.0314, "step": 7029 }, { "epoch": 6.759615384615385, "grad_norm": 5.495929718017578, "learning_rate": 1.7124593502415424e-05, "loss": 0.1287, "step": 7030 }, { "epoch": 6.760576923076923, "grad_norm": 2.0916409492492676, "learning_rate": 1.7123719340236352e-05, "loss": 0.0137, "step": 7031 }, { "epoch": 6.7615384615384615, "grad_norm": 3.495457649230957, "learning_rate": 1.712284506751806e-05, "loss": 0.0967, "step": 7032 }, { "epoch": 6.7625, "grad_norm": 0.8847543001174927, "learning_rate": 1.712197068427412e-05, "loss": 0.0093, "step": 7033 }, { "epoch": 6.763461538461538, "grad_norm": 3.200629472732544, "learning_rate": 1.71210961905181e-05, "loss": 0.0859, "step": 7034 }, { "epoch": 6.764423076923077, "grad_norm": 3.323255777359009, "learning_rate": 1.7120221586263565e-05, "loss": 0.0686, "step": 7035 }, { "epoch": 6.765384615384615, "grad_norm": 0.8901961445808411, "learning_rate": 1.7119346871524088e-05, "loss": 0.0089, "step": 7036 }, { "epoch": 6.766346153846154, "grad_norm": 2.227726459503174, "learning_rate": 1.7118472046313243e-05, "loss": 0.0199, "step": 7037 }, { "epoch": 6.767307692307693, "grad_norm": 1.8503100872039795, "learning_rate": 1.7117597110644606e-05, "loss": 0.0177, "step": 7038 }, { "epoch": 6.7682692307692305, "grad_norm": 2.493516683578491, "learning_rate": 1.711672206453175e-05, "loss": 0.0825, "step": 7039 }, { "epoch": 6.769230769230769, "grad_norm": 1.6220060586929321, "learning_rate": 1.7115846907988253e-05, "loss": 0.0111, "step": 7040 }, { "epoch": 6.770192307692308, "grad_norm": 4.610054969787598, "learning_rate": 1.71149716410277e-05, "loss": 0.0819, "step": 7041 }, { "epoch": 6.7711538461538465, "grad_norm": 0.9111939668655396, "learning_rate": 1.7114096263663665e-05, "loss": 0.0147, "step": 7042 }, { "epoch": 6.772115384615384, "grad_norm": 3.8828792572021484, "learning_rate": 1.711322077590974e-05, "loss": 0.1321, "step": 7043 }, { "epoch": 6.773076923076923, "grad_norm": 6.946933746337891, "learning_rate": 1.7112345177779505e-05, "loss": 0.1803, "step": 7044 }, { "epoch": 6.774038461538462, "grad_norm": 2.643562078475952, "learning_rate": 1.7111469469286545e-05, "loss": 0.0316, "step": 7045 }, { "epoch": 6.775, "grad_norm": 3.642073392868042, "learning_rate": 1.7110593650444457e-05, "loss": 0.1028, "step": 7046 }, { "epoch": 6.775961538461538, "grad_norm": 3.8467917442321777, "learning_rate": 1.7109717721266816e-05, "loss": 0.0913, "step": 7047 }, { "epoch": 6.776923076923077, "grad_norm": 2.3429253101348877, "learning_rate": 1.710884168176723e-05, "loss": 0.043, "step": 7048 }, { "epoch": 6.7778846153846155, "grad_norm": 2.567047357559204, "learning_rate": 1.710796553195928e-05, "loss": 0.0305, "step": 7049 }, { "epoch": 6.778846153846154, "grad_norm": 1.6738362312316895, "learning_rate": 1.710708927185657e-05, "loss": 0.0203, "step": 7050 }, { "epoch": 6.779807692307692, "grad_norm": 1.7366585731506348, "learning_rate": 1.7106212901472692e-05, "loss": 0.0118, "step": 7051 }, { "epoch": 6.780769230769231, "grad_norm": 1.2803759574890137, "learning_rate": 1.7105336420821247e-05, "loss": 0.0116, "step": 7052 }, { "epoch": 6.781730769230769, "grad_norm": 2.9179697036743164, "learning_rate": 1.7104459829915834e-05, "loss": 0.0616, "step": 7053 }, { "epoch": 6.782692307692308, "grad_norm": 3.813446044921875, "learning_rate": 1.7103583128770055e-05, "loss": 0.0571, "step": 7054 }, { "epoch": 6.783653846153846, "grad_norm": 1.0175868272781372, "learning_rate": 1.7102706317397517e-05, "loss": 0.0081, "step": 7055 }, { "epoch": 6.7846153846153845, "grad_norm": 3.1360397338867188, "learning_rate": 1.710182939581182e-05, "loss": 0.0586, "step": 7056 }, { "epoch": 6.785576923076923, "grad_norm": 3.035757303237915, "learning_rate": 1.7100952364026578e-05, "loss": 0.0753, "step": 7057 }, { "epoch": 6.786538461538462, "grad_norm": 1.896849513053894, "learning_rate": 1.7100075222055394e-05, "loss": 0.0282, "step": 7058 }, { "epoch": 6.7875, "grad_norm": 3.121622085571289, "learning_rate": 1.7099197969911884e-05, "loss": 0.0522, "step": 7059 }, { "epoch": 6.788461538461538, "grad_norm": 0.888568103313446, "learning_rate": 1.7098320607609652e-05, "loss": 0.0063, "step": 7060 }, { "epoch": 6.789423076923077, "grad_norm": 3.188758373260498, "learning_rate": 1.7097443135162322e-05, "loss": 0.0445, "step": 7061 }, { "epoch": 6.790384615384616, "grad_norm": 5.429752826690674, "learning_rate": 1.7096565552583504e-05, "loss": 0.0852, "step": 7062 }, { "epoch": 6.7913461538461535, "grad_norm": 4.309671878814697, "learning_rate": 1.709568785988682e-05, "loss": 0.0681, "step": 7063 }, { "epoch": 6.792307692307692, "grad_norm": 3.1127190589904785, "learning_rate": 1.709481005708588e-05, "loss": 0.0418, "step": 7064 }, { "epoch": 6.793269230769231, "grad_norm": 1.048953652381897, "learning_rate": 1.709393214419431e-05, "loss": 0.0112, "step": 7065 }, { "epoch": 6.7942307692307695, "grad_norm": 4.619563102722168, "learning_rate": 1.709305412122574e-05, "loss": 0.1567, "step": 7066 }, { "epoch": 6.795192307692307, "grad_norm": 2.374314069747925, "learning_rate": 1.7092175988193784e-05, "loss": 0.0253, "step": 7067 }, { "epoch": 6.796153846153846, "grad_norm": 2.692469835281372, "learning_rate": 1.7091297745112077e-05, "loss": 0.0293, "step": 7068 }, { "epoch": 6.797115384615385, "grad_norm": 3.5835723876953125, "learning_rate": 1.7090419391994234e-05, "loss": 0.0413, "step": 7069 }, { "epoch": 6.798076923076923, "grad_norm": 3.4766929149627686, "learning_rate": 1.70895409288539e-05, "loss": 0.0585, "step": 7070 }, { "epoch": 6.799038461538462, "grad_norm": 0.6318560838699341, "learning_rate": 1.708866235570469e-05, "loss": 0.0059, "step": 7071 }, { "epoch": 6.8, "grad_norm": 3.9494597911834717, "learning_rate": 1.708778367256025e-05, "loss": 0.0307, "step": 7072 }, { "epoch": 6.8009615384615385, "grad_norm": 4.741206645965576, "learning_rate": 1.7086904879434212e-05, "loss": 0.0996, "step": 7073 }, { "epoch": 6.801923076923077, "grad_norm": 2.4652626514434814, "learning_rate": 1.7086025976340205e-05, "loss": 0.0367, "step": 7074 }, { "epoch": 6.802884615384615, "grad_norm": 1.4825224876403809, "learning_rate": 1.7085146963291874e-05, "loss": 0.0129, "step": 7075 }, { "epoch": 6.803846153846154, "grad_norm": 3.226184606552124, "learning_rate": 1.708426784030286e-05, "loss": 0.0851, "step": 7076 }, { "epoch": 6.804807692307692, "grad_norm": 3.679596185684204, "learning_rate": 1.7083388607386798e-05, "loss": 0.1011, "step": 7077 }, { "epoch": 6.805769230769231, "grad_norm": 0.6926946043968201, "learning_rate": 1.7082509264557333e-05, "loss": 0.0053, "step": 7078 }, { "epoch": 6.80673076923077, "grad_norm": 1.8503117561340332, "learning_rate": 1.7081629811828112e-05, "loss": 0.0229, "step": 7079 }, { "epoch": 6.8076923076923075, "grad_norm": 2.654587507247925, "learning_rate": 1.7080750249212784e-05, "loss": 0.0421, "step": 7080 }, { "epoch": 6.808653846153846, "grad_norm": 2.6877517700195312, "learning_rate": 1.707987057672499e-05, "loss": 0.041, "step": 7081 }, { "epoch": 6.809615384615385, "grad_norm": 3.035937786102295, "learning_rate": 1.7078990794378384e-05, "loss": 0.0251, "step": 7082 }, { "epoch": 6.810576923076923, "grad_norm": 1.102607011795044, "learning_rate": 1.707811090218662e-05, "loss": 0.0056, "step": 7083 }, { "epoch": 6.811538461538461, "grad_norm": 2.231639862060547, "learning_rate": 1.7077230900163345e-05, "loss": 0.0291, "step": 7084 }, { "epoch": 6.8125, "grad_norm": 1.824698805809021, "learning_rate": 1.7076350788322222e-05, "loss": 0.0125, "step": 7085 }, { "epoch": 6.813461538461539, "grad_norm": 2.168226718902588, "learning_rate": 1.70754705666769e-05, "loss": 0.0263, "step": 7086 }, { "epoch": 6.814423076923077, "grad_norm": 2.251429796218872, "learning_rate": 1.7074590235241046e-05, "loss": 0.0202, "step": 7087 }, { "epoch": 6.815384615384615, "grad_norm": 2.7539379596710205, "learning_rate": 1.7073709794028312e-05, "loss": 0.0403, "step": 7088 }, { "epoch": 6.816346153846154, "grad_norm": 3.405616044998169, "learning_rate": 1.7072829243052363e-05, "loss": 0.0836, "step": 7089 }, { "epoch": 6.8173076923076925, "grad_norm": 3.0142509937286377, "learning_rate": 1.7071948582326863e-05, "loss": 0.0863, "step": 7090 }, { "epoch": 6.81826923076923, "grad_norm": 2.8274989128112793, "learning_rate": 1.7071067811865477e-05, "loss": 0.0278, "step": 7091 }, { "epoch": 6.819230769230769, "grad_norm": 1.7168817520141602, "learning_rate": 1.7070186931681872e-05, "loss": 0.0293, "step": 7092 }, { "epoch": 6.820192307692308, "grad_norm": 2.292757272720337, "learning_rate": 1.7069305941789716e-05, "loss": 0.0435, "step": 7093 }, { "epoch": 6.821153846153846, "grad_norm": 4.515440464019775, "learning_rate": 1.706842484220268e-05, "loss": 0.1455, "step": 7094 }, { "epoch": 6.822115384615385, "grad_norm": 3.6553502082824707, "learning_rate": 1.7067543632934436e-05, "loss": 0.0731, "step": 7095 }, { "epoch": 6.823076923076923, "grad_norm": 1.9431673288345337, "learning_rate": 1.7066662313998657e-05, "loss": 0.0112, "step": 7096 }, { "epoch": 6.8240384615384615, "grad_norm": 1.7621190547943115, "learning_rate": 1.7065780885409022e-05, "loss": 0.0174, "step": 7097 }, { "epoch": 6.825, "grad_norm": 4.222501277923584, "learning_rate": 1.7064899347179205e-05, "loss": 0.0411, "step": 7098 }, { "epoch": 6.825961538461538, "grad_norm": 5.887914180755615, "learning_rate": 1.7064017699322884e-05, "loss": 0.1091, "step": 7099 }, { "epoch": 6.826923076923077, "grad_norm": 3.4621002674102783, "learning_rate": 1.706313594185374e-05, "loss": 0.0797, "step": 7100 }, { "epoch": 6.827884615384615, "grad_norm": 2.79057240486145, "learning_rate": 1.706225407478546e-05, "loss": 0.017, "step": 7101 }, { "epoch": 6.828846153846154, "grad_norm": 5.5418381690979, "learning_rate": 1.706137209813172e-05, "loss": 0.0993, "step": 7102 }, { "epoch": 6.829807692307693, "grad_norm": 2.4670026302337646, "learning_rate": 1.7060490011906213e-05, "loss": 0.0316, "step": 7103 }, { "epoch": 6.8307692307692305, "grad_norm": 3.1700010299682617, "learning_rate": 1.705960781612262e-05, "loss": 0.0243, "step": 7104 }, { "epoch": 6.831730769230769, "grad_norm": 4.890570163726807, "learning_rate": 1.7058725510794635e-05, "loss": 0.0975, "step": 7105 }, { "epoch": 6.832692307692308, "grad_norm": 1.547156810760498, "learning_rate": 1.705784309593595e-05, "loss": 0.0197, "step": 7106 }, { "epoch": 6.8336538461538465, "grad_norm": 4.027763366699219, "learning_rate": 1.7056960571560252e-05, "loss": 0.1038, "step": 7107 }, { "epoch": 6.834615384615384, "grad_norm": 3.9088807106018066, "learning_rate": 1.7056077937681236e-05, "loss": 0.0286, "step": 7108 }, { "epoch": 6.835576923076923, "grad_norm": 2.744873285293579, "learning_rate": 1.7055195194312603e-05, "loss": 0.0342, "step": 7109 }, { "epoch": 6.836538461538462, "grad_norm": 3.779719114303589, "learning_rate": 1.7054312341468044e-05, "loss": 0.0618, "step": 7110 }, { "epoch": 6.8375, "grad_norm": 2.252436637878418, "learning_rate": 1.7053429379161262e-05, "loss": 0.0263, "step": 7111 }, { "epoch": 6.838461538461538, "grad_norm": 3.643470525741577, "learning_rate": 1.7052546307405962e-05, "loss": 0.057, "step": 7112 }, { "epoch": 6.839423076923077, "grad_norm": 3.0732626914978027, "learning_rate": 1.705166312621584e-05, "loss": 0.0571, "step": 7113 }, { "epoch": 6.8403846153846155, "grad_norm": 2.924802780151367, "learning_rate": 1.70507798356046e-05, "loss": 0.0458, "step": 7114 }, { "epoch": 6.841346153846154, "grad_norm": 1.6805018186569214, "learning_rate": 1.7049896435585954e-05, "loss": 0.0113, "step": 7115 }, { "epoch": 6.842307692307692, "grad_norm": 2.213127613067627, "learning_rate": 1.7049012926173606e-05, "loss": 0.0474, "step": 7116 }, { "epoch": 6.843269230769231, "grad_norm": 2.6150965690612793, "learning_rate": 1.7048129307381266e-05, "loss": 0.0185, "step": 7117 }, { "epoch": 6.844230769230769, "grad_norm": 2.43005108833313, "learning_rate": 1.7047245579222645e-05, "loss": 0.0283, "step": 7118 }, { "epoch": 6.845192307692308, "grad_norm": 2.499457836151123, "learning_rate": 1.7046361741711454e-05, "loss": 0.0359, "step": 7119 }, { "epoch": 6.846153846153846, "grad_norm": 2.5298919677734375, "learning_rate": 1.704547779486141e-05, "loss": 0.0244, "step": 7120 }, { "epoch": 6.8471153846153845, "grad_norm": 2.8966257572174072, "learning_rate": 1.7044593738686232e-05, "loss": 0.0324, "step": 7121 }, { "epoch": 6.848076923076923, "grad_norm": 5.474308490753174, "learning_rate": 1.7043709573199635e-05, "loss": 0.1018, "step": 7122 }, { "epoch": 6.849038461538462, "grad_norm": 4.618661880493164, "learning_rate": 1.7042825298415336e-05, "loss": 0.0428, "step": 7123 }, { "epoch": 6.85, "grad_norm": 2.757751703262329, "learning_rate": 1.704194091434706e-05, "loss": 0.0553, "step": 7124 }, { "epoch": 6.850961538461538, "grad_norm": 1.3087743520736694, "learning_rate": 1.7041056421008522e-05, "loss": 0.0104, "step": 7125 }, { "epoch": 6.851923076923077, "grad_norm": 2.918778896331787, "learning_rate": 1.7040171818413463e-05, "loss": 0.0933, "step": 7126 }, { "epoch": 6.852884615384616, "grad_norm": 2.787118434906006, "learning_rate": 1.70392871065756e-05, "loss": 0.0486, "step": 7127 }, { "epoch": 6.8538461538461535, "grad_norm": 3.7990362644195557, "learning_rate": 1.7038402285508655e-05, "loss": 0.0708, "step": 7128 }, { "epoch": 6.854807692307692, "grad_norm": 9.323714256286621, "learning_rate": 1.7037517355226367e-05, "loss": 0.2476, "step": 7129 }, { "epoch": 6.855769230769231, "grad_norm": 1.9326595067977905, "learning_rate": 1.7036632315742464e-05, "loss": 0.0245, "step": 7130 }, { "epoch": 6.8567307692307695, "grad_norm": 2.9586424827575684, "learning_rate": 1.703574716707068e-05, "loss": 0.0431, "step": 7131 }, { "epoch": 6.857692307692307, "grad_norm": 1.8655807971954346, "learning_rate": 1.703486190922475e-05, "loss": 0.0493, "step": 7132 }, { "epoch": 6.858653846153846, "grad_norm": 2.528289318084717, "learning_rate": 1.703397654221841e-05, "loss": 0.0304, "step": 7133 }, { "epoch": 6.859615384615385, "grad_norm": 2.8733232021331787, "learning_rate": 1.70330910660654e-05, "loss": 0.0354, "step": 7134 }, { "epoch": 6.860576923076923, "grad_norm": 0.910214900970459, "learning_rate": 1.7032205480779455e-05, "loss": 0.0082, "step": 7135 }, { "epoch": 6.861538461538462, "grad_norm": 2.9424118995666504, "learning_rate": 1.7031319786374322e-05, "loss": 0.0314, "step": 7136 }, { "epoch": 6.8625, "grad_norm": 0.3297310173511505, "learning_rate": 1.7030433982863743e-05, "loss": 0.004, "step": 7137 }, { "epoch": 6.8634615384615385, "grad_norm": 1.816348671913147, "learning_rate": 1.7029548070261462e-05, "loss": 0.0192, "step": 7138 }, { "epoch": 6.864423076923077, "grad_norm": 2.7566699981689453, "learning_rate": 1.7028662048581227e-05, "loss": 0.0687, "step": 7139 }, { "epoch": 6.865384615384615, "grad_norm": 3.953742027282715, "learning_rate": 1.7027775917836785e-05, "loss": 0.1324, "step": 7140 }, { "epoch": 6.866346153846154, "grad_norm": 4.005918502807617, "learning_rate": 1.702688967804189e-05, "loss": 0.0688, "step": 7141 }, { "epoch": 6.867307692307692, "grad_norm": 4.23971700668335, "learning_rate": 1.702600332921029e-05, "loss": 0.0292, "step": 7142 }, { "epoch": 6.868269230769231, "grad_norm": 3.831174373626709, "learning_rate": 1.7025116871355737e-05, "loss": 0.0792, "step": 7143 }, { "epoch": 6.86923076923077, "grad_norm": 1.9816703796386719, "learning_rate": 1.7024230304491987e-05, "loss": 0.0984, "step": 7144 }, { "epoch": 6.8701923076923075, "grad_norm": 1.482731580734253, "learning_rate": 1.7023343628632804e-05, "loss": 0.0289, "step": 7145 }, { "epoch": 6.871153846153846, "grad_norm": 1.7141388654708862, "learning_rate": 1.702245684379194e-05, "loss": 0.0277, "step": 7146 }, { "epoch": 6.872115384615385, "grad_norm": 2.960179567337036, "learning_rate": 1.7021569949983153e-05, "loss": 0.0292, "step": 7147 }, { "epoch": 6.873076923076923, "grad_norm": 2.9312257766723633, "learning_rate": 1.702068294722021e-05, "loss": 0.0431, "step": 7148 }, { "epoch": 6.874038461538461, "grad_norm": 1.2952412366867065, "learning_rate": 1.7019795835516873e-05, "loss": 0.0155, "step": 7149 }, { "epoch": 6.875, "grad_norm": 2.6641366481781006, "learning_rate": 1.7018908614886908e-05, "loss": 0.0424, "step": 7150 }, { "epoch": 6.875961538461539, "grad_norm": 3.2725830078125, "learning_rate": 1.701802128534408e-05, "loss": 0.0531, "step": 7151 }, { "epoch": 6.876923076923077, "grad_norm": 4.556220531463623, "learning_rate": 1.7017133846902162e-05, "loss": 0.0792, "step": 7152 }, { "epoch": 6.877884615384615, "grad_norm": 3.896845579147339, "learning_rate": 1.701624629957492e-05, "loss": 0.1304, "step": 7153 }, { "epoch": 6.878846153846154, "grad_norm": 1.1314430236816406, "learning_rate": 1.7015358643376128e-05, "loss": 0.0097, "step": 7154 }, { "epoch": 6.8798076923076925, "grad_norm": 2.7136588096618652, "learning_rate": 1.7014470878319562e-05, "loss": 0.037, "step": 7155 }, { "epoch": 6.88076923076923, "grad_norm": 3.0986177921295166, "learning_rate": 1.7013583004418994e-05, "loss": 0.0521, "step": 7156 }, { "epoch": 6.881730769230769, "grad_norm": 2.00750732421875, "learning_rate": 1.70126950216882e-05, "loss": 0.0129, "step": 7157 }, { "epoch": 6.882692307692308, "grad_norm": 2.4611120223999023, "learning_rate": 1.7011806930140962e-05, "loss": 0.0411, "step": 7158 }, { "epoch": 6.883653846153846, "grad_norm": 3.0316758155822754, "learning_rate": 1.7010918729791064e-05, "loss": 0.06, "step": 7159 }, { "epoch": 6.884615384615385, "grad_norm": 2.391464948654175, "learning_rate": 1.701003042065228e-05, "loss": 0.0605, "step": 7160 }, { "epoch": 6.885576923076923, "grad_norm": 2.731940984725952, "learning_rate": 1.7009142002738398e-05, "loss": 0.0232, "step": 7161 }, { "epoch": 6.8865384615384615, "grad_norm": 3.1418955326080322, "learning_rate": 1.7008253476063202e-05, "loss": 0.0584, "step": 7162 }, { "epoch": 6.8875, "grad_norm": 3.877270221710205, "learning_rate": 1.7007364840640485e-05, "loss": 0.066, "step": 7163 }, { "epoch": 6.888461538461538, "grad_norm": 2.6592838764190674, "learning_rate": 1.7006476096484034e-05, "loss": 0.0248, "step": 7164 }, { "epoch": 6.889423076923077, "grad_norm": 3.050607919692993, "learning_rate": 1.7005587243607634e-05, "loss": 0.0564, "step": 7165 }, { "epoch": 6.890384615384615, "grad_norm": 3.776986598968506, "learning_rate": 1.700469828202508e-05, "loss": 0.062, "step": 7166 }, { "epoch": 6.891346153846154, "grad_norm": 0.3076942563056946, "learning_rate": 1.700380921175017e-05, "loss": 0.0023, "step": 7167 }, { "epoch": 6.892307692307693, "grad_norm": 1.7848644256591797, "learning_rate": 1.7002920032796696e-05, "loss": 0.0161, "step": 7168 }, { "epoch": 6.8932692307692305, "grad_norm": 2.1041951179504395, "learning_rate": 1.7002030745178455e-05, "loss": 0.0292, "step": 7169 }, { "epoch": 6.894230769230769, "grad_norm": 2.654197931289673, "learning_rate": 1.700114134890925e-05, "loss": 0.039, "step": 7170 }, { "epoch": 6.895192307692308, "grad_norm": 1.9308301210403442, "learning_rate": 1.7000251844002876e-05, "loss": 0.0155, "step": 7171 }, { "epoch": 6.8961538461538465, "grad_norm": 5.9873738288879395, "learning_rate": 1.6999362230473143e-05, "loss": 0.0578, "step": 7172 }, { "epoch": 6.897115384615384, "grad_norm": 2.2480475902557373, "learning_rate": 1.699847250833385e-05, "loss": 0.0182, "step": 7173 }, { "epoch": 6.898076923076923, "grad_norm": 3.0320234298706055, "learning_rate": 1.69975826775988e-05, "loss": 0.0903, "step": 7174 }, { "epoch": 6.899038461538462, "grad_norm": 2.601938486099243, "learning_rate": 1.699669273828181e-05, "loss": 0.0373, "step": 7175 }, { "epoch": 6.9, "grad_norm": 0.7753565311431885, "learning_rate": 1.699580269039668e-05, "loss": 0.0042, "step": 7176 }, { "epoch": 6.900961538461538, "grad_norm": 4.063664436340332, "learning_rate": 1.6994912533957225e-05, "loss": 0.0782, "step": 7177 }, { "epoch": 6.901923076923077, "grad_norm": 2.5652554035186768, "learning_rate": 1.699402226897726e-05, "loss": 0.0306, "step": 7178 }, { "epoch": 6.9028846153846155, "grad_norm": 4.574602127075195, "learning_rate": 1.6993131895470594e-05, "loss": 0.0675, "step": 7179 }, { "epoch": 6.903846153846154, "grad_norm": 2.175647497177124, "learning_rate": 1.6992241413451047e-05, "loss": 0.0155, "step": 7180 }, { "epoch": 6.904807692307692, "grad_norm": 3.303365707397461, "learning_rate": 1.6991350822932435e-05, "loss": 0.082, "step": 7181 }, { "epoch": 6.905769230769231, "grad_norm": 3.2324612140655518, "learning_rate": 1.6990460123928577e-05, "loss": 0.0327, "step": 7182 }, { "epoch": 6.906730769230769, "grad_norm": 2.6072442531585693, "learning_rate": 1.6989569316453294e-05, "loss": 0.0446, "step": 7183 }, { "epoch": 6.907692307692308, "grad_norm": 3.074270486831665, "learning_rate": 1.6988678400520412e-05, "loss": 0.1417, "step": 7184 }, { "epoch": 6.908653846153846, "grad_norm": 2.6795473098754883, "learning_rate": 1.698778737614375e-05, "loss": 0.0475, "step": 7185 }, { "epoch": 6.9096153846153845, "grad_norm": 1.3068695068359375, "learning_rate": 1.6986896243337137e-05, "loss": 0.0173, "step": 7186 }, { "epoch": 6.910576923076923, "grad_norm": 4.466561794281006, "learning_rate": 1.69860050021144e-05, "loss": 0.0568, "step": 7187 }, { "epoch": 6.911538461538462, "grad_norm": 2.506927251815796, "learning_rate": 1.6985113652489374e-05, "loss": 0.0229, "step": 7188 }, { "epoch": 6.9125, "grad_norm": 3.0487191677093506, "learning_rate": 1.6984222194475877e-05, "loss": 0.0634, "step": 7189 }, { "epoch": 6.913461538461538, "grad_norm": 2.071024179458618, "learning_rate": 1.6983330628087757e-05, "loss": 0.0137, "step": 7190 }, { "epoch": 6.914423076923077, "grad_norm": 3.838421583175659, "learning_rate": 1.6982438953338837e-05, "loss": 0.0449, "step": 7191 }, { "epoch": 6.915384615384616, "grad_norm": 4.18673849105835, "learning_rate": 1.6981547170242962e-05, "loss": 0.0456, "step": 7192 }, { "epoch": 6.9163461538461535, "grad_norm": 3.832897186279297, "learning_rate": 1.698065527881396e-05, "loss": 0.0649, "step": 7193 }, { "epoch": 6.917307692307692, "grad_norm": 2.9148755073547363, "learning_rate": 1.6979763279065682e-05, "loss": 0.0398, "step": 7194 }, { "epoch": 6.918269230769231, "grad_norm": 4.141116142272949, "learning_rate": 1.6978871171011963e-05, "loss": 0.0974, "step": 7195 }, { "epoch": 6.9192307692307695, "grad_norm": 2.7911503314971924, "learning_rate": 1.697797895466664e-05, "loss": 0.0212, "step": 7196 }, { "epoch": 6.920192307692307, "grad_norm": 7.747021675109863, "learning_rate": 1.697708663004357e-05, "loss": 0.1422, "step": 7197 }, { "epoch": 6.921153846153846, "grad_norm": 1.1677156686782837, "learning_rate": 1.6976194197156587e-05, "loss": 0.0138, "step": 7198 }, { "epoch": 6.922115384615385, "grad_norm": 0.5094335079193115, "learning_rate": 1.6975301656019553e-05, "loss": 0.0035, "step": 7199 }, { "epoch": 6.923076923076923, "grad_norm": 3.6961522102355957, "learning_rate": 1.6974409006646304e-05, "loss": 0.0295, "step": 7200 }, { "epoch": 6.924038461538462, "grad_norm": 1.952799677848816, "learning_rate": 1.69735162490507e-05, "loss": 0.0156, "step": 7201 }, { "epoch": 6.925, "grad_norm": 3.8407464027404785, "learning_rate": 1.6972623383246586e-05, "loss": 0.0359, "step": 7202 }, { "epoch": 6.9259615384615385, "grad_norm": 2.741382122039795, "learning_rate": 1.697173040924782e-05, "loss": 0.0386, "step": 7203 }, { "epoch": 6.926923076923077, "grad_norm": 2.9918088912963867, "learning_rate": 1.697083732706827e-05, "loss": 0.0281, "step": 7204 }, { "epoch": 6.927884615384615, "grad_norm": 7.5680365562438965, "learning_rate": 1.6969944136721775e-05, "loss": 0.075, "step": 7205 }, { "epoch": 6.928846153846154, "grad_norm": 2.3488106727600098, "learning_rate": 1.696905083822221e-05, "loss": 0.0264, "step": 7206 }, { "epoch": 6.929807692307692, "grad_norm": 5.415262222290039, "learning_rate": 1.6968157431583423e-05, "loss": 0.0871, "step": 7207 }, { "epoch": 6.930769230769231, "grad_norm": 1.1032336950302124, "learning_rate": 1.696726391681929e-05, "loss": 0.0098, "step": 7208 }, { "epoch": 6.93173076923077, "grad_norm": 4.04144811630249, "learning_rate": 1.6966370293943667e-05, "loss": 0.1008, "step": 7209 }, { "epoch": 6.9326923076923075, "grad_norm": 2.619791030883789, "learning_rate": 1.696547656297042e-05, "loss": 0.0266, "step": 7210 }, { "epoch": 6.933653846153846, "grad_norm": 3.6193251609802246, "learning_rate": 1.6964582723913423e-05, "loss": 0.0927, "step": 7211 }, { "epoch": 6.934615384615385, "grad_norm": 3.0106141567230225, "learning_rate": 1.6963688776786545e-05, "loss": 0.0542, "step": 7212 }, { "epoch": 6.935576923076923, "grad_norm": 2.338977336883545, "learning_rate": 1.696279472160365e-05, "loss": 0.0421, "step": 7213 }, { "epoch": 6.936538461538461, "grad_norm": 1.1675902605056763, "learning_rate": 1.696190055837862e-05, "loss": 0.0101, "step": 7214 }, { "epoch": 6.9375, "grad_norm": 2.6778652667999268, "learning_rate": 1.6961006287125327e-05, "loss": 0.0193, "step": 7215 }, { "epoch": 6.938461538461539, "grad_norm": 3.5599746704101562, "learning_rate": 1.696011190785765e-05, "loss": 0.0772, "step": 7216 }, { "epoch": 6.939423076923077, "grad_norm": 2.5166683197021484, "learning_rate": 1.6959217420589458e-05, "loss": 0.0254, "step": 7217 }, { "epoch": 6.940384615384615, "grad_norm": 4.291004657745361, "learning_rate": 1.695832282533464e-05, "loss": 0.0684, "step": 7218 }, { "epoch": 6.941346153846154, "grad_norm": 3.948674440383911, "learning_rate": 1.695742812210707e-05, "loss": 0.0345, "step": 7219 }, { "epoch": 6.9423076923076925, "grad_norm": 1.5011762380599976, "learning_rate": 1.695653331092064e-05, "loss": 0.0138, "step": 7220 }, { "epoch": 6.94326923076923, "grad_norm": 1.3343820571899414, "learning_rate": 1.695563839178923e-05, "loss": 0.0091, "step": 7221 }, { "epoch": 6.944230769230769, "grad_norm": 2.4866273403167725, "learning_rate": 1.6954743364726722e-05, "loss": 0.0253, "step": 7222 }, { "epoch": 6.945192307692308, "grad_norm": 3.0480902194976807, "learning_rate": 1.6953848229747012e-05, "loss": 0.0639, "step": 7223 }, { "epoch": 6.946153846153846, "grad_norm": 3.8706753253936768, "learning_rate": 1.6952952986863986e-05, "loss": 0.0353, "step": 7224 }, { "epoch": 6.947115384615385, "grad_norm": 3.93873929977417, "learning_rate": 1.695205763609154e-05, "loss": 0.0393, "step": 7225 }, { "epoch": 6.948076923076923, "grad_norm": 2.013171911239624, "learning_rate": 1.6951162177443557e-05, "loss": 0.0135, "step": 7226 }, { "epoch": 6.9490384615384615, "grad_norm": 2.073823928833008, "learning_rate": 1.6950266610933944e-05, "loss": 0.016, "step": 7227 }, { "epoch": 6.95, "grad_norm": 2.868518829345703, "learning_rate": 1.694937093657659e-05, "loss": 0.0381, "step": 7228 }, { "epoch": 6.950961538461538, "grad_norm": 2.3691515922546387, "learning_rate": 1.6948475154385392e-05, "loss": 0.0452, "step": 7229 }, { "epoch": 6.951923076923077, "grad_norm": 3.933544874191284, "learning_rate": 1.694757926437426e-05, "loss": 0.0687, "step": 7230 }, { "epoch": 6.952884615384615, "grad_norm": 0.8371196985244751, "learning_rate": 1.6946683266557086e-05, "loss": 0.0038, "step": 7231 }, { "epoch": 6.953846153846154, "grad_norm": 2.301244020462036, "learning_rate": 1.6945787160947774e-05, "loss": 0.0416, "step": 7232 }, { "epoch": 6.954807692307693, "grad_norm": 2.868109941482544, "learning_rate": 1.694489094756023e-05, "loss": 0.0867, "step": 7233 }, { "epoch": 6.9557692307692305, "grad_norm": 3.459928035736084, "learning_rate": 1.6943994626408365e-05, "loss": 0.0319, "step": 7234 }, { "epoch": 6.956730769230769, "grad_norm": 3.9789481163024902, "learning_rate": 1.694309819750608e-05, "loss": 0.0243, "step": 7235 }, { "epoch": 6.957692307692308, "grad_norm": 4.495654582977295, "learning_rate": 1.6942201660867294e-05, "loss": 0.08, "step": 7236 }, { "epoch": 6.9586538461538465, "grad_norm": 1.796750545501709, "learning_rate": 1.6941305016505908e-05, "loss": 0.0215, "step": 7237 }, { "epoch": 6.959615384615384, "grad_norm": 0.6908000707626343, "learning_rate": 1.6940408264435842e-05, "loss": 0.0066, "step": 7238 }, { "epoch": 6.960576923076923, "grad_norm": 3.4452223777770996, "learning_rate": 1.6939511404671013e-05, "loss": 0.025, "step": 7239 }, { "epoch": 6.961538461538462, "grad_norm": 4.96945333480835, "learning_rate": 1.693861443722533e-05, "loss": 0.1165, "step": 7240 }, { "epoch": 6.9625, "grad_norm": 2.9288854598999023, "learning_rate": 1.693771736211272e-05, "loss": 0.0629, "step": 7241 }, { "epoch": 6.963461538461538, "grad_norm": 3.003803253173828, "learning_rate": 1.6936820179347092e-05, "loss": 0.083, "step": 7242 }, { "epoch": 6.964423076923077, "grad_norm": 3.8304264545440674, "learning_rate": 1.6935922888942377e-05, "loss": 0.1182, "step": 7243 }, { "epoch": 6.9653846153846155, "grad_norm": 2.9753060340881348, "learning_rate": 1.69350254909125e-05, "loss": 0.0346, "step": 7244 }, { "epoch": 6.966346153846154, "grad_norm": 5.919450759887695, "learning_rate": 1.693412798527138e-05, "loss": 0.1645, "step": 7245 }, { "epoch": 6.967307692307692, "grad_norm": 2.929158926010132, "learning_rate": 1.6933230372032938e-05, "loss": 0.0419, "step": 7246 }, { "epoch": 6.968269230769231, "grad_norm": 2.2799124717712402, "learning_rate": 1.6932332651211115e-05, "loss": 0.0203, "step": 7247 }, { "epoch": 6.969230769230769, "grad_norm": 4.060588359832764, "learning_rate": 1.6931434822819834e-05, "loss": 0.1068, "step": 7248 }, { "epoch": 6.970192307692308, "grad_norm": 4.103173732757568, "learning_rate": 1.6930536886873032e-05, "loss": 0.0661, "step": 7249 }, { "epoch": 6.971153846153846, "grad_norm": 6.052581310272217, "learning_rate": 1.6929638843384633e-05, "loss": 0.0734, "step": 7250 }, { "epoch": 6.9721153846153845, "grad_norm": 0.5085223317146301, "learning_rate": 1.692874069236858e-05, "loss": 0.0048, "step": 7251 }, { "epoch": 6.973076923076923, "grad_norm": 0.7268038392066956, "learning_rate": 1.692784243383881e-05, "loss": 0.0065, "step": 7252 }, { "epoch": 6.974038461538462, "grad_norm": 4.160561561584473, "learning_rate": 1.692694406780925e-05, "loss": 0.0656, "step": 7253 }, { "epoch": 6.975, "grad_norm": 5.134509086608887, "learning_rate": 1.6926045594293854e-05, "loss": 0.1387, "step": 7254 }, { "epoch": 6.975961538461538, "grad_norm": 3.2004587650299072, "learning_rate": 1.692514701330656e-05, "loss": 0.0327, "step": 7255 }, { "epoch": 6.976923076923077, "grad_norm": 2.209791660308838, "learning_rate": 1.6924248324861304e-05, "loss": 0.0423, "step": 7256 }, { "epoch": 6.977884615384616, "grad_norm": 3.834095001220703, "learning_rate": 1.692334952897204e-05, "loss": 0.0759, "step": 7257 }, { "epoch": 6.9788461538461535, "grad_norm": 3.7463626861572266, "learning_rate": 1.692245062565271e-05, "loss": 0.0242, "step": 7258 }, { "epoch": 6.979807692307692, "grad_norm": 0.9166072010993958, "learning_rate": 1.6921551614917263e-05, "loss": 0.009, "step": 7259 }, { "epoch": 6.980769230769231, "grad_norm": 2.6910111904144287, "learning_rate": 1.692065249677965e-05, "loss": 0.0318, "step": 7260 }, { "epoch": 6.9817307692307695, "grad_norm": 3.8541629314422607, "learning_rate": 1.6919753271253826e-05, "loss": 0.1076, "step": 7261 }, { "epoch": 6.982692307692307, "grad_norm": 1.4838114976882935, "learning_rate": 1.6918853938353734e-05, "loss": 0.047, "step": 7262 }, { "epoch": 6.983653846153846, "grad_norm": 4.14892578125, "learning_rate": 1.6917954498093337e-05, "loss": 0.1277, "step": 7263 }, { "epoch": 6.984615384615385, "grad_norm": 0.8983811140060425, "learning_rate": 1.6917054950486592e-05, "loss": 0.0104, "step": 7264 }, { "epoch": 6.985576923076923, "grad_norm": 3.0995101928710938, "learning_rate": 1.6916155295547457e-05, "loss": 0.0635, "step": 7265 }, { "epoch": 6.986538461538462, "grad_norm": 2.9608542919158936, "learning_rate": 1.6915255533289888e-05, "loss": 0.0558, "step": 7266 }, { "epoch": 6.9875, "grad_norm": 1.940702199935913, "learning_rate": 1.691435566372785e-05, "loss": 0.0184, "step": 7267 }, { "epoch": 6.9884615384615385, "grad_norm": 2.2919516563415527, "learning_rate": 1.6913455686875307e-05, "loss": 0.0278, "step": 7268 }, { "epoch": 6.989423076923077, "grad_norm": 2.6187803745269775, "learning_rate": 1.6912555602746218e-05, "loss": 0.0237, "step": 7269 }, { "epoch": 6.990384615384615, "grad_norm": 1.4675929546356201, "learning_rate": 1.6911655411354557e-05, "loss": 0.0146, "step": 7270 }, { "epoch": 6.991346153846154, "grad_norm": 1.4899029731750488, "learning_rate": 1.691075511271429e-05, "loss": 0.0129, "step": 7271 }, { "epoch": 6.992307692307692, "grad_norm": 0.5582616925239563, "learning_rate": 1.6909854706839385e-05, "loss": 0.0039, "step": 7272 }, { "epoch": 6.993269230769231, "grad_norm": 3.023552417755127, "learning_rate": 1.6908954193743816e-05, "loss": 0.0569, "step": 7273 }, { "epoch": 6.99423076923077, "grad_norm": 2.6198172569274902, "learning_rate": 1.6908053573441556e-05, "loss": 0.0281, "step": 7274 }, { "epoch": 6.9951923076923075, "grad_norm": 4.620250225067139, "learning_rate": 1.690715284594658e-05, "loss": 0.0564, "step": 7275 }, { "epoch": 6.996153846153846, "grad_norm": 3.0998146533966064, "learning_rate": 1.690625201127286e-05, "loss": 0.0332, "step": 7276 }, { "epoch": 6.997115384615385, "grad_norm": 2.4699387550354004, "learning_rate": 1.6905351069434382e-05, "loss": 0.0208, "step": 7277 }, { "epoch": 6.998076923076923, "grad_norm": 3.004265308380127, "learning_rate": 1.6904450020445123e-05, "loss": 0.0533, "step": 7278 }, { "epoch": 6.999038461538461, "grad_norm": 2.9670708179473877, "learning_rate": 1.6903548864319063e-05, "loss": 0.0294, "step": 7279 }, { "epoch": 7.0, "grad_norm": 3.333247661590576, "learning_rate": 1.6902647601070183e-05, "loss": 0.0562, "step": 7280 }, { "epoch": 7.000961538461539, "grad_norm": 1.5104539394378662, "learning_rate": 1.6901746230712476e-05, "loss": 0.0166, "step": 7281 }, { "epoch": 7.001923076923077, "grad_norm": 1.7373547554016113, "learning_rate": 1.690084475325992e-05, "loss": 0.0101, "step": 7282 }, { "epoch": 7.002884615384615, "grad_norm": 2.4939346313476562, "learning_rate": 1.6899943168726508e-05, "loss": 0.0402, "step": 7283 }, { "epoch": 7.003846153846154, "grad_norm": 0.19821670651435852, "learning_rate": 1.6899041477126233e-05, "loss": 0.002, "step": 7284 }, { "epoch": 7.0048076923076925, "grad_norm": 2.919705390930176, "learning_rate": 1.689813967847308e-05, "loss": 0.0197, "step": 7285 }, { "epoch": 7.005769230769231, "grad_norm": 2.401500701904297, "learning_rate": 1.6897237772781046e-05, "loss": 0.0241, "step": 7286 }, { "epoch": 7.006730769230769, "grad_norm": 0.9190427660942078, "learning_rate": 1.6896335760064123e-05, "loss": 0.0081, "step": 7287 }, { "epoch": 7.007692307692308, "grad_norm": 1.1063673496246338, "learning_rate": 1.689543364033631e-05, "loss": 0.0237, "step": 7288 }, { "epoch": 7.008653846153846, "grad_norm": 3.4831690788269043, "learning_rate": 1.6894531413611603e-05, "loss": 0.08, "step": 7289 }, { "epoch": 7.009615384615385, "grad_norm": 0.8827788829803467, "learning_rate": 1.6893629079904006e-05, "loss": 0.0059, "step": 7290 }, { "epoch": 7.010576923076923, "grad_norm": 1.6513789892196655, "learning_rate": 1.6892726639227518e-05, "loss": 0.0111, "step": 7291 }, { "epoch": 7.0115384615384615, "grad_norm": 3.748382806777954, "learning_rate": 1.6891824091596143e-05, "loss": 0.0837, "step": 7292 }, { "epoch": 7.0125, "grad_norm": 3.9743711948394775, "learning_rate": 1.6890921437023883e-05, "loss": 0.0693, "step": 7293 }, { "epoch": 7.013461538461539, "grad_norm": 1.540157675743103, "learning_rate": 1.6890018675524752e-05, "loss": 0.0196, "step": 7294 }, { "epoch": 7.014423076923077, "grad_norm": 1.989328145980835, "learning_rate": 1.6889115807112747e-05, "loss": 0.0131, "step": 7295 }, { "epoch": 7.015384615384615, "grad_norm": 0.9563029408454895, "learning_rate": 1.6888212831801887e-05, "loss": 0.0091, "step": 7296 }, { "epoch": 7.016346153846154, "grad_norm": 2.0197558403015137, "learning_rate": 1.6887309749606182e-05, "loss": 0.0393, "step": 7297 }, { "epoch": 7.017307692307693, "grad_norm": 5.232424259185791, "learning_rate": 1.688640656053964e-05, "loss": 0.0845, "step": 7298 }, { "epoch": 7.0182692307692305, "grad_norm": 1.4993228912353516, "learning_rate": 1.6885503264616282e-05, "loss": 0.0234, "step": 7299 }, { "epoch": 7.019230769230769, "grad_norm": 2.1932849884033203, "learning_rate": 1.6884599861850123e-05, "loss": 0.0354, "step": 7300 }, { "epoch": 7.020192307692308, "grad_norm": 0.2690395414829254, "learning_rate": 1.688369635225518e-05, "loss": 0.0017, "step": 7301 }, { "epoch": 7.0211538461538465, "grad_norm": 2.1435048580169678, "learning_rate": 1.6882792735845472e-05, "loss": 0.0144, "step": 7302 }, { "epoch": 7.022115384615384, "grad_norm": 3.181657552719116, "learning_rate": 1.688188901263502e-05, "loss": 0.1496, "step": 7303 }, { "epoch": 7.023076923076923, "grad_norm": 2.9774208068847656, "learning_rate": 1.688098518263785e-05, "loss": 0.0138, "step": 7304 }, { "epoch": 7.024038461538462, "grad_norm": 0.9575802683830261, "learning_rate": 1.688008124586799e-05, "loss": 0.0059, "step": 7305 }, { "epoch": 7.025, "grad_norm": 1.7624629735946655, "learning_rate": 1.687917720233945e-05, "loss": 0.0196, "step": 7306 }, { "epoch": 7.025961538461538, "grad_norm": 1.001130223274231, "learning_rate": 1.6878273052066282e-05, "loss": 0.0065, "step": 7307 }, { "epoch": 7.026923076923077, "grad_norm": 0.1054275780916214, "learning_rate": 1.68773687950625e-05, "loss": 0.001, "step": 7308 }, { "epoch": 7.0278846153846155, "grad_norm": 0.6162631511688232, "learning_rate": 1.6876464431342135e-05, "loss": 0.0049, "step": 7309 }, { "epoch": 7.028846153846154, "grad_norm": 4.529012203216553, "learning_rate": 1.6875559960919227e-05, "loss": 0.0837, "step": 7310 }, { "epoch": 7.029807692307692, "grad_norm": 0.7877856492996216, "learning_rate": 1.687465538380781e-05, "loss": 0.0041, "step": 7311 }, { "epoch": 7.030769230769231, "grad_norm": 1.57015061378479, "learning_rate": 1.6873750700021917e-05, "loss": 0.006, "step": 7312 }, { "epoch": 7.031730769230769, "grad_norm": 0.41593554615974426, "learning_rate": 1.6872845909575585e-05, "loss": 0.0023, "step": 7313 }, { "epoch": 7.032692307692308, "grad_norm": 2.0293118953704834, "learning_rate": 1.687194101248286e-05, "loss": 0.0193, "step": 7314 }, { "epoch": 7.033653846153846, "grad_norm": 2.084883213043213, "learning_rate": 1.6871036008757775e-05, "loss": 0.0149, "step": 7315 }, { "epoch": 7.0346153846153845, "grad_norm": 1.6077239513397217, "learning_rate": 1.687013089841438e-05, "loss": 0.0128, "step": 7316 }, { "epoch": 7.035576923076923, "grad_norm": 3.47530460357666, "learning_rate": 1.6869225681466717e-05, "loss": 0.0132, "step": 7317 }, { "epoch": 7.036538461538462, "grad_norm": 1.7865819931030273, "learning_rate": 1.6868320357928833e-05, "loss": 0.0248, "step": 7318 }, { "epoch": 7.0375, "grad_norm": 2.5195870399475098, "learning_rate": 1.6867414927814773e-05, "loss": 0.0156, "step": 7319 }, { "epoch": 7.038461538461538, "grad_norm": 0.35231006145477295, "learning_rate": 1.686650939113859e-05, "loss": 0.0032, "step": 7320 }, { "epoch": 7.039423076923077, "grad_norm": 2.068540573120117, "learning_rate": 1.6865603747914336e-05, "loss": 0.0759, "step": 7321 }, { "epoch": 7.040384615384616, "grad_norm": 4.1196208000183105, "learning_rate": 1.6864697998156063e-05, "loss": 0.0619, "step": 7322 }, { "epoch": 7.0413461538461535, "grad_norm": 2.3664278984069824, "learning_rate": 1.686379214187782e-05, "loss": 0.024, "step": 7323 }, { "epoch": 7.042307692307692, "grad_norm": 1.9854681491851807, "learning_rate": 1.686288617909367e-05, "loss": 0.0293, "step": 7324 }, { "epoch": 7.043269230769231, "grad_norm": 5.35850715637207, "learning_rate": 1.686198010981767e-05, "loss": 0.0337, "step": 7325 }, { "epoch": 7.0442307692307695, "grad_norm": 2.701603889465332, "learning_rate": 1.6861073934063878e-05, "loss": 0.0232, "step": 7326 }, { "epoch": 7.045192307692307, "grad_norm": 1.3357921838760376, "learning_rate": 1.6860167651846356e-05, "loss": 0.0126, "step": 7327 }, { "epoch": 7.046153846153846, "grad_norm": 3.8512158393859863, "learning_rate": 1.6859261263179164e-05, "loss": 0.0237, "step": 7328 }, { "epoch": 7.047115384615385, "grad_norm": 0.17653770744800568, "learning_rate": 1.685835476807637e-05, "loss": 0.0018, "step": 7329 }, { "epoch": 7.048076923076923, "grad_norm": 2.621598482131958, "learning_rate": 1.685744816655204e-05, "loss": 0.027, "step": 7330 }, { "epoch": 7.049038461538461, "grad_norm": 2.4583494663238525, "learning_rate": 1.6856541458620242e-05, "loss": 0.0431, "step": 7331 }, { "epoch": 7.05, "grad_norm": 0.3883029818534851, "learning_rate": 1.685563464429504e-05, "loss": 0.0029, "step": 7332 }, { "epoch": 7.0509615384615385, "grad_norm": 3.4931082725524902, "learning_rate": 1.6854727723590513e-05, "loss": 0.0656, "step": 7333 }, { "epoch": 7.051923076923077, "grad_norm": 0.58498615026474, "learning_rate": 1.6853820696520728e-05, "loss": 0.0052, "step": 7334 }, { "epoch": 7.052884615384615, "grad_norm": 2.6599643230438232, "learning_rate": 1.6852913563099763e-05, "loss": 0.032, "step": 7335 }, { "epoch": 7.053846153846154, "grad_norm": 4.053743839263916, "learning_rate": 1.6852006323341692e-05, "loss": 0.0438, "step": 7336 }, { "epoch": 7.054807692307692, "grad_norm": 4.741054058074951, "learning_rate": 1.6851098977260597e-05, "loss": 0.1202, "step": 7337 }, { "epoch": 7.055769230769231, "grad_norm": 0.20839960873126984, "learning_rate": 1.6850191524870548e-05, "loss": 0.0013, "step": 7338 }, { "epoch": 7.056730769230769, "grad_norm": 2.8958616256713867, "learning_rate": 1.6849283966185633e-05, "loss": 0.0295, "step": 7339 }, { "epoch": 7.0576923076923075, "grad_norm": 3.1724112033843994, "learning_rate": 1.6848376301219933e-05, "loss": 0.0367, "step": 7340 }, { "epoch": 7.058653846153846, "grad_norm": 3.594905376434326, "learning_rate": 1.6847468529987533e-05, "loss": 0.1241, "step": 7341 }, { "epoch": 7.059615384615385, "grad_norm": 2.7641286849975586, "learning_rate": 1.684656065250252e-05, "loss": 0.0173, "step": 7342 }, { "epoch": 7.060576923076923, "grad_norm": 3.657553195953369, "learning_rate": 1.684565266877898e-05, "loss": 0.1289, "step": 7343 }, { "epoch": 7.061538461538461, "grad_norm": 1.8015583753585815, "learning_rate": 1.6844744578831e-05, "loss": 0.0129, "step": 7344 }, { "epoch": 7.0625, "grad_norm": 1.0345910787582397, "learning_rate": 1.6843836382672675e-05, "loss": 0.0048, "step": 7345 }, { "epoch": 7.063461538461539, "grad_norm": 2.5547103881835938, "learning_rate": 1.6842928080318094e-05, "loss": 0.0325, "step": 7346 }, { "epoch": 7.064423076923077, "grad_norm": 1.5240639448165894, "learning_rate": 1.6842019671781357e-05, "loss": 0.0199, "step": 7347 }, { "epoch": 7.065384615384615, "grad_norm": 3.351881265640259, "learning_rate": 1.684111115707655e-05, "loss": 0.0288, "step": 7348 }, { "epoch": 7.066346153846154, "grad_norm": 3.9064815044403076, "learning_rate": 1.6840202536217782e-05, "loss": 0.0637, "step": 7349 }, { "epoch": 7.0673076923076925, "grad_norm": 1.822672724723816, "learning_rate": 1.683929380921914e-05, "loss": 0.0171, "step": 7350 }, { "epoch": 7.068269230769231, "grad_norm": 1.89170503616333, "learning_rate": 1.6838384976094738e-05, "loss": 0.0135, "step": 7351 }, { "epoch": 7.069230769230769, "grad_norm": 3.8119540214538574, "learning_rate": 1.683747603685867e-05, "loss": 0.031, "step": 7352 }, { "epoch": 7.070192307692308, "grad_norm": 2.7710583209991455, "learning_rate": 1.683656699152504e-05, "loss": 0.019, "step": 7353 }, { "epoch": 7.071153846153846, "grad_norm": 2.7710583209991455, "learning_rate": 1.6835657840107953e-05, "loss": 0.108, "step": 7354 }, { "epoch": 7.072115384615385, "grad_norm": 6.969161033630371, "learning_rate": 1.6835657840107953e-05, "loss": 0.0111, "step": 7355 }, { "epoch": 7.073076923076923, "grad_norm": 3.0055360794067383, "learning_rate": 1.683474858262152e-05, "loss": 0.0698, "step": 7356 }, { "epoch": 7.0740384615384615, "grad_norm": 3.6451058387756348, "learning_rate": 1.6833839219079852e-05, "loss": 0.0917, "step": 7357 }, { "epoch": 7.075, "grad_norm": 5.514656066894531, "learning_rate": 1.6832929749497055e-05, "loss": 0.1059, "step": 7358 }, { "epoch": 7.075961538461539, "grad_norm": 2.7456283569335938, "learning_rate": 1.683202017388724e-05, "loss": 0.0489, "step": 7359 }, { "epoch": 7.076923076923077, "grad_norm": 4.255626678466797, "learning_rate": 1.6831110492264527e-05, "loss": 0.0263, "step": 7360 }, { "epoch": 7.077884615384615, "grad_norm": 1.4207191467285156, "learning_rate": 1.6830200704643026e-05, "loss": 0.0075, "step": 7361 }, { "epoch": 7.078846153846154, "grad_norm": 2.3853588104248047, "learning_rate": 1.6829290811036857e-05, "loss": 0.0233, "step": 7362 }, { "epoch": 7.079807692307693, "grad_norm": 2.113698720932007, "learning_rate": 1.682838081146014e-05, "loss": 0.0201, "step": 7363 }, { "epoch": 7.0807692307692305, "grad_norm": 2.4773926734924316, "learning_rate": 1.6827470705926995e-05, "loss": 0.0129, "step": 7364 }, { "epoch": 7.081730769230769, "grad_norm": 1.2657474279403687, "learning_rate": 1.682656049445154e-05, "loss": 0.0108, "step": 7365 }, { "epoch": 7.082692307692308, "grad_norm": 4.718565940856934, "learning_rate": 1.68256501770479e-05, "loss": 0.0441, "step": 7366 }, { "epoch": 7.0836538461538465, "grad_norm": 6.018820762634277, "learning_rate": 1.6824739753730206e-05, "loss": 0.1241, "step": 7367 }, { "epoch": 7.084615384615384, "grad_norm": 1.8643053770065308, "learning_rate": 1.682382922451258e-05, "loss": 0.033, "step": 7368 }, { "epoch": 7.085576923076923, "grad_norm": 2.711031198501587, "learning_rate": 1.682291858940915e-05, "loss": 0.0435, "step": 7369 }, { "epoch": 7.086538461538462, "grad_norm": 1.596494197845459, "learning_rate": 1.6822007848434053e-05, "loss": 0.0102, "step": 7370 }, { "epoch": 7.0875, "grad_norm": 4.105428218841553, "learning_rate": 1.6821097001601413e-05, "loss": 0.0894, "step": 7371 }, { "epoch": 7.088461538461538, "grad_norm": 1.3727086782455444, "learning_rate": 1.682018604892537e-05, "loss": 0.0053, "step": 7372 }, { "epoch": 7.089423076923077, "grad_norm": 2.2729110717773438, "learning_rate": 1.6819274990420056e-05, "loss": 0.0178, "step": 7373 }, { "epoch": 7.0903846153846155, "grad_norm": 3.7844600677490234, "learning_rate": 1.6818363826099606e-05, "loss": 0.0475, "step": 7374 }, { "epoch": 7.091346153846154, "grad_norm": 2.7476794719696045, "learning_rate": 1.681745255597816e-05, "loss": 0.0371, "step": 7375 }, { "epoch": 7.092307692307692, "grad_norm": 1.8156098127365112, "learning_rate": 1.6816541180069866e-05, "loss": 0.0159, "step": 7376 }, { "epoch": 7.093269230769231, "grad_norm": 3.1346795558929443, "learning_rate": 1.6815629698388855e-05, "loss": 0.0402, "step": 7377 }, { "epoch": 7.094230769230769, "grad_norm": 2.395104169845581, "learning_rate": 1.6814718110949274e-05, "loss": 0.0331, "step": 7378 }, { "epoch": 7.095192307692308, "grad_norm": 0.8673678040504456, "learning_rate": 1.681380641776527e-05, "loss": 0.0048, "step": 7379 }, { "epoch": 7.096153846153846, "grad_norm": 1.2686989307403564, "learning_rate": 1.681289461885099e-05, "loss": 0.0061, "step": 7380 }, { "epoch": 7.0971153846153845, "grad_norm": 3.2491626739501953, "learning_rate": 1.6811982714220578e-05, "loss": 0.0298, "step": 7381 }, { "epoch": 7.098076923076923, "grad_norm": 0.3663393259048462, "learning_rate": 1.6811070703888188e-05, "loss": 0.0032, "step": 7382 }, { "epoch": 7.099038461538462, "grad_norm": 2.709033727645874, "learning_rate": 1.6810158587867973e-05, "loss": 0.0295, "step": 7383 }, { "epoch": 7.1, "grad_norm": 5.573629856109619, "learning_rate": 1.6809246366174082e-05, "loss": 0.1546, "step": 7384 }, { "epoch": 7.100961538461538, "grad_norm": 0.16183356940746307, "learning_rate": 1.680833403882067e-05, "loss": 0.0009, "step": 7385 }, { "epoch": 7.101923076923077, "grad_norm": 1.0700103044509888, "learning_rate": 1.6807421605821903e-05, "loss": 0.0083, "step": 7386 }, { "epoch": 7.102884615384616, "grad_norm": 2.682384729385376, "learning_rate": 1.6806509067191926e-05, "loss": 0.038, "step": 7387 }, { "epoch": 7.1038461538461535, "grad_norm": 1.9735064506530762, "learning_rate": 1.680559642294491e-05, "loss": 0.0148, "step": 7388 }, { "epoch": 7.104807692307692, "grad_norm": 3.355130195617676, "learning_rate": 1.680468367309501e-05, "loss": 0.0546, "step": 7389 }, { "epoch": 7.105769230769231, "grad_norm": 1.7489323616027832, "learning_rate": 1.680377081765639e-05, "loss": 0.0154, "step": 7390 }, { "epoch": 7.1067307692307695, "grad_norm": 2.9381721019744873, "learning_rate": 1.6802857856643214e-05, "loss": 0.0512, "step": 7391 }, { "epoch": 7.107692307692307, "grad_norm": 2.825444221496582, "learning_rate": 1.6801944790069656e-05, "loss": 0.0503, "step": 7392 }, { "epoch": 7.108653846153846, "grad_norm": 5.780921459197998, "learning_rate": 1.6801031617949873e-05, "loss": 0.0674, "step": 7393 }, { "epoch": 7.109615384615385, "grad_norm": 1.1391862630844116, "learning_rate": 1.680011834029804e-05, "loss": 0.0139, "step": 7394 }, { "epoch": 7.110576923076923, "grad_norm": 1.9211211204528809, "learning_rate": 1.6799204957128332e-05, "loss": 0.0252, "step": 7395 }, { "epoch": 7.111538461538461, "grad_norm": 1.0840226411819458, "learning_rate": 1.6798291468454916e-05, "loss": 0.0082, "step": 7396 }, { "epoch": 7.1125, "grad_norm": 2.0249998569488525, "learning_rate": 1.679737787429197e-05, "loss": 0.0138, "step": 7397 }, { "epoch": 7.1134615384615385, "grad_norm": 0.8189821243286133, "learning_rate": 1.679646417465367e-05, "loss": 0.0063, "step": 7398 }, { "epoch": 7.114423076923077, "grad_norm": 1.2684907913208008, "learning_rate": 1.679555036955419e-05, "loss": 0.0107, "step": 7399 }, { "epoch": 7.115384615384615, "grad_norm": 1.4760668277740479, "learning_rate": 1.6794636459007715e-05, "loss": 0.0113, "step": 7400 }, { "epoch": 7.116346153846154, "grad_norm": 1.3741166591644287, "learning_rate": 1.6793722443028425e-05, "loss": 0.0155, "step": 7401 }, { "epoch": 7.117307692307692, "grad_norm": 1.859445571899414, "learning_rate": 1.6792808321630504e-05, "loss": 0.0202, "step": 7402 }, { "epoch": 7.118269230769231, "grad_norm": 1.8407412767410278, "learning_rate": 1.679189409482813e-05, "loss": 0.0192, "step": 7403 }, { "epoch": 7.119230769230769, "grad_norm": 0.24221228063106537, "learning_rate": 1.6790979762635497e-05, "loss": 0.0026, "step": 7404 }, { "epoch": 7.1201923076923075, "grad_norm": 2.1050729751586914, "learning_rate": 1.6790065325066788e-05, "loss": 0.0087, "step": 7405 }, { "epoch": 7.121153846153846, "grad_norm": 2.8362507820129395, "learning_rate": 1.678915078213619e-05, "loss": 0.0259, "step": 7406 }, { "epoch": 7.122115384615385, "grad_norm": 5.891193389892578, "learning_rate": 1.6788236133857906e-05, "loss": 0.1594, "step": 7407 }, { "epoch": 7.123076923076923, "grad_norm": 2.2720980644226074, "learning_rate": 1.6787321380246116e-05, "loss": 0.0405, "step": 7408 }, { "epoch": 7.124038461538461, "grad_norm": 1.9033129215240479, "learning_rate": 1.678640652131502e-05, "loss": 0.0194, "step": 7409 }, { "epoch": 7.125, "grad_norm": 1.5726490020751953, "learning_rate": 1.678549155707881e-05, "loss": 0.0111, "step": 7410 }, { "epoch": 7.125961538461539, "grad_norm": 1.387114405632019, "learning_rate": 1.6784576487551686e-05, "loss": 0.0065, "step": 7411 }, { "epoch": 7.126923076923077, "grad_norm": 1.2191740274429321, "learning_rate": 1.678366131274785e-05, "loss": 0.0078, "step": 7412 }, { "epoch": 7.127884615384615, "grad_norm": 3.334386110305786, "learning_rate": 1.67827460326815e-05, "loss": 0.0444, "step": 7413 }, { "epoch": 7.128846153846154, "grad_norm": 1.5764131546020508, "learning_rate": 1.6781830647366835e-05, "loss": 0.0169, "step": 7414 }, { "epoch": 7.1298076923076925, "grad_norm": 1.9296863079071045, "learning_rate": 1.678091515681807e-05, "loss": 0.0105, "step": 7415 }, { "epoch": 7.130769230769231, "grad_norm": 3.034006118774414, "learning_rate": 1.6779999561049395e-05, "loss": 0.0259, "step": 7416 }, { "epoch": 7.131730769230769, "grad_norm": 4.355965614318848, "learning_rate": 1.6779083860075032e-05, "loss": 0.032, "step": 7417 }, { "epoch": 7.132692307692308, "grad_norm": 4.356139183044434, "learning_rate": 1.6778168053909182e-05, "loss": 0.0434, "step": 7418 }, { "epoch": 7.133653846153846, "grad_norm": 2.5547540187835693, "learning_rate": 1.677725214256606e-05, "loss": 0.0219, "step": 7419 }, { "epoch": 7.134615384615385, "grad_norm": 5.435250759124756, "learning_rate": 1.677633612605987e-05, "loss": 0.0702, "step": 7420 }, { "epoch": 7.135576923076923, "grad_norm": 2.7386693954467773, "learning_rate": 1.6775420004404836e-05, "loss": 0.0295, "step": 7421 }, { "epoch": 7.1365384615384615, "grad_norm": 3.1898062229156494, "learning_rate": 1.6774503777615167e-05, "loss": 0.0686, "step": 7422 }, { "epoch": 7.1375, "grad_norm": 4.46256160736084, "learning_rate": 1.6773587445705087e-05, "loss": 0.1146, "step": 7423 }, { "epoch": 7.138461538461539, "grad_norm": 2.905296802520752, "learning_rate": 1.6772671008688807e-05, "loss": 0.0457, "step": 7424 }, { "epoch": 7.139423076923077, "grad_norm": 4.163933277130127, "learning_rate": 1.677175446658055e-05, "loss": 0.0728, "step": 7425 }, { "epoch": 7.140384615384615, "grad_norm": 3.768616199493408, "learning_rate": 1.6770837819394537e-05, "loss": 0.1192, "step": 7426 }, { "epoch": 7.141346153846154, "grad_norm": 0.3736225664615631, "learning_rate": 1.6769921067144994e-05, "loss": 0.0033, "step": 7427 }, { "epoch": 7.142307692307693, "grad_norm": 3.147514820098877, "learning_rate": 1.6769004209846147e-05, "loss": 0.0514, "step": 7428 }, { "epoch": 7.1432692307692305, "grad_norm": 0.6779277920722961, "learning_rate": 1.676808724751222e-05, "loss": 0.0042, "step": 7429 }, { "epoch": 7.144230769230769, "grad_norm": 0.5313619375228882, "learning_rate": 1.6767170180157442e-05, "loss": 0.0051, "step": 7430 }, { "epoch": 7.145192307692308, "grad_norm": 2.6131961345672607, "learning_rate": 1.676625300779605e-05, "loss": 0.0409, "step": 7431 }, { "epoch": 7.1461538461538465, "grad_norm": 2.3298377990722656, "learning_rate": 1.6765335730442266e-05, "loss": 0.0755, "step": 7432 }, { "epoch": 7.147115384615384, "grad_norm": 3.985602855682373, "learning_rate": 1.6764418348110325e-05, "loss": 0.1205, "step": 7433 }, { "epoch": 7.148076923076923, "grad_norm": 4.000908851623535, "learning_rate": 1.6763500860814468e-05, "loss": 0.0627, "step": 7434 }, { "epoch": 7.149038461538462, "grad_norm": 0.9414312243461609, "learning_rate": 1.6762583268568928e-05, "loss": 0.0067, "step": 7435 }, { "epoch": 7.15, "grad_norm": 0.21612459421157837, "learning_rate": 1.676166557138794e-05, "loss": 0.0029, "step": 7436 }, { "epoch": 7.150961538461538, "grad_norm": 0.7790057063102722, "learning_rate": 1.6760747769285754e-05, "loss": 0.008, "step": 7437 }, { "epoch": 7.151923076923077, "grad_norm": 0.3517242670059204, "learning_rate": 1.67598298622766e-05, "loss": 0.0034, "step": 7438 }, { "epoch": 7.1528846153846155, "grad_norm": 2.631847620010376, "learning_rate": 1.675891185037473e-05, "loss": 0.0387, "step": 7439 }, { "epoch": 7.153846153846154, "grad_norm": 2.0665016174316406, "learning_rate": 1.6757993733594384e-05, "loss": 0.0933, "step": 7440 }, { "epoch": 7.154807692307692, "grad_norm": 2.3493335247039795, "learning_rate": 1.6757075511949804e-05, "loss": 0.0108, "step": 7441 }, { "epoch": 7.155769230769231, "grad_norm": 3.6934263706207275, "learning_rate": 1.675615718545525e-05, "loss": 0.0788, "step": 7442 }, { "epoch": 7.156730769230769, "grad_norm": 1.365593671798706, "learning_rate": 1.6755238754124965e-05, "loss": 0.0118, "step": 7443 }, { "epoch": 7.157692307692308, "grad_norm": 4.357581615447998, "learning_rate": 1.67543202179732e-05, "loss": 0.0551, "step": 7444 }, { "epoch": 7.158653846153846, "grad_norm": 3.638126850128174, "learning_rate": 1.675340157701421e-05, "loss": 0.0714, "step": 7445 }, { "epoch": 7.1596153846153845, "grad_norm": 2.718214750289917, "learning_rate": 1.6752482831262248e-05, "loss": 0.0278, "step": 7446 }, { "epoch": 7.160576923076923, "grad_norm": 2.9135546684265137, "learning_rate": 1.675156398073157e-05, "loss": 0.0855, "step": 7447 }, { "epoch": 7.161538461538462, "grad_norm": 1.3373295068740845, "learning_rate": 1.6750645025436435e-05, "loss": 0.0101, "step": 7448 }, { "epoch": 7.1625, "grad_norm": 4.2607502937316895, "learning_rate": 1.6749725965391102e-05, "loss": 0.0775, "step": 7449 }, { "epoch": 7.163461538461538, "grad_norm": 2.5162956714630127, "learning_rate": 1.6748806800609834e-05, "loss": 0.0218, "step": 7450 }, { "epoch": 7.164423076923077, "grad_norm": 2.782700777053833, "learning_rate": 1.674788753110689e-05, "loss": 0.0279, "step": 7451 }, { "epoch": 7.165384615384616, "grad_norm": 3.111765146255493, "learning_rate": 1.6746968156896536e-05, "loss": 0.0889, "step": 7452 }, { "epoch": 7.1663461538461535, "grad_norm": 0.30841121077537537, "learning_rate": 1.674604867799304e-05, "loss": 0.0033, "step": 7453 }, { "epoch": 7.167307692307692, "grad_norm": 3.5375843048095703, "learning_rate": 1.6745129094410664e-05, "loss": 0.052, "step": 7454 }, { "epoch": 7.168269230769231, "grad_norm": 1.337774395942688, "learning_rate": 1.674420940616369e-05, "loss": 0.0079, "step": 7455 }, { "epoch": 7.1692307692307695, "grad_norm": 2.338129758834839, "learning_rate": 1.674328961326637e-05, "loss": 0.0411, "step": 7456 }, { "epoch": 7.170192307692307, "grad_norm": 4.47578239440918, "learning_rate": 1.6742369715732994e-05, "loss": 0.0745, "step": 7457 }, { "epoch": 7.171153846153846, "grad_norm": 0.1513325273990631, "learning_rate": 1.6741449713577824e-05, "loss": 0.0017, "step": 7458 }, { "epoch": 7.172115384615385, "grad_norm": 2.4581146240234375, "learning_rate": 1.674052960681514e-05, "loss": 0.0282, "step": 7459 }, { "epoch": 7.173076923076923, "grad_norm": 1.9218878746032715, "learning_rate": 1.673960939545922e-05, "loss": 0.0125, "step": 7460 }, { "epoch": 7.174038461538461, "grad_norm": 5.317139148712158, "learning_rate": 1.673868907952435e-05, "loss": 0.1541, "step": 7461 }, { "epoch": 7.175, "grad_norm": 4.590349197387695, "learning_rate": 1.6737768659024793e-05, "loss": 0.1044, "step": 7462 }, { "epoch": 7.1759615384615385, "grad_norm": 2.759895086288452, "learning_rate": 1.673684813397485e-05, "loss": 0.027, "step": 7463 }, { "epoch": 7.176923076923077, "grad_norm": 1.6219242811203003, "learning_rate": 1.6735927504388788e-05, "loss": 0.0228, "step": 7464 }, { "epoch": 7.177884615384615, "grad_norm": 1.5878491401672363, "learning_rate": 1.6735006770280908e-05, "loss": 0.014, "step": 7465 }, { "epoch": 7.178846153846154, "grad_norm": 2.597033977508545, "learning_rate": 1.6734085931665486e-05, "loss": 0.0309, "step": 7466 }, { "epoch": 7.179807692307692, "grad_norm": 1.8487069606781006, "learning_rate": 1.6733164988556817e-05, "loss": 0.0181, "step": 7467 }, { "epoch": 7.180769230769231, "grad_norm": 1.9634184837341309, "learning_rate": 1.6732243940969185e-05, "loss": 0.0116, "step": 7468 }, { "epoch": 7.181730769230769, "grad_norm": 1.889106035232544, "learning_rate": 1.6731322788916892e-05, "loss": 0.0203, "step": 7469 }, { "epoch": 7.1826923076923075, "grad_norm": 0.36106133460998535, "learning_rate": 1.6730401532414222e-05, "loss": 0.0024, "step": 7470 }, { "epoch": 7.183653846153846, "grad_norm": 0.456906259059906, "learning_rate": 1.6729480171475472e-05, "loss": 0.0037, "step": 7471 }, { "epoch": 7.184615384615385, "grad_norm": 4.464239597320557, "learning_rate": 1.6728558706114945e-05, "loss": 0.0357, "step": 7472 }, { "epoch": 7.185576923076923, "grad_norm": 2.4992854595184326, "learning_rate": 1.672763713634693e-05, "loss": 0.0399, "step": 7473 }, { "epoch": 7.186538461538461, "grad_norm": 2.9017117023468018, "learning_rate": 1.6726715462185738e-05, "loss": 0.0268, "step": 7474 }, { "epoch": 7.1875, "grad_norm": 1.6688615083694458, "learning_rate": 1.6725793683645658e-05, "loss": 0.0114, "step": 7475 }, { "epoch": 7.188461538461539, "grad_norm": 1.6494370698928833, "learning_rate": 1.6724871800741005e-05, "loss": 0.0156, "step": 7476 }, { "epoch": 7.189423076923077, "grad_norm": 2.462251663208008, "learning_rate": 1.6723949813486077e-05, "loss": 0.0108, "step": 7477 }, { "epoch": 7.190384615384615, "grad_norm": 1.9474456310272217, "learning_rate": 1.6723027721895182e-05, "loss": 0.027, "step": 7478 }, { "epoch": 7.191346153846154, "grad_norm": 2.2001302242279053, "learning_rate": 1.672210552598263e-05, "loss": 0.0466, "step": 7479 }, { "epoch": 7.1923076923076925, "grad_norm": 3.794550895690918, "learning_rate": 1.6721183225762726e-05, "loss": 0.0556, "step": 7480 }, { "epoch": 7.193269230769231, "grad_norm": 3.642944097518921, "learning_rate": 1.672026082124979e-05, "loss": 0.0324, "step": 7481 }, { "epoch": 7.194230769230769, "grad_norm": 2.242870569229126, "learning_rate": 1.6719338312458123e-05, "loss": 0.0312, "step": 7482 }, { "epoch": 7.195192307692308, "grad_norm": 3.6238317489624023, "learning_rate": 1.6718415699402054e-05, "loss": 0.0483, "step": 7483 }, { "epoch": 7.196153846153846, "grad_norm": 2.5756478309631348, "learning_rate": 1.671749298209589e-05, "loss": 0.0444, "step": 7484 }, { "epoch": 7.197115384615385, "grad_norm": 3.471151351928711, "learning_rate": 1.6716570160553946e-05, "loss": 0.0249, "step": 7485 }, { "epoch": 7.198076923076923, "grad_norm": 1.4817882776260376, "learning_rate": 1.671564723479055e-05, "loss": 0.012, "step": 7486 }, { "epoch": 7.1990384615384615, "grad_norm": 0.8295800685882568, "learning_rate": 1.6714724204820017e-05, "loss": 0.0115, "step": 7487 }, { "epoch": 7.2, "grad_norm": 2.3051702976226807, "learning_rate": 1.671380107065667e-05, "loss": 0.043, "step": 7488 }, { "epoch": 7.200961538461539, "grad_norm": 1.9986227750778198, "learning_rate": 1.671287783231484e-05, "loss": 0.0263, "step": 7489 }, { "epoch": 7.201923076923077, "grad_norm": 1.2152074575424194, "learning_rate": 1.6711954489808846e-05, "loss": 0.011, "step": 7490 }, { "epoch": 7.202884615384615, "grad_norm": 2.6497645378112793, "learning_rate": 1.671103104315302e-05, "loss": 0.0368, "step": 7491 }, { "epoch": 7.203846153846154, "grad_norm": 2.940943717956543, "learning_rate": 1.6710107492361686e-05, "loss": 0.0436, "step": 7492 }, { "epoch": 7.204807692307693, "grad_norm": 0.267861008644104, "learning_rate": 1.6709183837449175e-05, "loss": 0.0014, "step": 7493 }, { "epoch": 7.2057692307692305, "grad_norm": 1.7530388832092285, "learning_rate": 1.6708260078429828e-05, "loss": 0.0157, "step": 7494 }, { "epoch": 7.206730769230769, "grad_norm": 0.8195852637290955, "learning_rate": 1.6707336215317968e-05, "loss": 0.0065, "step": 7495 }, { "epoch": 7.207692307692308, "grad_norm": 2.2678909301757812, "learning_rate": 1.6706412248127937e-05, "loss": 0.0251, "step": 7496 }, { "epoch": 7.2086538461538465, "grad_norm": 4.2924017906188965, "learning_rate": 1.6705488176874074e-05, "loss": 0.0397, "step": 7497 }, { "epoch": 7.209615384615384, "grad_norm": 1.5221999883651733, "learning_rate": 1.670456400157071e-05, "loss": 0.0122, "step": 7498 }, { "epoch": 7.210576923076923, "grad_norm": 0.2905971109867096, "learning_rate": 1.6703639722232194e-05, "loss": 0.0017, "step": 7499 }, { "epoch": 7.211538461538462, "grad_norm": 2.588233709335327, "learning_rate": 1.6702715338872867e-05, "loss": 0.0352, "step": 7500 }, { "epoch": 7.2125, "grad_norm": 1.4471402168273926, "learning_rate": 1.6701790851507066e-05, "loss": 0.0072, "step": 7501 }, { "epoch": 7.213461538461538, "grad_norm": 3.0489742755889893, "learning_rate": 1.670086626014914e-05, "loss": 0.0593, "step": 7502 }, { "epoch": 7.214423076923077, "grad_norm": 1.219924807548523, "learning_rate": 1.669994156481344e-05, "loss": 0.0053, "step": 7503 }, { "epoch": 7.2153846153846155, "grad_norm": 3.008413553237915, "learning_rate": 1.6699016765514308e-05, "loss": 0.0285, "step": 7504 }, { "epoch": 7.216346153846154, "grad_norm": 1.787843108177185, "learning_rate": 1.6698091862266097e-05, "loss": 0.0497, "step": 7505 }, { "epoch": 7.217307692307692, "grad_norm": 2.5809319019317627, "learning_rate": 1.669716685508316e-05, "loss": 0.1065, "step": 7506 }, { "epoch": 7.218269230769231, "grad_norm": 0.3938421308994293, "learning_rate": 1.669624174397985e-05, "loss": 0.0025, "step": 7507 }, { "epoch": 7.219230769230769, "grad_norm": 1.5127172470092773, "learning_rate": 1.6695316528970517e-05, "loss": 0.0216, "step": 7508 }, { "epoch": 7.220192307692308, "grad_norm": 1.6250879764556885, "learning_rate": 1.6694391210069527e-05, "loss": 0.0181, "step": 7509 }, { "epoch": 7.221153846153846, "grad_norm": 2.152467966079712, "learning_rate": 1.6693465787291232e-05, "loss": 0.0164, "step": 7510 }, { "epoch": 7.2221153846153845, "grad_norm": 2.9240493774414062, "learning_rate": 1.669254026064999e-05, "loss": 0.0762, "step": 7511 }, { "epoch": 7.223076923076923, "grad_norm": 2.4277334213256836, "learning_rate": 1.6691614630160173e-05, "loss": 0.0428, "step": 7512 }, { "epoch": 7.224038461538462, "grad_norm": 2.7650511264801025, "learning_rate": 1.669068889583613e-05, "loss": 0.0635, "step": 7513 }, { "epoch": 7.225, "grad_norm": 4.010443210601807, "learning_rate": 1.6689763057692237e-05, "loss": 0.0526, "step": 7514 }, { "epoch": 7.225961538461538, "grad_norm": 0.5231905579566956, "learning_rate": 1.668883711574285e-05, "loss": 0.0044, "step": 7515 }, { "epoch": 7.226923076923077, "grad_norm": 2.2153916358947754, "learning_rate": 1.6687911070002343e-05, "loss": 0.053, "step": 7516 }, { "epoch": 7.227884615384616, "grad_norm": 2.2103092670440674, "learning_rate": 1.668698492048509e-05, "loss": 0.0245, "step": 7517 }, { "epoch": 7.2288461538461535, "grad_norm": 0.8192870616912842, "learning_rate": 1.6686058667205455e-05, "loss": 0.0056, "step": 7518 }, { "epoch": 7.229807692307692, "grad_norm": 0.3607310950756073, "learning_rate": 1.6685132310177812e-05, "loss": 0.0021, "step": 7519 }, { "epoch": 7.230769230769231, "grad_norm": 0.621950626373291, "learning_rate": 1.6684205849416538e-05, "loss": 0.0041, "step": 7520 }, { "epoch": 7.2317307692307695, "grad_norm": 2.8667612075805664, "learning_rate": 1.6683279284936004e-05, "loss": 0.0849, "step": 7521 }, { "epoch": 7.232692307692307, "grad_norm": 1.582812786102295, "learning_rate": 1.668235261675059e-05, "loss": 0.011, "step": 7522 }, { "epoch": 7.233653846153846, "grad_norm": 0.8097621202468872, "learning_rate": 1.6681425844874678e-05, "loss": 0.0068, "step": 7523 }, { "epoch": 7.234615384615385, "grad_norm": 4.31506872177124, "learning_rate": 1.668049896932265e-05, "loss": 0.1121, "step": 7524 }, { "epoch": 7.235576923076923, "grad_norm": 0.46563437581062317, "learning_rate": 1.6679571990108877e-05, "loss": 0.0044, "step": 7525 }, { "epoch": 7.236538461538461, "grad_norm": 1.6237190961837769, "learning_rate": 1.6678644907247757e-05, "loss": 0.0152, "step": 7526 }, { "epoch": 7.2375, "grad_norm": 1.3491551876068115, "learning_rate": 1.667771772075367e-05, "loss": 0.0049, "step": 7527 }, { "epoch": 7.2384615384615385, "grad_norm": 2.252009868621826, "learning_rate": 1.6676790430641002e-05, "loss": 0.045, "step": 7528 }, { "epoch": 7.239423076923077, "grad_norm": 3.098637342453003, "learning_rate": 1.667586303692414e-05, "loss": 0.0341, "step": 7529 }, { "epoch": 7.240384615384615, "grad_norm": 1.884629249572754, "learning_rate": 1.667493553961748e-05, "loss": 0.0158, "step": 7530 }, { "epoch": 7.241346153846154, "grad_norm": 1.9946279525756836, "learning_rate": 1.667400793873541e-05, "loss": 0.021, "step": 7531 }, { "epoch": 7.242307692307692, "grad_norm": 1.64272141456604, "learning_rate": 1.6673080234292325e-05, "loss": 0.015, "step": 7532 }, { "epoch": 7.243269230769231, "grad_norm": 1.0816000699996948, "learning_rate": 1.667215242630262e-05, "loss": 0.0067, "step": 7533 }, { "epoch": 7.244230769230769, "grad_norm": 0.6669939756393433, "learning_rate": 1.6671224514780692e-05, "loss": 0.007, "step": 7534 }, { "epoch": 7.2451923076923075, "grad_norm": 0.09436783194541931, "learning_rate": 1.667029649974094e-05, "loss": 0.0009, "step": 7535 }, { "epoch": 7.246153846153846, "grad_norm": 4.171426773071289, "learning_rate": 1.666936838119776e-05, "loss": 0.0609, "step": 7536 }, { "epoch": 7.247115384615385, "grad_norm": 1.8343074321746826, "learning_rate": 1.666844015916556e-05, "loss": 0.022, "step": 7537 }, { "epoch": 7.248076923076923, "grad_norm": 1.4345818758010864, "learning_rate": 1.666751183365874e-05, "loss": 0.0219, "step": 7538 }, { "epoch": 7.249038461538461, "grad_norm": 1.3280181884765625, "learning_rate": 1.6666583404691706e-05, "loss": 0.0716, "step": 7539 }, { "epoch": 7.25, "grad_norm": 5.07439661026001, "learning_rate": 1.6665654872278862e-05, "loss": 0.0577, "step": 7540 }, { "epoch": 7.250961538461539, "grad_norm": 3.8864736557006836, "learning_rate": 1.666472623643462e-05, "loss": 0.067, "step": 7541 }, { "epoch": 7.251923076923077, "grad_norm": 3.097949266433716, "learning_rate": 1.6663797497173388e-05, "loss": 0.1359, "step": 7542 }, { "epoch": 7.252884615384615, "grad_norm": 2.835500717163086, "learning_rate": 1.6662868654509573e-05, "loss": 0.0658, "step": 7543 }, { "epoch": 7.253846153846154, "grad_norm": 7.152682304382324, "learning_rate": 1.6661939708457592e-05, "loss": 0.1448, "step": 7544 }, { "epoch": 7.2548076923076925, "grad_norm": 1.4803165197372437, "learning_rate": 1.6661010659031863e-05, "loss": 0.0306, "step": 7545 }, { "epoch": 7.25576923076923, "grad_norm": 2.0587542057037354, "learning_rate": 1.6660081506246796e-05, "loss": 0.0149, "step": 7546 }, { "epoch": 7.256730769230769, "grad_norm": 4.352960109710693, "learning_rate": 1.665915225011681e-05, "loss": 0.061, "step": 7547 }, { "epoch": 7.257692307692308, "grad_norm": 1.17228102684021, "learning_rate": 1.665822289065633e-05, "loss": 0.005, "step": 7548 }, { "epoch": 7.258653846153846, "grad_norm": 3.0668694972991943, "learning_rate": 1.665729342787977e-05, "loss": 0.0768, "step": 7549 }, { "epoch": 7.259615384615385, "grad_norm": 0.507124125957489, "learning_rate": 1.6656363861801554e-05, "loss": 0.0041, "step": 7550 }, { "epoch": 7.260576923076923, "grad_norm": 1.8725028038024902, "learning_rate": 1.6655434192436107e-05, "loss": 0.0351, "step": 7551 }, { "epoch": 7.2615384615384615, "grad_norm": 1.1391528844833374, "learning_rate": 1.6654504419797855e-05, "loss": 0.0071, "step": 7552 }, { "epoch": 7.2625, "grad_norm": 1.4714281558990479, "learning_rate": 1.6653574543901226e-05, "loss": 0.0284, "step": 7553 }, { "epoch": 7.263461538461539, "grad_norm": 2.4753241539001465, "learning_rate": 1.6652644564760648e-05, "loss": 0.0404, "step": 7554 }, { "epoch": 7.264423076923077, "grad_norm": 0.9986577033996582, "learning_rate": 1.6651714482390552e-05, "loss": 0.0122, "step": 7555 }, { "epoch": 7.265384615384615, "grad_norm": 1.2907755374908447, "learning_rate": 1.665078429680537e-05, "loss": 0.0118, "step": 7556 }, { "epoch": 7.266346153846154, "grad_norm": 3.2046327590942383, "learning_rate": 1.6649854008019534e-05, "loss": 0.0697, "step": 7557 }, { "epoch": 7.267307692307693, "grad_norm": 2.3266518115997314, "learning_rate": 1.664892361604748e-05, "loss": 0.0239, "step": 7558 }, { "epoch": 7.2682692307692305, "grad_norm": 0.5293348431587219, "learning_rate": 1.6647993120903648e-05, "loss": 0.0042, "step": 7559 }, { "epoch": 7.269230769230769, "grad_norm": 2.578521251678467, "learning_rate": 1.6647062522602474e-05, "loss": 0.0273, "step": 7560 }, { "epoch": 7.270192307692308, "grad_norm": 2.9948463439941406, "learning_rate": 1.66461318211584e-05, "loss": 0.0632, "step": 7561 }, { "epoch": 7.2711538461538465, "grad_norm": 1.8377891778945923, "learning_rate": 1.664520101658586e-05, "loss": 0.0352, "step": 7562 }, { "epoch": 7.272115384615384, "grad_norm": 0.5876540541648865, "learning_rate": 1.6644270108899313e-05, "loss": 0.0046, "step": 7563 }, { "epoch": 7.273076923076923, "grad_norm": 2.4862635135650635, "learning_rate": 1.664333909811319e-05, "loss": 0.0508, "step": 7564 }, { "epoch": 7.274038461538462, "grad_norm": 1.7939099073410034, "learning_rate": 1.6642407984241938e-05, "loss": 0.0183, "step": 7565 }, { "epoch": 7.275, "grad_norm": 1.448204755783081, "learning_rate": 1.6641476767300012e-05, "loss": 0.0112, "step": 7566 }, { "epoch": 7.275961538461538, "grad_norm": 2.2499911785125732, "learning_rate": 1.664054544730186e-05, "loss": 0.0246, "step": 7567 }, { "epoch": 7.276923076923077, "grad_norm": 3.5714778900146484, "learning_rate": 1.6639614024261935e-05, "loss": 0.0288, "step": 7568 }, { "epoch": 7.2778846153846155, "grad_norm": 2.490370512008667, "learning_rate": 1.6638682498194684e-05, "loss": 0.0228, "step": 7569 }, { "epoch": 7.278846153846154, "grad_norm": 1.4024004936218262, "learning_rate": 1.663775086911457e-05, "loss": 0.0139, "step": 7570 }, { "epoch": 7.279807692307692, "grad_norm": 0.8937489986419678, "learning_rate": 1.6636819137036038e-05, "loss": 0.01, "step": 7571 }, { "epoch": 7.280769230769231, "grad_norm": 2.459829807281494, "learning_rate": 1.6635887301973553e-05, "loss": 0.0714, "step": 7572 }, { "epoch": 7.281730769230769, "grad_norm": 2.0657246112823486, "learning_rate": 1.6634955363941573e-05, "loss": 0.0731, "step": 7573 }, { "epoch": 7.282692307692308, "grad_norm": 3.4768922328948975, "learning_rate": 1.6634023322954564e-05, "loss": 0.0312, "step": 7574 }, { "epoch": 7.283653846153846, "grad_norm": 1.7772833108901978, "learning_rate": 1.663309117902698e-05, "loss": 0.0143, "step": 7575 }, { "epoch": 7.2846153846153845, "grad_norm": 2.2938072681427, "learning_rate": 1.663215893217329e-05, "loss": 0.0189, "step": 7576 }, { "epoch": 7.285576923076923, "grad_norm": 2.5593090057373047, "learning_rate": 1.6631226582407954e-05, "loss": 0.0341, "step": 7577 }, { "epoch": 7.286538461538462, "grad_norm": 1.235360860824585, "learning_rate": 1.6630294129745447e-05, "loss": 0.0116, "step": 7578 }, { "epoch": 7.2875, "grad_norm": 3.6255433559417725, "learning_rate": 1.6629361574200237e-05, "loss": 0.0625, "step": 7579 }, { "epoch": 7.288461538461538, "grad_norm": 3.062239170074463, "learning_rate": 1.6628428915786786e-05, "loss": 0.047, "step": 7580 }, { "epoch": 7.289423076923077, "grad_norm": 4.02795934677124, "learning_rate": 1.6627496154519577e-05, "loss": 0.0365, "step": 7581 }, { "epoch": 7.290384615384616, "grad_norm": 2.4746124744415283, "learning_rate": 1.662656329041308e-05, "loss": 0.0457, "step": 7582 }, { "epoch": 7.2913461538461535, "grad_norm": 1.13642418384552, "learning_rate": 1.6625630323481768e-05, "loss": 0.0127, "step": 7583 }, { "epoch": 7.292307692307692, "grad_norm": 1.2261565923690796, "learning_rate": 1.662469725374012e-05, "loss": 0.0093, "step": 7584 }, { "epoch": 7.293269230769231, "grad_norm": 2.1867644786834717, "learning_rate": 1.6623764081202612e-05, "loss": 0.0359, "step": 7585 }, { "epoch": 7.2942307692307695, "grad_norm": 3.68183970451355, "learning_rate": 1.662283080588373e-05, "loss": 0.0269, "step": 7586 }, { "epoch": 7.295192307692307, "grad_norm": 2.2027595043182373, "learning_rate": 1.6621897427797947e-05, "loss": 0.0176, "step": 7587 }, { "epoch": 7.296153846153846, "grad_norm": 6.0756635665893555, "learning_rate": 1.6620963946959753e-05, "loss": 0.104, "step": 7588 }, { "epoch": 7.297115384615385, "grad_norm": 3.4373159408569336, "learning_rate": 1.662003036338363e-05, "loss": 0.0456, "step": 7589 }, { "epoch": 7.298076923076923, "grad_norm": 3.3125600814819336, "learning_rate": 1.661909667708407e-05, "loss": 0.0462, "step": 7590 }, { "epoch": 7.299038461538461, "grad_norm": 1.7709425687789917, "learning_rate": 1.661816288807555e-05, "loss": 0.0605, "step": 7591 }, { "epoch": 7.3, "grad_norm": 2.250044584274292, "learning_rate": 1.661722899637257e-05, "loss": 0.0319, "step": 7592 }, { "epoch": 7.3009615384615385, "grad_norm": 1.8360384702682495, "learning_rate": 1.6616295001989614e-05, "loss": 0.0168, "step": 7593 }, { "epoch": 7.301923076923077, "grad_norm": 3.395515203475952, "learning_rate": 1.6615360904941182e-05, "loss": 0.0462, "step": 7594 }, { "epoch": 7.302884615384615, "grad_norm": 1.3518280982971191, "learning_rate": 1.6614426705241766e-05, "loss": 0.0126, "step": 7595 }, { "epoch": 7.303846153846154, "grad_norm": 6.055699348449707, "learning_rate": 1.6613492402905852e-05, "loss": 0.0378, "step": 7596 }, { "epoch": 7.304807692307692, "grad_norm": 2.81855845451355, "learning_rate": 1.6612557997947954e-05, "loss": 0.0945, "step": 7597 }, { "epoch": 7.305769230769231, "grad_norm": 2.194190740585327, "learning_rate": 1.661162349038256e-05, "loss": 0.0349, "step": 7598 }, { "epoch": 7.30673076923077, "grad_norm": 2.426734447479248, "learning_rate": 1.6610688880224178e-05, "loss": 0.0217, "step": 7599 }, { "epoch": 7.3076923076923075, "grad_norm": 1.0793029069900513, "learning_rate": 1.6609754167487303e-05, "loss": 0.0079, "step": 7600 }, { "epoch": 7.308653846153846, "grad_norm": 3.9860708713531494, "learning_rate": 1.660881935218644e-05, "loss": 0.0484, "step": 7601 }, { "epoch": 7.309615384615385, "grad_norm": 1.9918078184127808, "learning_rate": 1.6607884434336107e-05, "loss": 0.0211, "step": 7602 }, { "epoch": 7.310576923076923, "grad_norm": 1.5853302478790283, "learning_rate": 1.660694941395079e-05, "loss": 0.0174, "step": 7603 }, { "epoch": 7.311538461538461, "grad_norm": 4.706449508666992, "learning_rate": 1.6606014291045018e-05, "loss": 0.0912, "step": 7604 }, { "epoch": 7.3125, "grad_norm": 3.361886978149414, "learning_rate": 1.660507906563329e-05, "loss": 0.1113, "step": 7605 }, { "epoch": 7.313461538461539, "grad_norm": 0.3027788996696472, "learning_rate": 1.6604143737730122e-05, "loss": 0.0028, "step": 7606 }, { "epoch": 7.314423076923077, "grad_norm": 2.7949841022491455, "learning_rate": 1.6603208307350024e-05, "loss": 0.0243, "step": 7607 }, { "epoch": 7.315384615384615, "grad_norm": 3.416271448135376, "learning_rate": 1.660227277450751e-05, "loss": 0.0327, "step": 7608 }, { "epoch": 7.316346153846154, "grad_norm": 1.8509892225265503, "learning_rate": 1.6601337139217103e-05, "loss": 0.017, "step": 7609 }, { "epoch": 7.3173076923076925, "grad_norm": 1.6450297832489014, "learning_rate": 1.6600401401493323e-05, "loss": 0.0366, "step": 7610 }, { "epoch": 7.31826923076923, "grad_norm": 3.7959377765655518, "learning_rate": 1.659946556135068e-05, "loss": 0.0557, "step": 7611 }, { "epoch": 7.319230769230769, "grad_norm": 3.302722454071045, "learning_rate": 1.65985296188037e-05, "loss": 0.0828, "step": 7612 }, { "epoch": 7.320192307692308, "grad_norm": 5.023414134979248, "learning_rate": 1.6597593573866908e-05, "loss": 0.108, "step": 7613 }, { "epoch": 7.321153846153846, "grad_norm": 3.992828130722046, "learning_rate": 1.659665742655483e-05, "loss": 0.1509, "step": 7614 }, { "epoch": 7.322115384615385, "grad_norm": 2.5738892555236816, "learning_rate": 1.6595721176881986e-05, "loss": 0.0357, "step": 7615 }, { "epoch": 7.323076923076923, "grad_norm": 1.1384837627410889, "learning_rate": 1.6594784824862912e-05, "loss": 0.013, "step": 7616 }, { "epoch": 7.3240384615384615, "grad_norm": 1.1930820941925049, "learning_rate": 1.6593848370512128e-05, "loss": 0.0133, "step": 7617 }, { "epoch": 7.325, "grad_norm": 2.764697790145874, "learning_rate": 1.659291181384417e-05, "loss": 0.0249, "step": 7618 }, { "epoch": 7.325961538461539, "grad_norm": 1.2545833587646484, "learning_rate": 1.6591975154873576e-05, "loss": 0.0099, "step": 7619 }, { "epoch": 7.326923076923077, "grad_norm": 0.16659802198410034, "learning_rate": 1.659103839361487e-05, "loss": 0.0018, "step": 7620 }, { "epoch": 7.327884615384615, "grad_norm": 0.9128257632255554, "learning_rate": 1.6590101530082595e-05, "loss": 0.0045, "step": 7621 }, { "epoch": 7.328846153846154, "grad_norm": 3.0345661640167236, "learning_rate": 1.6589164564291285e-05, "loss": 0.0511, "step": 7622 }, { "epoch": 7.329807692307693, "grad_norm": 2.415734052658081, "learning_rate": 1.6588227496255477e-05, "loss": 0.0448, "step": 7623 }, { "epoch": 7.3307692307692305, "grad_norm": 0.5429709553718567, "learning_rate": 1.658729032598972e-05, "loss": 0.0065, "step": 7624 }, { "epoch": 7.331730769230769, "grad_norm": 0.07039512693881989, "learning_rate": 1.6586353053508548e-05, "loss": 0.001, "step": 7625 }, { "epoch": 7.332692307692308, "grad_norm": 4.497085094451904, "learning_rate": 1.658541567882651e-05, "loss": 0.0958, "step": 7626 }, { "epoch": 7.3336538461538465, "grad_norm": 1.7011116743087769, "learning_rate": 1.6584478201958144e-05, "loss": 0.0187, "step": 7627 }, { "epoch": 7.334615384615384, "grad_norm": 4.842002868652344, "learning_rate": 1.6583540622918005e-05, "loss": 0.1394, "step": 7628 }, { "epoch": 7.335576923076923, "grad_norm": 4.188808917999268, "learning_rate": 1.6582602941720634e-05, "loss": 0.0782, "step": 7629 }, { "epoch": 7.336538461538462, "grad_norm": 2.3156309127807617, "learning_rate": 1.6581665158380587e-05, "loss": 0.0703, "step": 7630 }, { "epoch": 7.3375, "grad_norm": 2.8671817779541016, "learning_rate": 1.6580727272912412e-05, "loss": 0.0388, "step": 7631 }, { "epoch": 7.338461538461538, "grad_norm": 0.5381327271461487, "learning_rate": 1.6579789285330668e-05, "loss": 0.0055, "step": 7632 }, { "epoch": 7.339423076923077, "grad_norm": 3.477771520614624, "learning_rate": 1.6578851195649907e-05, "loss": 0.1159, "step": 7633 }, { "epoch": 7.3403846153846155, "grad_norm": 0.8379768133163452, "learning_rate": 1.657791300388468e-05, "loss": 0.0108, "step": 7634 }, { "epoch": 7.341346153846154, "grad_norm": 1.2576900720596313, "learning_rate": 1.657697471004955e-05, "loss": 0.0106, "step": 7635 }, { "epoch": 7.342307692307692, "grad_norm": 3.453507661819458, "learning_rate": 1.6576036314159076e-05, "loss": 0.0652, "step": 7636 }, { "epoch": 7.343269230769231, "grad_norm": 2.3391380310058594, "learning_rate": 1.6575097816227818e-05, "loss": 0.0109, "step": 7637 }, { "epoch": 7.344230769230769, "grad_norm": 1.4485427141189575, "learning_rate": 1.657415921627034e-05, "loss": 0.0148, "step": 7638 }, { "epoch": 7.345192307692308, "grad_norm": 3.9182605743408203, "learning_rate": 1.6573220514301208e-05, "loss": 0.1, "step": 7639 }, { "epoch": 7.346153846153846, "grad_norm": 2.6040687561035156, "learning_rate": 1.6572281710334984e-05, "loss": 0.0278, "step": 7640 }, { "epoch": 7.3471153846153845, "grad_norm": 1.998773455619812, "learning_rate": 1.6571342804386238e-05, "loss": 0.048, "step": 7641 }, { "epoch": 7.348076923076923, "grad_norm": 2.7782084941864014, "learning_rate": 1.6570403796469537e-05, "loss": 0.058, "step": 7642 }, { "epoch": 7.349038461538462, "grad_norm": 2.045595645904541, "learning_rate": 1.656946468659945e-05, "loss": 0.0418, "step": 7643 }, { "epoch": 7.35, "grad_norm": 1.0184227228164673, "learning_rate": 1.6568525474790557e-05, "loss": 0.0086, "step": 7644 }, { "epoch": 7.350961538461538, "grad_norm": 3.704566478729248, "learning_rate": 1.656758616105743e-05, "loss": 0.0312, "step": 7645 }, { "epoch": 7.351923076923077, "grad_norm": 0.9653027057647705, "learning_rate": 1.6566646745414632e-05, "loss": 0.0061, "step": 7646 }, { "epoch": 7.352884615384616, "grad_norm": 1.765262484550476, "learning_rate": 1.6565707227876758e-05, "loss": 0.019, "step": 7647 }, { "epoch": 7.3538461538461535, "grad_norm": 0.6316194534301758, "learning_rate": 1.6564767608458373e-05, "loss": 0.0044, "step": 7648 }, { "epoch": 7.354807692307692, "grad_norm": 0.7662065029144287, "learning_rate": 1.6563827887174065e-05, "loss": 0.0128, "step": 7649 }, { "epoch": 7.355769230769231, "grad_norm": 3.5408132076263428, "learning_rate": 1.6562888064038412e-05, "loss": 0.0276, "step": 7650 }, { "epoch": 7.3567307692307695, "grad_norm": 2.340924024581909, "learning_rate": 1.6561948139065997e-05, "loss": 0.0255, "step": 7651 }, { "epoch": 7.357692307692307, "grad_norm": 1.396388053894043, "learning_rate": 1.6561008112271406e-05, "loss": 0.0064, "step": 7652 }, { "epoch": 7.358653846153846, "grad_norm": 3.518264055252075, "learning_rate": 1.6560067983669225e-05, "loss": 0.1108, "step": 7653 }, { "epoch": 7.359615384615385, "grad_norm": 3.4311470985412598, "learning_rate": 1.6559127753274046e-05, "loss": 0.0953, "step": 7654 }, { "epoch": 7.360576923076923, "grad_norm": 1.6385068893432617, "learning_rate": 1.6558187421100452e-05, "loss": 0.018, "step": 7655 }, { "epoch": 7.361538461538461, "grad_norm": 5.9898200035095215, "learning_rate": 1.6557246987163036e-05, "loss": 0.0692, "step": 7656 }, { "epoch": 7.3625, "grad_norm": 1.7009001970291138, "learning_rate": 1.6556306451476393e-05, "loss": 0.0205, "step": 7657 }, { "epoch": 7.3634615384615385, "grad_norm": 1.9674270153045654, "learning_rate": 1.655536581405512e-05, "loss": 0.0186, "step": 7658 }, { "epoch": 7.364423076923077, "grad_norm": 1.4937247037887573, "learning_rate": 1.6554425074913803e-05, "loss": 0.009, "step": 7659 }, { "epoch": 7.365384615384615, "grad_norm": 3.088763475418091, "learning_rate": 1.655348423406705e-05, "loss": 0.0652, "step": 7660 }, { "epoch": 7.366346153846154, "grad_norm": 4.1290483474731445, "learning_rate": 1.6552543291529457e-05, "loss": 0.0874, "step": 7661 }, { "epoch": 7.367307692307692, "grad_norm": 2.143932580947876, "learning_rate": 1.6551602247315624e-05, "loss": 0.0658, "step": 7662 }, { "epoch": 7.368269230769231, "grad_norm": 0.5684295892715454, "learning_rate": 1.6550661101440148e-05, "loss": 0.0036, "step": 7663 }, { "epoch": 7.36923076923077, "grad_norm": 1.090516209602356, "learning_rate": 1.654971985391764e-05, "loss": 0.0059, "step": 7664 }, { "epoch": 7.3701923076923075, "grad_norm": 3.4470772743225098, "learning_rate": 1.6548778504762705e-05, "loss": 0.0793, "step": 7665 }, { "epoch": 7.371153846153846, "grad_norm": 3.7930104732513428, "learning_rate": 1.6547837053989947e-05, "loss": 0.041, "step": 7666 }, { "epoch": 7.372115384615385, "grad_norm": 0.3045136332511902, "learning_rate": 1.6546895501613974e-05, "loss": 0.0025, "step": 7667 }, { "epoch": 7.373076923076923, "grad_norm": 1.7478890419006348, "learning_rate": 1.65459538476494e-05, "loss": 0.0167, "step": 7668 }, { "epoch": 7.374038461538461, "grad_norm": 2.949533462524414, "learning_rate": 1.6545012092110833e-05, "loss": 0.1106, "step": 7669 }, { "epoch": 7.375, "grad_norm": 3.3524274826049805, "learning_rate": 1.6544070235012887e-05, "loss": 0.038, "step": 7670 }, { "epoch": 7.375961538461539, "grad_norm": 2.75227427482605, "learning_rate": 1.654312827637018e-05, "loss": 0.0665, "step": 7671 }, { "epoch": 7.376923076923077, "grad_norm": 2.5316922664642334, "learning_rate": 1.6542186216197324e-05, "loss": 0.024, "step": 7672 }, { "epoch": 7.377884615384615, "grad_norm": 2.876100540161133, "learning_rate": 1.654124405450894e-05, "loss": 0.0577, "step": 7673 }, { "epoch": 7.378846153846154, "grad_norm": 2.397385358810425, "learning_rate": 1.6540301791319647e-05, "loss": 0.0582, "step": 7674 }, { "epoch": 7.3798076923076925, "grad_norm": 3.7254855632781982, "learning_rate": 1.6539359426644062e-05, "loss": 0.0646, "step": 7675 }, { "epoch": 7.38076923076923, "grad_norm": 2.015686511993408, "learning_rate": 1.6538416960496813e-05, "loss": 0.0213, "step": 7676 }, { "epoch": 7.381730769230769, "grad_norm": 2.190155267715454, "learning_rate": 1.6537474392892527e-05, "loss": 0.0235, "step": 7677 }, { "epoch": 7.382692307692308, "grad_norm": 0.8188439607620239, "learning_rate": 1.653653172384582e-05, "loss": 0.0044, "step": 7678 }, { "epoch": 7.383653846153846, "grad_norm": 2.8475587368011475, "learning_rate": 1.6535588953371332e-05, "loss": 0.0174, "step": 7679 }, { "epoch": 7.384615384615385, "grad_norm": 0.5963507294654846, "learning_rate": 1.6534646081483677e-05, "loss": 0.0039, "step": 7680 }, { "epoch": 7.385576923076923, "grad_norm": 0.488040030002594, "learning_rate": 1.65337031081975e-05, "loss": 0.0031, "step": 7681 }, { "epoch": 7.3865384615384615, "grad_norm": 2.922346591949463, "learning_rate": 1.6532760033527427e-05, "loss": 0.0312, "step": 7682 }, { "epoch": 7.3875, "grad_norm": 1.0766220092773438, "learning_rate": 1.653181685748809e-05, "loss": 0.0074, "step": 7683 }, { "epoch": 7.388461538461539, "grad_norm": 2.961430311203003, "learning_rate": 1.6530873580094123e-05, "loss": 0.0481, "step": 7684 }, { "epoch": 7.389423076923077, "grad_norm": 1.734300136566162, "learning_rate": 1.652993020136017e-05, "loss": 0.0138, "step": 7685 }, { "epoch": 7.390384615384615, "grad_norm": 5.569676876068115, "learning_rate": 1.6528986721300863e-05, "loss": 0.05, "step": 7686 }, { "epoch": 7.391346153846154, "grad_norm": 0.8948124647140503, "learning_rate": 1.6528043139930845e-05, "loss": 0.0078, "step": 7687 }, { "epoch": 7.392307692307693, "grad_norm": 0.7934513092041016, "learning_rate": 1.6527099457264757e-05, "loss": 0.0031, "step": 7688 }, { "epoch": 7.3932692307692305, "grad_norm": 2.5897560119628906, "learning_rate": 1.6526155673317245e-05, "loss": 0.0263, "step": 7689 }, { "epoch": 7.394230769230769, "grad_norm": 2.2643375396728516, "learning_rate": 1.6525211788102946e-05, "loss": 0.0255, "step": 7690 }, { "epoch": 7.395192307692308, "grad_norm": 2.084369421005249, "learning_rate": 1.6524267801636515e-05, "loss": 0.0228, "step": 7691 }, { "epoch": 7.3961538461538465, "grad_norm": 2.316364049911499, "learning_rate": 1.6523323713932593e-05, "loss": 0.0223, "step": 7692 }, { "epoch": 7.397115384615384, "grad_norm": 2.6992244720458984, "learning_rate": 1.652237952500584e-05, "loss": 0.0212, "step": 7693 }, { "epoch": 7.398076923076923, "grad_norm": 3.0675532817840576, "learning_rate": 1.652143523487089e-05, "loss": 0.0478, "step": 7694 }, { "epoch": 7.399038461538462, "grad_norm": 1.585935115814209, "learning_rate": 1.652049084354241e-05, "loss": 0.01, "step": 7695 }, { "epoch": 7.4, "grad_norm": 5.26570987701416, "learning_rate": 1.651954635103505e-05, "loss": 0.0883, "step": 7696 }, { "epoch": 7.400961538461538, "grad_norm": 2.4877710342407227, "learning_rate": 1.6518601757363465e-05, "loss": 0.0577, "step": 7697 }, { "epoch": 7.401923076923077, "grad_norm": 0.7194527387619019, "learning_rate": 1.6517657062542313e-05, "loss": 0.0036, "step": 7698 }, { "epoch": 7.4028846153846155, "grad_norm": 4.393311500549316, "learning_rate": 1.6516712266586253e-05, "loss": 0.0529, "step": 7699 }, { "epoch": 7.403846153846154, "grad_norm": 0.4777916371822357, "learning_rate": 1.6515767369509943e-05, "loss": 0.0028, "step": 7700 }, { "epoch": 7.404807692307692, "grad_norm": 0.35816508531570435, "learning_rate": 1.6514822371328046e-05, "loss": 0.0044, "step": 7701 }, { "epoch": 7.405769230769231, "grad_norm": 3.25277042388916, "learning_rate": 1.651387727205523e-05, "loss": 0.0566, "step": 7702 }, { "epoch": 7.406730769230769, "grad_norm": 3.9095582962036133, "learning_rate": 1.6512932071706153e-05, "loss": 0.1111, "step": 7703 }, { "epoch": 7.407692307692308, "grad_norm": 2.7571375370025635, "learning_rate": 1.6511986770295488e-05, "loss": 0.0311, "step": 7704 }, { "epoch": 7.408653846153846, "grad_norm": 1.837631106376648, "learning_rate": 1.6511041367837902e-05, "loss": 0.0204, "step": 7705 }, { "epoch": 7.4096153846153845, "grad_norm": 0.5687504410743713, "learning_rate": 1.651009586434806e-05, "loss": 0.003, "step": 7706 }, { "epoch": 7.410576923076923, "grad_norm": 1.7594692707061768, "learning_rate": 1.650915025984064e-05, "loss": 0.0229, "step": 7707 }, { "epoch": 7.411538461538462, "grad_norm": 1.7839550971984863, "learning_rate": 1.650820455433031e-05, "loss": 0.0073, "step": 7708 }, { "epoch": 7.4125, "grad_norm": 2.7365825176239014, "learning_rate": 1.650725874783175e-05, "loss": 0.0632, "step": 7709 }, { "epoch": 7.413461538461538, "grad_norm": 3.0610599517822266, "learning_rate": 1.650631284035963e-05, "loss": 0.0304, "step": 7710 }, { "epoch": 7.414423076923077, "grad_norm": 2.2477669715881348, "learning_rate": 1.6505366831928632e-05, "loss": 0.0132, "step": 7711 }, { "epoch": 7.415384615384616, "grad_norm": 1.6825528144836426, "learning_rate": 1.650442072255343e-05, "loss": 0.0238, "step": 7712 }, { "epoch": 7.4163461538461535, "grad_norm": 1.7957366704940796, "learning_rate": 1.6503474512248715e-05, "loss": 0.0066, "step": 7713 }, { "epoch": 7.417307692307692, "grad_norm": 2.857659101486206, "learning_rate": 1.650252820102916e-05, "loss": 0.0356, "step": 7714 }, { "epoch": 7.418269230769231, "grad_norm": 3.7332968711853027, "learning_rate": 1.6501581788909448e-05, "loss": 0.1239, "step": 7715 }, { "epoch": 7.4192307692307695, "grad_norm": 4.054263114929199, "learning_rate": 1.6500635275904274e-05, "loss": 0.088, "step": 7716 }, { "epoch": 7.420192307692307, "grad_norm": 1.432197093963623, "learning_rate": 1.649968866202832e-05, "loss": 0.0159, "step": 7717 }, { "epoch": 7.421153846153846, "grad_norm": 0.4456283748149872, "learning_rate": 1.649874194729627e-05, "loss": 0.0027, "step": 7718 }, { "epoch": 7.422115384615385, "grad_norm": 0.8678861260414124, "learning_rate": 1.6497795131722818e-05, "loss": 0.0048, "step": 7719 }, { "epoch": 7.423076923076923, "grad_norm": 0.9344317317008972, "learning_rate": 1.6496848215322662e-05, "loss": 0.007, "step": 7720 }, { "epoch": 7.424038461538461, "grad_norm": 1.3882774114608765, "learning_rate": 1.6495901198110488e-05, "loss": 0.0143, "step": 7721 }, { "epoch": 7.425, "grad_norm": 0.8640710115432739, "learning_rate": 1.649495408010099e-05, "loss": 0.0058, "step": 7722 }, { "epoch": 7.4259615384615385, "grad_norm": 2.4430668354034424, "learning_rate": 1.649400686130887e-05, "loss": 0.0271, "step": 7723 }, { "epoch": 7.426923076923077, "grad_norm": 3.0252623558044434, "learning_rate": 1.649305954174882e-05, "loss": 0.0116, "step": 7724 }, { "epoch": 7.427884615384615, "grad_norm": 2.0789098739624023, "learning_rate": 1.6492112121435543e-05, "loss": 0.0103, "step": 7725 }, { "epoch": 7.428846153846154, "grad_norm": 2.097597360610962, "learning_rate": 1.6491164600383743e-05, "loss": 0.0134, "step": 7726 }, { "epoch": 7.429807692307692, "grad_norm": 0.7967100739479065, "learning_rate": 1.649021697860812e-05, "loss": 0.0084, "step": 7727 }, { "epoch": 7.430769230769231, "grad_norm": 0.7694404125213623, "learning_rate": 1.6489269256123376e-05, "loss": 0.0062, "step": 7728 }, { "epoch": 7.43173076923077, "grad_norm": 1.7126729488372803, "learning_rate": 1.6488321432944218e-05, "loss": 0.0191, "step": 7729 }, { "epoch": 7.4326923076923075, "grad_norm": 0.5178890228271484, "learning_rate": 1.6487373509085355e-05, "loss": 0.0034, "step": 7730 }, { "epoch": 7.433653846153846, "grad_norm": 4.861431121826172, "learning_rate": 1.6486425484561498e-05, "loss": 0.0468, "step": 7731 }, { "epoch": 7.434615384615385, "grad_norm": 6.4319305419921875, "learning_rate": 1.6485477359387352e-05, "loss": 0.26, "step": 7732 }, { "epoch": 7.435576923076923, "grad_norm": 0.8073949217796326, "learning_rate": 1.648452913357763e-05, "loss": 0.0048, "step": 7733 }, { "epoch": 7.436538461538461, "grad_norm": 1.9104670286178589, "learning_rate": 1.648358080714705e-05, "loss": 0.0175, "step": 7734 }, { "epoch": 7.4375, "grad_norm": 0.5081338286399841, "learning_rate": 1.648263238011033e-05, "loss": 0.0043, "step": 7735 }, { "epoch": 7.438461538461539, "grad_norm": 0.4522683024406433, "learning_rate": 1.6481683852482176e-05, "loss": 0.0041, "step": 7736 }, { "epoch": 7.439423076923077, "grad_norm": 0.6901274919509888, "learning_rate": 1.648073522427731e-05, "loss": 0.0046, "step": 7737 }, { "epoch": 7.440384615384615, "grad_norm": 1.4243634939193726, "learning_rate": 1.647978649551046e-05, "loss": 0.0111, "step": 7738 }, { "epoch": 7.441346153846154, "grad_norm": 1.8369649648666382, "learning_rate": 1.647883766619634e-05, "loss": 0.0099, "step": 7739 }, { "epoch": 7.4423076923076925, "grad_norm": 0.1767854541540146, "learning_rate": 1.6477888736349672e-05, "loss": 0.0014, "step": 7740 }, { "epoch": 7.44326923076923, "grad_norm": 2.492025852203369, "learning_rate": 1.6476939705985185e-05, "loss": 0.0431, "step": 7741 }, { "epoch": 7.444230769230769, "grad_norm": 2.59887433052063, "learning_rate": 1.6475990575117603e-05, "loss": 0.0229, "step": 7742 }, { "epoch": 7.445192307692308, "grad_norm": 3.8026373386383057, "learning_rate": 1.6475041343761657e-05, "loss": 0.0426, "step": 7743 }, { "epoch": 7.446153846153846, "grad_norm": 2.521789073944092, "learning_rate": 1.647409201193207e-05, "loss": 0.0222, "step": 7744 }, { "epoch": 7.447115384615385, "grad_norm": 5.175374984741211, "learning_rate": 1.6473142579643577e-05, "loss": 0.1011, "step": 7745 }, { "epoch": 7.448076923076923, "grad_norm": 2.597712755203247, "learning_rate": 1.6472193046910906e-05, "loss": 0.0816, "step": 7746 }, { "epoch": 7.4490384615384615, "grad_norm": 1.7369816303253174, "learning_rate": 1.6471243413748798e-05, "loss": 0.0113, "step": 7747 }, { "epoch": 7.45, "grad_norm": 3.352018356323242, "learning_rate": 1.6470293680171985e-05, "loss": 0.1402, "step": 7748 }, { "epoch": 7.450961538461539, "grad_norm": 2.0226144790649414, "learning_rate": 1.6469343846195207e-05, "loss": 0.0802, "step": 7749 }, { "epoch": 7.451923076923077, "grad_norm": 0.7458157539367676, "learning_rate": 1.6468393911833194e-05, "loss": 0.0065, "step": 7750 }, { "epoch": 7.452884615384615, "grad_norm": 1.2386113405227661, "learning_rate": 1.6467443877100694e-05, "loss": 0.0061, "step": 7751 }, { "epoch": 7.453846153846154, "grad_norm": 0.22700929641723633, "learning_rate": 1.646649374201245e-05, "loss": 0.0017, "step": 7752 }, { "epoch": 7.454807692307693, "grad_norm": 2.7945525646209717, "learning_rate": 1.64655435065832e-05, "loss": 0.0672, "step": 7753 }, { "epoch": 7.4557692307692305, "grad_norm": 4.710268974304199, "learning_rate": 1.6464593170827693e-05, "loss": 0.074, "step": 7754 }, { "epoch": 7.456730769230769, "grad_norm": 2.665706157684326, "learning_rate": 1.646364273476067e-05, "loss": 0.1946, "step": 7755 }, { "epoch": 7.457692307692308, "grad_norm": 2.4784464836120605, "learning_rate": 1.646269219839688e-05, "loss": 0.0189, "step": 7756 }, { "epoch": 7.4586538461538465, "grad_norm": 3.824551582336426, "learning_rate": 1.646174156175108e-05, "loss": 0.0857, "step": 7757 }, { "epoch": 7.459615384615384, "grad_norm": 2.601270914077759, "learning_rate": 1.6460790824838017e-05, "loss": 0.0163, "step": 7758 }, { "epoch": 7.460576923076923, "grad_norm": 3.917882204055786, "learning_rate": 1.6459839987672436e-05, "loss": 0.0551, "step": 7759 }, { "epoch": 7.461538461538462, "grad_norm": 2.277360677719116, "learning_rate": 1.6458889050269104e-05, "loss": 0.0345, "step": 7760 }, { "epoch": 7.4625, "grad_norm": 0.8840941190719604, "learning_rate": 1.6457938012642767e-05, "loss": 0.0067, "step": 7761 }, { "epoch": 7.463461538461538, "grad_norm": 2.5766472816467285, "learning_rate": 1.6456986874808188e-05, "loss": 0.0299, "step": 7762 }, { "epoch": 7.464423076923077, "grad_norm": 3.1040031909942627, "learning_rate": 1.6456035636780124e-05, "loss": 0.0318, "step": 7763 }, { "epoch": 7.4653846153846155, "grad_norm": 1.3892998695373535, "learning_rate": 1.6455084298573334e-05, "loss": 0.0126, "step": 7764 }, { "epoch": 7.466346153846154, "grad_norm": 1.4495408535003662, "learning_rate": 1.6454132860202578e-05, "loss": 0.0089, "step": 7765 }, { "epoch": 7.467307692307692, "grad_norm": 4.2335405349731445, "learning_rate": 1.6453181321682627e-05, "loss": 0.0895, "step": 7766 }, { "epoch": 7.468269230769231, "grad_norm": 1.4946227073669434, "learning_rate": 1.645222968302824e-05, "loss": 0.0242, "step": 7767 }, { "epoch": 7.469230769230769, "grad_norm": 2.2144157886505127, "learning_rate": 1.6451277944254186e-05, "loss": 0.0105, "step": 7768 }, { "epoch": 7.470192307692308, "grad_norm": 1.05050790309906, "learning_rate": 1.6450326105375234e-05, "loss": 0.0043, "step": 7769 }, { "epoch": 7.471153846153846, "grad_norm": 3.9251797199249268, "learning_rate": 1.644937416640615e-05, "loss": 0.0422, "step": 7770 }, { "epoch": 7.4721153846153845, "grad_norm": 3.51782488822937, "learning_rate": 1.6448422127361707e-05, "loss": 0.0766, "step": 7771 }, { "epoch": 7.473076923076923, "grad_norm": 2.351818084716797, "learning_rate": 1.644746998825668e-05, "loss": 0.0352, "step": 7772 }, { "epoch": 7.474038461538462, "grad_norm": 1.5380113124847412, "learning_rate": 1.6446517749105844e-05, "loss": 0.0084, "step": 7773 }, { "epoch": 7.475, "grad_norm": 2.1582112312316895, "learning_rate": 1.6445565409923968e-05, "loss": 0.0408, "step": 7774 }, { "epoch": 7.475961538461538, "grad_norm": 1.3456995487213135, "learning_rate": 1.6444612970725837e-05, "loss": 0.0123, "step": 7775 }, { "epoch": 7.476923076923077, "grad_norm": 2.7016618251800537, "learning_rate": 1.6443660431526228e-05, "loss": 0.0456, "step": 7776 }, { "epoch": 7.477884615384616, "grad_norm": 2.421360969543457, "learning_rate": 1.6442707792339918e-05, "loss": 0.0212, "step": 7777 }, { "epoch": 7.4788461538461535, "grad_norm": 3.7815980911254883, "learning_rate": 1.6441755053181698e-05, "loss": 0.0313, "step": 7778 }, { "epoch": 7.479807692307692, "grad_norm": 2.125485420227051, "learning_rate": 1.644080221406634e-05, "loss": 0.0306, "step": 7779 }, { "epoch": 7.480769230769231, "grad_norm": 4.068481922149658, "learning_rate": 1.643984927500864e-05, "loss": 0.0591, "step": 7780 }, { "epoch": 7.4817307692307695, "grad_norm": 4.714351177215576, "learning_rate": 1.6438896236023374e-05, "loss": 0.0457, "step": 7781 }, { "epoch": 7.482692307692307, "grad_norm": 0.28599151968955994, "learning_rate": 1.6437943097125343e-05, "loss": 0.0022, "step": 7782 }, { "epoch": 7.483653846153846, "grad_norm": 4.975604057312012, "learning_rate": 1.6436989858329326e-05, "loss": 0.0582, "step": 7783 }, { "epoch": 7.484615384615385, "grad_norm": 3.0971529483795166, "learning_rate": 1.643603651965012e-05, "loss": 0.0608, "step": 7784 }, { "epoch": 7.485576923076923, "grad_norm": 3.3473567962646484, "learning_rate": 1.6435083081102515e-05, "loss": 0.0963, "step": 7785 }, { "epoch": 7.486538461538461, "grad_norm": 0.8242923021316528, "learning_rate": 1.6434129542701313e-05, "loss": 0.006, "step": 7786 }, { "epoch": 7.4875, "grad_norm": 0.7882581949234009, "learning_rate": 1.64331759044613e-05, "loss": 0.0051, "step": 7787 }, { "epoch": 7.4884615384615385, "grad_norm": 0.9679139256477356, "learning_rate": 1.6432222166397276e-05, "loss": 0.0061, "step": 7788 }, { "epoch": 7.489423076923077, "grad_norm": 2.244316577911377, "learning_rate": 1.6431268328524045e-05, "loss": 0.0146, "step": 7789 }, { "epoch": 7.490384615384615, "grad_norm": 3.516324996948242, "learning_rate": 1.6430314390856407e-05, "loss": 0.0602, "step": 7790 }, { "epoch": 7.491346153846154, "grad_norm": 2.9230194091796875, "learning_rate": 1.642936035340916e-05, "loss": 0.0609, "step": 7791 }, { "epoch": 7.492307692307692, "grad_norm": 2.1511576175689697, "learning_rate": 1.642840621619711e-05, "loss": 0.0155, "step": 7792 }, { "epoch": 7.493269230769231, "grad_norm": 1.4347540140151978, "learning_rate": 1.6427451979235062e-05, "loss": 0.011, "step": 7793 }, { "epoch": 7.49423076923077, "grad_norm": 0.5524981617927551, "learning_rate": 1.6426497642537826e-05, "loss": 0.0046, "step": 7794 }, { "epoch": 7.4951923076923075, "grad_norm": 0.7818390130996704, "learning_rate": 1.6425543206120207e-05, "loss": 0.0068, "step": 7795 }, { "epoch": 7.496153846153846, "grad_norm": 0.4843199551105499, "learning_rate": 1.6424588669997016e-05, "loss": 0.0026, "step": 7796 }, { "epoch": 7.497115384615385, "grad_norm": 3.664710283279419, "learning_rate": 1.6423634034183064e-05, "loss": 0.0438, "step": 7797 }, { "epoch": 7.498076923076923, "grad_norm": 0.6810100078582764, "learning_rate": 1.6422679298693166e-05, "loss": 0.0051, "step": 7798 }, { "epoch": 7.499038461538461, "grad_norm": 1.0609368085861206, "learning_rate": 1.6421724463542136e-05, "loss": 0.0094, "step": 7799 }, { "epoch": 7.5, "grad_norm": 1.930014729499817, "learning_rate": 1.642076952874479e-05, "loss": 0.0336, "step": 7800 }, { "epoch": 7.500961538461539, "grad_norm": 3.294062376022339, "learning_rate": 1.6419814494315946e-05, "loss": 0.0484, "step": 7801 }, { "epoch": 7.501923076923077, "grad_norm": 3.389267683029175, "learning_rate": 1.641885936027042e-05, "loss": 0.0421, "step": 7802 }, { "epoch": 7.502884615384615, "grad_norm": 1.4308385848999023, "learning_rate": 1.641790412662304e-05, "loss": 0.0497, "step": 7803 }, { "epoch": 7.503846153846154, "grad_norm": 1.0121225118637085, "learning_rate": 1.641694879338862e-05, "loss": 0.0096, "step": 7804 }, { "epoch": 7.5048076923076925, "grad_norm": 3.219878911972046, "learning_rate": 1.641599336058199e-05, "loss": 0.0872, "step": 7805 }, { "epoch": 7.50576923076923, "grad_norm": 1.2766611576080322, "learning_rate": 1.6415037828217973e-05, "loss": 0.0158, "step": 7806 }, { "epoch": 7.506730769230769, "grad_norm": 2.8003058433532715, "learning_rate": 1.6414082196311402e-05, "loss": 0.0235, "step": 7807 }, { "epoch": 7.507692307692308, "grad_norm": 3.340486526489258, "learning_rate": 1.6413126464877094e-05, "loss": 0.0516, "step": 7808 }, { "epoch": 7.508653846153846, "grad_norm": 0.7927320003509521, "learning_rate": 1.641217063392989e-05, "loss": 0.008, "step": 7809 }, { "epoch": 7.509615384615385, "grad_norm": 1.7053321599960327, "learning_rate": 1.641121470348461e-05, "loss": 0.011, "step": 7810 }, { "epoch": 7.510576923076923, "grad_norm": 2.7603302001953125, "learning_rate": 1.6410258673556107e-05, "loss": 0.021, "step": 7811 }, { "epoch": 7.5115384615384615, "grad_norm": 3.072981119155884, "learning_rate": 1.640930254415919e-05, "loss": 0.0436, "step": 7812 }, { "epoch": 7.5125, "grad_norm": 3.1562750339508057, "learning_rate": 1.6408346315308715e-05, "loss": 0.025, "step": 7813 }, { "epoch": 7.513461538461538, "grad_norm": 1.1391634941101074, "learning_rate": 1.6407389987019516e-05, "loss": 0.0142, "step": 7814 }, { "epoch": 7.514423076923077, "grad_norm": 0.7352020144462585, "learning_rate": 1.6406433559306426e-05, "loss": 0.0091, "step": 7815 }, { "epoch": 7.515384615384615, "grad_norm": 2.937762498855591, "learning_rate": 1.6405477032184292e-05, "loss": 0.0371, "step": 7816 }, { "epoch": 7.516346153846154, "grad_norm": 3.2158944606781006, "learning_rate": 1.640452040566795e-05, "loss": 0.0306, "step": 7817 }, { "epoch": 7.517307692307693, "grad_norm": 3.062983274459839, "learning_rate": 1.6403563679772254e-05, "loss": 0.038, "step": 7818 }, { "epoch": 7.5182692307692305, "grad_norm": 0.4565798044204712, "learning_rate": 1.640260685451204e-05, "loss": 0.0035, "step": 7819 }, { "epoch": 7.519230769230769, "grad_norm": 4.8583478927612305, "learning_rate": 1.640164992990216e-05, "loss": 0.0555, "step": 7820 }, { "epoch": 7.520192307692308, "grad_norm": 2.5641419887542725, "learning_rate": 1.6400692905957464e-05, "loss": 0.0272, "step": 7821 }, { "epoch": 7.5211538461538465, "grad_norm": 3.28047513961792, "learning_rate": 1.6399735782692796e-05, "loss": 0.0332, "step": 7822 }, { "epoch": 7.522115384615384, "grad_norm": 1.8827102184295654, "learning_rate": 1.6398778560123013e-05, "loss": 0.0448, "step": 7823 }, { "epoch": 7.523076923076923, "grad_norm": 3.4564719200134277, "learning_rate": 1.639782123826297e-05, "loss": 0.0663, "step": 7824 }, { "epoch": 7.524038461538462, "grad_norm": 1.5261722803115845, "learning_rate": 1.6396863817127515e-05, "loss": 0.0085, "step": 7825 }, { "epoch": 7.525, "grad_norm": 3.080117702484131, "learning_rate": 1.639590629673151e-05, "loss": 0.0424, "step": 7826 }, { "epoch": 7.525961538461538, "grad_norm": 0.2388523668050766, "learning_rate": 1.639494867708981e-05, "loss": 0.0019, "step": 7827 }, { "epoch": 7.526923076923077, "grad_norm": 4.93770694732666, "learning_rate": 1.6393990958217275e-05, "loss": 0.0935, "step": 7828 }, { "epoch": 7.5278846153846155, "grad_norm": 3.9181711673736572, "learning_rate": 1.6393033140128768e-05, "loss": 0.0687, "step": 7829 }, { "epoch": 7.528846153846154, "grad_norm": 1.2728408575057983, "learning_rate": 1.639207522283915e-05, "loss": 0.0275, "step": 7830 }, { "epoch": 7.529807692307692, "grad_norm": 1.2408407926559448, "learning_rate": 1.6391117206363284e-05, "loss": 0.0049, "step": 7831 }, { "epoch": 7.530769230769231, "grad_norm": 2.2924821376800537, "learning_rate": 1.6390159090716038e-05, "loss": 0.0361, "step": 7832 }, { "epoch": 7.531730769230769, "grad_norm": 2.4375174045562744, "learning_rate": 1.638920087591228e-05, "loss": 0.0153, "step": 7833 }, { "epoch": 7.532692307692308, "grad_norm": 1.8202179670333862, "learning_rate": 1.638824256196687e-05, "loss": 0.034, "step": 7834 }, { "epoch": 7.533653846153846, "grad_norm": 1.602137565612793, "learning_rate": 1.6387284148894688e-05, "loss": 0.0124, "step": 7835 }, { "epoch": 7.5346153846153845, "grad_norm": 1.580424427986145, "learning_rate": 1.6386325636710605e-05, "loss": 0.0092, "step": 7836 }, { "epoch": 7.535576923076923, "grad_norm": 2.497460126876831, "learning_rate": 1.6385367025429485e-05, "loss": 0.0284, "step": 7837 }, { "epoch": 7.536538461538462, "grad_norm": 0.5920858383178711, "learning_rate": 1.6384408315066214e-05, "loss": 0.0043, "step": 7838 }, { "epoch": 7.5375, "grad_norm": 1.536864161491394, "learning_rate": 1.6383449505635668e-05, "loss": 0.0208, "step": 7839 }, { "epoch": 7.538461538461538, "grad_norm": 1.0520211458206177, "learning_rate": 1.6382490597152716e-05, "loss": 0.0055, "step": 7840 }, { "epoch": 7.539423076923077, "grad_norm": 2.4658029079437256, "learning_rate": 1.6381531589632242e-05, "loss": 0.0252, "step": 7841 }, { "epoch": 7.540384615384616, "grad_norm": 4.8936052322387695, "learning_rate": 1.638057248308913e-05, "loss": 0.0669, "step": 7842 }, { "epoch": 7.5413461538461535, "grad_norm": 4.896088123321533, "learning_rate": 1.6379613277538257e-05, "loss": 0.032, "step": 7843 }, { "epoch": 7.542307692307692, "grad_norm": 2.021243095397949, "learning_rate": 1.6378653972994513e-05, "loss": 0.0163, "step": 7844 }, { "epoch": 7.543269230769231, "grad_norm": 1.4243603944778442, "learning_rate": 1.6377694569472774e-05, "loss": 0.0132, "step": 7845 }, { "epoch": 7.5442307692307695, "grad_norm": 5.343995571136475, "learning_rate": 1.637673506698794e-05, "loss": 0.0948, "step": 7846 }, { "epoch": 7.545192307692307, "grad_norm": 1.881281852722168, "learning_rate": 1.6375775465554893e-05, "loss": 0.0306, "step": 7847 }, { "epoch": 7.546153846153846, "grad_norm": 1.4014424085617065, "learning_rate": 1.637481576518852e-05, "loss": 0.0119, "step": 7848 }, { "epoch": 7.547115384615385, "grad_norm": 2.550123453140259, "learning_rate": 1.637385596590372e-05, "loss": 0.0163, "step": 7849 }, { "epoch": 7.548076923076923, "grad_norm": 0.7686355710029602, "learning_rate": 1.637289606771538e-05, "loss": 0.0051, "step": 7850 }, { "epoch": 7.549038461538462, "grad_norm": 5.946466445922852, "learning_rate": 1.6371936070638397e-05, "loss": 0.0228, "step": 7851 }, { "epoch": 7.55, "grad_norm": 3.8814029693603516, "learning_rate": 1.6370975974687668e-05, "loss": 0.0319, "step": 7852 }, { "epoch": 7.5509615384615385, "grad_norm": 1.8585928678512573, "learning_rate": 1.6370015779878093e-05, "loss": 0.0188, "step": 7853 }, { "epoch": 7.551923076923077, "grad_norm": 2.1659443378448486, "learning_rate": 1.6369055486224565e-05, "loss": 0.0272, "step": 7854 }, { "epoch": 7.552884615384615, "grad_norm": 3.6410186290740967, "learning_rate": 1.636809509374199e-05, "loss": 0.1107, "step": 7855 }, { "epoch": 7.553846153846154, "grad_norm": 1.6548911333084106, "learning_rate": 1.636713460244527e-05, "loss": 0.0265, "step": 7856 }, { "epoch": 7.554807692307692, "grad_norm": 4.403834342956543, "learning_rate": 1.6366174012349308e-05, "loss": 0.0727, "step": 7857 }, { "epoch": 7.555769230769231, "grad_norm": 1.627753496170044, "learning_rate": 1.636521332346901e-05, "loss": 0.0156, "step": 7858 }, { "epoch": 7.55673076923077, "grad_norm": 2.322500705718994, "learning_rate": 1.6364252535819284e-05, "loss": 0.0195, "step": 7859 }, { "epoch": 7.5576923076923075, "grad_norm": 3.359468936920166, "learning_rate": 1.6363291649415036e-05, "loss": 0.0486, "step": 7860 }, { "epoch": 7.558653846153846, "grad_norm": 2.80853271484375, "learning_rate": 1.636233066427118e-05, "loss": 0.0331, "step": 7861 }, { "epoch": 7.559615384615385, "grad_norm": 1.6611311435699463, "learning_rate": 1.636136958040262e-05, "loss": 0.0433, "step": 7862 }, { "epoch": 7.560576923076923, "grad_norm": 3.477323055267334, "learning_rate": 1.6360408397824276e-05, "loss": 0.0637, "step": 7863 }, { "epoch": 7.561538461538461, "grad_norm": 0.6606910824775696, "learning_rate": 1.6359447116551066e-05, "loss": 0.0041, "step": 7864 }, { "epoch": 7.5625, "grad_norm": 3.91101336479187, "learning_rate": 1.6358485736597898e-05, "loss": 0.1363, "step": 7865 }, { "epoch": 7.563461538461539, "grad_norm": 0.969722330570221, "learning_rate": 1.6357524257979696e-05, "loss": 0.0104, "step": 7866 }, { "epoch": 7.564423076923077, "grad_norm": 3.8922698497772217, "learning_rate": 1.635656268071137e-05, "loss": 0.0358, "step": 7867 }, { "epoch": 7.565384615384615, "grad_norm": 2.3984949588775635, "learning_rate": 1.6355601004807856e-05, "loss": 0.0285, "step": 7868 }, { "epoch": 7.566346153846154, "grad_norm": 3.683070182800293, "learning_rate": 1.635463923028406e-05, "loss": 0.0477, "step": 7869 }, { "epoch": 7.5673076923076925, "grad_norm": 2.2659919261932373, "learning_rate": 1.6353677357154918e-05, "loss": 0.0177, "step": 7870 }, { "epoch": 7.56826923076923, "grad_norm": 2.183762550354004, "learning_rate": 1.635271538543535e-05, "loss": 0.0535, "step": 7871 }, { "epoch": 7.569230769230769, "grad_norm": 3.972398519515991, "learning_rate": 1.6351753315140285e-05, "loss": 0.034, "step": 7872 }, { "epoch": 7.570192307692308, "grad_norm": 0.6929716467857361, "learning_rate": 1.635079114628465e-05, "loss": 0.0054, "step": 7873 }, { "epoch": 7.571153846153846, "grad_norm": 0.229251891374588, "learning_rate": 1.634982887888338e-05, "loss": 0.0015, "step": 7874 }, { "epoch": 7.572115384615385, "grad_norm": 2.3748154640197754, "learning_rate": 1.63488665129514e-05, "loss": 0.0359, "step": 7875 }, { "epoch": 7.573076923076923, "grad_norm": 2.1083152294158936, "learning_rate": 1.634790404850364e-05, "loss": 0.0211, "step": 7876 }, { "epoch": 7.5740384615384615, "grad_norm": 2.698698043823242, "learning_rate": 1.6346941485555047e-05, "loss": 0.0207, "step": 7877 }, { "epoch": 7.575, "grad_norm": 2.9474258422851562, "learning_rate": 1.6345978824120546e-05, "loss": 0.0346, "step": 7878 }, { "epoch": 7.575961538461538, "grad_norm": 2.419401168823242, "learning_rate": 1.634501606421508e-05, "loss": 0.0311, "step": 7879 }, { "epoch": 7.576923076923077, "grad_norm": 2.137140989303589, "learning_rate": 1.634405320585359e-05, "loss": 0.026, "step": 7880 }, { "epoch": 7.577884615384615, "grad_norm": 1.2426308393478394, "learning_rate": 1.634309024905101e-05, "loss": 0.0093, "step": 7881 }, { "epoch": 7.578846153846154, "grad_norm": 5.451724529266357, "learning_rate": 1.6342127193822288e-05, "loss": 0.0963, "step": 7882 }, { "epoch": 7.579807692307693, "grad_norm": 4.672908306121826, "learning_rate": 1.6341164040182366e-05, "loss": 0.0954, "step": 7883 }, { "epoch": 7.5807692307692305, "grad_norm": 1.3151798248291016, "learning_rate": 1.6340200788146187e-05, "loss": 0.0472, "step": 7884 }, { "epoch": 7.581730769230769, "grad_norm": 4.891580104827881, "learning_rate": 1.63392374377287e-05, "loss": 0.0923, "step": 7885 }, { "epoch": 7.582692307692308, "grad_norm": 4.31657075881958, "learning_rate": 1.6338273988944856e-05, "loss": 0.0502, "step": 7886 }, { "epoch": 7.5836538461538465, "grad_norm": 2.4264121055603027, "learning_rate": 1.63373104418096e-05, "loss": 0.0251, "step": 7887 }, { "epoch": 7.584615384615384, "grad_norm": 3.0658907890319824, "learning_rate": 1.6336346796337887e-05, "loss": 0.0535, "step": 7888 }, { "epoch": 7.585576923076923, "grad_norm": 2.2070913314819336, "learning_rate": 1.633538305254467e-05, "loss": 0.0475, "step": 7889 }, { "epoch": 7.586538461538462, "grad_norm": 3.4310731887817383, "learning_rate": 1.63344192104449e-05, "loss": 0.0312, "step": 7890 }, { "epoch": 7.5875, "grad_norm": 2.7429323196411133, "learning_rate": 1.6333455270053537e-05, "loss": 0.0268, "step": 7891 }, { "epoch": 7.588461538461538, "grad_norm": 1.7241172790527344, "learning_rate": 1.6332491231385534e-05, "loss": 0.0151, "step": 7892 }, { "epoch": 7.589423076923077, "grad_norm": 4.347882270812988, "learning_rate": 1.6331527094455854e-05, "loss": 0.1389, "step": 7893 }, { "epoch": 7.5903846153846155, "grad_norm": 1.2495044469833374, "learning_rate": 1.6330562859279457e-05, "loss": 0.0121, "step": 7894 }, { "epoch": 7.591346153846154, "grad_norm": 1.845320463180542, "learning_rate": 1.6329598525871303e-05, "loss": 0.0164, "step": 7895 }, { "epoch": 7.592307692307692, "grad_norm": 2.832840919494629, "learning_rate": 1.632863409424636e-05, "loss": 0.0359, "step": 7896 }, { "epoch": 7.593269230769231, "grad_norm": 3.182124376296997, "learning_rate": 1.6327669564419586e-05, "loss": 0.0286, "step": 7897 }, { "epoch": 7.594230769230769, "grad_norm": 0.16504207253456116, "learning_rate": 1.6326704936405953e-05, "loss": 0.0015, "step": 7898 }, { "epoch": 7.595192307692308, "grad_norm": 1.9000178575515747, "learning_rate": 1.632574021022043e-05, "loss": 0.0148, "step": 7899 }, { "epoch": 7.596153846153846, "grad_norm": 1.2667598724365234, "learning_rate": 1.6324775385877983e-05, "loss": 0.0088, "step": 7900 }, { "epoch": 7.5971153846153845, "grad_norm": 2.558600664138794, "learning_rate": 1.6323810463393587e-05, "loss": 0.0207, "step": 7901 }, { "epoch": 7.598076923076923, "grad_norm": 2.7253334522247314, "learning_rate": 1.6322845442782213e-05, "loss": 0.0206, "step": 7902 }, { "epoch": 7.599038461538462, "grad_norm": 1.4535106420516968, "learning_rate": 1.6321880324058833e-05, "loss": 0.0133, "step": 7903 }, { "epoch": 7.6, "grad_norm": 3.524388313293457, "learning_rate": 1.6320915107238423e-05, "loss": 0.045, "step": 7904 }, { "epoch": 7.600961538461538, "grad_norm": 3.0744359493255615, "learning_rate": 1.6319949792335968e-05, "loss": 0.0277, "step": 7905 }, { "epoch": 7.601923076923077, "grad_norm": 2.3549933433532715, "learning_rate": 1.6318984379366437e-05, "loss": 0.0212, "step": 7906 }, { "epoch": 7.602884615384616, "grad_norm": 2.7899320125579834, "learning_rate": 1.6318018868344812e-05, "loss": 0.0686, "step": 7907 }, { "epoch": 7.6038461538461535, "grad_norm": 3.780275821685791, "learning_rate": 1.6317053259286084e-05, "loss": 0.085, "step": 7908 }, { "epoch": 7.604807692307692, "grad_norm": 1.3548667430877686, "learning_rate": 1.6316087552205226e-05, "loss": 0.0136, "step": 7909 }, { "epoch": 7.605769230769231, "grad_norm": 2.695866107940674, "learning_rate": 1.6315121747117225e-05, "loss": 0.0292, "step": 7910 }, { "epoch": 7.6067307692307695, "grad_norm": 1.7882441282272339, "learning_rate": 1.6314155844037074e-05, "loss": 0.0248, "step": 7911 }, { "epoch": 7.607692307692307, "grad_norm": 2.42801570892334, "learning_rate": 1.631318984297975e-05, "loss": 0.0178, "step": 7912 }, { "epoch": 7.608653846153846, "grad_norm": 0.2776091992855072, "learning_rate": 1.6312223743960255e-05, "loss": 0.0023, "step": 7913 }, { "epoch": 7.609615384615385, "grad_norm": 3.8889224529266357, "learning_rate": 1.631125754699357e-05, "loss": 0.0718, "step": 7914 }, { "epoch": 7.610576923076923, "grad_norm": 2.9182422161102295, "learning_rate": 1.631029125209469e-05, "loss": 0.0372, "step": 7915 }, { "epoch": 7.611538461538462, "grad_norm": 2.1974194049835205, "learning_rate": 1.6309324859278613e-05, "loss": 0.0202, "step": 7916 }, { "epoch": 7.6125, "grad_norm": 1.6948095560073853, "learning_rate": 1.630835836856033e-05, "loss": 0.016, "step": 7917 }, { "epoch": 7.6134615384615385, "grad_norm": 3.0403220653533936, "learning_rate": 1.630739177995484e-05, "loss": 0.0538, "step": 7918 }, { "epoch": 7.614423076923077, "grad_norm": 4.378208160400391, "learning_rate": 1.6306425093477142e-05, "loss": 0.1089, "step": 7919 }, { "epoch": 7.615384615384615, "grad_norm": 3.163105010986328, "learning_rate": 1.6305458309142238e-05, "loss": 0.0228, "step": 7920 }, { "epoch": 7.616346153846154, "grad_norm": 1.6725883483886719, "learning_rate": 1.6304491426965122e-05, "loss": 0.0114, "step": 7921 }, { "epoch": 7.617307692307692, "grad_norm": 1.9302704334259033, "learning_rate": 1.6303524446960807e-05, "loss": 0.0145, "step": 7922 }, { "epoch": 7.618269230769231, "grad_norm": 1.78840970993042, "learning_rate": 1.630255736914429e-05, "loss": 0.0149, "step": 7923 }, { "epoch": 7.61923076923077, "grad_norm": 2.0646891593933105, "learning_rate": 1.6301590193530585e-05, "loss": 0.0443, "step": 7924 }, { "epoch": 7.6201923076923075, "grad_norm": 1.506747841835022, "learning_rate": 1.630062292013469e-05, "loss": 0.0166, "step": 7925 }, { "epoch": 7.621153846153846, "grad_norm": 0.13224151730537415, "learning_rate": 1.6299655548971624e-05, "loss": 0.0008, "step": 7926 }, { "epoch": 7.622115384615385, "grad_norm": 4.217807292938232, "learning_rate": 1.629868808005639e-05, "loss": 0.0563, "step": 7927 }, { "epoch": 7.623076923076923, "grad_norm": 0.6095119714736938, "learning_rate": 1.6297720513404006e-05, "loss": 0.0044, "step": 7928 }, { "epoch": 7.624038461538461, "grad_norm": 2.023375988006592, "learning_rate": 1.6296752849029483e-05, "loss": 0.0183, "step": 7929 }, { "epoch": 7.625, "grad_norm": 3.8464996814727783, "learning_rate": 1.6295785086947833e-05, "loss": 0.0708, "step": 7930 }, { "epoch": 7.625961538461539, "grad_norm": 2.256742477416992, "learning_rate": 1.629481722717408e-05, "loss": 0.027, "step": 7931 }, { "epoch": 7.626923076923077, "grad_norm": 2.7107431888580322, "learning_rate": 1.6293849269723236e-05, "loss": 0.0371, "step": 7932 }, { "epoch": 7.627884615384615, "grad_norm": 0.8789340853691101, "learning_rate": 1.6292881214610325e-05, "loss": 0.0064, "step": 7933 }, { "epoch": 7.628846153846154, "grad_norm": 3.1529793739318848, "learning_rate": 1.6291913061850367e-05, "loss": 0.0162, "step": 7934 }, { "epoch": 7.6298076923076925, "grad_norm": 1.1237117052078247, "learning_rate": 1.6290944811458387e-05, "loss": 0.0094, "step": 7935 }, { "epoch": 7.63076923076923, "grad_norm": 0.43759214878082275, "learning_rate": 1.6289976463449404e-05, "loss": 0.0037, "step": 7936 }, { "epoch": 7.631730769230769, "grad_norm": 4.733105182647705, "learning_rate": 1.6289008017838447e-05, "loss": 0.1524, "step": 7937 }, { "epoch": 7.632692307692308, "grad_norm": 2.7900569438934326, "learning_rate": 1.6288039474640544e-05, "loss": 0.0724, "step": 7938 }, { "epoch": 7.633653846153846, "grad_norm": 3.1894419193267822, "learning_rate": 1.6287070833870723e-05, "loss": 0.0358, "step": 7939 }, { "epoch": 7.634615384615385, "grad_norm": 1.120944619178772, "learning_rate": 1.6286102095544017e-05, "loss": 0.0179, "step": 7940 }, { "epoch": 7.635576923076923, "grad_norm": 2.160560131072998, "learning_rate": 1.6285133259675454e-05, "loss": 0.0142, "step": 7941 }, { "epoch": 7.6365384615384615, "grad_norm": 2.586662530899048, "learning_rate": 1.6284164326280073e-05, "loss": 0.0287, "step": 7942 }, { "epoch": 7.6375, "grad_norm": 4.884359359741211, "learning_rate": 1.62831952953729e-05, "loss": 0.1494, "step": 7943 }, { "epoch": 7.638461538461538, "grad_norm": 8.79371166229248, "learning_rate": 1.628222616696898e-05, "loss": 0.1371, "step": 7944 }, { "epoch": 7.639423076923077, "grad_norm": 4.22046422958374, "learning_rate": 1.628125694108335e-05, "loss": 0.0699, "step": 7945 }, { "epoch": 7.640384615384615, "grad_norm": 0.47617945075035095, "learning_rate": 1.6280287617731044e-05, "loss": 0.0029, "step": 7946 }, { "epoch": 7.641346153846154, "grad_norm": 1.0614994764328003, "learning_rate": 1.627931819692711e-05, "loss": 0.0061, "step": 7947 }, { "epoch": 7.642307692307693, "grad_norm": 2.45626163482666, "learning_rate": 1.6278348678686583e-05, "loss": 0.0575, "step": 7948 }, { "epoch": 7.6432692307692305, "grad_norm": 2.775839328765869, "learning_rate": 1.6277379063024517e-05, "loss": 0.0285, "step": 7949 }, { "epoch": 7.644230769230769, "grad_norm": 3.2154440879821777, "learning_rate": 1.6276409349955945e-05, "loss": 0.0258, "step": 7950 }, { "epoch": 7.645192307692308, "grad_norm": 1.7126855850219727, "learning_rate": 1.6275439539495925e-05, "loss": 0.013, "step": 7951 }, { "epoch": 7.6461538461538465, "grad_norm": 0.2279919534921646, "learning_rate": 1.6274469631659503e-05, "loss": 0.0019, "step": 7952 }, { "epoch": 7.647115384615384, "grad_norm": 4.912763595581055, "learning_rate": 1.6273499626461726e-05, "loss": 0.1418, "step": 7953 }, { "epoch": 7.648076923076923, "grad_norm": 2.149643898010254, "learning_rate": 1.6272529523917643e-05, "loss": 0.0213, "step": 7954 }, { "epoch": 7.649038461538462, "grad_norm": 1.4749897718429565, "learning_rate": 1.6271559324042317e-05, "loss": 0.0236, "step": 7955 }, { "epoch": 7.65, "grad_norm": 2.1410346031188965, "learning_rate": 1.6270589026850795e-05, "loss": 0.0363, "step": 7956 }, { "epoch": 7.650961538461538, "grad_norm": 3.996265172958374, "learning_rate": 1.626961863235814e-05, "loss": 0.0395, "step": 7957 }, { "epoch": 7.651923076923077, "grad_norm": 1.8148869276046753, "learning_rate": 1.62686481405794e-05, "loss": 0.0145, "step": 7958 }, { "epoch": 7.6528846153846155, "grad_norm": 1.8183553218841553, "learning_rate": 1.6267677551529634e-05, "loss": 0.0459, "step": 7959 }, { "epoch": 7.653846153846154, "grad_norm": 0.09948805719614029, "learning_rate": 1.6266706865223913e-05, "loss": 0.0009, "step": 7960 }, { "epoch": 7.654807692307692, "grad_norm": 2.9887914657592773, "learning_rate": 1.6265736081677298e-05, "loss": 0.0346, "step": 7961 }, { "epoch": 7.655769230769231, "grad_norm": 1.2928732633590698, "learning_rate": 1.6264765200904843e-05, "loss": 0.0157, "step": 7962 }, { "epoch": 7.656730769230769, "grad_norm": 1.9414634704589844, "learning_rate": 1.626379422292162e-05, "loss": 0.0206, "step": 7963 }, { "epoch": 7.657692307692308, "grad_norm": 1.246583342552185, "learning_rate": 1.6262823147742698e-05, "loss": 0.0108, "step": 7964 }, { "epoch": 7.658653846153846, "grad_norm": 1.7121323347091675, "learning_rate": 1.626185197538314e-05, "loss": 0.0281, "step": 7965 }, { "epoch": 7.6596153846153845, "grad_norm": 4.275106430053711, "learning_rate": 1.6260880705858014e-05, "loss": 0.1368, "step": 7966 }, { "epoch": 7.660576923076923, "grad_norm": 2.925536870956421, "learning_rate": 1.6259909339182402e-05, "loss": 0.0325, "step": 7967 }, { "epoch": 7.661538461538462, "grad_norm": 3.775726556777954, "learning_rate": 1.6258937875371362e-05, "loss": 0.1128, "step": 7968 }, { "epoch": 7.6625, "grad_norm": 3.550267219543457, "learning_rate": 1.6257966314439985e-05, "loss": 0.0174, "step": 7969 }, { "epoch": 7.663461538461538, "grad_norm": 1.3990674018859863, "learning_rate": 1.6256994656403328e-05, "loss": 0.0096, "step": 7970 }, { "epoch": 7.664423076923077, "grad_norm": 3.753967046737671, "learning_rate": 1.6256022901276485e-05, "loss": 0.0747, "step": 7971 }, { "epoch": 7.665384615384616, "grad_norm": 0.8409728407859802, "learning_rate": 1.6255051049074523e-05, "loss": 0.0069, "step": 7972 }, { "epoch": 7.6663461538461535, "grad_norm": 2.4179491996765137, "learning_rate": 1.6254079099812533e-05, "loss": 0.0676, "step": 7973 }, { "epoch": 7.667307692307692, "grad_norm": 4.075190544128418, "learning_rate": 1.6253107053505585e-05, "loss": 0.0456, "step": 7974 }, { "epoch": 7.668269230769231, "grad_norm": 1.1363966464996338, "learning_rate": 1.625213491016877e-05, "loss": 0.01, "step": 7975 }, { "epoch": 7.6692307692307695, "grad_norm": 3.879289150238037, "learning_rate": 1.6251162669817172e-05, "loss": 0.0532, "step": 7976 }, { "epoch": 7.670192307692307, "grad_norm": 0.9003033638000488, "learning_rate": 1.6250190332465875e-05, "loss": 0.0036, "step": 7977 }, { "epoch": 7.671153846153846, "grad_norm": 5.211202144622803, "learning_rate": 1.624921789812997e-05, "loss": 0.117, "step": 7978 }, { "epoch": 7.672115384615385, "grad_norm": 5.04176664352417, "learning_rate": 1.6248245366824543e-05, "loss": 0.0431, "step": 7979 }, { "epoch": 7.673076923076923, "grad_norm": 3.5675902366638184, "learning_rate": 1.624727273856469e-05, "loss": 0.0432, "step": 7980 }, { "epoch": 7.674038461538462, "grad_norm": 5.2003173828125, "learning_rate": 1.6246300013365493e-05, "loss": 0.0305, "step": 7981 }, { "epoch": 7.675, "grad_norm": 0.4150409698486328, "learning_rate": 1.6245327191242054e-05, "loss": 0.004, "step": 7982 }, { "epoch": 7.6759615384615385, "grad_norm": 2.95746111869812, "learning_rate": 1.6244354272209468e-05, "loss": 0.0458, "step": 7983 }, { "epoch": 7.676923076923077, "grad_norm": 1.726657748222351, "learning_rate": 1.6243381256282834e-05, "loss": 0.0069, "step": 7984 }, { "epoch": 7.677884615384615, "grad_norm": 3.074568271636963, "learning_rate": 1.6242408143477245e-05, "loss": 0.0277, "step": 7985 }, { "epoch": 7.678846153846154, "grad_norm": 3.0381362438201904, "learning_rate": 1.62414349338078e-05, "loss": 0.0294, "step": 7986 }, { "epoch": 7.679807692307692, "grad_norm": 1.7889044284820557, "learning_rate": 1.6240461627289607e-05, "loss": 0.0171, "step": 7987 }, { "epoch": 7.680769230769231, "grad_norm": 0.9656373858451843, "learning_rate": 1.6239488223937764e-05, "loss": 0.0076, "step": 7988 }, { "epoch": 7.68173076923077, "grad_norm": 5.980212688446045, "learning_rate": 1.6238514723767372e-05, "loss": 0.1597, "step": 7989 }, { "epoch": 7.6826923076923075, "grad_norm": 2.9772274494171143, "learning_rate": 1.6237541126793548e-05, "loss": 0.0364, "step": 7990 }, { "epoch": 7.683653846153846, "grad_norm": 3.2770180702209473, "learning_rate": 1.6236567433031387e-05, "loss": 0.0855, "step": 7991 }, { "epoch": 7.684615384615385, "grad_norm": 2.0477547645568848, "learning_rate": 1.6235593642496007e-05, "loss": 0.0414, "step": 7992 }, { "epoch": 7.685576923076923, "grad_norm": 2.1254310607910156, "learning_rate": 1.6234619755202518e-05, "loss": 0.0191, "step": 7993 }, { "epoch": 7.686538461538461, "grad_norm": 3.172360420227051, "learning_rate": 1.6233645771166022e-05, "loss": 0.061, "step": 7994 }, { "epoch": 7.6875, "grad_norm": 1.489962100982666, "learning_rate": 1.6232671690401643e-05, "loss": 0.0197, "step": 7995 }, { "epoch": 7.688461538461539, "grad_norm": 1.0225117206573486, "learning_rate": 1.6231697512924492e-05, "loss": 0.0055, "step": 7996 }, { "epoch": 7.689423076923077, "grad_norm": 2.8581981658935547, "learning_rate": 1.6230723238749684e-05, "loss": 0.0182, "step": 7997 }, { "epoch": 7.690384615384615, "grad_norm": 0.9802869558334351, "learning_rate": 1.622974886789234e-05, "loss": 0.0041, "step": 7998 }, { "epoch": 7.691346153846154, "grad_norm": 0.42990195751190186, "learning_rate": 1.6228774400367573e-05, "loss": 0.0037, "step": 7999 }, { "epoch": 7.6923076923076925, "grad_norm": 1.4476991891860962, "learning_rate": 1.6227799836190514e-05, "loss": 0.0077, "step": 8000 }, { "epoch": 7.69326923076923, "grad_norm": 1.3411128520965576, "learning_rate": 1.6226825175376278e-05, "loss": 0.0055, "step": 8001 }, { "epoch": 7.694230769230769, "grad_norm": 2.450592279434204, "learning_rate": 1.622585041793999e-05, "loss": 0.0337, "step": 8002 }, { "epoch": 7.695192307692308, "grad_norm": 2.555093765258789, "learning_rate": 1.6224875563896775e-05, "loss": 0.1467, "step": 8003 }, { "epoch": 7.696153846153846, "grad_norm": 3.241830587387085, "learning_rate": 1.6223900613261766e-05, "loss": 0.0418, "step": 8004 }, { "epoch": 7.697115384615385, "grad_norm": 2.6382038593292236, "learning_rate": 1.6222925566050084e-05, "loss": 0.0375, "step": 8005 }, { "epoch": 7.698076923076923, "grad_norm": 4.2275495529174805, "learning_rate": 1.622195042227686e-05, "loss": 0.039, "step": 8006 }, { "epoch": 7.6990384615384615, "grad_norm": 1.7008107900619507, "learning_rate": 1.6220975181957227e-05, "loss": 0.014, "step": 8007 }, { "epoch": 7.7, "grad_norm": 1.01779043674469, "learning_rate": 1.6219999845106318e-05, "loss": 0.0078, "step": 8008 }, { "epoch": 7.700961538461538, "grad_norm": 1.9091264009475708, "learning_rate": 1.6219024411739267e-05, "loss": 0.0207, "step": 8009 }, { "epoch": 7.701923076923077, "grad_norm": 2.407625675201416, "learning_rate": 1.621804888187121e-05, "loss": 0.0339, "step": 8010 }, { "epoch": 7.702884615384615, "grad_norm": 1.0354607105255127, "learning_rate": 1.621707325551728e-05, "loss": 0.008, "step": 8011 }, { "epoch": 7.703846153846154, "grad_norm": 1.8130959272384644, "learning_rate": 1.6216097532692624e-05, "loss": 0.0096, "step": 8012 }, { "epoch": 7.704807692307693, "grad_norm": 3.7841365337371826, "learning_rate": 1.621512171341238e-05, "loss": 0.0668, "step": 8013 }, { "epoch": 7.7057692307692305, "grad_norm": 6.600587368011475, "learning_rate": 1.6214145797691687e-05, "loss": 0.0923, "step": 8014 }, { "epoch": 7.706730769230769, "grad_norm": 2.9379723072052, "learning_rate": 1.6213169785545688e-05, "loss": 0.0349, "step": 8015 }, { "epoch": 7.707692307692308, "grad_norm": 3.3571419715881348, "learning_rate": 1.6212193676989533e-05, "loss": 0.0378, "step": 8016 }, { "epoch": 7.7086538461538465, "grad_norm": 2.316208600997925, "learning_rate": 1.6211217472038358e-05, "loss": 0.0186, "step": 8017 }, { "epoch": 7.709615384615384, "grad_norm": 3.2920868396759033, "learning_rate": 1.6210241170707322e-05, "loss": 0.0256, "step": 8018 }, { "epoch": 7.710576923076923, "grad_norm": 1.267061471939087, "learning_rate": 1.6209264773011567e-05, "loss": 0.0042, "step": 8019 }, { "epoch": 7.711538461538462, "grad_norm": 3.0869369506835938, "learning_rate": 1.620828827896625e-05, "loss": 0.0267, "step": 8020 }, { "epoch": 7.7125, "grad_norm": 3.394812822341919, "learning_rate": 1.620731168858652e-05, "loss": 0.0602, "step": 8021 }, { "epoch": 7.713461538461538, "grad_norm": 2.8356869220733643, "learning_rate": 1.6206335001887528e-05, "loss": 0.0424, "step": 8022 }, { "epoch": 7.714423076923077, "grad_norm": 1.5840107202529907, "learning_rate": 1.6205358218884434e-05, "loss": 0.0084, "step": 8023 }, { "epoch": 7.7153846153846155, "grad_norm": 3.049194812774658, "learning_rate": 1.6204381339592393e-05, "loss": 0.0529, "step": 8024 }, { "epoch": 7.716346153846154, "grad_norm": 3.601475954055786, "learning_rate": 1.6203404364026564e-05, "loss": 0.0624, "step": 8025 }, { "epoch": 7.717307692307692, "grad_norm": 0.9710744619369507, "learning_rate": 1.6202427292202104e-05, "loss": 0.0031, "step": 8026 }, { "epoch": 7.718269230769231, "grad_norm": 3.8231616020202637, "learning_rate": 1.620145012413418e-05, "loss": 0.0844, "step": 8027 }, { "epoch": 7.719230769230769, "grad_norm": 2.0359444618225098, "learning_rate": 1.6200472859837946e-05, "loss": 0.0241, "step": 8028 }, { "epoch": 7.720192307692308, "grad_norm": 5.229976654052734, "learning_rate": 1.6199495499328575e-05, "loss": 0.0927, "step": 8029 }, { "epoch": 7.721153846153846, "grad_norm": 1.7818599939346313, "learning_rate": 1.6198518042621228e-05, "loss": 0.0167, "step": 8030 }, { "epoch": 7.7221153846153845, "grad_norm": 3.626441240310669, "learning_rate": 1.6197540489731073e-05, "loss": 0.0339, "step": 8031 }, { "epoch": 7.723076923076923, "grad_norm": 1.3912211656570435, "learning_rate": 1.619656284067328e-05, "loss": 0.01, "step": 8032 }, { "epoch": 7.724038461538462, "grad_norm": 3.4968295097351074, "learning_rate": 1.619558509546302e-05, "loss": 0.0317, "step": 8033 }, { "epoch": 7.725, "grad_norm": 3.0204031467437744, "learning_rate": 1.6194607254115456e-05, "loss": 0.0453, "step": 8034 }, { "epoch": 7.725961538461538, "grad_norm": 3.439828634262085, "learning_rate": 1.6193629316645777e-05, "loss": 0.033, "step": 8035 }, { "epoch": 7.726923076923077, "grad_norm": 2.1508543491363525, "learning_rate": 1.6192651283069145e-05, "loss": 0.0351, "step": 8036 }, { "epoch": 7.727884615384616, "grad_norm": 3.2028794288635254, "learning_rate": 1.6191673153400742e-05, "loss": 0.0459, "step": 8037 }, { "epoch": 7.7288461538461535, "grad_norm": 1.8746927976608276, "learning_rate": 1.619069492765574e-05, "loss": 0.0222, "step": 8038 }, { "epoch": 7.729807692307692, "grad_norm": 2.0559775829315186, "learning_rate": 1.6189716605849327e-05, "loss": 0.0258, "step": 8039 }, { "epoch": 7.730769230769231, "grad_norm": 1.5790263414382935, "learning_rate": 1.6188738187996676e-05, "loss": 0.012, "step": 8040 }, { "epoch": 7.7317307692307695, "grad_norm": 3.8902878761291504, "learning_rate": 1.6187759674112972e-05, "loss": 0.045, "step": 8041 }, { "epoch": 7.732692307692307, "grad_norm": 5.845954895019531, "learning_rate": 1.6186781064213402e-05, "loss": 0.1383, "step": 8042 }, { "epoch": 7.733653846153846, "grad_norm": 1.462241530418396, "learning_rate": 1.6185802358313147e-05, "loss": 0.0083, "step": 8043 }, { "epoch": 7.734615384615385, "grad_norm": 3.076591730117798, "learning_rate": 1.6184823556427392e-05, "loss": 0.0682, "step": 8044 }, { "epoch": 7.735576923076923, "grad_norm": 1.779697060585022, "learning_rate": 1.6183844658571328e-05, "loss": 0.0454, "step": 8045 }, { "epoch": 7.736538461538462, "grad_norm": 2.102947473526001, "learning_rate": 1.6182865664760146e-05, "loss": 0.019, "step": 8046 }, { "epoch": 7.7375, "grad_norm": 1.2894351482391357, "learning_rate": 1.618188657500903e-05, "loss": 0.0058, "step": 8047 }, { "epoch": 7.7384615384615385, "grad_norm": 2.626718521118164, "learning_rate": 1.6180907389333184e-05, "loss": 0.0463, "step": 8048 }, { "epoch": 7.739423076923077, "grad_norm": 1.3210501670837402, "learning_rate": 1.6179928107747796e-05, "loss": 0.0152, "step": 8049 }, { "epoch": 7.740384615384615, "grad_norm": 1.559281587600708, "learning_rate": 1.617894873026806e-05, "loss": 0.0185, "step": 8050 }, { "epoch": 7.741346153846154, "grad_norm": 3.133263111114502, "learning_rate": 1.6177969256909177e-05, "loss": 0.0197, "step": 8051 }, { "epoch": 7.742307692307692, "grad_norm": 2.5862019062042236, "learning_rate": 1.617698968768634e-05, "loss": 0.0387, "step": 8052 }, { "epoch": 7.743269230769231, "grad_norm": 5.177099704742432, "learning_rate": 1.6176010022614753e-05, "loss": 0.1842, "step": 8053 }, { "epoch": 7.74423076923077, "grad_norm": 3.5609493255615234, "learning_rate": 1.6175030261709615e-05, "loss": 0.0468, "step": 8054 }, { "epoch": 7.7451923076923075, "grad_norm": 2.7715179920196533, "learning_rate": 1.6174050404986135e-05, "loss": 0.0332, "step": 8055 }, { "epoch": 7.746153846153846, "grad_norm": 4.0807647705078125, "learning_rate": 1.617307045245951e-05, "loss": 0.0182, "step": 8056 }, { "epoch": 7.747115384615385, "grad_norm": 0.8630942106246948, "learning_rate": 1.6172090404144952e-05, "loss": 0.004, "step": 8057 }, { "epoch": 7.748076923076923, "grad_norm": 3.806227207183838, "learning_rate": 1.6171110260057664e-05, "loss": 0.0712, "step": 8058 }, { "epoch": 7.749038461538461, "grad_norm": 1.1954545974731445, "learning_rate": 1.6170130020212854e-05, "loss": 0.0062, "step": 8059 }, { "epoch": 7.75, "grad_norm": 1.9651997089385986, "learning_rate": 1.616914968462574e-05, "loss": 0.0135, "step": 8060 }, { "epoch": 7.750961538461539, "grad_norm": 1.8480563163757324, "learning_rate": 1.616816925331153e-05, "loss": 0.0214, "step": 8061 }, { "epoch": 7.751923076923077, "grad_norm": 3.1834633350372314, "learning_rate": 1.6167188726285433e-05, "loss": 0.0214, "step": 8062 }, { "epoch": 7.752884615384615, "grad_norm": 0.7082204222679138, "learning_rate": 1.616620810356267e-05, "loss": 0.0054, "step": 8063 }, { "epoch": 7.753846153846154, "grad_norm": 1.0306556224822998, "learning_rate": 1.6165227385158454e-05, "loss": 0.0051, "step": 8064 }, { "epoch": 7.7548076923076925, "grad_norm": 3.5013110637664795, "learning_rate": 1.6164246571088e-05, "loss": 0.0355, "step": 8065 }, { "epoch": 7.75576923076923, "grad_norm": 3.255098819732666, "learning_rate": 1.6163265661366536e-05, "loss": 0.0389, "step": 8066 }, { "epoch": 7.756730769230769, "grad_norm": 4.260367393493652, "learning_rate": 1.6162284656009276e-05, "loss": 0.0948, "step": 8067 }, { "epoch": 7.757692307692308, "grad_norm": 1.3193855285644531, "learning_rate": 1.6161303555031446e-05, "loss": 0.0085, "step": 8068 }, { "epoch": 7.758653846153846, "grad_norm": 0.4464302659034729, "learning_rate": 1.6160322358448265e-05, "loss": 0.0037, "step": 8069 }, { "epoch": 7.759615384615385, "grad_norm": 2.4818077087402344, "learning_rate": 1.615934106627496e-05, "loss": 0.0578, "step": 8070 }, { "epoch": 7.760576923076923, "grad_norm": 4.439877510070801, "learning_rate": 1.6158359678526763e-05, "loss": 0.0398, "step": 8071 }, { "epoch": 7.7615384615384615, "grad_norm": 3.6368045806884766, "learning_rate": 1.6157378195218897e-05, "loss": 0.0409, "step": 8072 }, { "epoch": 7.7625, "grad_norm": 0.6106494665145874, "learning_rate": 1.6156396616366593e-05, "loss": 0.0059, "step": 8073 }, { "epoch": 7.763461538461538, "grad_norm": 1.9619519710540771, "learning_rate": 1.6155414941985083e-05, "loss": 0.0134, "step": 8074 }, { "epoch": 7.764423076923077, "grad_norm": 1.9076039791107178, "learning_rate": 1.61544331720896e-05, "loss": 0.0358, "step": 8075 }, { "epoch": 7.765384615384615, "grad_norm": 2.550788402557373, "learning_rate": 1.6153451306695373e-05, "loss": 0.0263, "step": 8076 }, { "epoch": 7.766346153846154, "grad_norm": 1.8728158473968506, "learning_rate": 1.6152469345817643e-05, "loss": 0.0464, "step": 8077 }, { "epoch": 7.767307692307693, "grad_norm": 2.222407579421997, "learning_rate": 1.6151487289471647e-05, "loss": 0.0145, "step": 8078 }, { "epoch": 7.7682692307692305, "grad_norm": 3.7282543182373047, "learning_rate": 1.6150505137672626e-05, "loss": 0.0904, "step": 8079 }, { "epoch": 7.769230769230769, "grad_norm": 0.4351408779621124, "learning_rate": 1.6149522890435815e-05, "loss": 0.0029, "step": 8080 }, { "epoch": 7.770192307692308, "grad_norm": 5.588055610656738, "learning_rate": 1.6148540547776453e-05, "loss": 0.0791, "step": 8081 }, { "epoch": 7.7711538461538465, "grad_norm": 0.5258685946464539, "learning_rate": 1.614755810970979e-05, "loss": 0.0036, "step": 8082 }, { "epoch": 7.772115384615384, "grad_norm": 4.497735977172852, "learning_rate": 1.614657557625107e-05, "loss": 0.1474, "step": 8083 }, { "epoch": 7.773076923076923, "grad_norm": 4.713917255401611, "learning_rate": 1.6145592947415538e-05, "loss": 0.0648, "step": 8084 }, { "epoch": 7.774038461538462, "grad_norm": 1.1751445531845093, "learning_rate": 1.614461022321844e-05, "loss": 0.0103, "step": 8085 }, { "epoch": 7.775, "grad_norm": 1.1988450288772583, "learning_rate": 1.614362740367502e-05, "loss": 0.0088, "step": 8086 }, { "epoch": 7.775961538461538, "grad_norm": 3.7028632164001465, "learning_rate": 1.614264448880054e-05, "loss": 0.0418, "step": 8087 }, { "epoch": 7.776923076923077, "grad_norm": 2.5111894607543945, "learning_rate": 1.6141661478610246e-05, "loss": 0.0399, "step": 8088 }, { "epoch": 7.7778846153846155, "grad_norm": 1.3118842840194702, "learning_rate": 1.6140678373119388e-05, "loss": 0.015, "step": 8089 }, { "epoch": 7.778846153846154, "grad_norm": 1.431270956993103, "learning_rate": 1.6139695172343226e-05, "loss": 0.0082, "step": 8090 }, { "epoch": 7.779807692307692, "grad_norm": 1.4245425462722778, "learning_rate": 1.6138711876297013e-05, "loss": 0.0121, "step": 8091 }, { "epoch": 7.780769230769231, "grad_norm": 3.7597830295562744, "learning_rate": 1.6137728484996012e-05, "loss": 0.1419, "step": 8092 }, { "epoch": 7.781730769230769, "grad_norm": 2.0973033905029297, "learning_rate": 1.6136744998455477e-05, "loss": 0.0187, "step": 8093 }, { "epoch": 7.782692307692308, "grad_norm": 2.5800044536590576, "learning_rate": 1.613576141669067e-05, "loss": 0.0251, "step": 8094 }, { "epoch": 7.783653846153846, "grad_norm": 0.9600571990013123, "learning_rate": 1.6134777739716855e-05, "loss": 0.0095, "step": 8095 }, { "epoch": 7.7846153846153845, "grad_norm": 3.1758487224578857, "learning_rate": 1.6133793967549295e-05, "loss": 0.0388, "step": 8096 }, { "epoch": 7.785576923076923, "grad_norm": 2.075599431991577, "learning_rate": 1.6132810100203252e-05, "loss": 0.0113, "step": 8097 }, { "epoch": 7.786538461538462, "grad_norm": 4.937253475189209, "learning_rate": 1.6131826137694e-05, "loss": 0.1011, "step": 8098 }, { "epoch": 7.7875, "grad_norm": 3.348914384841919, "learning_rate": 1.6130842080036804e-05, "loss": 0.0541, "step": 8099 }, { "epoch": 7.788461538461538, "grad_norm": 3.9557619094848633, "learning_rate": 1.612985792724693e-05, "loss": 0.0647, "step": 8100 }, { "epoch": 7.789423076923077, "grad_norm": 2.3678841590881348, "learning_rate": 1.612887367933965e-05, "loss": 0.0139, "step": 8101 }, { "epoch": 7.790384615384616, "grad_norm": 3.288130521774292, "learning_rate": 1.612788933633024e-05, "loss": 0.0336, "step": 8102 }, { "epoch": 7.7913461538461535, "grad_norm": 1.6068336963653564, "learning_rate": 1.612690489823398e-05, "loss": 0.0097, "step": 8103 }, { "epoch": 7.792307692307692, "grad_norm": 2.8948049545288086, "learning_rate": 1.6125920365066127e-05, "loss": 0.031, "step": 8104 }, { "epoch": 7.793269230769231, "grad_norm": 2.1691746711730957, "learning_rate": 1.6124935736841975e-05, "loss": 0.0263, "step": 8105 }, { "epoch": 7.7942307692307695, "grad_norm": 1.9560840129852295, "learning_rate": 1.6123951013576796e-05, "loss": 0.0164, "step": 8106 }, { "epoch": 7.795192307692307, "grad_norm": 3.7269606590270996, "learning_rate": 1.612296619528587e-05, "loss": 0.0491, "step": 8107 }, { "epoch": 7.796153846153846, "grad_norm": 1.0083961486816406, "learning_rate": 1.612198128198448e-05, "loss": 0.0085, "step": 8108 }, { "epoch": 7.797115384615385, "grad_norm": 2.659005880355835, "learning_rate": 1.6120996273687905e-05, "loss": 0.0218, "step": 8109 }, { "epoch": 7.798076923076923, "grad_norm": 0.2600357234477997, "learning_rate": 1.612001117041144e-05, "loss": 0.0026, "step": 8110 }, { "epoch": 7.799038461538462, "grad_norm": 2.328158378601074, "learning_rate": 1.6119025972170357e-05, "loss": 0.0204, "step": 8111 }, { "epoch": 7.8, "grad_norm": 1.0808699131011963, "learning_rate": 1.6118040678979954e-05, "loss": 0.0101, "step": 8112 }, { "epoch": 7.8009615384615385, "grad_norm": 6.195700168609619, "learning_rate": 1.6117055290855514e-05, "loss": 0.1703, "step": 8113 }, { "epoch": 7.801923076923077, "grad_norm": 2.1907877922058105, "learning_rate": 1.6116069807812332e-05, "loss": 0.0161, "step": 8114 }, { "epoch": 7.802884615384615, "grad_norm": 3.7292118072509766, "learning_rate": 1.611508422986569e-05, "loss": 0.0524, "step": 8115 }, { "epoch": 7.803846153846154, "grad_norm": 1.6350796222686768, "learning_rate": 1.6114098557030896e-05, "loss": 0.0081, "step": 8116 }, { "epoch": 7.804807692307692, "grad_norm": 2.2719368934631348, "learning_rate": 1.6113112789323233e-05, "loss": 0.0145, "step": 8117 }, { "epoch": 7.805769230769231, "grad_norm": 2.4137399196624756, "learning_rate": 1.6112126926758005e-05, "loss": 0.0495, "step": 8118 }, { "epoch": 7.80673076923077, "grad_norm": 0.9958486557006836, "learning_rate": 1.6111140969350504e-05, "loss": 0.0187, "step": 8119 }, { "epoch": 7.8076923076923075, "grad_norm": 3.5292842388153076, "learning_rate": 1.6110154917116028e-05, "loss": 0.0297, "step": 8120 }, { "epoch": 7.808653846153846, "grad_norm": 1.850777268409729, "learning_rate": 1.6109168770069884e-05, "loss": 0.0299, "step": 8121 }, { "epoch": 7.809615384615385, "grad_norm": 2.5038254261016846, "learning_rate": 1.610818252822737e-05, "loss": 0.04, "step": 8122 }, { "epoch": 7.810576923076923, "grad_norm": 2.1079342365264893, "learning_rate": 1.6107196191603792e-05, "loss": 0.0336, "step": 8123 }, { "epoch": 7.811538461538461, "grad_norm": 3.938854694366455, "learning_rate": 1.610620976021445e-05, "loss": 0.0483, "step": 8124 }, { "epoch": 7.8125, "grad_norm": 1.4661835432052612, "learning_rate": 1.6105223234074657e-05, "loss": 0.0433, "step": 8125 }, { "epoch": 7.813461538461539, "grad_norm": 2.8783724308013916, "learning_rate": 1.6104236613199716e-05, "loss": 0.0599, "step": 8126 }, { "epoch": 7.814423076923077, "grad_norm": 3.0511724948883057, "learning_rate": 1.6103249897604944e-05, "loss": 0.1018, "step": 8127 }, { "epoch": 7.815384615384615, "grad_norm": 2.9207396507263184, "learning_rate": 1.610226308730564e-05, "loss": 0.0356, "step": 8128 }, { "epoch": 7.816346153846154, "grad_norm": 0.920228123664856, "learning_rate": 1.6101276182317125e-05, "loss": 0.006, "step": 8129 }, { "epoch": 7.8173076923076925, "grad_norm": 1.5842081308364868, "learning_rate": 1.6100289182654712e-05, "loss": 0.0132, "step": 8130 }, { "epoch": 7.81826923076923, "grad_norm": 0.31740573048591614, "learning_rate": 1.6099302088333714e-05, "loss": 0.0022, "step": 8131 }, { "epoch": 7.819230769230769, "grad_norm": 3.0914602279663086, "learning_rate": 1.6098314899369446e-05, "loss": 0.0387, "step": 8132 }, { "epoch": 7.820192307692308, "grad_norm": 0.5048596262931824, "learning_rate": 1.6097327615777235e-05, "loss": 0.0052, "step": 8133 }, { "epoch": 7.821153846153846, "grad_norm": 1.0575929880142212, "learning_rate": 1.609634023757239e-05, "loss": 0.0055, "step": 8134 }, { "epoch": 7.822115384615385, "grad_norm": 2.37916898727417, "learning_rate": 1.609535276477024e-05, "loss": 0.0124, "step": 8135 }, { "epoch": 7.823076923076923, "grad_norm": 3.2772603034973145, "learning_rate": 1.60943651973861e-05, "loss": 0.0737, "step": 8136 }, { "epoch": 7.8240384615384615, "grad_norm": 1.9979594945907593, "learning_rate": 1.6093377535435304e-05, "loss": 0.0243, "step": 8137 }, { "epoch": 7.825, "grad_norm": 3.901580810546875, "learning_rate": 1.609238977893317e-05, "loss": 0.0514, "step": 8138 }, { "epoch": 7.825961538461538, "grad_norm": 2.029740333557129, "learning_rate": 1.6091401927895028e-05, "loss": 0.016, "step": 8139 }, { "epoch": 7.826923076923077, "grad_norm": 2.4290473461151123, "learning_rate": 1.6090413982336208e-05, "loss": 0.0249, "step": 8140 }, { "epoch": 7.827884615384615, "grad_norm": 3.922184705734253, "learning_rate": 1.6089425942272035e-05, "loss": 0.0329, "step": 8141 }, { "epoch": 7.828846153846154, "grad_norm": 3.2758779525756836, "learning_rate": 1.6088437807717846e-05, "loss": 0.035, "step": 8142 }, { "epoch": 7.829807692307693, "grad_norm": 0.9004072546958923, "learning_rate": 1.608744957868897e-05, "loss": 0.0084, "step": 8143 }, { "epoch": 7.8307692307692305, "grad_norm": 3.467228412628174, "learning_rate": 1.6086461255200743e-05, "loss": 0.099, "step": 8144 }, { "epoch": 7.831730769230769, "grad_norm": 3.048407793045044, "learning_rate": 1.6085472837268504e-05, "loss": 0.0307, "step": 8145 }, { "epoch": 7.832692307692308, "grad_norm": 2.969224214553833, "learning_rate": 1.6084484324907582e-05, "loss": 0.0453, "step": 8146 }, { "epoch": 7.8336538461538465, "grad_norm": 5.522034168243408, "learning_rate": 1.6083495718133326e-05, "loss": 0.153, "step": 8147 }, { "epoch": 7.834615384615384, "grad_norm": 1.9219025373458862, "learning_rate": 1.608250701696107e-05, "loss": 0.0247, "step": 8148 }, { "epoch": 7.835576923076923, "grad_norm": 0.05156487599015236, "learning_rate": 1.6081518221406153e-05, "loss": 0.0005, "step": 8149 }, { "epoch": 7.836538461538462, "grad_norm": 1.6801491975784302, "learning_rate": 1.6080529331483927e-05, "loss": 0.0123, "step": 8150 }, { "epoch": 7.8375, "grad_norm": 3.0487759113311768, "learning_rate": 1.607954034720973e-05, "loss": 0.0515, "step": 8151 }, { "epoch": 7.838461538461538, "grad_norm": 1.2652333974838257, "learning_rate": 1.607855126859891e-05, "loss": 0.0045, "step": 8152 }, { "epoch": 7.839423076923077, "grad_norm": 2.7709360122680664, "learning_rate": 1.6077562095666818e-05, "loss": 0.0393, "step": 8153 }, { "epoch": 7.8403846153846155, "grad_norm": 2.1088364124298096, "learning_rate": 1.6076572828428795e-05, "loss": 0.0197, "step": 8154 }, { "epoch": 7.841346153846154, "grad_norm": 3.9259541034698486, "learning_rate": 1.6075583466900196e-05, "loss": 0.0498, "step": 8155 }, { "epoch": 7.842307692307692, "grad_norm": 2.8139097690582275, "learning_rate": 1.6074594011096376e-05, "loss": 0.0688, "step": 8156 }, { "epoch": 7.843269230769231, "grad_norm": 4.250286102294922, "learning_rate": 1.6073604461032687e-05, "loss": 0.0614, "step": 8157 }, { "epoch": 7.844230769230769, "grad_norm": 3.573486089706421, "learning_rate": 1.607261481672448e-05, "loss": 0.0704, "step": 8158 }, { "epoch": 7.845192307692308, "grad_norm": 4.121364593505859, "learning_rate": 1.6071625078187113e-05, "loss": 0.0412, "step": 8159 }, { "epoch": 7.846153846153846, "grad_norm": 1.1624014377593994, "learning_rate": 1.6070635245435946e-05, "loss": 0.01, "step": 8160 }, { "epoch": 7.8471153846153845, "grad_norm": 2.408764123916626, "learning_rate": 1.606964531848634e-05, "loss": 0.0268, "step": 8161 }, { "epoch": 7.848076923076923, "grad_norm": 2.445692539215088, "learning_rate": 1.6068655297353645e-05, "loss": 0.0244, "step": 8162 }, { "epoch": 7.849038461538462, "grad_norm": 4.237918853759766, "learning_rate": 1.606766518205324e-05, "loss": 0.1097, "step": 8163 }, { "epoch": 7.85, "grad_norm": 2.224148750305176, "learning_rate": 1.6066674972600474e-05, "loss": 0.021, "step": 8164 }, { "epoch": 7.850961538461538, "grad_norm": 3.413735866546631, "learning_rate": 1.6065684669010722e-05, "loss": 0.0409, "step": 8165 }, { "epoch": 7.851923076923077, "grad_norm": 2.10075044631958, "learning_rate": 1.6064694271299343e-05, "loss": 0.0274, "step": 8166 }, { "epoch": 7.852884615384616, "grad_norm": 2.611720561981201, "learning_rate": 1.6063703779481712e-05, "loss": 0.0261, "step": 8167 }, { "epoch": 7.8538461538461535, "grad_norm": 1.7030236721038818, "learning_rate": 1.606271319357319e-05, "loss": 0.0193, "step": 8168 }, { "epoch": 7.854807692307692, "grad_norm": 0.3206578493118286, "learning_rate": 1.6061722513589157e-05, "loss": 0.0023, "step": 8169 }, { "epoch": 7.855769230769231, "grad_norm": 2.000990867614746, "learning_rate": 1.6060731739544983e-05, "loss": 0.0271, "step": 8170 }, { "epoch": 7.8567307692307695, "grad_norm": 4.765812873840332, "learning_rate": 1.6059740871456035e-05, "loss": 0.0441, "step": 8171 }, { "epoch": 7.857692307692307, "grad_norm": 1.7297309637069702, "learning_rate": 1.60587499093377e-05, "loss": 0.0075, "step": 8172 }, { "epoch": 7.858653846153846, "grad_norm": 6.498708724975586, "learning_rate": 1.6057758853205343e-05, "loss": 0.0599, "step": 8173 }, { "epoch": 7.859615384615385, "grad_norm": 0.6025789976119995, "learning_rate": 1.6056767703074353e-05, "loss": 0.0039, "step": 8174 }, { "epoch": 7.860576923076923, "grad_norm": 4.076010227203369, "learning_rate": 1.60557764589601e-05, "loss": 0.0434, "step": 8175 }, { "epoch": 7.861538461538462, "grad_norm": 1.425586223602295, "learning_rate": 1.605478512087797e-05, "loss": 0.022, "step": 8176 }, { "epoch": 7.8625, "grad_norm": 2.6618895530700684, "learning_rate": 1.605379368884335e-05, "loss": 0.0727, "step": 8177 }, { "epoch": 7.8634615384615385, "grad_norm": 0.6166961193084717, "learning_rate": 1.6052802162871617e-05, "loss": 0.0045, "step": 8178 }, { "epoch": 7.864423076923077, "grad_norm": 3.1778385639190674, "learning_rate": 1.6051810542978157e-05, "loss": 0.0648, "step": 8179 }, { "epoch": 7.865384615384615, "grad_norm": 2.243638515472412, "learning_rate": 1.605081882917836e-05, "loss": 0.0324, "step": 8180 }, { "epoch": 7.866346153846154, "grad_norm": 5.071863174438477, "learning_rate": 1.6049827021487617e-05, "loss": 0.0226, "step": 8181 }, { "epoch": 7.867307692307692, "grad_norm": 3.7070770263671875, "learning_rate": 1.6048835119921312e-05, "loss": 0.0251, "step": 8182 }, { "epoch": 7.868269230769231, "grad_norm": 2.9129743576049805, "learning_rate": 1.6047843124494843e-05, "loss": 0.0486, "step": 8183 }, { "epoch": 7.86923076923077, "grad_norm": 2.151581048965454, "learning_rate": 1.6046851035223594e-05, "loss": 0.0114, "step": 8184 }, { "epoch": 7.8701923076923075, "grad_norm": 3.3855061531066895, "learning_rate": 1.6045858852122965e-05, "loss": 0.0258, "step": 8185 }, { "epoch": 7.871153846153846, "grad_norm": 5.1986403465271, "learning_rate": 1.6044866575208355e-05, "loss": 0.0888, "step": 8186 }, { "epoch": 7.872115384615385, "grad_norm": 2.353830575942993, "learning_rate": 1.6043874204495152e-05, "loss": 0.0134, "step": 8187 }, { "epoch": 7.873076923076923, "grad_norm": 3.804332971572876, "learning_rate": 1.6042881739998764e-05, "loss": 0.1306, "step": 8188 }, { "epoch": 7.874038461538461, "grad_norm": 2.6997106075286865, "learning_rate": 1.6041889181734584e-05, "loss": 0.0575, "step": 8189 }, { "epoch": 7.875, "grad_norm": 3.2527878284454346, "learning_rate": 1.6040896529718017e-05, "loss": 0.0247, "step": 8190 }, { "epoch": 7.875961538461539, "grad_norm": 2.966540813446045, "learning_rate": 1.6039903783964467e-05, "loss": 0.0249, "step": 8191 }, { "epoch": 7.876923076923077, "grad_norm": 4.663567066192627, "learning_rate": 1.6038910944489337e-05, "loss": 0.0595, "step": 8192 }, { "epoch": 7.877884615384615, "grad_norm": 3.7013018131256104, "learning_rate": 1.6037918011308032e-05, "loss": 0.0778, "step": 8193 }, { "epoch": 7.878846153846154, "grad_norm": 0.4677358567714691, "learning_rate": 1.6036924984435963e-05, "loss": 0.004, "step": 8194 }, { "epoch": 7.8798076923076925, "grad_norm": 3.1461381912231445, "learning_rate": 1.6035931863888535e-05, "loss": 0.0413, "step": 8195 }, { "epoch": 7.88076923076923, "grad_norm": 6.650115489959717, "learning_rate": 1.6034938649681157e-05, "loss": 0.1375, "step": 8196 }, { "epoch": 7.881730769230769, "grad_norm": 3.0600147247314453, "learning_rate": 1.603394534182925e-05, "loss": 0.0951, "step": 8197 }, { "epoch": 7.882692307692308, "grad_norm": 3.3237152099609375, "learning_rate": 1.6032951940348212e-05, "loss": 0.0302, "step": 8198 }, { "epoch": 7.883653846153846, "grad_norm": 1.442319393157959, "learning_rate": 1.6031958445253474e-05, "loss": 0.0096, "step": 8199 }, { "epoch": 7.884615384615385, "grad_norm": 5.349106788635254, "learning_rate": 1.603096485656044e-05, "loss": 0.1128, "step": 8200 }, { "epoch": 7.885576923076923, "grad_norm": 0.8535543084144592, "learning_rate": 1.6029971174284535e-05, "loss": 0.0099, "step": 8201 }, { "epoch": 7.8865384615384615, "grad_norm": 1.915257215499878, "learning_rate": 1.6028977398441173e-05, "loss": 0.0182, "step": 8202 }, { "epoch": 7.8875, "grad_norm": 1.993101954460144, "learning_rate": 1.6027983529045777e-05, "loss": 0.0409, "step": 8203 }, { "epoch": 7.888461538461538, "grad_norm": 5.98414945602417, "learning_rate": 1.602698956611377e-05, "loss": 0.0717, "step": 8204 }, { "epoch": 7.889423076923077, "grad_norm": 3.3821966648101807, "learning_rate": 1.6025995509660573e-05, "loss": 0.0493, "step": 8205 }, { "epoch": 7.890384615384615, "grad_norm": 2.205456256866455, "learning_rate": 1.602500135970161e-05, "loss": 0.0167, "step": 8206 }, { "epoch": 7.891346153846154, "grad_norm": 1.88550865650177, "learning_rate": 1.602400711625231e-05, "loss": 0.0345, "step": 8207 }, { "epoch": 7.892307692307693, "grad_norm": 2.315155506134033, "learning_rate": 1.60230127793281e-05, "loss": 0.0201, "step": 8208 }, { "epoch": 7.8932692307692305, "grad_norm": 3.520841598510742, "learning_rate": 1.6022018348944414e-05, "loss": 0.0529, "step": 8209 }, { "epoch": 7.894230769230769, "grad_norm": 2.2905616760253906, "learning_rate": 1.6021023825116672e-05, "loss": 0.0196, "step": 8210 }, { "epoch": 7.895192307692308, "grad_norm": 1.2174654006958008, "learning_rate": 1.602002920786031e-05, "loss": 0.0138, "step": 8211 }, { "epoch": 7.8961538461538465, "grad_norm": 0.6821128129959106, "learning_rate": 1.601903449719077e-05, "loss": 0.005, "step": 8212 }, { "epoch": 7.897115384615384, "grad_norm": 2.3527398109436035, "learning_rate": 1.6018039693123474e-05, "loss": 0.0306, "step": 8213 }, { "epoch": 7.898076923076923, "grad_norm": 2.8540236949920654, "learning_rate": 1.601704479567387e-05, "loss": 0.0197, "step": 8214 }, { "epoch": 7.899038461538462, "grad_norm": 1.776222586631775, "learning_rate": 1.6016049804857386e-05, "loss": 0.0234, "step": 8215 }, { "epoch": 7.9, "grad_norm": 2.982741594314575, "learning_rate": 1.6015054720689468e-05, "loss": 0.0352, "step": 8216 }, { "epoch": 7.900961538461538, "grad_norm": 1.63387131690979, "learning_rate": 1.6014059543185554e-05, "loss": 0.0166, "step": 8217 }, { "epoch": 7.901923076923077, "grad_norm": 3.026926279067993, "learning_rate": 1.601306427236109e-05, "loss": 0.0417, "step": 8218 }, { "epoch": 7.9028846153846155, "grad_norm": 3.208735466003418, "learning_rate": 1.601206890823151e-05, "loss": 0.048, "step": 8219 }, { "epoch": 7.903846153846154, "grad_norm": 1.8914051055908203, "learning_rate": 1.6011073450812272e-05, "loss": 0.0154, "step": 8220 }, { "epoch": 7.904807692307692, "grad_norm": 2.5425429344177246, "learning_rate": 1.601007790011881e-05, "loss": 0.0162, "step": 8221 }, { "epoch": 7.905769230769231, "grad_norm": 5.288308620452881, "learning_rate": 1.6009082256166585e-05, "loss": 0.0409, "step": 8222 }, { "epoch": 7.906730769230769, "grad_norm": 2.8148438930511475, "learning_rate": 1.6008086518971037e-05, "loss": 0.0161, "step": 8223 }, { "epoch": 7.907692307692308, "grad_norm": 2.99078631401062, "learning_rate": 1.600709068854762e-05, "loss": 0.0209, "step": 8224 }, { "epoch": 7.908653846153846, "grad_norm": 2.8261454105377197, "learning_rate": 1.6006094764911784e-05, "loss": 0.0321, "step": 8225 }, { "epoch": 7.9096153846153845, "grad_norm": 3.228323221206665, "learning_rate": 1.600509874807899e-05, "loss": 0.0139, "step": 8226 }, { "epoch": 7.910576923076923, "grad_norm": 3.312211036682129, "learning_rate": 1.6004102638064685e-05, "loss": 0.054, "step": 8227 }, { "epoch": 7.911538461538462, "grad_norm": 7.180749893188477, "learning_rate": 1.600310643488433e-05, "loss": 0.082, "step": 8228 }, { "epoch": 7.9125, "grad_norm": 3.0490708351135254, "learning_rate": 1.6002110138553378e-05, "loss": 0.0855, "step": 8229 }, { "epoch": 7.913461538461538, "grad_norm": 0.9069132208824158, "learning_rate": 1.6001113749087296e-05, "loss": 0.0096, "step": 8230 }, { "epoch": 7.914423076923077, "grad_norm": 3.852966070175171, "learning_rate": 1.6000117266501542e-05, "loss": 0.0479, "step": 8231 }, { "epoch": 7.915384615384616, "grad_norm": 2.918743371963501, "learning_rate": 1.599912069081158e-05, "loss": 0.0373, "step": 8232 }, { "epoch": 7.9163461538461535, "grad_norm": 2.830904245376587, "learning_rate": 1.599812402203287e-05, "loss": 0.0241, "step": 8233 }, { "epoch": 7.917307692307692, "grad_norm": 4.265012264251709, "learning_rate": 1.5997127260180878e-05, "loss": 0.0811, "step": 8234 }, { "epoch": 7.918269230769231, "grad_norm": 1.1553014516830444, "learning_rate": 1.5996130405271072e-05, "loss": 0.0057, "step": 8235 }, { "epoch": 7.9192307692307695, "grad_norm": 3.152608871459961, "learning_rate": 1.599513345731892e-05, "loss": 0.0198, "step": 8236 }, { "epoch": 7.920192307692307, "grad_norm": 1.0277049541473389, "learning_rate": 1.5994136416339897e-05, "loss": 0.0059, "step": 8237 }, { "epoch": 7.921153846153846, "grad_norm": 2.775926113128662, "learning_rate": 1.5993139282349466e-05, "loss": 0.0247, "step": 8238 }, { "epoch": 7.922115384615385, "grad_norm": 0.9461855292320251, "learning_rate": 1.5992142055363104e-05, "loss": 0.0054, "step": 8239 }, { "epoch": 7.923076923076923, "grad_norm": 2.903397560119629, "learning_rate": 1.5991144735396282e-05, "loss": 0.0226, "step": 8240 }, { "epoch": 7.924038461538462, "grad_norm": 1.7406364679336548, "learning_rate": 1.599014732246448e-05, "loss": 0.0108, "step": 8241 }, { "epoch": 7.925, "grad_norm": 1.011969804763794, "learning_rate": 1.5989149816583173e-05, "loss": 0.0093, "step": 8242 }, { "epoch": 7.9259615384615385, "grad_norm": 3.080037832260132, "learning_rate": 1.5988152217767837e-05, "loss": 0.0302, "step": 8243 }, { "epoch": 7.926923076923077, "grad_norm": 2.5255773067474365, "learning_rate": 1.598715452603395e-05, "loss": 0.0573, "step": 8244 }, { "epoch": 7.927884615384615, "grad_norm": 3.4934582710266113, "learning_rate": 1.5986156741397004e-05, "loss": 0.0298, "step": 8245 }, { "epoch": 7.928846153846154, "grad_norm": 4.540456295013428, "learning_rate": 1.5985158863872473e-05, "loss": 0.0559, "step": 8246 }, { "epoch": 7.929807692307692, "grad_norm": 2.0021278858184814, "learning_rate": 1.598416089347584e-05, "loss": 0.0138, "step": 8247 }, { "epoch": 7.930769230769231, "grad_norm": 1.9220802783966064, "learning_rate": 1.598316283022259e-05, "loss": 0.0204, "step": 8248 }, { "epoch": 7.93173076923077, "grad_norm": 2.2353498935699463, "learning_rate": 1.598216467412822e-05, "loss": 0.0146, "step": 8249 }, { "epoch": 7.9326923076923075, "grad_norm": 2.097883939743042, "learning_rate": 1.5981166425208208e-05, "loss": 0.0106, "step": 8250 }, { "epoch": 7.933653846153846, "grad_norm": 3.3805336952209473, "learning_rate": 1.598016808347805e-05, "loss": 0.0883, "step": 8251 }, { "epoch": 7.934615384615385, "grad_norm": 1.5988255739212036, "learning_rate": 1.5979169648953235e-05, "loss": 0.0087, "step": 8252 }, { "epoch": 7.935576923076923, "grad_norm": 4.937220573425293, "learning_rate": 1.5978171121649252e-05, "loss": 0.0347, "step": 8253 }, { "epoch": 7.936538461538461, "grad_norm": 2.1858792304992676, "learning_rate": 1.5977172501581605e-05, "loss": 0.0188, "step": 8254 }, { "epoch": 7.9375, "grad_norm": 0.39354386925697327, "learning_rate": 1.5976173788765775e-05, "loss": 0.0034, "step": 8255 }, { "epoch": 7.938461538461539, "grad_norm": 2.4879002571105957, "learning_rate": 1.5975174983217273e-05, "loss": 0.08, "step": 8256 }, { "epoch": 7.939423076923077, "grad_norm": 2.9604997634887695, "learning_rate": 1.5974176084951595e-05, "loss": 0.0245, "step": 8257 }, { "epoch": 7.940384615384615, "grad_norm": 3.3580238819122314, "learning_rate": 1.5973177093984233e-05, "loss": 0.0607, "step": 8258 }, { "epoch": 7.941346153846154, "grad_norm": 4.38834285736084, "learning_rate": 1.5972178010330694e-05, "loss": 0.0284, "step": 8259 }, { "epoch": 7.9423076923076925, "grad_norm": 3.3609659671783447, "learning_rate": 1.5971178834006482e-05, "loss": 0.0439, "step": 8260 }, { "epoch": 7.94326923076923, "grad_norm": 2.629223108291626, "learning_rate": 1.59701795650271e-05, "loss": 0.0702, "step": 8261 }, { "epoch": 7.944230769230769, "grad_norm": 0.7988322377204895, "learning_rate": 1.5969180203408052e-05, "loss": 0.0074, "step": 8262 }, { "epoch": 7.945192307692308, "grad_norm": 2.276977300643921, "learning_rate": 1.596818074916485e-05, "loss": 0.0163, "step": 8263 }, { "epoch": 7.946153846153846, "grad_norm": 1.3581836223602295, "learning_rate": 1.5967181202312996e-05, "loss": 0.0073, "step": 8264 }, { "epoch": 7.947115384615385, "grad_norm": 6.088513374328613, "learning_rate": 1.5966181562868004e-05, "loss": 0.0783, "step": 8265 }, { "epoch": 7.948076923076923, "grad_norm": 0.3512720763683319, "learning_rate": 1.5965181830845384e-05, "loss": 0.0032, "step": 8266 }, { "epoch": 7.9490384615384615, "grad_norm": 2.8760738372802734, "learning_rate": 1.596418200626065e-05, "loss": 0.0381, "step": 8267 }, { "epoch": 7.95, "grad_norm": 4.568723201751709, "learning_rate": 1.5963182089129317e-05, "loss": 0.0561, "step": 8268 }, { "epoch": 7.950961538461538, "grad_norm": 4.885130882263184, "learning_rate": 1.5962182079466898e-05, "loss": 0.0439, "step": 8269 }, { "epoch": 7.951923076923077, "grad_norm": 4.137868404388428, "learning_rate": 1.5961181977288912e-05, "loss": 0.1456, "step": 8270 }, { "epoch": 7.952884615384615, "grad_norm": 1.9489643573760986, "learning_rate": 1.596018178261088e-05, "loss": 0.0285, "step": 8271 }, { "epoch": 7.953846153846154, "grad_norm": 2.0375304222106934, "learning_rate": 1.5959181495448317e-05, "loss": 0.0396, "step": 8272 }, { "epoch": 7.954807692307693, "grad_norm": 0.8914142847061157, "learning_rate": 1.5958181115816747e-05, "loss": 0.0057, "step": 8273 }, { "epoch": 7.9557692307692305, "grad_norm": 0.41478297114372253, "learning_rate": 1.5957180643731696e-05, "loss": 0.0032, "step": 8274 }, { "epoch": 7.956730769230769, "grad_norm": 0.7949101328849792, "learning_rate": 1.5956180079208684e-05, "loss": 0.0046, "step": 8275 }, { "epoch": 7.957692307692308, "grad_norm": 0.7403222918510437, "learning_rate": 1.595517942226324e-05, "loss": 0.0056, "step": 8276 }, { "epoch": 7.9586538461538465, "grad_norm": 4.1838603019714355, "learning_rate": 1.5954178672910885e-05, "loss": 0.1173, "step": 8277 }, { "epoch": 7.959615384615384, "grad_norm": 2.669301986694336, "learning_rate": 1.5953177831167155e-05, "loss": 0.0394, "step": 8278 }, { "epoch": 7.960576923076923, "grad_norm": 2.26366925239563, "learning_rate": 1.5952176897047577e-05, "loss": 0.0421, "step": 8279 }, { "epoch": 7.961538461538462, "grad_norm": 2.221813917160034, "learning_rate": 1.5951175870567682e-05, "loss": 0.0638, "step": 8280 }, { "epoch": 7.9625, "grad_norm": 1.2788050174713135, "learning_rate": 1.595017475174301e-05, "loss": 0.0143, "step": 8281 }, { "epoch": 7.963461538461538, "grad_norm": 3.6346850395202637, "learning_rate": 1.594917354058908e-05, "loss": 0.0487, "step": 8282 }, { "epoch": 7.964423076923077, "grad_norm": 0.9925442337989807, "learning_rate": 1.5948172237121443e-05, "loss": 0.0078, "step": 8283 }, { "epoch": 7.9653846153846155, "grad_norm": 3.124147891998291, "learning_rate": 1.594717084135563e-05, "loss": 0.0414, "step": 8284 }, { "epoch": 7.966346153846154, "grad_norm": 3.2965221405029297, "learning_rate": 1.594616935330718e-05, "loss": 0.0392, "step": 8285 }, { "epoch": 7.967307692307692, "grad_norm": 3.872453212738037, "learning_rate": 1.5945167772991633e-05, "loss": 0.0287, "step": 8286 }, { "epoch": 7.968269230769231, "grad_norm": 2.544628143310547, "learning_rate": 1.5944166100424534e-05, "loss": 0.0807, "step": 8287 }, { "epoch": 7.969230769230769, "grad_norm": 0.6910704970359802, "learning_rate": 1.5943164335621418e-05, "loss": 0.0054, "step": 8288 }, { "epoch": 7.970192307692308, "grad_norm": 2.670196771621704, "learning_rate": 1.5942162478597838e-05, "loss": 0.0466, "step": 8289 }, { "epoch": 7.971153846153846, "grad_norm": 2.4310851097106934, "learning_rate": 1.5941160529369334e-05, "loss": 0.021, "step": 8290 }, { "epoch": 7.9721153846153845, "grad_norm": 2.611623764038086, "learning_rate": 1.5940158487951454e-05, "loss": 0.0263, "step": 8291 }, { "epoch": 7.973076923076923, "grad_norm": 4.044985294342041, "learning_rate": 1.5939156354359755e-05, "loss": 0.0413, "step": 8292 }, { "epoch": 7.974038461538462, "grad_norm": 4.242997169494629, "learning_rate": 1.5938154128609776e-05, "loss": 0.0689, "step": 8293 }, { "epoch": 7.975, "grad_norm": 3.7844884395599365, "learning_rate": 1.5937151810717077e-05, "loss": 0.0271, "step": 8294 }, { "epoch": 7.975961538461538, "grad_norm": 6.749919414520264, "learning_rate": 1.59361494006972e-05, "loss": 0.0247, "step": 8295 }, { "epoch": 7.976923076923077, "grad_norm": 0.09080912172794342, "learning_rate": 1.5935146898565715e-05, "loss": 0.0012, "step": 8296 }, { "epoch": 7.977884615384616, "grad_norm": 3.1803722381591797, "learning_rate": 1.5934144304338163e-05, "loss": 0.0661, "step": 8297 }, { "epoch": 7.9788461538461535, "grad_norm": 3.4994914531707764, "learning_rate": 1.5933141618030113e-05, "loss": 0.0498, "step": 8298 }, { "epoch": 7.979807692307692, "grad_norm": 1.6331636905670166, "learning_rate": 1.5932138839657113e-05, "loss": 0.022, "step": 8299 }, { "epoch": 7.980769230769231, "grad_norm": 1.4138500690460205, "learning_rate": 1.5931135969234736e-05, "loss": 0.0133, "step": 8300 }, { "epoch": 7.9817307692307695, "grad_norm": 3.301060438156128, "learning_rate": 1.593013300677853e-05, "loss": 0.0587, "step": 8301 }, { "epoch": 7.982692307692307, "grad_norm": 1.0437276363372803, "learning_rate": 1.5929129952304068e-05, "loss": 0.0054, "step": 8302 }, { "epoch": 7.983653846153846, "grad_norm": 2.1712234020233154, "learning_rate": 1.592812680582691e-05, "loss": 0.0631, "step": 8303 }, { "epoch": 7.984615384615385, "grad_norm": 2.5005249977111816, "learning_rate": 1.5927123567362622e-05, "loss": 0.0266, "step": 8304 }, { "epoch": 7.985576923076923, "grad_norm": 3.4651808738708496, "learning_rate": 1.5926120236926772e-05, "loss": 0.0441, "step": 8305 }, { "epoch": 7.986538461538462, "grad_norm": 3.4495255947113037, "learning_rate": 1.592511681453493e-05, "loss": 0.0304, "step": 8306 }, { "epoch": 7.9875, "grad_norm": 3.224544048309326, "learning_rate": 1.5924113300202667e-05, "loss": 0.0144, "step": 8307 }, { "epoch": 7.9884615384615385, "grad_norm": 3.5678772926330566, "learning_rate": 1.592310969394555e-05, "loss": 0.0456, "step": 8308 }, { "epoch": 7.989423076923077, "grad_norm": 1.650198221206665, "learning_rate": 1.5922105995779155e-05, "loss": 0.0082, "step": 8309 }, { "epoch": 7.990384615384615, "grad_norm": 3.6238112449645996, "learning_rate": 1.5921102205719056e-05, "loss": 0.0772, "step": 8310 }, { "epoch": 7.991346153846154, "grad_norm": 6.1580891609191895, "learning_rate": 1.592009832378083e-05, "loss": 0.0775, "step": 8311 }, { "epoch": 7.992307692307692, "grad_norm": 3.0461721420288086, "learning_rate": 1.591909434998005e-05, "loss": 0.0964, "step": 8312 }, { "epoch": 7.993269230769231, "grad_norm": 2.6676173210144043, "learning_rate": 1.5918090284332303e-05, "loss": 0.0378, "step": 8313 }, { "epoch": 7.99423076923077, "grad_norm": 0.6491154432296753, "learning_rate": 1.591708612685316e-05, "loss": 0.0053, "step": 8314 }, { "epoch": 7.9951923076923075, "grad_norm": 6.822248458862305, "learning_rate": 1.591608187755821e-05, "loss": 0.0478, "step": 8315 }, { "epoch": 7.996153846153846, "grad_norm": 3.2280678749084473, "learning_rate": 1.591507753646303e-05, "loss": 0.0468, "step": 8316 }, { "epoch": 7.997115384615385, "grad_norm": 1.8193756341934204, "learning_rate": 1.591407310358321e-05, "loss": 0.0112, "step": 8317 }, { "epoch": 7.998076923076923, "grad_norm": 0.9593809247016907, "learning_rate": 1.591306857893433e-05, "loss": 0.0094, "step": 8318 }, { "epoch": 7.999038461538461, "grad_norm": 3.3846869468688965, "learning_rate": 1.591206396253198e-05, "loss": 0.0255, "step": 8319 }, { "epoch": 8.0, "grad_norm": 3.779266834259033, "learning_rate": 1.591105925439175e-05, "loss": 0.0414, "step": 8320 }, { "epoch": 8.000961538461539, "grad_norm": 0.6144025921821594, "learning_rate": 1.5910054454529233e-05, "loss": 0.0037, "step": 8321 }, { "epoch": 8.001923076923077, "grad_norm": 0.652991771697998, "learning_rate": 1.590904956296001e-05, "loss": 0.0049, "step": 8322 }, { "epoch": 8.002884615384616, "grad_norm": 0.1955224871635437, "learning_rate": 1.5908044579699687e-05, "loss": 0.0015, "step": 8323 }, { "epoch": 8.003846153846155, "grad_norm": 0.9094949960708618, "learning_rate": 1.590703950476385e-05, "loss": 0.0049, "step": 8324 }, { "epoch": 8.004807692307692, "grad_norm": 2.2613368034362793, "learning_rate": 1.5906034338168095e-05, "loss": 0.0175, "step": 8325 }, { "epoch": 8.00576923076923, "grad_norm": 0.6612601280212402, "learning_rate": 1.590502907992802e-05, "loss": 0.0039, "step": 8326 }, { "epoch": 8.006730769230769, "grad_norm": 1.1236478090286255, "learning_rate": 1.5904023730059227e-05, "loss": 0.009, "step": 8327 }, { "epoch": 8.007692307692308, "grad_norm": 0.4718210697174072, "learning_rate": 1.5903018288577314e-05, "loss": 0.0035, "step": 8328 }, { "epoch": 8.008653846153846, "grad_norm": 6.769756317138672, "learning_rate": 1.5902012755497885e-05, "loss": 0.0336, "step": 8329 }, { "epoch": 8.009615384615385, "grad_norm": 0.48767316341400146, "learning_rate": 1.5901007130836536e-05, "loss": 0.0035, "step": 8330 }, { "epoch": 8.010576923076924, "grad_norm": 1.406339168548584, "learning_rate": 1.5900001414608875e-05, "loss": 0.0066, "step": 8331 }, { "epoch": 8.011538461538462, "grad_norm": 1.3467458486557007, "learning_rate": 1.589899560683051e-05, "loss": 0.0113, "step": 8332 }, { "epoch": 8.0125, "grad_norm": 4.181005954742432, "learning_rate": 1.589798970751705e-05, "loss": 0.114, "step": 8333 }, { "epoch": 8.013461538461538, "grad_norm": 4.863245487213135, "learning_rate": 1.5896983716684095e-05, "loss": 0.1155, "step": 8334 }, { "epoch": 8.014423076923077, "grad_norm": 2.2981033325195312, "learning_rate": 1.5895977634347262e-05, "loss": 0.0193, "step": 8335 }, { "epoch": 8.015384615384615, "grad_norm": 1.4139151573181152, "learning_rate": 1.589497146052216e-05, "loss": 0.0064, "step": 8336 }, { "epoch": 8.016346153846154, "grad_norm": 3.880075454711914, "learning_rate": 1.5893965195224403e-05, "loss": 0.0881, "step": 8337 }, { "epoch": 8.017307692307693, "grad_norm": 3.5264089107513428, "learning_rate": 1.5892958838469607e-05, "loss": 0.1175, "step": 8338 }, { "epoch": 8.018269230769231, "grad_norm": 4.507894992828369, "learning_rate": 1.589195239027338e-05, "loss": 0.0691, "step": 8339 }, { "epoch": 8.01923076923077, "grad_norm": 0.817520797252655, "learning_rate": 1.5890945850651347e-05, "loss": 0.0066, "step": 8340 }, { "epoch": 8.020192307692307, "grad_norm": 3.5145585536956787, "learning_rate": 1.5889939219619125e-05, "loss": 0.0731, "step": 8341 }, { "epoch": 8.021153846153846, "grad_norm": 1.6987968683242798, "learning_rate": 1.588893249719233e-05, "loss": 0.0077, "step": 8342 }, { "epoch": 8.022115384615384, "grad_norm": 3.2846732139587402, "learning_rate": 1.5887925683386594e-05, "loss": 0.077, "step": 8343 }, { "epoch": 8.023076923076923, "grad_norm": 2.453211784362793, "learning_rate": 1.5886918778217525e-05, "loss": 0.032, "step": 8344 }, { "epoch": 8.024038461538462, "grad_norm": 2.7351057529449463, "learning_rate": 1.5885911781700757e-05, "loss": 0.0245, "step": 8345 }, { "epoch": 8.025, "grad_norm": 3.649730682373047, "learning_rate": 1.5884904693851916e-05, "loss": 0.0218, "step": 8346 }, { "epoch": 8.025961538461539, "grad_norm": 0.05672658234834671, "learning_rate": 1.5883897514686625e-05, "loss": 0.0009, "step": 8347 }, { "epoch": 8.026923076923078, "grad_norm": 2.249889612197876, "learning_rate": 1.588289024422051e-05, "loss": 0.0161, "step": 8348 }, { "epoch": 8.027884615384615, "grad_norm": 4.190976619720459, "learning_rate": 1.5881882882469206e-05, "loss": 0.1223, "step": 8349 }, { "epoch": 8.028846153846153, "grad_norm": 1.7610265016555786, "learning_rate": 1.5880875429448344e-05, "loss": 0.0135, "step": 8350 }, { "epoch": 8.029807692307692, "grad_norm": 0.1749831587076187, "learning_rate": 1.5879867885173553e-05, "loss": 0.0013, "step": 8351 }, { "epoch": 8.03076923076923, "grad_norm": 3.5574402809143066, "learning_rate": 1.5878860249660473e-05, "loss": 0.0183, "step": 8352 }, { "epoch": 8.03173076923077, "grad_norm": 3.722949981689453, "learning_rate": 1.5877852522924733e-05, "loss": 0.0675, "step": 8353 }, { "epoch": 8.032692307692308, "grad_norm": 2.4475347995758057, "learning_rate": 1.5876844704981975e-05, "loss": 0.047, "step": 8354 }, { "epoch": 8.033653846153847, "grad_norm": 4.947295188903809, "learning_rate": 1.5875836795847834e-05, "loss": 0.0311, "step": 8355 }, { "epoch": 8.034615384615385, "grad_norm": 7.314836025238037, "learning_rate": 1.5874828795537954e-05, "loss": 0.1954, "step": 8356 }, { "epoch": 8.035576923076922, "grad_norm": 2.225203037261963, "learning_rate": 1.5873820704067974e-05, "loss": 0.02, "step": 8357 }, { "epoch": 8.036538461538461, "grad_norm": 3.2830352783203125, "learning_rate": 1.587281252145353e-05, "loss": 0.0296, "step": 8358 }, { "epoch": 8.0375, "grad_norm": 3.7495486736297607, "learning_rate": 1.587180424771028e-05, "loss": 0.1026, "step": 8359 }, { "epoch": 8.038461538461538, "grad_norm": 0.9549773335456848, "learning_rate": 1.5870795882853856e-05, "loss": 0.011, "step": 8360 }, { "epoch": 8.039423076923077, "grad_norm": 4.281935691833496, "learning_rate": 1.5869787426899912e-05, "loss": 0.0902, "step": 8361 }, { "epoch": 8.040384615384616, "grad_norm": 3.6452391147613525, "learning_rate": 1.5868778879864093e-05, "loss": 0.0399, "step": 8362 }, { "epoch": 8.041346153846154, "grad_norm": 3.944788932800293, "learning_rate": 1.5867770241762053e-05, "loss": 0.0212, "step": 8363 }, { "epoch": 8.042307692307693, "grad_norm": 2.4809036254882812, "learning_rate": 1.586676151260944e-05, "loss": 0.0296, "step": 8364 }, { "epoch": 8.04326923076923, "grad_norm": 2.844817638397217, "learning_rate": 1.5865752692421908e-05, "loss": 0.031, "step": 8365 }, { "epoch": 8.044230769230769, "grad_norm": 2.519181966781616, "learning_rate": 1.586474378121511e-05, "loss": 0.0287, "step": 8366 }, { "epoch": 8.045192307692307, "grad_norm": 3.232710838317871, "learning_rate": 1.58637347790047e-05, "loss": 0.0919, "step": 8367 }, { "epoch": 8.046153846153846, "grad_norm": 1.5959713459014893, "learning_rate": 1.5862725685806336e-05, "loss": 0.0114, "step": 8368 }, { "epoch": 8.047115384615385, "grad_norm": 1.5943347215652466, "learning_rate": 1.5861716501635678e-05, "loss": 0.0525, "step": 8369 }, { "epoch": 8.048076923076923, "grad_norm": 3.139108657836914, "learning_rate": 1.5860707226508382e-05, "loss": 0.0867, "step": 8370 }, { "epoch": 8.049038461538462, "grad_norm": 2.29105544090271, "learning_rate": 1.5859697860440112e-05, "loss": 0.0641, "step": 8371 }, { "epoch": 8.05, "grad_norm": 2.425459384918213, "learning_rate": 1.5858688403446528e-05, "loss": 0.0362, "step": 8372 }, { "epoch": 8.050961538461538, "grad_norm": 1.9998282194137573, "learning_rate": 1.5857678855543296e-05, "loss": 0.0431, "step": 8373 }, { "epoch": 8.051923076923076, "grad_norm": 2.9578680992126465, "learning_rate": 1.585666921674608e-05, "loss": 0.0087, "step": 8374 }, { "epoch": 8.052884615384615, "grad_norm": 2.9161195755004883, "learning_rate": 1.5855659487070547e-05, "loss": 0.0247, "step": 8375 }, { "epoch": 8.053846153846154, "grad_norm": 2.3311939239501953, "learning_rate": 1.5854649666532364e-05, "loss": 0.0312, "step": 8376 }, { "epoch": 8.054807692307692, "grad_norm": 1.449188470840454, "learning_rate": 1.5853639755147203e-05, "loss": 0.0072, "step": 8377 }, { "epoch": 8.055769230769231, "grad_norm": 2.407484769821167, "learning_rate": 1.585262975293073e-05, "loss": 0.0124, "step": 8378 }, { "epoch": 8.05673076923077, "grad_norm": 3.6480817794799805, "learning_rate": 1.5851619659898623e-05, "loss": 0.0839, "step": 8379 }, { "epoch": 8.057692307692308, "grad_norm": 1.2705203294754028, "learning_rate": 1.5850609476066555e-05, "loss": 0.0137, "step": 8380 }, { "epoch": 8.058653846153845, "grad_norm": 1.2483075857162476, "learning_rate": 1.5849599201450197e-05, "loss": 0.0176, "step": 8381 }, { "epoch": 8.059615384615384, "grad_norm": 0.7162018418312073, "learning_rate": 1.584858883606523e-05, "loss": 0.0081, "step": 8382 }, { "epoch": 8.060576923076923, "grad_norm": 0.4893990755081177, "learning_rate": 1.5847578379927325e-05, "loss": 0.0047, "step": 8383 }, { "epoch": 8.061538461538461, "grad_norm": 3.788491725921631, "learning_rate": 1.584656783305217e-05, "loss": 0.0887, "step": 8384 }, { "epoch": 8.0625, "grad_norm": 1.6595022678375244, "learning_rate": 1.584555719545544e-05, "loss": 0.046, "step": 8385 }, { "epoch": 8.063461538461539, "grad_norm": 3.2780725955963135, "learning_rate": 1.584454646715282e-05, "loss": 0.0628, "step": 8386 }, { "epoch": 8.064423076923077, "grad_norm": 2.483212471008301, "learning_rate": 1.584353564815999e-05, "loss": 0.0109, "step": 8387 }, { "epoch": 8.065384615384616, "grad_norm": 1.3214664459228516, "learning_rate": 1.5842524738492636e-05, "loss": 0.0128, "step": 8388 }, { "epoch": 8.066346153846155, "grad_norm": 2.475459575653076, "learning_rate": 1.584151373816645e-05, "loss": 0.0412, "step": 8389 }, { "epoch": 8.067307692307692, "grad_norm": 2.903172731399536, "learning_rate": 1.5840502647197117e-05, "loss": 0.0896, "step": 8390 }, { "epoch": 8.06826923076923, "grad_norm": 2.349876642227173, "learning_rate": 1.5839491465600318e-05, "loss": 0.065, "step": 8391 }, { "epoch": 8.069230769230769, "grad_norm": 2.470897912979126, "learning_rate": 1.5838480193391753e-05, "loss": 0.0874, "step": 8392 }, { "epoch": 8.070192307692308, "grad_norm": 2.051968574523926, "learning_rate": 1.5837468830587113e-05, "loss": 0.0355, "step": 8393 }, { "epoch": 8.071153846153846, "grad_norm": 2.3884458541870117, "learning_rate": 1.583645737720209e-05, "loss": 0.0154, "step": 8394 }, { "epoch": 8.072115384615385, "grad_norm": 0.19777053594589233, "learning_rate": 1.5835445833252376e-05, "loss": 0.002, "step": 8395 }, { "epoch": 8.073076923076924, "grad_norm": 2.741036891937256, "learning_rate": 1.583443419875367e-05, "loss": 0.0744, "step": 8396 }, { "epoch": 8.074038461538462, "grad_norm": 5.964816093444824, "learning_rate": 1.5833422473721668e-05, "loss": 0.0881, "step": 8397 }, { "epoch": 8.075, "grad_norm": 1.8454656600952148, "learning_rate": 1.5832410658172076e-05, "loss": 0.0401, "step": 8398 }, { "epoch": 8.075961538461538, "grad_norm": 1.6675457954406738, "learning_rate": 1.583139875212058e-05, "loss": 0.0448, "step": 8399 }, { "epoch": 8.076923076923077, "grad_norm": 2.9616312980651855, "learning_rate": 1.5830386755582895e-05, "loss": 0.0556, "step": 8400 }, { "epoch": 8.077884615384615, "grad_norm": 3.0521273612976074, "learning_rate": 1.5829374668574717e-05, "loss": 0.1478, "step": 8401 }, { "epoch": 8.078846153846154, "grad_norm": 3.8091111183166504, "learning_rate": 1.5828362491111758e-05, "loss": 0.1851, "step": 8402 }, { "epoch": 8.079807692307693, "grad_norm": 1.8524385690689087, "learning_rate": 1.5827350223209716e-05, "loss": 0.0106, "step": 8403 }, { "epoch": 8.080769230769231, "grad_norm": 3.5704808235168457, "learning_rate": 1.58263378648843e-05, "loss": 0.1203, "step": 8404 }, { "epoch": 8.08173076923077, "grad_norm": 1.6360844373703003, "learning_rate": 1.582532541615122e-05, "loss": 0.0156, "step": 8405 }, { "epoch": 8.082692307692307, "grad_norm": 2.0027244091033936, "learning_rate": 1.582431287702619e-05, "loss": 0.0142, "step": 8406 }, { "epoch": 8.083653846153846, "grad_norm": 3.1380603313446045, "learning_rate": 1.582330024752492e-05, "loss": 0.063, "step": 8407 }, { "epoch": 8.084615384615384, "grad_norm": 2.8961453437805176, "learning_rate": 1.5822287527663115e-05, "loss": 0.1391, "step": 8408 }, { "epoch": 8.085576923076923, "grad_norm": 4.342606544494629, "learning_rate": 1.58212747174565e-05, "loss": 0.0385, "step": 8409 }, { "epoch": 8.086538461538462, "grad_norm": 4.167107105255127, "learning_rate": 1.5820261816920785e-05, "loss": 0.0402, "step": 8410 }, { "epoch": 8.0875, "grad_norm": 0.40332335233688354, "learning_rate": 1.581924882607169e-05, "loss": 0.0035, "step": 8411 }, { "epoch": 8.088461538461539, "grad_norm": 4.059520244598389, "learning_rate": 1.581823574492493e-05, "loss": 0.039, "step": 8412 }, { "epoch": 8.089423076923078, "grad_norm": 1.353361964225769, "learning_rate": 1.5817222573496232e-05, "loss": 0.0162, "step": 8413 }, { "epoch": 8.090384615384615, "grad_norm": 1.2835081815719604, "learning_rate": 1.5816209311801313e-05, "loss": 0.0119, "step": 8414 }, { "epoch": 8.091346153846153, "grad_norm": 1.4250613451004028, "learning_rate": 1.5815195959855893e-05, "loss": 0.0168, "step": 8415 }, { "epoch": 8.092307692307692, "grad_norm": 2.4784882068634033, "learning_rate": 1.5814182517675698e-05, "loss": 0.0602, "step": 8416 }, { "epoch": 8.09326923076923, "grad_norm": 1.1222712993621826, "learning_rate": 1.5813168985276458e-05, "loss": 0.0146, "step": 8417 }, { "epoch": 8.09423076923077, "grad_norm": 1.1043462753295898, "learning_rate": 1.5812155362673895e-05, "loss": 0.0167, "step": 8418 }, { "epoch": 8.095192307692308, "grad_norm": 2.9049575328826904, "learning_rate": 1.5811141649883736e-05, "loss": 0.0378, "step": 8419 }, { "epoch": 8.096153846153847, "grad_norm": 0.8451196551322937, "learning_rate": 1.581012784692172e-05, "loss": 0.0071, "step": 8420 }, { "epoch": 8.097115384615385, "grad_norm": 2.5902137756347656, "learning_rate": 1.580911395380357e-05, "loss": 0.0449, "step": 8421 }, { "epoch": 8.098076923076922, "grad_norm": 1.9261524677276611, "learning_rate": 1.580809997054502e-05, "loss": 0.0239, "step": 8422 }, { "epoch": 8.099038461538461, "grad_norm": 2.96528697013855, "learning_rate": 1.5807085897161806e-05, "loss": 0.0743, "step": 8423 }, { "epoch": 8.1, "grad_norm": 2.213089942932129, "learning_rate": 1.5806071733669664e-05, "loss": 0.0618, "step": 8424 }, { "epoch": 8.100961538461538, "grad_norm": 1.3505909442901611, "learning_rate": 1.5805057480084327e-05, "loss": 0.0168, "step": 8425 }, { "epoch": 8.101923076923077, "grad_norm": 0.3977002203464508, "learning_rate": 1.5804043136421535e-05, "loss": 0.004, "step": 8426 }, { "epoch": 8.102884615384616, "grad_norm": 1.5522401332855225, "learning_rate": 1.580302870269703e-05, "loss": 0.0178, "step": 8427 }, { "epoch": 8.103846153846154, "grad_norm": 0.8547126054763794, "learning_rate": 1.580201417892655e-05, "loss": 0.0091, "step": 8428 }, { "epoch": 8.104807692307693, "grad_norm": 1.8050997257232666, "learning_rate": 1.580099956512584e-05, "loss": 0.0132, "step": 8429 }, { "epoch": 8.10576923076923, "grad_norm": 1.6774795055389404, "learning_rate": 1.5799984861310644e-05, "loss": 0.0224, "step": 8430 }, { "epoch": 8.106730769230769, "grad_norm": 4.293046951293945, "learning_rate": 1.57989700674967e-05, "loss": 0.0436, "step": 8431 }, { "epoch": 8.107692307692307, "grad_norm": 1.380704402923584, "learning_rate": 1.5797955183699766e-05, "loss": 0.0161, "step": 8432 }, { "epoch": 8.108653846153846, "grad_norm": 1.3810229301452637, "learning_rate": 1.5796940209935582e-05, "loss": 0.0133, "step": 8433 }, { "epoch": 8.109615384615385, "grad_norm": 3.3364005088806152, "learning_rate": 1.57959251462199e-05, "loss": 0.0203, "step": 8434 }, { "epoch": 8.110576923076923, "grad_norm": 0.5099472403526306, "learning_rate": 1.579490999256847e-05, "loss": 0.004, "step": 8435 }, { "epoch": 8.111538461538462, "grad_norm": 0.32807502150535583, "learning_rate": 1.5793894748997044e-05, "loss": 0.0028, "step": 8436 }, { "epoch": 8.1125, "grad_norm": 3.662914514541626, "learning_rate": 1.5792879415521375e-05, "loss": 0.0184, "step": 8437 }, { "epoch": 8.113461538461538, "grad_norm": 1.245973825454712, "learning_rate": 1.5791863992157222e-05, "loss": 0.0164, "step": 8438 }, { "epoch": 8.114423076923076, "grad_norm": 2.8790740966796875, "learning_rate": 1.579084847892034e-05, "loss": 0.1037, "step": 8439 }, { "epoch": 8.115384615384615, "grad_norm": 1.4661500453948975, "learning_rate": 1.5789832875826484e-05, "loss": 0.015, "step": 8440 }, { "epoch": 8.116346153846154, "grad_norm": 2.1808395385742188, "learning_rate": 1.5788817182891415e-05, "loss": 0.0148, "step": 8441 }, { "epoch": 8.117307692307692, "grad_norm": 0.2293960303068161, "learning_rate": 1.5787801400130894e-05, "loss": 0.002, "step": 8442 }, { "epoch": 8.118269230769231, "grad_norm": 0.8855037093162537, "learning_rate": 1.5786785527560683e-05, "loss": 0.0055, "step": 8443 }, { "epoch": 8.11923076923077, "grad_norm": 2.705012559890747, "learning_rate": 1.5785769565196543e-05, "loss": 0.0333, "step": 8444 }, { "epoch": 8.120192307692308, "grad_norm": 0.2542017996311188, "learning_rate": 1.578475351305424e-05, "loss": 0.0016, "step": 8445 }, { "epoch": 8.121153846153845, "grad_norm": 5.791164398193359, "learning_rate": 1.578373737114954e-05, "loss": 0.1614, "step": 8446 }, { "epoch": 8.122115384615384, "grad_norm": 1.8152334690093994, "learning_rate": 1.5782721139498213e-05, "loss": 0.0288, "step": 8447 }, { "epoch": 8.123076923076923, "grad_norm": 2.5992674827575684, "learning_rate": 1.5781704818116024e-05, "loss": 0.0324, "step": 8448 }, { "epoch": 8.124038461538461, "grad_norm": 1.1904048919677734, "learning_rate": 1.578068840701875e-05, "loss": 0.0088, "step": 8449 }, { "epoch": 8.125, "grad_norm": 3.0610768795013428, "learning_rate": 1.577967190622215e-05, "loss": 0.0228, "step": 8450 }, { "epoch": 8.125961538461539, "grad_norm": 0.5097993016242981, "learning_rate": 1.577865531574201e-05, "loss": 0.0051, "step": 8451 }, { "epoch": 8.126923076923077, "grad_norm": 3.085939407348633, "learning_rate": 1.5777638635594103e-05, "loss": 0.064, "step": 8452 }, { "epoch": 8.127884615384616, "grad_norm": 4.5259175300598145, "learning_rate": 1.5776621865794197e-05, "loss": 0.0472, "step": 8453 }, { "epoch": 8.128846153846155, "grad_norm": 2.303018569946289, "learning_rate": 1.5775605006358072e-05, "loss": 0.0422, "step": 8454 }, { "epoch": 8.129807692307692, "grad_norm": 0.7933651804924011, "learning_rate": 1.577458805730151e-05, "loss": 0.0031, "step": 8455 }, { "epoch": 8.13076923076923, "grad_norm": 2.5905728340148926, "learning_rate": 1.5773571018640293e-05, "loss": 0.0382, "step": 8456 }, { "epoch": 8.131730769230769, "grad_norm": 1.801020622253418, "learning_rate": 1.5772553890390196e-05, "loss": 0.0183, "step": 8457 }, { "epoch": 8.132692307692308, "grad_norm": 2.6516122817993164, "learning_rate": 1.5771536672567008e-05, "loss": 0.0465, "step": 8458 }, { "epoch": 8.133653846153846, "grad_norm": 2.224860906600952, "learning_rate": 1.5770519365186508e-05, "loss": 0.1054, "step": 8459 }, { "epoch": 8.134615384615385, "grad_norm": 1.8454334735870361, "learning_rate": 1.5769501968264482e-05, "loss": 0.0201, "step": 8460 }, { "epoch": 8.135576923076924, "grad_norm": 1.9779629707336426, "learning_rate": 1.576848448181672e-05, "loss": 0.0362, "step": 8461 }, { "epoch": 8.136538461538462, "grad_norm": 1.3219705820083618, "learning_rate": 1.5767466905859007e-05, "loss": 0.0056, "step": 8462 }, { "epoch": 8.1375, "grad_norm": 2.1126866340637207, "learning_rate": 1.5766449240407137e-05, "loss": 0.0218, "step": 8463 }, { "epoch": 8.138461538461538, "grad_norm": 1.2214930057525635, "learning_rate": 1.57654314854769e-05, "loss": 0.0135, "step": 8464 }, { "epoch": 8.139423076923077, "grad_norm": 1.5817373991012573, "learning_rate": 1.576441364108409e-05, "loss": 0.0086, "step": 8465 }, { "epoch": 8.140384615384615, "grad_norm": 2.797714948654175, "learning_rate": 1.5763395707244496e-05, "loss": 0.0505, "step": 8466 }, { "epoch": 8.141346153846154, "grad_norm": 0.5922094583511353, "learning_rate": 1.5762377683973915e-05, "loss": 0.0036, "step": 8467 }, { "epoch": 8.142307692307693, "grad_norm": 2.469298839569092, "learning_rate": 1.5761359571288143e-05, "loss": 0.0333, "step": 8468 }, { "epoch": 8.143269230769231, "grad_norm": 2.9669318199157715, "learning_rate": 1.5760341369202984e-05, "loss": 0.0432, "step": 8469 }, { "epoch": 8.14423076923077, "grad_norm": 3.090594530105591, "learning_rate": 1.5759323077734233e-05, "loss": 0.0528, "step": 8470 }, { "epoch": 8.145192307692307, "grad_norm": 1.2945878505706787, "learning_rate": 1.5758304696897688e-05, "loss": 0.01, "step": 8471 }, { "epoch": 8.146153846153846, "grad_norm": 1.656499981880188, "learning_rate": 1.5757286226709155e-05, "loss": 0.0153, "step": 8472 }, { "epoch": 8.147115384615384, "grad_norm": 4.311193466186523, "learning_rate": 1.5756267667184437e-05, "loss": 0.0668, "step": 8473 }, { "epoch": 8.148076923076923, "grad_norm": 2.387040853500366, "learning_rate": 1.575524901833934e-05, "loss": 0.0159, "step": 8474 }, { "epoch": 8.149038461538462, "grad_norm": 0.830683708190918, "learning_rate": 1.5754230280189674e-05, "loss": 0.0054, "step": 8475 }, { "epoch": 8.15, "grad_norm": 0.13275489211082458, "learning_rate": 1.5753211452751237e-05, "loss": 0.0007, "step": 8476 }, { "epoch": 8.150961538461539, "grad_norm": 1.614640235900879, "learning_rate": 1.5752192536039847e-05, "loss": 0.0105, "step": 8477 }, { "epoch": 8.151923076923078, "grad_norm": 1.5473276376724243, "learning_rate": 1.575117353007131e-05, "loss": 0.0091, "step": 8478 }, { "epoch": 8.152884615384615, "grad_norm": 1.0377289056777954, "learning_rate": 1.575015443486144e-05, "loss": 0.0084, "step": 8479 }, { "epoch": 8.153846153846153, "grad_norm": 3.5766820907592773, "learning_rate": 1.5749135250426047e-05, "loss": 0.0268, "step": 8480 }, { "epoch": 8.154807692307692, "grad_norm": 1.204282283782959, "learning_rate": 1.574811597678095e-05, "loss": 0.011, "step": 8481 }, { "epoch": 8.15576923076923, "grad_norm": 1.6330602169036865, "learning_rate": 1.5747096613941965e-05, "loss": 0.0119, "step": 8482 }, { "epoch": 8.15673076923077, "grad_norm": 2.3372230529785156, "learning_rate": 1.5746077161924905e-05, "loss": 0.0273, "step": 8483 }, { "epoch": 8.157692307692308, "grad_norm": 0.6888970136642456, "learning_rate": 1.5745057620745597e-05, "loss": 0.0076, "step": 8484 }, { "epoch": 8.158653846153847, "grad_norm": 1.746100664138794, "learning_rate": 1.574403799041985e-05, "loss": 0.0268, "step": 8485 }, { "epoch": 8.159615384615385, "grad_norm": 2.38752818107605, "learning_rate": 1.5743018270963496e-05, "loss": 0.021, "step": 8486 }, { "epoch": 8.160576923076922, "grad_norm": 1.149933934211731, "learning_rate": 1.5741998462392352e-05, "loss": 0.0125, "step": 8487 }, { "epoch": 8.161538461538461, "grad_norm": 2.9141476154327393, "learning_rate": 1.574097856472225e-05, "loss": 0.0626, "step": 8488 }, { "epoch": 8.1625, "grad_norm": 0.3642544448375702, "learning_rate": 1.5739958577969005e-05, "loss": 0.0024, "step": 8489 }, { "epoch": 8.163461538461538, "grad_norm": 2.707888603210449, "learning_rate": 1.573893850214845e-05, "loss": 0.0741, "step": 8490 }, { "epoch": 8.164423076923077, "grad_norm": 0.2335919439792633, "learning_rate": 1.5737918337276414e-05, "loss": 0.0017, "step": 8491 }, { "epoch": 8.165384615384616, "grad_norm": 1.5937087535858154, "learning_rate": 1.5736898083368723e-05, "loss": 0.0381, "step": 8492 }, { "epoch": 8.166346153846154, "grad_norm": 0.054368507117033005, "learning_rate": 1.5735877740441213e-05, "loss": 0.0004, "step": 8493 }, { "epoch": 8.167307692307693, "grad_norm": 0.7076345682144165, "learning_rate": 1.5734857308509718e-05, "loss": 0.0039, "step": 8494 }, { "epoch": 8.16826923076923, "grad_norm": 1.581374168395996, "learning_rate": 1.5733836787590066e-05, "loss": 0.0414, "step": 8495 }, { "epoch": 8.169230769230769, "grad_norm": 2.7188591957092285, "learning_rate": 1.5732816177698097e-05, "loss": 0.0099, "step": 8496 }, { "epoch": 8.170192307692307, "grad_norm": 0.6263406872749329, "learning_rate": 1.5731795478849647e-05, "loss": 0.0052, "step": 8497 }, { "epoch": 8.171153846153846, "grad_norm": 0.8693374991416931, "learning_rate": 1.5730774691060552e-05, "loss": 0.0064, "step": 8498 }, { "epoch": 8.172115384615385, "grad_norm": 3.366767406463623, "learning_rate": 1.5729753814346655e-05, "loss": 0.019, "step": 8499 }, { "epoch": 8.173076923076923, "grad_norm": 1.0829209089279175, "learning_rate": 1.5728732848723796e-05, "loss": 0.0081, "step": 8500 }, { "epoch": 8.174038461538462, "grad_norm": 2.0937836170196533, "learning_rate": 1.5727711794207814e-05, "loss": 0.0166, "step": 8501 }, { "epoch": 8.175, "grad_norm": 1.985098123550415, "learning_rate": 1.572669065081456e-05, "loss": 0.0288, "step": 8502 }, { "epoch": 8.175961538461538, "grad_norm": 2.8086652755737305, "learning_rate": 1.5725669418559867e-05, "loss": 0.0504, "step": 8503 }, { "epoch": 8.176923076923076, "grad_norm": 1.5021047592163086, "learning_rate": 1.5724648097459597e-05, "loss": 0.0077, "step": 8504 }, { "epoch": 8.177884615384615, "grad_norm": 1.4079214334487915, "learning_rate": 1.5723626687529587e-05, "loss": 0.0062, "step": 8505 }, { "epoch": 8.178846153846154, "grad_norm": 1.2525103092193604, "learning_rate": 1.572260518878569e-05, "loss": 0.011, "step": 8506 }, { "epoch": 8.179807692307692, "grad_norm": 1.3275907039642334, "learning_rate": 1.5721583601243755e-05, "loss": 0.01, "step": 8507 }, { "epoch": 8.180769230769231, "grad_norm": 0.2401643842458725, "learning_rate": 1.5720561924919636e-05, "loss": 0.0018, "step": 8508 }, { "epoch": 8.18173076923077, "grad_norm": 2.343552350997925, "learning_rate": 1.5719540159829185e-05, "loss": 0.0229, "step": 8509 }, { "epoch": 8.182692307692308, "grad_norm": 2.346646308898926, "learning_rate": 1.5718518305988258e-05, "loss": 0.1004, "step": 8510 }, { "epoch": 8.183653846153845, "grad_norm": 5.587529182434082, "learning_rate": 1.5717496363412712e-05, "loss": 0.0712, "step": 8511 }, { "epoch": 8.184615384615384, "grad_norm": 2.925095796585083, "learning_rate": 1.57164743321184e-05, "loss": 0.0814, "step": 8512 }, { "epoch": 8.185576923076923, "grad_norm": 2.1882119178771973, "learning_rate": 1.5715452212121188e-05, "loss": 0.0194, "step": 8513 }, { "epoch": 8.186538461538461, "grad_norm": 0.2466847002506256, "learning_rate": 1.571443000343693e-05, "loss": 0.0011, "step": 8514 }, { "epoch": 8.1875, "grad_norm": 0.8590470552444458, "learning_rate": 1.5713407706081488e-05, "loss": 0.0068, "step": 8515 }, { "epoch": 8.188461538461539, "grad_norm": 2.163015127182007, "learning_rate": 1.571238532007073e-05, "loss": 0.0085, "step": 8516 }, { "epoch": 8.189423076923077, "grad_norm": 1.7634750604629517, "learning_rate": 1.5711362845420518e-05, "loss": 0.0193, "step": 8517 }, { "epoch": 8.190384615384616, "grad_norm": 2.841874837875366, "learning_rate": 1.5710340282146716e-05, "loss": 0.1467, "step": 8518 }, { "epoch": 8.191346153846155, "grad_norm": 1.615851640701294, "learning_rate": 1.570931763026519e-05, "loss": 0.0191, "step": 8519 }, { "epoch": 8.192307692307692, "grad_norm": 3.3347885608673096, "learning_rate": 1.5708294889791814e-05, "loss": 0.0642, "step": 8520 }, { "epoch": 8.19326923076923, "grad_norm": 1.3678432703018188, "learning_rate": 1.5707272060742452e-05, "loss": 0.007, "step": 8521 }, { "epoch": 8.194230769230769, "grad_norm": 1.510719656944275, "learning_rate": 1.5706249143132982e-05, "loss": 0.0065, "step": 8522 }, { "epoch": 8.195192307692308, "grad_norm": 0.1735934615135193, "learning_rate": 1.5705226136979273e-05, "loss": 0.0013, "step": 8523 }, { "epoch": 8.196153846153846, "grad_norm": 2.5988242626190186, "learning_rate": 1.5704203042297196e-05, "loss": 0.0285, "step": 8524 }, { "epoch": 8.197115384615385, "grad_norm": 0.9085572361946106, "learning_rate": 1.570317985910263e-05, "loss": 0.0079, "step": 8525 }, { "epoch": 8.198076923076924, "grad_norm": 3.1036598682403564, "learning_rate": 1.5702156587411453e-05, "loss": 0.0218, "step": 8526 }, { "epoch": 8.199038461538462, "grad_norm": 3.4574413299560547, "learning_rate": 1.570113322723954e-05, "loss": 0.0356, "step": 8527 }, { "epoch": 8.2, "grad_norm": 2.327535629272461, "learning_rate": 1.5700109778602772e-05, "loss": 0.0722, "step": 8528 }, { "epoch": 8.200961538461538, "grad_norm": 4.081518173217773, "learning_rate": 1.569908624151703e-05, "loss": 0.0713, "step": 8529 }, { "epoch": 8.201923076923077, "grad_norm": 0.7387577295303345, "learning_rate": 1.5698062615998196e-05, "loss": 0.0026, "step": 8530 }, { "epoch": 8.202884615384615, "grad_norm": 2.5121686458587646, "learning_rate": 1.5697038902062157e-05, "loss": 0.0325, "step": 8531 }, { "epoch": 8.203846153846154, "grad_norm": 3.259476900100708, "learning_rate": 1.569601509972479e-05, "loss": 0.0192, "step": 8532 }, { "epoch": 8.204807692307693, "grad_norm": 0.21113626658916473, "learning_rate": 1.5694991209001988e-05, "loss": 0.0015, "step": 8533 }, { "epoch": 8.205769230769231, "grad_norm": 0.7987468838691711, "learning_rate": 1.569396722990964e-05, "loss": 0.0054, "step": 8534 }, { "epoch": 8.20673076923077, "grad_norm": 0.6806673407554626, "learning_rate": 1.5692943162463628e-05, "loss": 0.0031, "step": 8535 }, { "epoch": 8.207692307692307, "grad_norm": 1.9974409341812134, "learning_rate": 1.5691919006679847e-05, "loss": 0.0132, "step": 8536 }, { "epoch": 8.208653846153846, "grad_norm": 0.38769376277923584, "learning_rate": 1.5690894762574194e-05, "loss": 0.0026, "step": 8537 }, { "epoch": 8.209615384615384, "grad_norm": 0.9337697625160217, "learning_rate": 1.568987043016255e-05, "loss": 0.0036, "step": 8538 }, { "epoch": 8.210576923076923, "grad_norm": 0.3235160708427429, "learning_rate": 1.568884600946082e-05, "loss": 0.0025, "step": 8539 }, { "epoch": 8.211538461538462, "grad_norm": 2.4130797386169434, "learning_rate": 1.5687821500484894e-05, "loss": 0.0157, "step": 8540 }, { "epoch": 8.2125, "grad_norm": 1.4306970834732056, "learning_rate": 1.5686796903250672e-05, "loss": 0.0055, "step": 8541 }, { "epoch": 8.213461538461539, "grad_norm": 3.093635320663452, "learning_rate": 1.568577221777406e-05, "loss": 0.0407, "step": 8542 }, { "epoch": 8.214423076923078, "grad_norm": 2.83150053024292, "learning_rate": 1.5684747444070942e-05, "loss": 0.0385, "step": 8543 }, { "epoch": 8.215384615384615, "grad_norm": 0.19299839437007904, "learning_rate": 1.5683722582157234e-05, "loss": 0.0013, "step": 8544 }, { "epoch": 8.216346153846153, "grad_norm": 0.5428751111030579, "learning_rate": 1.568269763204883e-05, "loss": 0.0054, "step": 8545 }, { "epoch": 8.217307692307692, "grad_norm": 3.108689069747925, "learning_rate": 1.5681672593761635e-05, "loss": 0.0324, "step": 8546 }, { "epoch": 8.21826923076923, "grad_norm": 0.375868558883667, "learning_rate": 1.568064746731156e-05, "loss": 0.002, "step": 8547 }, { "epoch": 8.21923076923077, "grad_norm": 5.499864101409912, "learning_rate": 1.5679622252714507e-05, "loss": 0.0136, "step": 8548 }, { "epoch": 8.220192307692308, "grad_norm": 0.4276775121688843, "learning_rate": 1.5678596949986385e-05, "loss": 0.0037, "step": 8549 }, { "epoch": 8.221153846153847, "grad_norm": 0.4172000288963318, "learning_rate": 1.567757155914311e-05, "loss": 0.0024, "step": 8550 }, { "epoch": 8.222115384615385, "grad_norm": 2.0623862743377686, "learning_rate": 1.5676546080200587e-05, "loss": 0.0315, "step": 8551 }, { "epoch": 8.223076923076922, "grad_norm": 0.605018138885498, "learning_rate": 1.5675520513174725e-05, "loss": 0.0041, "step": 8552 }, { "epoch": 8.224038461538461, "grad_norm": 3.437633752822876, "learning_rate": 1.5674494858081442e-05, "loss": 0.0277, "step": 8553 }, { "epoch": 8.225, "grad_norm": 4.18513298034668, "learning_rate": 1.5673469114936655e-05, "loss": 0.0287, "step": 8554 }, { "epoch": 8.225961538461538, "grad_norm": 3.3965699672698975, "learning_rate": 1.567244328375628e-05, "loss": 0.0266, "step": 8555 }, { "epoch": 8.226923076923077, "grad_norm": 0.1628262996673584, "learning_rate": 1.5671417364556235e-05, "loss": 0.0011, "step": 8556 }, { "epoch": 8.227884615384616, "grad_norm": 1.2510889768600464, "learning_rate": 1.5670391357352433e-05, "loss": 0.0284, "step": 8557 }, { "epoch": 8.228846153846154, "grad_norm": 0.952182412147522, "learning_rate": 1.5669365262160804e-05, "loss": 0.0074, "step": 8558 }, { "epoch": 8.229807692307693, "grad_norm": 1.2782789468765259, "learning_rate": 1.5668339078997264e-05, "loss": 0.0147, "step": 8559 }, { "epoch": 8.23076923076923, "grad_norm": 0.3609100878238678, "learning_rate": 1.566731280787774e-05, "loss": 0.0013, "step": 8560 }, { "epoch": 8.231730769230769, "grad_norm": 0.4559246599674225, "learning_rate": 1.5666286448818152e-05, "loss": 0.0021, "step": 8561 }, { "epoch": 8.232692307692307, "grad_norm": 1.5324053764343262, "learning_rate": 1.5665260001834427e-05, "loss": 0.0109, "step": 8562 }, { "epoch": 8.233653846153846, "grad_norm": 0.9905279874801636, "learning_rate": 1.56642334669425e-05, "loss": 0.0092, "step": 8563 }, { "epoch": 8.234615384615385, "grad_norm": 1.3959848880767822, "learning_rate": 1.5663206844158286e-05, "loss": 0.0112, "step": 8564 }, { "epoch": 8.235576923076923, "grad_norm": 2.2202765941619873, "learning_rate": 1.5662180133497728e-05, "loss": 0.0198, "step": 8565 }, { "epoch": 8.236538461538462, "grad_norm": 2.614004373550415, "learning_rate": 1.5661153334976753e-05, "loss": 0.0235, "step": 8566 }, { "epoch": 8.2375, "grad_norm": 3.8335113525390625, "learning_rate": 1.5660126448611292e-05, "loss": 0.1366, "step": 8567 }, { "epoch": 8.238461538461538, "grad_norm": 1.7375437021255493, "learning_rate": 1.5659099474417283e-05, "loss": 0.0127, "step": 8568 }, { "epoch": 8.239423076923076, "grad_norm": 0.25556936860084534, "learning_rate": 1.5658072412410654e-05, "loss": 0.001, "step": 8569 }, { "epoch": 8.240384615384615, "grad_norm": 2.347547769546509, "learning_rate": 1.565704526260735e-05, "loss": 0.0181, "step": 8570 }, { "epoch": 8.241346153846154, "grad_norm": 0.4863249659538269, "learning_rate": 1.5656018025023308e-05, "loss": 0.0029, "step": 8571 }, { "epoch": 8.242307692307692, "grad_norm": 1.6773048639297485, "learning_rate": 1.5654990699674467e-05, "loss": 0.0087, "step": 8572 }, { "epoch": 8.243269230769231, "grad_norm": 2.1167943477630615, "learning_rate": 1.565396328657677e-05, "loss": 0.0125, "step": 8573 }, { "epoch": 8.24423076923077, "grad_norm": 4.425620079040527, "learning_rate": 1.565293578574615e-05, "loss": 0.1716, "step": 8574 }, { "epoch": 8.245192307692308, "grad_norm": 0.5208784937858582, "learning_rate": 1.5651908197198564e-05, "loss": 0.0042, "step": 8575 }, { "epoch": 8.246153846153845, "grad_norm": 1.047802209854126, "learning_rate": 1.5650880520949947e-05, "loss": 0.0055, "step": 8576 }, { "epoch": 8.247115384615384, "grad_norm": 3.3145859241485596, "learning_rate": 1.564985275701625e-05, "loss": 0.055, "step": 8577 }, { "epoch": 8.248076923076923, "grad_norm": 1.0612778663635254, "learning_rate": 1.5648824905413423e-05, "loss": 0.0099, "step": 8578 }, { "epoch": 8.249038461538461, "grad_norm": 1.1865314245224, "learning_rate": 1.564779696615741e-05, "loss": 0.009, "step": 8579 }, { "epoch": 8.25, "grad_norm": 1.4829821586608887, "learning_rate": 1.5646768939264166e-05, "loss": 0.014, "step": 8580 }, { "epoch": 8.250961538461539, "grad_norm": 1.3787733316421509, "learning_rate": 1.564574082474964e-05, "loss": 0.0063, "step": 8581 }, { "epoch": 8.251923076923077, "grad_norm": 1.6157891750335693, "learning_rate": 1.5644712622629788e-05, "loss": 0.0091, "step": 8582 }, { "epoch": 8.252884615384616, "grad_norm": 5.72125244140625, "learning_rate": 1.564368433292056e-05, "loss": 0.0549, "step": 8583 }, { "epoch": 8.253846153846155, "grad_norm": 2.0574817657470703, "learning_rate": 1.5642655955637915e-05, "loss": 0.0195, "step": 8584 }, { "epoch": 8.254807692307692, "grad_norm": 3.784043788909912, "learning_rate": 1.5641627490797815e-05, "loss": 0.0567, "step": 8585 }, { "epoch": 8.25576923076923, "grad_norm": 0.7856494784355164, "learning_rate": 1.564059893841621e-05, "loss": 0.0037, "step": 8586 }, { "epoch": 8.256730769230769, "grad_norm": 1.5984654426574707, "learning_rate": 1.5639570298509067e-05, "loss": 0.0131, "step": 8587 }, { "epoch": 8.257692307692308, "grad_norm": 1.0425326824188232, "learning_rate": 1.5638541571092342e-05, "loss": 0.0045, "step": 8588 }, { "epoch": 8.258653846153846, "grad_norm": 2.8569231033325195, "learning_rate": 1.5637512756182003e-05, "loss": 0.0443, "step": 8589 }, { "epoch": 8.259615384615385, "grad_norm": 2.772646188735962, "learning_rate": 1.5636483853794014e-05, "loss": 0.0397, "step": 8590 }, { "epoch": 8.260576923076924, "grad_norm": 2.0276331901550293, "learning_rate": 1.5635454863944333e-05, "loss": 0.0232, "step": 8591 }, { "epoch": 8.261538461538462, "grad_norm": 2.6357479095458984, "learning_rate": 1.5634425786648936e-05, "loss": 0.023, "step": 8592 }, { "epoch": 8.2625, "grad_norm": 3.276996612548828, "learning_rate": 1.5633396621923788e-05, "loss": 0.0313, "step": 8593 }, { "epoch": 8.263461538461538, "grad_norm": 1.9537463188171387, "learning_rate": 1.5632367369784857e-05, "loss": 0.0136, "step": 8594 }, { "epoch": 8.264423076923077, "grad_norm": 0.39324358105659485, "learning_rate": 1.5631338030248117e-05, "loss": 0.0032, "step": 8595 }, { "epoch": 8.265384615384615, "grad_norm": 2.7122809886932373, "learning_rate": 1.5630308603329537e-05, "loss": 0.0477, "step": 8596 }, { "epoch": 8.266346153846154, "grad_norm": 1.002213716506958, "learning_rate": 1.5629279089045093e-05, "loss": 0.0059, "step": 8597 }, { "epoch": 8.267307692307693, "grad_norm": 4.131115436553955, "learning_rate": 1.562824948741076e-05, "loss": 0.0318, "step": 8598 }, { "epoch": 8.268269230769231, "grad_norm": 0.5956870913505554, "learning_rate": 1.562721979844251e-05, "loss": 0.0049, "step": 8599 }, { "epoch": 8.26923076923077, "grad_norm": 1.575439214706421, "learning_rate": 1.5626190022156328e-05, "loss": 0.0086, "step": 8600 }, { "epoch": 8.270192307692307, "grad_norm": 0.2627207338809967, "learning_rate": 1.5625160158568188e-05, "loss": 0.0026, "step": 8601 }, { "epoch": 8.271153846153846, "grad_norm": 2.242055654525757, "learning_rate": 1.562413020769407e-05, "loss": 0.0211, "step": 8602 }, { "epoch": 8.272115384615384, "grad_norm": 1.4419362545013428, "learning_rate": 1.5623100169549965e-05, "loss": 0.0077, "step": 8603 }, { "epoch": 8.273076923076923, "grad_norm": 1.0845435857772827, "learning_rate": 1.5622070044151842e-05, "loss": 0.0065, "step": 8604 }, { "epoch": 8.274038461538462, "grad_norm": 1.1041654348373413, "learning_rate": 1.562103983151569e-05, "loss": 0.0065, "step": 8605 }, { "epoch": 8.275, "grad_norm": 0.9608336687088013, "learning_rate": 1.5620009531657506e-05, "loss": 0.0059, "step": 8606 }, { "epoch": 8.275961538461539, "grad_norm": 0.4577905535697937, "learning_rate": 1.5618979144593264e-05, "loss": 0.0034, "step": 8607 }, { "epoch": 8.276923076923078, "grad_norm": 3.256965160369873, "learning_rate": 1.5617948670338957e-05, "loss": 0.0445, "step": 8608 }, { "epoch": 8.277884615384615, "grad_norm": 3.529857873916626, "learning_rate": 1.5616918108910577e-05, "loss": 0.0726, "step": 8609 }, { "epoch": 8.278846153846153, "grad_norm": 4.6371541023254395, "learning_rate": 1.561588746032411e-05, "loss": 0.0178, "step": 8610 }, { "epoch": 8.279807692307692, "grad_norm": 2.0170700550079346, "learning_rate": 1.5614856724595556e-05, "loss": 0.0524, "step": 8611 }, { "epoch": 8.28076923076923, "grad_norm": 0.5126069784164429, "learning_rate": 1.5613825901740902e-05, "loss": 0.0023, "step": 8612 }, { "epoch": 8.28173076923077, "grad_norm": 0.9175063967704773, "learning_rate": 1.5612794991776147e-05, "loss": 0.0069, "step": 8613 }, { "epoch": 8.282692307692308, "grad_norm": 2.078251838684082, "learning_rate": 1.561176399471729e-05, "loss": 0.0269, "step": 8614 }, { "epoch": 8.283653846153847, "grad_norm": 2.619131326675415, "learning_rate": 1.5610732910580324e-05, "loss": 0.0281, "step": 8615 }, { "epoch": 8.284615384615385, "grad_norm": 3.1284213066101074, "learning_rate": 1.560970173938125e-05, "loss": 0.0694, "step": 8616 }, { "epoch": 8.285576923076922, "grad_norm": 0.7395358085632324, "learning_rate": 1.560867048113607e-05, "loss": 0.0034, "step": 8617 }, { "epoch": 8.286538461538461, "grad_norm": 2.8923847675323486, "learning_rate": 1.5607639135860788e-05, "loss": 0.032, "step": 8618 }, { "epoch": 8.2875, "grad_norm": 2.851262092590332, "learning_rate": 1.56066077035714e-05, "loss": 0.0224, "step": 8619 }, { "epoch": 8.288461538461538, "grad_norm": 5.166256904602051, "learning_rate": 1.5605576184283916e-05, "loss": 0.0959, "step": 8620 }, { "epoch": 8.289423076923077, "grad_norm": 2.89927077293396, "learning_rate": 1.560454457801434e-05, "loss": 0.025, "step": 8621 }, { "epoch": 8.290384615384616, "grad_norm": 6.633941173553467, "learning_rate": 1.560351288477869e-05, "loss": 0.1298, "step": 8622 }, { "epoch": 8.291346153846154, "grad_norm": 2.590176820755005, "learning_rate": 1.560248110459296e-05, "loss": 0.0248, "step": 8623 }, { "epoch": 8.292307692307693, "grad_norm": 1.2776648998260498, "learning_rate": 1.5601449237473168e-05, "loss": 0.0261, "step": 8624 }, { "epoch": 8.29326923076923, "grad_norm": 1.3027667999267578, "learning_rate": 1.560041728343532e-05, "loss": 0.0311, "step": 8625 }, { "epoch": 8.294230769230769, "grad_norm": 0.8197978138923645, "learning_rate": 1.5599385242495437e-05, "loss": 0.0034, "step": 8626 }, { "epoch": 8.295192307692307, "grad_norm": 2.195188045501709, "learning_rate": 1.559835311466953e-05, "loss": 0.0212, "step": 8627 }, { "epoch": 8.296153846153846, "grad_norm": 1.8020036220550537, "learning_rate": 1.559732089997361e-05, "loss": 0.0409, "step": 8628 }, { "epoch": 8.297115384615385, "grad_norm": 2.784212589263916, "learning_rate": 1.5596288598423697e-05, "loss": 0.0233, "step": 8629 }, { "epoch": 8.298076923076923, "grad_norm": 0.44224342703819275, "learning_rate": 1.5595256210035815e-05, "loss": 0.0038, "step": 8630 }, { "epoch": 8.299038461538462, "grad_norm": 2.4570040702819824, "learning_rate": 1.5594223734825974e-05, "loss": 0.0546, "step": 8631 }, { "epoch": 8.3, "grad_norm": 3.150324821472168, "learning_rate": 1.55931911728102e-05, "loss": 0.0248, "step": 8632 }, { "epoch": 8.300961538461538, "grad_norm": 1.2601680755615234, "learning_rate": 1.5592158524004517e-05, "loss": 0.0096, "step": 8633 }, { "epoch": 8.301923076923076, "grad_norm": 2.691281795501709, "learning_rate": 1.5591125788424944e-05, "loss": 0.0793, "step": 8634 }, { "epoch": 8.302884615384615, "grad_norm": 1.0225284099578857, "learning_rate": 1.559009296608751e-05, "loss": 0.0114, "step": 8635 }, { "epoch": 8.303846153846154, "grad_norm": 0.92251056432724, "learning_rate": 1.5589060057008238e-05, "loss": 0.0083, "step": 8636 }, { "epoch": 8.304807692307692, "grad_norm": 0.3188043534755707, "learning_rate": 1.5588027061203163e-05, "loss": 0.0024, "step": 8637 }, { "epoch": 8.305769230769231, "grad_norm": 2.123778820037842, "learning_rate": 1.5586993978688304e-05, "loss": 0.0147, "step": 8638 }, { "epoch": 8.30673076923077, "grad_norm": 2.4824390411376953, "learning_rate": 1.5585960809479698e-05, "loss": 0.018, "step": 8639 }, { "epoch": 8.307692307692308, "grad_norm": 4.149640083312988, "learning_rate": 1.5584927553593373e-05, "loss": 0.0869, "step": 8640 }, { "epoch": 8.308653846153845, "grad_norm": 3.054534912109375, "learning_rate": 1.5583894211045362e-05, "loss": 0.0471, "step": 8641 }, { "epoch": 8.309615384615384, "grad_norm": 3.0534870624542236, "learning_rate": 1.558286078185171e-05, "loss": 0.0713, "step": 8642 }, { "epoch": 8.310576923076923, "grad_norm": 2.8046185970306396, "learning_rate": 1.558182726602843e-05, "loss": 0.0422, "step": 8643 }, { "epoch": 8.311538461538461, "grad_norm": 0.8312775492668152, "learning_rate": 1.5580793663591583e-05, "loss": 0.0066, "step": 8644 }, { "epoch": 8.3125, "grad_norm": 1.0367752313613892, "learning_rate": 1.5579759974557197e-05, "loss": 0.0121, "step": 8645 }, { "epoch": 8.313461538461539, "grad_norm": 0.5158880352973938, "learning_rate": 1.5578726198941315e-05, "loss": 0.0048, "step": 8646 }, { "epoch": 8.314423076923077, "grad_norm": 3.067052125930786, "learning_rate": 1.5577692336759974e-05, "loss": 0.0263, "step": 8647 }, { "epoch": 8.315384615384616, "grad_norm": 2.14111065864563, "learning_rate": 1.5576658388029216e-05, "loss": 0.0889, "step": 8648 }, { "epoch": 8.316346153846155, "grad_norm": 1.935865879058838, "learning_rate": 1.5575624352765085e-05, "loss": 0.0157, "step": 8649 }, { "epoch": 8.317307692307692, "grad_norm": 1.080568552017212, "learning_rate": 1.5574590230983636e-05, "loss": 0.014, "step": 8650 }, { "epoch": 8.31826923076923, "grad_norm": 2.804777145385742, "learning_rate": 1.5573556022700902e-05, "loss": 0.0378, "step": 8651 }, { "epoch": 8.319230769230769, "grad_norm": 1.0445923805236816, "learning_rate": 1.5572521727932937e-05, "loss": 0.0049, "step": 8652 }, { "epoch": 8.320192307692308, "grad_norm": 0.8952280879020691, "learning_rate": 1.5571487346695792e-05, "loss": 0.0057, "step": 8653 }, { "epoch": 8.321153846153846, "grad_norm": 1.613590955734253, "learning_rate": 1.5570452879005512e-05, "loss": 0.0148, "step": 8654 }, { "epoch": 8.322115384615385, "grad_norm": 2.0664618015289307, "learning_rate": 1.5569418324878155e-05, "loss": 0.0146, "step": 8655 }, { "epoch": 8.323076923076924, "grad_norm": 0.2961326241493225, "learning_rate": 1.556838368432977e-05, "loss": 0.0019, "step": 8656 }, { "epoch": 8.324038461538462, "grad_norm": 1.189179539680481, "learning_rate": 1.5567348957376414e-05, "loss": 0.0129, "step": 8657 }, { "epoch": 8.325, "grad_norm": 1.612618327140808, "learning_rate": 1.5566314144034146e-05, "loss": 0.013, "step": 8658 }, { "epoch": 8.325961538461538, "grad_norm": 2.1494526863098145, "learning_rate": 1.5565279244319018e-05, "loss": 0.0123, "step": 8659 }, { "epoch": 8.326923076923077, "grad_norm": 1.6600735187530518, "learning_rate": 1.5564244258247087e-05, "loss": 0.0187, "step": 8660 }, { "epoch": 8.327884615384615, "grad_norm": 0.16357596218585968, "learning_rate": 1.5563209185834416e-05, "loss": 0.0017, "step": 8661 }, { "epoch": 8.328846153846154, "grad_norm": 3.129136085510254, "learning_rate": 1.556217402709707e-05, "loss": 0.0731, "step": 8662 }, { "epoch": 8.329807692307693, "grad_norm": 0.1440412551164627, "learning_rate": 1.5561138782051103e-05, "loss": 0.0011, "step": 8663 }, { "epoch": 8.330769230769231, "grad_norm": 1.327972173690796, "learning_rate": 1.5560103450712586e-05, "loss": 0.0075, "step": 8664 }, { "epoch": 8.33173076923077, "grad_norm": 2.1224114894866943, "learning_rate": 1.5559068033097583e-05, "loss": 0.0213, "step": 8665 }, { "epoch": 8.332692307692307, "grad_norm": 4.929311752319336, "learning_rate": 1.555803252922216e-05, "loss": 0.0451, "step": 8666 }, { "epoch": 8.333653846153846, "grad_norm": 0.10403475910425186, "learning_rate": 1.5556996939102385e-05, "loss": 0.0013, "step": 8667 }, { "epoch": 8.334615384615384, "grad_norm": 2.1328542232513428, "learning_rate": 1.5555961262754324e-05, "loss": 0.0132, "step": 8668 }, { "epoch": 8.335576923076923, "grad_norm": 2.0260496139526367, "learning_rate": 1.5554925500194054e-05, "loss": 0.0702, "step": 8669 }, { "epoch": 8.336538461538462, "grad_norm": 1.9798014163970947, "learning_rate": 1.5553889651437644e-05, "loss": 0.0134, "step": 8670 }, { "epoch": 8.3375, "grad_norm": 0.7681238055229187, "learning_rate": 1.5552853716501165e-05, "loss": 0.0028, "step": 8671 }, { "epoch": 8.338461538461539, "grad_norm": 1.0378820896148682, "learning_rate": 1.5551817695400693e-05, "loss": 0.0263, "step": 8672 }, { "epoch": 8.339423076923078, "grad_norm": 0.7387703657150269, "learning_rate": 1.5550781588152307e-05, "loss": 0.005, "step": 8673 }, { "epoch": 8.340384615384615, "grad_norm": 2.0381171703338623, "learning_rate": 1.554974539477208e-05, "loss": 0.0202, "step": 8674 }, { "epoch": 8.341346153846153, "grad_norm": 2.139709234237671, "learning_rate": 1.5548709115276095e-05, "loss": 0.0389, "step": 8675 }, { "epoch": 8.342307692307692, "grad_norm": 1.000730037689209, "learning_rate": 1.5547672749680425e-05, "loss": 0.0034, "step": 8676 }, { "epoch": 8.34326923076923, "grad_norm": 0.3738476634025574, "learning_rate": 1.5546636298001163e-05, "loss": 0.0024, "step": 8677 }, { "epoch": 8.34423076923077, "grad_norm": 0.12742187082767487, "learning_rate": 1.554559976025438e-05, "loss": 0.0008, "step": 8678 }, { "epoch": 8.345192307692308, "grad_norm": 2.0943515300750732, "learning_rate": 1.5544563136456167e-05, "loss": 0.0058, "step": 8679 }, { "epoch": 8.346153846153847, "grad_norm": 2.2832322120666504, "learning_rate": 1.5543526426622607e-05, "loss": 0.0367, "step": 8680 }, { "epoch": 8.347115384615385, "grad_norm": 0.9180238842964172, "learning_rate": 1.554248963076979e-05, "loss": 0.0056, "step": 8681 }, { "epoch": 8.348076923076922, "grad_norm": 1.417242407798767, "learning_rate": 1.5541452748913798e-05, "loss": 0.0278, "step": 8682 }, { "epoch": 8.349038461538461, "grad_norm": 2.240691900253296, "learning_rate": 1.5540415781070723e-05, "loss": 0.011, "step": 8683 }, { "epoch": 8.35, "grad_norm": 0.2130621373653412, "learning_rate": 1.5539378727256656e-05, "loss": 0.0011, "step": 8684 }, { "epoch": 8.350961538461538, "grad_norm": 2.3201191425323486, "learning_rate": 1.5538341587487685e-05, "loss": 0.0146, "step": 8685 }, { "epoch": 8.351923076923077, "grad_norm": 1.0788973569869995, "learning_rate": 1.5537304361779914e-05, "loss": 0.0052, "step": 8686 }, { "epoch": 8.352884615384616, "grad_norm": 0.021688589826226234, "learning_rate": 1.5536267050149428e-05, "loss": 0.0002, "step": 8687 }, { "epoch": 8.353846153846154, "grad_norm": 0.6540505290031433, "learning_rate": 1.553522965261233e-05, "loss": 0.0042, "step": 8688 }, { "epoch": 8.354807692307693, "grad_norm": 3.4500036239624023, "learning_rate": 1.5534192169184707e-05, "loss": 0.0329, "step": 8689 }, { "epoch": 8.35576923076923, "grad_norm": 2.2220492362976074, "learning_rate": 1.553315459988267e-05, "loss": 0.0139, "step": 8690 }, { "epoch": 8.356730769230769, "grad_norm": 0.8400586843490601, "learning_rate": 1.5532116944722308e-05, "loss": 0.0061, "step": 8691 }, { "epoch": 8.357692307692307, "grad_norm": 1.337946891784668, "learning_rate": 1.5531079203719732e-05, "loss": 0.0516, "step": 8692 }, { "epoch": 8.358653846153846, "grad_norm": 1.9278022050857544, "learning_rate": 1.5530041376891038e-05, "loss": 0.0251, "step": 8693 }, { "epoch": 8.359615384615385, "grad_norm": 3.16998291015625, "learning_rate": 1.5529003464252333e-05, "loss": 0.0466, "step": 8694 }, { "epoch": 8.360576923076923, "grad_norm": 0.21436890959739685, "learning_rate": 1.552796546581972e-05, "loss": 0.0015, "step": 8695 }, { "epoch": 8.361538461538462, "grad_norm": 1.1664665937423706, "learning_rate": 1.5526927381609308e-05, "loss": 0.0095, "step": 8696 }, { "epoch": 8.3625, "grad_norm": 1.5467326641082764, "learning_rate": 1.5525889211637206e-05, "loss": 0.0129, "step": 8697 }, { "epoch": 8.363461538461538, "grad_norm": 0.128243550658226, "learning_rate": 1.552485095591952e-05, "loss": 0.0009, "step": 8698 }, { "epoch": 8.364423076923076, "grad_norm": 2.859326124191284, "learning_rate": 1.5523812614472365e-05, "loss": 0.0337, "step": 8699 }, { "epoch": 8.365384615384615, "grad_norm": 4.399127006530762, "learning_rate": 1.5522774187311848e-05, "loss": 0.1185, "step": 8700 }, { "epoch": 8.366346153846154, "grad_norm": 0.5379676222801208, "learning_rate": 1.5521735674454087e-05, "loss": 0.0023, "step": 8701 }, { "epoch": 8.367307692307692, "grad_norm": 0.796435534954071, "learning_rate": 1.5520697075915193e-05, "loss": 0.0052, "step": 8702 }, { "epoch": 8.368269230769231, "grad_norm": 4.982675552368164, "learning_rate": 1.5519658391711283e-05, "loss": 0.1443, "step": 8703 }, { "epoch": 8.36923076923077, "grad_norm": 0.6089456677436829, "learning_rate": 1.5518619621858474e-05, "loss": 0.0039, "step": 8704 }, { "epoch": 8.370192307692308, "grad_norm": 0.6269102692604065, "learning_rate": 1.5517580766372887e-05, "loss": 0.003, "step": 8705 }, { "epoch": 8.371153846153845, "grad_norm": 0.6550171971321106, "learning_rate": 1.551654182527064e-05, "loss": 0.0031, "step": 8706 }, { "epoch": 8.372115384615384, "grad_norm": 3.2535276412963867, "learning_rate": 1.5515502798567856e-05, "loss": 0.1013, "step": 8707 }, { "epoch": 8.373076923076923, "grad_norm": 3.178060531616211, "learning_rate": 1.5514463686280653e-05, "loss": 0.0308, "step": 8708 }, { "epoch": 8.374038461538461, "grad_norm": 0.7394765019416809, "learning_rate": 1.5513424488425163e-05, "loss": 0.0046, "step": 8709 }, { "epoch": 8.375, "grad_norm": 1.9804985523223877, "learning_rate": 1.5512385205017504e-05, "loss": 0.0315, "step": 8710 }, { "epoch": 8.375961538461539, "grad_norm": 0.47159531712532043, "learning_rate": 1.5511345836073806e-05, "loss": 0.0033, "step": 8711 }, { "epoch": 8.376923076923077, "grad_norm": 1.1655112504959106, "learning_rate": 1.5510306381610195e-05, "loss": 0.0093, "step": 8712 }, { "epoch": 8.377884615384616, "grad_norm": 0.6640762090682983, "learning_rate": 1.55092668416428e-05, "loss": 0.0026, "step": 8713 }, { "epoch": 8.378846153846155, "grad_norm": 0.25831228494644165, "learning_rate": 1.5508227216187757e-05, "loss": 0.0023, "step": 8714 }, { "epoch": 8.379807692307692, "grad_norm": 3.4696037769317627, "learning_rate": 1.550718750526119e-05, "loss": 0.0302, "step": 8715 }, { "epoch": 8.38076923076923, "grad_norm": 2.8019356727600098, "learning_rate": 1.550614770887924e-05, "loss": 0.0172, "step": 8716 }, { "epoch": 8.381730769230769, "grad_norm": 1.5354336500167847, "learning_rate": 1.5505107827058038e-05, "loss": 0.0145, "step": 8717 }, { "epoch": 8.382692307692308, "grad_norm": 2.3231968879699707, "learning_rate": 1.5504067859813718e-05, "loss": 0.0577, "step": 8718 }, { "epoch": 8.383653846153846, "grad_norm": 1.9636497497558594, "learning_rate": 1.550302780716242e-05, "loss": 0.0308, "step": 8719 }, { "epoch": 8.384615384615385, "grad_norm": 2.151348352432251, "learning_rate": 1.550198766912028e-05, "loss": 0.0187, "step": 8720 }, { "epoch": 8.385576923076924, "grad_norm": 0.8523425459861755, "learning_rate": 1.550094744570344e-05, "loss": 0.0033, "step": 8721 }, { "epoch": 8.386538461538462, "grad_norm": 3.9949872493743896, "learning_rate": 1.5499907136928042e-05, "loss": 0.1097, "step": 8722 }, { "epoch": 8.3875, "grad_norm": 1.7286659479141235, "learning_rate": 1.5498866742810224e-05, "loss": 0.0079, "step": 8723 }, { "epoch": 8.388461538461538, "grad_norm": 0.046355415135622025, "learning_rate": 1.549782626336614e-05, "loss": 0.0005, "step": 8724 }, { "epoch": 8.389423076923077, "grad_norm": 0.5146416425704956, "learning_rate": 1.5496785698611924e-05, "loss": 0.0031, "step": 8725 }, { "epoch": 8.390384615384615, "grad_norm": 4.205075740814209, "learning_rate": 1.5495745048563727e-05, "loss": 0.0309, "step": 8726 }, { "epoch": 8.391346153846154, "grad_norm": 1.5904910564422607, "learning_rate": 1.5494704313237693e-05, "loss": 0.0684, "step": 8727 }, { "epoch": 8.392307692307693, "grad_norm": 1.6409319639205933, "learning_rate": 1.549366349264998e-05, "loss": 0.037, "step": 8728 }, { "epoch": 8.393269230769231, "grad_norm": 3.0948452949523926, "learning_rate": 1.5492622586816733e-05, "loss": 0.0469, "step": 8729 }, { "epoch": 8.39423076923077, "grad_norm": 1.8012871742248535, "learning_rate": 1.5491581595754102e-05, "loss": 0.0114, "step": 8730 }, { "epoch": 8.395192307692307, "grad_norm": 0.6047906875610352, "learning_rate": 1.5490540519478242e-05, "loss": 0.0043, "step": 8731 }, { "epoch": 8.396153846153846, "grad_norm": 0.33336377143859863, "learning_rate": 1.5489499358005308e-05, "loss": 0.0021, "step": 8732 }, { "epoch": 8.397115384615384, "grad_norm": 5.002013683319092, "learning_rate": 1.5488458111351457e-05, "loss": 0.0549, "step": 8733 }, { "epoch": 8.398076923076923, "grad_norm": 1.1830188035964966, "learning_rate": 1.548741677953284e-05, "loss": 0.0075, "step": 8734 }, { "epoch": 8.399038461538462, "grad_norm": 0.15198713541030884, "learning_rate": 1.5486375362565626e-05, "loss": 0.0014, "step": 8735 }, { "epoch": 8.4, "grad_norm": 1.7085710763931274, "learning_rate": 1.5485333860465964e-05, "loss": 0.0118, "step": 8736 }, { "epoch": 8.400961538461539, "grad_norm": 1.0910038948059082, "learning_rate": 1.5484292273250025e-05, "loss": 0.005, "step": 8737 }, { "epoch": 8.401923076923078, "grad_norm": 1.5562180280685425, "learning_rate": 1.5483250600933963e-05, "loss": 0.0062, "step": 8738 }, { "epoch": 8.402884615384615, "grad_norm": 1.7193875312805176, "learning_rate": 1.5482208843533946e-05, "loss": 0.005, "step": 8739 }, { "epoch": 8.403846153846153, "grad_norm": 1.4767060279846191, "learning_rate": 1.5481167001066136e-05, "loss": 0.0093, "step": 8740 }, { "epoch": 8.404807692307692, "grad_norm": 1.5939706563949585, "learning_rate": 1.5480125073546705e-05, "loss": 0.0785, "step": 8741 }, { "epoch": 8.40576923076923, "grad_norm": 1.5443671941757202, "learning_rate": 1.547908306099181e-05, "loss": 0.0171, "step": 8742 }, { "epoch": 8.40673076923077, "grad_norm": 1.7804666757583618, "learning_rate": 1.547804096341763e-05, "loss": 0.0106, "step": 8743 }, { "epoch": 8.407692307692308, "grad_norm": 1.481200933456421, "learning_rate": 1.5476998780840333e-05, "loss": 0.0088, "step": 8744 }, { "epoch": 8.408653846153847, "grad_norm": 0.9218351244926453, "learning_rate": 1.5475956513276093e-05, "loss": 0.0031, "step": 8745 }, { "epoch": 8.409615384615385, "grad_norm": 3.0729637145996094, "learning_rate": 1.547491416074107e-05, "loss": 0.0424, "step": 8746 }, { "epoch": 8.410576923076922, "grad_norm": 5.861369609832764, "learning_rate": 1.5473871723251457e-05, "loss": 0.1226, "step": 8747 }, { "epoch": 8.411538461538461, "grad_norm": 3.7582695484161377, "learning_rate": 1.547282920082342e-05, "loss": 0.0429, "step": 8748 }, { "epoch": 8.4125, "grad_norm": 2.380598783493042, "learning_rate": 1.547178659347313e-05, "loss": 0.0222, "step": 8749 }, { "epoch": 8.413461538461538, "grad_norm": 2.3740315437316895, "learning_rate": 1.5470743901216776e-05, "loss": 0.045, "step": 8750 }, { "epoch": 8.414423076923077, "grad_norm": 0.6455535292625427, "learning_rate": 1.5469701124070535e-05, "loss": 0.004, "step": 8751 }, { "epoch": 8.415384615384616, "grad_norm": 0.8369932174682617, "learning_rate": 1.5468658262050582e-05, "loss": 0.0054, "step": 8752 }, { "epoch": 8.416346153846154, "grad_norm": 2.2800216674804688, "learning_rate": 1.5467615315173104e-05, "loss": 0.0512, "step": 8753 }, { "epoch": 8.417307692307693, "grad_norm": 1.3175122737884521, "learning_rate": 1.5466572283454286e-05, "loss": 0.0057, "step": 8754 }, { "epoch": 8.41826923076923, "grad_norm": 2.1792609691619873, "learning_rate": 1.5465529166910308e-05, "loss": 0.0193, "step": 8755 }, { "epoch": 8.419230769230769, "grad_norm": 0.33655253052711487, "learning_rate": 1.546448596555736e-05, "loss": 0.0016, "step": 8756 }, { "epoch": 8.420192307692307, "grad_norm": 6.654140472412109, "learning_rate": 1.5463442679411625e-05, "loss": 0.1485, "step": 8757 }, { "epoch": 8.421153846153846, "grad_norm": 2.4976871013641357, "learning_rate": 1.5462399308489295e-05, "loss": 0.032, "step": 8758 }, { "epoch": 8.422115384615385, "grad_norm": 0.1989831030368805, "learning_rate": 1.5461355852806562e-05, "loss": 0.0007, "step": 8759 }, { "epoch": 8.423076923076923, "grad_norm": 1.748530626296997, "learning_rate": 1.5460312312379616e-05, "loss": 0.0082, "step": 8760 }, { "epoch": 8.424038461538462, "grad_norm": 3.3048977851867676, "learning_rate": 1.5459268687224646e-05, "loss": 0.0416, "step": 8761 }, { "epoch": 8.425, "grad_norm": 2.505018711090088, "learning_rate": 1.545822497735785e-05, "loss": 0.0203, "step": 8762 }, { "epoch": 8.425961538461538, "grad_norm": 2.863009214401245, "learning_rate": 1.5457181182795422e-05, "loss": 0.0386, "step": 8763 }, { "epoch": 8.426923076923076, "grad_norm": 3.40228009223938, "learning_rate": 1.545613730355356e-05, "loss": 0.0369, "step": 8764 }, { "epoch": 8.427884615384615, "grad_norm": 1.7730525732040405, "learning_rate": 1.5455093339648456e-05, "loss": 0.0314, "step": 8765 }, { "epoch": 8.428846153846154, "grad_norm": 2.095384359359741, "learning_rate": 1.545404929109632e-05, "loss": 0.0069, "step": 8766 }, { "epoch": 8.429807692307692, "grad_norm": 5.265395641326904, "learning_rate": 1.5453005157913338e-05, "loss": 0.0648, "step": 8767 }, { "epoch": 8.430769230769231, "grad_norm": 2.340113639831543, "learning_rate": 1.545196094011573e-05, "loss": 0.0779, "step": 8768 }, { "epoch": 8.43173076923077, "grad_norm": 1.4472318887710571, "learning_rate": 1.5450916637719683e-05, "loss": 0.0154, "step": 8769 }, { "epoch": 8.432692307692308, "grad_norm": 0.3936481177806854, "learning_rate": 1.544987225074141e-05, "loss": 0.0026, "step": 8770 }, { "epoch": 8.433653846153845, "grad_norm": 2.733299970626831, "learning_rate": 1.5448827779197115e-05, "loss": 0.0238, "step": 8771 }, { "epoch": 8.434615384615384, "grad_norm": 2.41219425201416, "learning_rate": 1.5447783223103005e-05, "loss": 0.0096, "step": 8772 }, { "epoch": 8.435576923076923, "grad_norm": 2.835867166519165, "learning_rate": 1.544673858247529e-05, "loss": 0.0333, "step": 8773 }, { "epoch": 8.436538461538461, "grad_norm": 2.593644380569458, "learning_rate": 1.5445693857330175e-05, "loss": 0.0256, "step": 8774 }, { "epoch": 8.4375, "grad_norm": 2.424135446548462, "learning_rate": 1.5444649047683878e-05, "loss": 0.0264, "step": 8775 }, { "epoch": 8.438461538461539, "grad_norm": 0.6839600801467896, "learning_rate": 1.5443604153552603e-05, "loss": 0.0026, "step": 8776 }, { "epoch": 8.439423076923077, "grad_norm": 2.8044214248657227, "learning_rate": 1.5442559174952573e-05, "loss": 0.0262, "step": 8777 }, { "epoch": 8.440384615384616, "grad_norm": 3.3111989498138428, "learning_rate": 1.5441514111899994e-05, "loss": 0.035, "step": 8778 }, { "epoch": 8.441346153846155, "grad_norm": 2.3916172981262207, "learning_rate": 1.544046896441109e-05, "loss": 0.0189, "step": 8779 }, { "epoch": 8.442307692307692, "grad_norm": 1.226091742515564, "learning_rate": 1.5439423732502075e-05, "loss": 0.0125, "step": 8780 }, { "epoch": 8.44326923076923, "grad_norm": 1.6803759336471558, "learning_rate": 1.543837841618917e-05, "loss": 0.0229, "step": 8781 }, { "epoch": 8.444230769230769, "grad_norm": 0.7918835282325745, "learning_rate": 1.5437333015488586e-05, "loss": 0.0058, "step": 8782 }, { "epoch": 8.445192307692308, "grad_norm": 0.45118632912635803, "learning_rate": 1.543628753041656e-05, "loss": 0.0021, "step": 8783 }, { "epoch": 8.446153846153846, "grad_norm": 1.6374273300170898, "learning_rate": 1.5435241960989304e-05, "loss": 0.013, "step": 8784 }, { "epoch": 8.447115384615385, "grad_norm": 0.25029873847961426, "learning_rate": 1.5434196307223042e-05, "loss": 0.0019, "step": 8785 }, { "epoch": 8.448076923076924, "grad_norm": 0.1451641470193863, "learning_rate": 1.5433150569134007e-05, "loss": 0.0011, "step": 8786 }, { "epoch": 8.449038461538462, "grad_norm": 1.9907547235488892, "learning_rate": 1.5432104746738417e-05, "loss": 0.0133, "step": 8787 }, { "epoch": 8.45, "grad_norm": 2.9576501846313477, "learning_rate": 1.5431058840052505e-05, "loss": 0.0568, "step": 8788 }, { "epoch": 8.450961538461538, "grad_norm": 3.9056456089019775, "learning_rate": 1.5430012849092504e-05, "loss": 0.0392, "step": 8789 }, { "epoch": 8.451923076923077, "grad_norm": 3.8307058811187744, "learning_rate": 1.5428966773874638e-05, "loss": 0.0511, "step": 8790 }, { "epoch": 8.452884615384615, "grad_norm": 3.299647092819214, "learning_rate": 1.542792061441514e-05, "loss": 0.0354, "step": 8791 }, { "epoch": 8.453846153846154, "grad_norm": 3.148106336593628, "learning_rate": 1.5426874370730246e-05, "loss": 0.0319, "step": 8792 }, { "epoch": 8.454807692307693, "grad_norm": 1.3507522344589233, "learning_rate": 1.542582804283619e-05, "loss": 0.0223, "step": 8793 }, { "epoch": 8.455769230769231, "grad_norm": 1.7656093835830688, "learning_rate": 1.542478163074921e-05, "loss": 0.0122, "step": 8794 }, { "epoch": 8.45673076923077, "grad_norm": 2.3867125511169434, "learning_rate": 1.5423735134485537e-05, "loss": 0.0497, "step": 8795 }, { "epoch": 8.457692307692307, "grad_norm": 2.5267560482025146, "learning_rate": 1.5422688554061413e-05, "loss": 0.016, "step": 8796 }, { "epoch": 8.458653846153846, "grad_norm": 1.6213059425354004, "learning_rate": 1.542164188949308e-05, "loss": 0.0222, "step": 8797 }, { "epoch": 8.459615384615384, "grad_norm": 0.9469143152236938, "learning_rate": 1.5420595140796773e-05, "loss": 0.0042, "step": 8798 }, { "epoch": 8.460576923076923, "grad_norm": 1.9616926908493042, "learning_rate": 1.5419548307988744e-05, "loss": 0.007, "step": 8799 }, { "epoch": 8.461538461538462, "grad_norm": 1.8041764497756958, "learning_rate": 1.5418501391085226e-05, "loss": 0.0212, "step": 8800 }, { "epoch": 8.4625, "grad_norm": 1.8278254270553589, "learning_rate": 1.5417454390102474e-05, "loss": 0.0149, "step": 8801 }, { "epoch": 8.463461538461539, "grad_norm": 4.970765590667725, "learning_rate": 1.541640730505673e-05, "loss": 0.0438, "step": 8802 }, { "epoch": 8.464423076923078, "grad_norm": 0.34447750449180603, "learning_rate": 1.541536013596424e-05, "loss": 0.0015, "step": 8803 }, { "epoch": 8.465384615384615, "grad_norm": 0.5881319046020508, "learning_rate": 1.5414312882841254e-05, "loss": 0.0047, "step": 8804 }, { "epoch": 8.466346153846153, "grad_norm": 2.866201877593994, "learning_rate": 1.5413265545704023e-05, "loss": 0.0374, "step": 8805 }, { "epoch": 8.467307692307692, "grad_norm": 2.3802297115325928, "learning_rate": 1.54122181245688e-05, "loss": 0.021, "step": 8806 }, { "epoch": 8.46826923076923, "grad_norm": 3.321981906890869, "learning_rate": 1.5411170619451837e-05, "loss": 0.0831, "step": 8807 }, { "epoch": 8.46923076923077, "grad_norm": 1.6173670291900635, "learning_rate": 1.5410123030369387e-05, "loss": 0.0136, "step": 8808 }, { "epoch": 8.470192307692308, "grad_norm": 3.2456884384155273, "learning_rate": 1.5409075357337704e-05, "loss": 0.0276, "step": 8809 }, { "epoch": 8.471153846153847, "grad_norm": 2.794821262359619, "learning_rate": 1.540802760037305e-05, "loss": 0.0226, "step": 8810 }, { "epoch": 8.472115384615385, "grad_norm": 4.868372440338135, "learning_rate": 1.5406979759491675e-05, "loss": 0.0741, "step": 8811 }, { "epoch": 8.473076923076922, "grad_norm": 0.47627925872802734, "learning_rate": 1.5405931834709847e-05, "loss": 0.0023, "step": 8812 }, { "epoch": 8.474038461538461, "grad_norm": 3.3088207244873047, "learning_rate": 1.5404883826043822e-05, "loss": 0.0397, "step": 8813 }, { "epoch": 8.475, "grad_norm": 1.630454659461975, "learning_rate": 1.5403835733509867e-05, "loss": 0.0104, "step": 8814 }, { "epoch": 8.475961538461538, "grad_norm": 2.123807191848755, "learning_rate": 1.5402787557124238e-05, "loss": 0.022, "step": 8815 }, { "epoch": 8.476923076923077, "grad_norm": 0.03446822986006737, "learning_rate": 1.5401739296903204e-05, "loss": 0.0004, "step": 8816 }, { "epoch": 8.477884615384616, "grad_norm": 3.007354736328125, "learning_rate": 1.540069095286303e-05, "loss": 0.0264, "step": 8817 }, { "epoch": 8.478846153846154, "grad_norm": 2.173884868621826, "learning_rate": 1.539964252501998e-05, "loss": 0.0247, "step": 8818 }, { "epoch": 8.479807692307693, "grad_norm": 5.279908180236816, "learning_rate": 1.5398594013390326e-05, "loss": 0.0666, "step": 8819 }, { "epoch": 8.48076923076923, "grad_norm": 0.2520839273929596, "learning_rate": 1.5397545417990342e-05, "loss": 0.0017, "step": 8820 }, { "epoch": 8.481730769230769, "grad_norm": 3.1210172176361084, "learning_rate": 1.5396496738836292e-05, "loss": 0.0209, "step": 8821 }, { "epoch": 8.482692307692307, "grad_norm": 2.304288864135742, "learning_rate": 1.5395447975944454e-05, "loss": 0.0193, "step": 8822 }, { "epoch": 8.483653846153846, "grad_norm": 2.9711129665374756, "learning_rate": 1.5394399129331096e-05, "loss": 0.0158, "step": 8823 }, { "epoch": 8.484615384615385, "grad_norm": 2.3610379695892334, "learning_rate": 1.53933501990125e-05, "loss": 0.0304, "step": 8824 }, { "epoch": 8.485576923076923, "grad_norm": 1.8152035474777222, "learning_rate": 1.5392301185004933e-05, "loss": 0.0077, "step": 8825 }, { "epoch": 8.486538461538462, "grad_norm": 1.2405742406845093, "learning_rate": 1.5391252087324677e-05, "loss": 0.0053, "step": 8826 }, { "epoch": 8.4875, "grad_norm": 1.3578463792800903, "learning_rate": 1.5390202905988015e-05, "loss": 0.0094, "step": 8827 }, { "epoch": 8.488461538461538, "grad_norm": 2.871373176574707, "learning_rate": 1.5389153641011224e-05, "loss": 0.0397, "step": 8828 }, { "epoch": 8.489423076923076, "grad_norm": 10.780152320861816, "learning_rate": 1.5388104292410587e-05, "loss": 0.07, "step": 8829 }, { "epoch": 8.490384615384615, "grad_norm": 2.1664459705352783, "learning_rate": 1.5387054860202383e-05, "loss": 0.0302, "step": 8830 }, { "epoch": 8.491346153846154, "grad_norm": 4.0022759437561035, "learning_rate": 1.5386005344402897e-05, "loss": 0.1114, "step": 8831 }, { "epoch": 8.492307692307692, "grad_norm": 1.5099836587905884, "learning_rate": 1.5384955745028418e-05, "loss": 0.0112, "step": 8832 }, { "epoch": 8.493269230769231, "grad_norm": 0.6182374954223633, "learning_rate": 1.538390606209523e-05, "loss": 0.0042, "step": 8833 }, { "epoch": 8.49423076923077, "grad_norm": 4.196985721588135, "learning_rate": 1.5382856295619622e-05, "loss": 0.0495, "step": 8834 }, { "epoch": 8.495192307692308, "grad_norm": 1.6402357816696167, "learning_rate": 1.5381806445617883e-05, "loss": 0.0143, "step": 8835 }, { "epoch": 8.496153846153845, "grad_norm": 2.069551944732666, "learning_rate": 1.53807565121063e-05, "loss": 0.0096, "step": 8836 }, { "epoch": 8.497115384615384, "grad_norm": 2.8987157344818115, "learning_rate": 1.537970649510117e-05, "loss": 0.0307, "step": 8837 }, { "epoch": 8.498076923076923, "grad_norm": 1.2589225769042969, "learning_rate": 1.5378656394618788e-05, "loss": 0.014, "step": 8838 }, { "epoch": 8.499038461538461, "grad_norm": 1.9016292095184326, "learning_rate": 1.537760621067544e-05, "loss": 0.0141, "step": 8839 }, { "epoch": 8.5, "grad_norm": 3.1978180408477783, "learning_rate": 1.537655594328743e-05, "loss": 0.0282, "step": 8840 }, { "epoch": 8.500961538461539, "grad_norm": 1.2659645080566406, "learning_rate": 1.537550559247105e-05, "loss": 0.0075, "step": 8841 }, { "epoch": 8.501923076923077, "grad_norm": 1.308758020401001, "learning_rate": 1.5374455158242604e-05, "loss": 0.0247, "step": 8842 }, { "epoch": 8.502884615384616, "grad_norm": 3.7552709579467773, "learning_rate": 1.5373404640618382e-05, "loss": 0.1001, "step": 8843 }, { "epoch": 8.503846153846155, "grad_norm": 2.0967068672180176, "learning_rate": 1.537235403961469e-05, "loss": 0.0241, "step": 8844 }, { "epoch": 8.504807692307692, "grad_norm": 2.553410768508911, "learning_rate": 1.537130335524784e-05, "loss": 0.013, "step": 8845 }, { "epoch": 8.50576923076923, "grad_norm": 2.168172597885132, "learning_rate": 1.5370252587534117e-05, "loss": 0.0363, "step": 8846 }, { "epoch": 8.506730769230769, "grad_norm": 0.17508289217948914, "learning_rate": 1.536920173648984e-05, "loss": 0.0016, "step": 8847 }, { "epoch": 8.507692307692308, "grad_norm": 2.39164662361145, "learning_rate": 1.5368150802131307e-05, "loss": 0.0277, "step": 8848 }, { "epoch": 8.508653846153846, "grad_norm": 1.828802227973938, "learning_rate": 1.5367099784474832e-05, "loss": 0.0158, "step": 8849 }, { "epoch": 8.509615384615385, "grad_norm": 2.845775842666626, "learning_rate": 1.5366048683536723e-05, "loss": 0.0395, "step": 8850 }, { "epoch": 8.510576923076924, "grad_norm": 0.09411949664354324, "learning_rate": 1.5364997499333286e-05, "loss": 0.001, "step": 8851 }, { "epoch": 8.51153846153846, "grad_norm": 1.2664011716842651, "learning_rate": 1.5363946231880832e-05, "loss": 0.0098, "step": 8852 }, { "epoch": 8.5125, "grad_norm": 3.645420789718628, "learning_rate": 1.5362894881195675e-05, "loss": 0.1195, "step": 8853 }, { "epoch": 8.513461538461538, "grad_norm": 0.6937733888626099, "learning_rate": 1.5361843447294132e-05, "loss": 0.0039, "step": 8854 }, { "epoch": 8.514423076923077, "grad_norm": 3.5625369548797607, "learning_rate": 1.536079193019251e-05, "loss": 0.0352, "step": 8855 }, { "epoch": 8.515384615384615, "grad_norm": 0.3028959631919861, "learning_rate": 1.5359740329907136e-05, "loss": 0.0029, "step": 8856 }, { "epoch": 8.516346153846154, "grad_norm": 2.8029205799102783, "learning_rate": 1.5358688646454322e-05, "loss": 0.0254, "step": 8857 }, { "epoch": 8.517307692307693, "grad_norm": 2.4579358100891113, "learning_rate": 1.5357636879850387e-05, "loss": 0.0256, "step": 8858 }, { "epoch": 8.518269230769231, "grad_norm": 1.5355961322784424, "learning_rate": 1.535658503011165e-05, "loss": 0.0072, "step": 8859 }, { "epoch": 8.51923076923077, "grad_norm": 0.9853239059448242, "learning_rate": 1.535553309725444e-05, "loss": 0.008, "step": 8860 }, { "epoch": 8.520192307692307, "grad_norm": 2.4535868167877197, "learning_rate": 1.5354481081295073e-05, "loss": 0.0779, "step": 8861 }, { "epoch": 8.521153846153846, "grad_norm": 2.9604861736297607, "learning_rate": 1.5353428982249873e-05, "loss": 0.0246, "step": 8862 }, { "epoch": 8.522115384615384, "grad_norm": 1.2995147705078125, "learning_rate": 1.5352376800135166e-05, "loss": 0.0093, "step": 8863 }, { "epoch": 8.523076923076923, "grad_norm": 3.6665003299713135, "learning_rate": 1.5351324534967282e-05, "loss": 0.0203, "step": 8864 }, { "epoch": 8.524038461538462, "grad_norm": 2.241466999053955, "learning_rate": 1.535027218676255e-05, "loss": 0.028, "step": 8865 }, { "epoch": 8.525, "grad_norm": 1.500809669494629, "learning_rate": 1.5349219755537293e-05, "loss": 0.0533, "step": 8866 }, { "epoch": 8.525961538461539, "grad_norm": 0.6075651049613953, "learning_rate": 1.5348167241307845e-05, "loss": 0.0042, "step": 8867 }, { "epoch": 8.526923076923078, "grad_norm": 1.4166340827941895, "learning_rate": 1.5347114644090537e-05, "loss": 0.0078, "step": 8868 }, { "epoch": 8.527884615384615, "grad_norm": 3.5213751792907715, "learning_rate": 1.5346061963901707e-05, "loss": 0.0383, "step": 8869 }, { "epoch": 8.528846153846153, "grad_norm": 4.6801276206970215, "learning_rate": 1.5345009200757682e-05, "loss": 0.0489, "step": 8870 }, { "epoch": 8.529807692307692, "grad_norm": 4.129976272583008, "learning_rate": 1.5343956354674806e-05, "loss": 0.037, "step": 8871 }, { "epoch": 8.53076923076923, "grad_norm": 0.3446692228317261, "learning_rate": 1.5342903425669408e-05, "loss": 0.002, "step": 8872 }, { "epoch": 8.53173076923077, "grad_norm": 2.62662410736084, "learning_rate": 1.5341850413757834e-05, "loss": 0.0207, "step": 8873 }, { "epoch": 8.532692307692308, "grad_norm": 3.7122089862823486, "learning_rate": 1.534079731895641e-05, "loss": 0.0338, "step": 8874 }, { "epoch": 8.533653846153847, "grad_norm": 1.854723334312439, "learning_rate": 1.53397441412815e-05, "loss": 0.0189, "step": 8875 }, { "epoch": 8.534615384615385, "grad_norm": 1.6164222955703735, "learning_rate": 1.5338690880749428e-05, "loss": 0.0101, "step": 8876 }, { "epoch": 8.535576923076922, "grad_norm": 2.594033718109131, "learning_rate": 1.5337637537376538e-05, "loss": 0.0181, "step": 8877 }, { "epoch": 8.536538461538461, "grad_norm": 3.31575870513916, "learning_rate": 1.533658411117918e-05, "loss": 0.0319, "step": 8878 }, { "epoch": 8.5375, "grad_norm": 2.0329091548919678, "learning_rate": 1.5335530602173704e-05, "loss": 0.0079, "step": 8879 }, { "epoch": 8.538461538461538, "grad_norm": 0.26523256301879883, "learning_rate": 1.5334477010376452e-05, "loss": 0.0019, "step": 8880 }, { "epoch": 8.539423076923077, "grad_norm": 1.8525563478469849, "learning_rate": 1.5333423335803773e-05, "loss": 0.0186, "step": 8881 }, { "epoch": 8.540384615384616, "grad_norm": 2.7837302684783936, "learning_rate": 1.533236957847201e-05, "loss": 0.0274, "step": 8882 }, { "epoch": 8.541346153846154, "grad_norm": 1.0282374620437622, "learning_rate": 1.533131573839753e-05, "loss": 0.0112, "step": 8883 }, { "epoch": 8.542307692307693, "grad_norm": 2.1136391162872314, "learning_rate": 1.5330261815596676e-05, "loss": 0.0161, "step": 8884 }, { "epoch": 8.54326923076923, "grad_norm": 4.669015884399414, "learning_rate": 1.53292078100858e-05, "loss": 0.0707, "step": 8885 }, { "epoch": 8.544230769230769, "grad_norm": 1.9888951778411865, "learning_rate": 1.532815372188126e-05, "loss": 0.049, "step": 8886 }, { "epoch": 8.545192307692307, "grad_norm": 0.7095575332641602, "learning_rate": 1.5327099550999416e-05, "loss": 0.0045, "step": 8887 }, { "epoch": 8.546153846153846, "grad_norm": 1.8561111688613892, "learning_rate": 1.5326045297456617e-05, "loss": 0.0099, "step": 8888 }, { "epoch": 8.547115384615385, "grad_norm": 0.8592397570610046, "learning_rate": 1.5324990961269227e-05, "loss": 0.0074, "step": 8889 }, { "epoch": 8.548076923076923, "grad_norm": 2.280717372894287, "learning_rate": 1.532393654245361e-05, "loss": 0.0157, "step": 8890 }, { "epoch": 8.549038461538462, "grad_norm": 2.10127854347229, "learning_rate": 1.532288204102612e-05, "loss": 0.0134, "step": 8891 }, { "epoch": 8.55, "grad_norm": 2.3159379959106445, "learning_rate": 1.5321827457003125e-05, "loss": 0.0323, "step": 8892 }, { "epoch": 8.55096153846154, "grad_norm": 0.44768622517585754, "learning_rate": 1.5320772790400983e-05, "loss": 0.0025, "step": 8893 }, { "epoch": 8.551923076923076, "grad_norm": 1.5095007419586182, "learning_rate": 1.531971804123607e-05, "loss": 0.0377, "step": 8894 }, { "epoch": 8.552884615384615, "grad_norm": 1.2103322744369507, "learning_rate": 1.5318663209524745e-05, "loss": 0.0096, "step": 8895 }, { "epoch": 8.553846153846154, "grad_norm": 5.950124263763428, "learning_rate": 1.5317608295283372e-05, "loss": 0.1188, "step": 8896 }, { "epoch": 8.554807692307692, "grad_norm": 0.11971963942050934, "learning_rate": 1.531655329852833e-05, "loss": 0.0007, "step": 8897 }, { "epoch": 8.555769230769231, "grad_norm": 2.096545457839966, "learning_rate": 1.531549821927598e-05, "loss": 0.0049, "step": 8898 }, { "epoch": 8.55673076923077, "grad_norm": 0.760607123374939, "learning_rate": 1.5314443057542703e-05, "loss": 0.0041, "step": 8899 }, { "epoch": 8.557692307692308, "grad_norm": 2.7492663860321045, "learning_rate": 1.5313387813344867e-05, "loss": 0.1304, "step": 8900 }, { "epoch": 8.558653846153845, "grad_norm": 1.6919116973876953, "learning_rate": 1.5312332486698846e-05, "loss": 0.0187, "step": 8901 }, { "epoch": 8.559615384615384, "grad_norm": 3.347136974334717, "learning_rate": 1.5311277077621016e-05, "loss": 0.0276, "step": 8902 }, { "epoch": 8.560576923076923, "grad_norm": 2.206850051879883, "learning_rate": 1.5310221586127753e-05, "loss": 0.0349, "step": 8903 }, { "epoch": 8.561538461538461, "grad_norm": 5.135500431060791, "learning_rate": 1.5309166012235438e-05, "loss": 0.0316, "step": 8904 }, { "epoch": 8.5625, "grad_norm": 1.891371726989746, "learning_rate": 1.5308110355960447e-05, "loss": 0.0124, "step": 8905 }, { "epoch": 8.563461538461539, "grad_norm": 1.4601093530654907, "learning_rate": 1.5307054617319166e-05, "loss": 0.0095, "step": 8906 }, { "epoch": 8.564423076923077, "grad_norm": 2.391298532485962, "learning_rate": 1.530599879632797e-05, "loss": 0.0472, "step": 8907 }, { "epoch": 8.565384615384616, "grad_norm": 0.28431352972984314, "learning_rate": 1.5304942893003246e-05, "loss": 0.0013, "step": 8908 }, { "epoch": 8.566346153846155, "grad_norm": 5.059742450714111, "learning_rate": 1.530388690736138e-05, "loss": 0.0665, "step": 8909 }, { "epoch": 8.567307692307692, "grad_norm": 2.762127161026001, "learning_rate": 1.5302830839418755e-05, "loss": 0.0381, "step": 8910 }, { "epoch": 8.56826923076923, "grad_norm": 0.34882229566574097, "learning_rate": 1.5301774689191758e-05, "loss": 0.0033, "step": 8911 }, { "epoch": 8.569230769230769, "grad_norm": 3.6395857334136963, "learning_rate": 1.530071845669678e-05, "loss": 0.0451, "step": 8912 }, { "epoch": 8.570192307692308, "grad_norm": 2.154808521270752, "learning_rate": 1.5299662141950208e-05, "loss": 0.0119, "step": 8913 }, { "epoch": 8.571153846153846, "grad_norm": 3.6499226093292236, "learning_rate": 1.5298605744968435e-05, "loss": 0.058, "step": 8914 }, { "epoch": 8.572115384615385, "grad_norm": 6.864863395690918, "learning_rate": 1.5297549265767847e-05, "loss": 0.1406, "step": 8915 }, { "epoch": 8.573076923076924, "grad_norm": 4.733797073364258, "learning_rate": 1.529649270436485e-05, "loss": 0.0587, "step": 8916 }, { "epoch": 8.57403846153846, "grad_norm": 3.5357494354248047, "learning_rate": 1.5295436060775828e-05, "loss": 0.0387, "step": 8917 }, { "epoch": 8.575, "grad_norm": 1.7077629566192627, "learning_rate": 1.529437933501718e-05, "loss": 0.0123, "step": 8918 }, { "epoch": 8.575961538461538, "grad_norm": 4.581937313079834, "learning_rate": 1.52933225271053e-05, "loss": 0.2445, "step": 8919 }, { "epoch": 8.576923076923077, "grad_norm": 2.443157434463501, "learning_rate": 1.5292265637056597e-05, "loss": 0.0185, "step": 8920 }, { "epoch": 8.577884615384615, "grad_norm": 2.695645570755005, "learning_rate": 1.529120866488746e-05, "loss": 0.0179, "step": 8921 }, { "epoch": 8.578846153846154, "grad_norm": 0.7756983637809753, "learning_rate": 1.5290151610614296e-05, "loss": 0.0052, "step": 8922 }, { "epoch": 8.579807692307693, "grad_norm": 2.855191469192505, "learning_rate": 1.5289094474253502e-05, "loss": 0.0204, "step": 8923 }, { "epoch": 8.580769230769231, "grad_norm": 4.074349880218506, "learning_rate": 1.5288037255821488e-05, "loss": 0.019, "step": 8924 }, { "epoch": 8.58173076923077, "grad_norm": 0.5927085280418396, "learning_rate": 1.5286979955334655e-05, "loss": 0.0038, "step": 8925 }, { "epoch": 8.582692307692307, "grad_norm": 4.324835777282715, "learning_rate": 1.5285922572809408e-05, "loss": 0.058, "step": 8926 }, { "epoch": 8.583653846153846, "grad_norm": 1.3597898483276367, "learning_rate": 1.5284865108262162e-05, "loss": 0.0085, "step": 8927 }, { "epoch": 8.584615384615384, "grad_norm": 1.5713762044906616, "learning_rate": 1.5283807561709316e-05, "loss": 0.0149, "step": 8928 }, { "epoch": 8.585576923076923, "grad_norm": 4.599573135375977, "learning_rate": 1.5282749933167283e-05, "loss": 0.143, "step": 8929 }, { "epoch": 8.586538461538462, "grad_norm": 3.4343018531799316, "learning_rate": 1.528169222265248e-05, "loss": 0.036, "step": 8930 }, { "epoch": 8.5875, "grad_norm": 2.0251824855804443, "learning_rate": 1.5280634430181315e-05, "loss": 0.017, "step": 8931 }, { "epoch": 8.588461538461539, "grad_norm": 5.272190093994141, "learning_rate": 1.5279576555770202e-05, "loss": 0.1354, "step": 8932 }, { "epoch": 8.589423076923078, "grad_norm": 3.142031669616699, "learning_rate": 1.5278518599435553e-05, "loss": 0.0916, "step": 8933 }, { "epoch": 8.590384615384615, "grad_norm": 1.032718539237976, "learning_rate": 1.527746056119379e-05, "loss": 0.0074, "step": 8934 }, { "epoch": 8.591346153846153, "grad_norm": 0.8246534466743469, "learning_rate": 1.527640244106133e-05, "loss": 0.0073, "step": 8935 }, { "epoch": 8.592307692307692, "grad_norm": 3.4851341247558594, "learning_rate": 1.5275344239054585e-05, "loss": 0.0867, "step": 8936 }, { "epoch": 8.59326923076923, "grad_norm": 2.0130555629730225, "learning_rate": 1.5274285955189984e-05, "loss": 0.0375, "step": 8937 }, { "epoch": 8.59423076923077, "grad_norm": 2.668365955352783, "learning_rate": 1.5273227589483945e-05, "loss": 0.03, "step": 8938 }, { "epoch": 8.595192307692308, "grad_norm": 0.5968009233474731, "learning_rate": 1.527216914195289e-05, "loss": 0.0042, "step": 8939 }, { "epoch": 8.596153846153847, "grad_norm": 2.6149778366088867, "learning_rate": 1.5271110612613243e-05, "loss": 0.0224, "step": 8940 }, { "epoch": 8.597115384615385, "grad_norm": 1.3265419006347656, "learning_rate": 1.527005200148143e-05, "loss": 0.0261, "step": 8941 }, { "epoch": 8.598076923076922, "grad_norm": 1.77713942527771, "learning_rate": 1.526899330857388e-05, "loss": 0.0294, "step": 8942 }, { "epoch": 8.599038461538461, "grad_norm": 0.4090152084827423, "learning_rate": 1.526793453390701e-05, "loss": 0.003, "step": 8943 }, { "epoch": 8.6, "grad_norm": 2.4497196674346924, "learning_rate": 1.526687567749727e-05, "loss": 0.0228, "step": 8944 }, { "epoch": 8.600961538461538, "grad_norm": 1.4138609170913696, "learning_rate": 1.526581673936107e-05, "loss": 0.0108, "step": 8945 }, { "epoch": 8.601923076923077, "grad_norm": 2.4738192558288574, "learning_rate": 1.5264757719514847e-05, "loss": 0.0293, "step": 8946 }, { "epoch": 8.602884615384616, "grad_norm": 0.6614894270896912, "learning_rate": 1.5263698617975037e-05, "loss": 0.0062, "step": 8947 }, { "epoch": 8.603846153846154, "grad_norm": 3.658256769180298, "learning_rate": 1.5262639434758077e-05, "loss": 0.0742, "step": 8948 }, { "epoch": 8.604807692307693, "grad_norm": 2.7146756649017334, "learning_rate": 1.5261580169880398e-05, "loss": 0.0318, "step": 8949 }, { "epoch": 8.60576923076923, "grad_norm": 2.0448946952819824, "learning_rate": 1.5260520823358436e-05, "loss": 0.0339, "step": 8950 }, { "epoch": 8.606730769230769, "grad_norm": 1.159675121307373, "learning_rate": 1.5259461395208628e-05, "loss": 0.0085, "step": 8951 }, { "epoch": 8.607692307692307, "grad_norm": 1.6057204008102417, "learning_rate": 1.5258401885447418e-05, "loss": 0.0175, "step": 8952 }, { "epoch": 8.608653846153846, "grad_norm": 0.7166149020195007, "learning_rate": 1.5257342294091246e-05, "loss": 0.0083, "step": 8953 }, { "epoch": 8.609615384615385, "grad_norm": 0.8179662227630615, "learning_rate": 1.5256282621156549e-05, "loss": 0.0064, "step": 8954 }, { "epoch": 8.610576923076923, "grad_norm": 3.3343405723571777, "learning_rate": 1.525522286665977e-05, "loss": 0.0412, "step": 8955 }, { "epoch": 8.611538461538462, "grad_norm": 1.7669378519058228, "learning_rate": 1.5254163030617362e-05, "loss": 0.0126, "step": 8956 }, { "epoch": 8.6125, "grad_norm": 5.25461483001709, "learning_rate": 1.5253103113045762e-05, "loss": 0.1438, "step": 8957 }, { "epoch": 8.61346153846154, "grad_norm": 1.1159251928329468, "learning_rate": 1.5252043113961417e-05, "loss": 0.0059, "step": 8958 }, { "epoch": 8.614423076923076, "grad_norm": 0.6332054138183594, "learning_rate": 1.525098303338078e-05, "loss": 0.0065, "step": 8959 }, { "epoch": 8.615384615384615, "grad_norm": 0.9437165856361389, "learning_rate": 1.5249922871320297e-05, "loss": 0.0079, "step": 8960 }, { "epoch": 8.616346153846154, "grad_norm": 2.987243890762329, "learning_rate": 1.5248862627796421e-05, "loss": 0.0285, "step": 8961 }, { "epoch": 8.617307692307692, "grad_norm": 1.8259440660476685, "learning_rate": 1.52478023028256e-05, "loss": 0.0124, "step": 8962 }, { "epoch": 8.618269230769231, "grad_norm": 0.9058039784431458, "learning_rate": 1.524674189642429e-05, "loss": 0.0037, "step": 8963 }, { "epoch": 8.61923076923077, "grad_norm": 2.506107807159424, "learning_rate": 1.5245681408608946e-05, "loss": 0.0223, "step": 8964 }, { "epoch": 8.620192307692308, "grad_norm": 1.2662208080291748, "learning_rate": 1.5244620839396018e-05, "loss": 0.0123, "step": 8965 }, { "epoch": 8.621153846153845, "grad_norm": 0.3242282569408417, "learning_rate": 1.5243560188801972e-05, "loss": 0.0019, "step": 8966 }, { "epoch": 8.622115384615384, "grad_norm": 2.1357169151306152, "learning_rate": 1.5242499456843264e-05, "loss": 0.0199, "step": 8967 }, { "epoch": 8.623076923076923, "grad_norm": 3.335709810256958, "learning_rate": 1.5241438643536346e-05, "loss": 0.0245, "step": 8968 }, { "epoch": 8.624038461538461, "grad_norm": 2.0251657962799072, "learning_rate": 1.5240377748897683e-05, "loss": 0.0506, "step": 8969 }, { "epoch": 8.625, "grad_norm": 3.3878962993621826, "learning_rate": 1.5239316772943741e-05, "loss": 0.0262, "step": 8970 }, { "epoch": 8.625961538461539, "grad_norm": 1.22383713722229, "learning_rate": 1.523825571569098e-05, "loss": 0.0302, "step": 8971 }, { "epoch": 8.626923076923077, "grad_norm": 1.736488938331604, "learning_rate": 1.5237194577155862e-05, "loss": 0.0543, "step": 8972 }, { "epoch": 8.627884615384616, "grad_norm": 0.8105095624923706, "learning_rate": 1.5236133357354854e-05, "loss": 0.0046, "step": 8973 }, { "epoch": 8.628846153846155, "grad_norm": 1.6089082956314087, "learning_rate": 1.5235072056304427e-05, "loss": 0.0317, "step": 8974 }, { "epoch": 8.629807692307692, "grad_norm": 0.29323461651802063, "learning_rate": 1.5234010674021048e-05, "loss": 0.0027, "step": 8975 }, { "epoch": 8.63076923076923, "grad_norm": 4.67387056350708, "learning_rate": 1.523294921052118e-05, "loss": 0.0499, "step": 8976 }, { "epoch": 8.631730769230769, "grad_norm": 1.8012481927871704, "learning_rate": 1.52318876658213e-05, "loss": 0.0307, "step": 8977 }, { "epoch": 8.632692307692308, "grad_norm": 2.407214641571045, "learning_rate": 1.5230826039937882e-05, "loss": 0.07, "step": 8978 }, { "epoch": 8.633653846153846, "grad_norm": 1.951380968093872, "learning_rate": 1.5229764332887395e-05, "loss": 0.0271, "step": 8979 }, { "epoch": 8.634615384615385, "grad_norm": 3.242814064025879, "learning_rate": 1.5228702544686313e-05, "loss": 0.0382, "step": 8980 }, { "epoch": 8.635576923076924, "grad_norm": 3.7299246788024902, "learning_rate": 1.5227640675351115e-05, "loss": 0.0604, "step": 8981 }, { "epoch": 8.63653846153846, "grad_norm": 0.082292839884758, "learning_rate": 1.5226578724898278e-05, "loss": 0.0007, "step": 8982 }, { "epoch": 8.6375, "grad_norm": 1.0814636945724487, "learning_rate": 1.5225516693344276e-05, "loss": 0.0097, "step": 8983 }, { "epoch": 8.638461538461538, "grad_norm": 2.60097336769104, "learning_rate": 1.5224454580705593e-05, "loss": 0.0543, "step": 8984 }, { "epoch": 8.639423076923077, "grad_norm": 1.4820936918258667, "learning_rate": 1.5223392386998709e-05, "loss": 0.0048, "step": 8985 }, { "epoch": 8.640384615384615, "grad_norm": 1.08260977268219, "learning_rate": 1.5222330112240107e-05, "loss": 0.0102, "step": 8986 }, { "epoch": 8.641346153846154, "grad_norm": 0.866987407207489, "learning_rate": 1.5221267756446266e-05, "loss": 0.0053, "step": 8987 }, { "epoch": 8.642307692307693, "grad_norm": 3.3995368480682373, "learning_rate": 1.5220205319633672e-05, "loss": 0.0573, "step": 8988 }, { "epoch": 8.643269230769231, "grad_norm": 0.7835789322853088, "learning_rate": 1.5219142801818816e-05, "loss": 0.0038, "step": 8989 }, { "epoch": 8.64423076923077, "grad_norm": 2.2011916637420654, "learning_rate": 1.5218080203018181e-05, "loss": 0.0755, "step": 8990 }, { "epoch": 8.645192307692307, "grad_norm": 0.6673306226730347, "learning_rate": 1.5217017523248257e-05, "loss": 0.0034, "step": 8991 }, { "epoch": 8.646153846153846, "grad_norm": 3.1752395629882812, "learning_rate": 1.5215954762525531e-05, "loss": 0.0208, "step": 8992 }, { "epoch": 8.647115384615384, "grad_norm": 2.3014328479766846, "learning_rate": 1.5214891920866496e-05, "loss": 0.039, "step": 8993 }, { "epoch": 8.648076923076923, "grad_norm": 0.9522573947906494, "learning_rate": 1.5213828998287646e-05, "loss": 0.0089, "step": 8994 }, { "epoch": 8.649038461538462, "grad_norm": 3.6046154499053955, "learning_rate": 1.5212765994805472e-05, "loss": 0.0238, "step": 8995 }, { "epoch": 8.65, "grad_norm": 1.653562307357788, "learning_rate": 1.5211702910436468e-05, "loss": 0.0118, "step": 8996 }, { "epoch": 8.650961538461539, "grad_norm": 2.765418767929077, "learning_rate": 1.5210639745197132e-05, "loss": 0.0589, "step": 8997 }, { "epoch": 8.651923076923078, "grad_norm": 3.325787305831909, "learning_rate": 1.5209576499103958e-05, "loss": 0.0367, "step": 8998 }, { "epoch": 8.652884615384615, "grad_norm": 4.144985198974609, "learning_rate": 1.5208513172173449e-05, "loss": 0.0284, "step": 8999 }, { "epoch": 8.653846153846153, "grad_norm": 2.277646780014038, "learning_rate": 1.52074497644221e-05, "loss": 0.0248, "step": 9000 }, { "epoch": 8.654807692307692, "grad_norm": 4.746774673461914, "learning_rate": 1.5206386275866419e-05, "loss": 0.0524, "step": 9001 }, { "epoch": 8.65576923076923, "grad_norm": 2.8356642723083496, "learning_rate": 1.5205322706522895e-05, "loss": 0.0163, "step": 9002 }, { "epoch": 8.65673076923077, "grad_norm": 1.1003433465957642, "learning_rate": 1.5204259056408046e-05, "loss": 0.0062, "step": 9003 }, { "epoch": 8.657692307692308, "grad_norm": 1.1871322393417358, "learning_rate": 1.5203195325538372e-05, "loss": 0.0089, "step": 9004 }, { "epoch": 8.658653846153847, "grad_norm": 3.3403186798095703, "learning_rate": 1.5202131513930374e-05, "loss": 0.0417, "step": 9005 }, { "epoch": 8.659615384615385, "grad_norm": 0.18007048964500427, "learning_rate": 1.5201067621600565e-05, "loss": 0.0013, "step": 9006 }, { "epoch": 8.660576923076922, "grad_norm": 4.397294998168945, "learning_rate": 1.5200003648565452e-05, "loss": 0.0514, "step": 9007 }, { "epoch": 8.661538461538461, "grad_norm": 2.6789300441741943, "learning_rate": 1.5198939594841545e-05, "loss": 0.0577, "step": 9008 }, { "epoch": 8.6625, "grad_norm": 2.950369119644165, "learning_rate": 1.519787546044535e-05, "loss": 0.0239, "step": 9009 }, { "epoch": 8.663461538461538, "grad_norm": 5.779106140136719, "learning_rate": 1.5196811245393387e-05, "loss": 0.0521, "step": 9010 }, { "epoch": 8.664423076923077, "grad_norm": 1.5128356218338013, "learning_rate": 1.5195746949702167e-05, "loss": 0.0132, "step": 9011 }, { "epoch": 8.665384615384616, "grad_norm": 3.5400240421295166, "learning_rate": 1.51946825733882e-05, "loss": 0.1348, "step": 9012 }, { "epoch": 8.666346153846154, "grad_norm": 0.24070389568805695, "learning_rate": 1.5193618116468005e-05, "loss": 0.0017, "step": 9013 }, { "epoch": 8.667307692307693, "grad_norm": 2.6633830070495605, "learning_rate": 1.5192553578958103e-05, "loss": 0.0264, "step": 9014 }, { "epoch": 8.66826923076923, "grad_norm": 1.888693928718567, "learning_rate": 1.519148896087501e-05, "loss": 0.0252, "step": 9015 }, { "epoch": 8.669230769230769, "grad_norm": 2.295511245727539, "learning_rate": 1.5190424262235241e-05, "loss": 0.0142, "step": 9016 }, { "epoch": 8.670192307692307, "grad_norm": 4.623761177062988, "learning_rate": 1.5189359483055324e-05, "loss": 0.1074, "step": 9017 }, { "epoch": 8.671153846153846, "grad_norm": 4.910150527954102, "learning_rate": 1.518829462335178e-05, "loss": 0.3358, "step": 9018 }, { "epoch": 8.672115384615385, "grad_norm": 2.3674609661102295, "learning_rate": 1.5187229683141129e-05, "loss": 0.0201, "step": 9019 }, { "epoch": 8.673076923076923, "grad_norm": 2.7864506244659424, "learning_rate": 1.5186164662439898e-05, "loss": 0.0729, "step": 9020 }, { "epoch": 8.674038461538462, "grad_norm": 2.729339361190796, "learning_rate": 1.518509956126461e-05, "loss": 0.0109, "step": 9021 }, { "epoch": 8.675, "grad_norm": 3.0148723125457764, "learning_rate": 1.51840343796318e-05, "loss": 0.0204, "step": 9022 }, { "epoch": 8.67596153846154, "grad_norm": 0.29972392320632935, "learning_rate": 1.5182969117557989e-05, "loss": 0.0023, "step": 9023 }, { "epoch": 8.676923076923076, "grad_norm": 2.05639386177063, "learning_rate": 1.5181903775059708e-05, "loss": 0.0493, "step": 9024 }, { "epoch": 8.677884615384615, "grad_norm": 1.7942345142364502, "learning_rate": 1.5180838352153491e-05, "loss": 0.0191, "step": 9025 }, { "epoch": 8.678846153846154, "grad_norm": 2.2996973991394043, "learning_rate": 1.5179772848855867e-05, "loss": 0.0349, "step": 9026 }, { "epoch": 8.679807692307692, "grad_norm": 1.8156394958496094, "learning_rate": 1.517870726518337e-05, "loss": 0.036, "step": 9027 }, { "epoch": 8.680769230769231, "grad_norm": 1.7103667259216309, "learning_rate": 1.5177641601152538e-05, "loss": 0.0508, "step": 9028 }, { "epoch": 8.68173076923077, "grad_norm": 0.26304247975349426, "learning_rate": 1.5176575856779904e-05, "loss": 0.0022, "step": 9029 }, { "epoch": 8.682692307692308, "grad_norm": 2.9332547187805176, "learning_rate": 1.5175510032082006e-05, "loss": 0.0192, "step": 9030 }, { "epoch": 8.683653846153845, "grad_norm": 4.1889801025390625, "learning_rate": 1.5174444127075378e-05, "loss": 0.0704, "step": 9031 }, { "epoch": 8.684615384615384, "grad_norm": 1.117337942123413, "learning_rate": 1.5173378141776569e-05, "loss": 0.0068, "step": 9032 }, { "epoch": 8.685576923076923, "grad_norm": 0.7794901132583618, "learning_rate": 1.5172312076202113e-05, "loss": 0.0066, "step": 9033 }, { "epoch": 8.686538461538461, "grad_norm": 2.6631574630737305, "learning_rate": 1.5171245930368554e-05, "loss": 0.0274, "step": 9034 }, { "epoch": 8.6875, "grad_norm": 2.6141388416290283, "learning_rate": 1.5170179704292436e-05, "loss": 0.0408, "step": 9035 }, { "epoch": 8.688461538461539, "grad_norm": 0.46946823596954346, "learning_rate": 1.5169113397990303e-05, "loss": 0.0047, "step": 9036 }, { "epoch": 8.689423076923077, "grad_norm": 1.7034997940063477, "learning_rate": 1.5168047011478704e-05, "loss": 0.031, "step": 9037 }, { "epoch": 8.690384615384616, "grad_norm": 1.4507917165756226, "learning_rate": 1.516698054477418e-05, "loss": 0.0109, "step": 9038 }, { "epoch": 8.691346153846155, "grad_norm": 4.944517612457275, "learning_rate": 1.5165913997893285e-05, "loss": 0.0884, "step": 9039 }, { "epoch": 8.692307692307692, "grad_norm": 1.2729758024215698, "learning_rate": 1.5164847370852562e-05, "loss": 0.0056, "step": 9040 }, { "epoch": 8.69326923076923, "grad_norm": 3.3870456218719482, "learning_rate": 1.5163780663668572e-05, "loss": 0.0601, "step": 9041 }, { "epoch": 8.694230769230769, "grad_norm": 2.0792031288146973, "learning_rate": 1.516271387635786e-05, "loss": 0.016, "step": 9042 }, { "epoch": 8.695192307692308, "grad_norm": 1.9019020795822144, "learning_rate": 1.516164700893698e-05, "loss": 0.0196, "step": 9043 }, { "epoch": 8.696153846153846, "grad_norm": 1.4530388116836548, "learning_rate": 1.5160580061422488e-05, "loss": 0.0134, "step": 9044 }, { "epoch": 8.697115384615385, "grad_norm": 1.7429689168930054, "learning_rate": 1.5159513033830936e-05, "loss": 0.0233, "step": 9045 }, { "epoch": 8.698076923076924, "grad_norm": 1.9955408573150635, "learning_rate": 1.5158445926178891e-05, "loss": 0.0129, "step": 9046 }, { "epoch": 8.69903846153846, "grad_norm": 2.076380491256714, "learning_rate": 1.5157378738482902e-05, "loss": 0.0236, "step": 9047 }, { "epoch": 8.7, "grad_norm": 2.661653995513916, "learning_rate": 1.5156311470759532e-05, "loss": 0.0453, "step": 9048 }, { "epoch": 8.700961538461538, "grad_norm": 3.250490665435791, "learning_rate": 1.5155244123025341e-05, "loss": 0.0336, "step": 9049 }, { "epoch": 8.701923076923077, "grad_norm": 1.5785104036331177, "learning_rate": 1.5154176695296894e-05, "loss": 0.0324, "step": 9050 }, { "epoch": 8.702884615384615, "grad_norm": 3.9384396076202393, "learning_rate": 1.515310918759075e-05, "loss": 0.0449, "step": 9051 }, { "epoch": 8.703846153846154, "grad_norm": 2.233146905899048, "learning_rate": 1.5152041599923477e-05, "loss": 0.0254, "step": 9052 }, { "epoch": 8.704807692307693, "grad_norm": 1.8174327611923218, "learning_rate": 1.5150973932311637e-05, "loss": 0.0153, "step": 9053 }, { "epoch": 8.705769230769231, "grad_norm": 2.5512759685516357, "learning_rate": 1.5149906184771801e-05, "loss": 0.0127, "step": 9054 }, { "epoch": 8.70673076923077, "grad_norm": 3.092228412628174, "learning_rate": 1.5148838357320537e-05, "loss": 0.052, "step": 9055 }, { "epoch": 8.707692307692307, "grad_norm": 2.541219711303711, "learning_rate": 1.514777044997441e-05, "loss": 0.0685, "step": 9056 }, { "epoch": 8.708653846153846, "grad_norm": 2.008121967315674, "learning_rate": 1.5146702462749999e-05, "loss": 0.0242, "step": 9057 }, { "epoch": 8.709615384615384, "grad_norm": 2.752368927001953, "learning_rate": 1.5145634395663866e-05, "loss": 0.0386, "step": 9058 }, { "epoch": 8.710576923076923, "grad_norm": 1.144248604774475, "learning_rate": 1.5144566248732593e-05, "loss": 0.0066, "step": 9059 }, { "epoch": 8.711538461538462, "grad_norm": 1.4253287315368652, "learning_rate": 1.5143498021972747e-05, "loss": 0.0122, "step": 9060 }, { "epoch": 8.7125, "grad_norm": 1.0587552785873413, "learning_rate": 1.514242971540091e-05, "loss": 0.0084, "step": 9061 }, { "epoch": 8.713461538461539, "grad_norm": 1.8857872486114502, "learning_rate": 1.5141361329033659e-05, "loss": 0.0271, "step": 9062 }, { "epoch": 8.714423076923078, "grad_norm": 2.857759952545166, "learning_rate": 1.5140292862887567e-05, "loss": 0.0574, "step": 9063 }, { "epoch": 8.715384615384615, "grad_norm": 0.2772725224494934, "learning_rate": 1.5139224316979217e-05, "loss": 0.0021, "step": 9064 }, { "epoch": 8.716346153846153, "grad_norm": 0.4592585861682892, "learning_rate": 1.5138155691325192e-05, "loss": 0.004, "step": 9065 }, { "epoch": 8.717307692307692, "grad_norm": 0.37399524450302124, "learning_rate": 1.513708698594207e-05, "loss": 0.0037, "step": 9066 }, { "epoch": 8.71826923076923, "grad_norm": 4.360682964324951, "learning_rate": 1.513601820084643e-05, "loss": 0.0833, "step": 9067 }, { "epoch": 8.71923076923077, "grad_norm": 2.028245687484741, "learning_rate": 1.5134949336054866e-05, "loss": 0.0085, "step": 9068 }, { "epoch": 8.720192307692308, "grad_norm": 1.3732315301895142, "learning_rate": 1.5133880391583958e-05, "loss": 0.0113, "step": 9069 }, { "epoch": 8.721153846153847, "grad_norm": 0.5161634683609009, "learning_rate": 1.5132811367450296e-05, "loss": 0.0038, "step": 9070 }, { "epoch": 8.722115384615385, "grad_norm": 3.2444422245025635, "learning_rate": 1.5131742263670463e-05, "loss": 0.0372, "step": 9071 }, { "epoch": 8.723076923076922, "grad_norm": 1.9603835344314575, "learning_rate": 1.5130673080261051e-05, "loss": 0.0097, "step": 9072 }, { "epoch": 8.724038461538461, "grad_norm": 1.861885905265808, "learning_rate": 1.5129603817238656e-05, "loss": 0.0298, "step": 9073 }, { "epoch": 8.725, "grad_norm": 2.2943296432495117, "learning_rate": 1.512853447461986e-05, "loss": 0.0267, "step": 9074 }, { "epoch": 8.725961538461538, "grad_norm": 4.785098075866699, "learning_rate": 1.5127465052421265e-05, "loss": 0.1286, "step": 9075 }, { "epoch": 8.726923076923077, "grad_norm": 1.0055965185165405, "learning_rate": 1.5126395550659459e-05, "loss": 0.0079, "step": 9076 }, { "epoch": 8.727884615384616, "grad_norm": 2.0980799198150635, "learning_rate": 1.5125325969351041e-05, "loss": 0.0388, "step": 9077 }, { "epoch": 8.728846153846154, "grad_norm": 1.3785473108291626, "learning_rate": 1.5124256308512606e-05, "loss": 0.0136, "step": 9078 }, { "epoch": 8.729807692307693, "grad_norm": 2.2930774688720703, "learning_rate": 1.512318656816075e-05, "loss": 0.0383, "step": 9079 }, { "epoch": 8.73076923076923, "grad_norm": 0.6437271237373352, "learning_rate": 1.512211674831208e-05, "loss": 0.0053, "step": 9080 }, { "epoch": 8.731730769230769, "grad_norm": 2.2338435649871826, "learning_rate": 1.512104684898319e-05, "loss": 0.0329, "step": 9081 }, { "epoch": 8.732692307692307, "grad_norm": 1.3502966165542603, "learning_rate": 1.5119976870190681e-05, "loss": 0.0058, "step": 9082 }, { "epoch": 8.733653846153846, "grad_norm": 1.6709742546081543, "learning_rate": 1.5118906811951157e-05, "loss": 0.015, "step": 9083 }, { "epoch": 8.734615384615385, "grad_norm": 1.7246214151382446, "learning_rate": 1.5117836674281226e-05, "loss": 0.0304, "step": 9084 }, { "epoch": 8.735576923076923, "grad_norm": 3.994643449783325, "learning_rate": 1.5116766457197488e-05, "loss": 0.0477, "step": 9085 }, { "epoch": 8.736538461538462, "grad_norm": 2.8765203952789307, "learning_rate": 1.5115696160716554e-05, "loss": 0.0273, "step": 9086 }, { "epoch": 8.7375, "grad_norm": 2.2908847332000732, "learning_rate": 1.5114625784855028e-05, "loss": 0.0679, "step": 9087 }, { "epoch": 8.73846153846154, "grad_norm": 2.433270215988159, "learning_rate": 1.5113555329629523e-05, "loss": 0.0355, "step": 9088 }, { "epoch": 8.739423076923076, "grad_norm": 2.8343687057495117, "learning_rate": 1.5112484795056645e-05, "loss": 0.0555, "step": 9089 }, { "epoch": 8.740384615384615, "grad_norm": 3.1044838428497314, "learning_rate": 1.5111414181153011e-05, "loss": 0.052, "step": 9090 }, { "epoch": 8.741346153846154, "grad_norm": 2.2297239303588867, "learning_rate": 1.5110343487935231e-05, "loss": 0.0207, "step": 9091 }, { "epoch": 8.742307692307692, "grad_norm": 0.05553961545228958, "learning_rate": 1.5109272715419919e-05, "loss": 0.0006, "step": 9092 }, { "epoch": 8.743269230769231, "grad_norm": 0.8536881804466248, "learning_rate": 1.5108201863623687e-05, "loss": 0.0029, "step": 9093 }, { "epoch": 8.74423076923077, "grad_norm": 3.048752784729004, "learning_rate": 1.5107130932563151e-05, "loss": 0.0643, "step": 9094 }, { "epoch": 8.745192307692308, "grad_norm": 2.311068296432495, "learning_rate": 1.5106059922254938e-05, "loss": 0.0181, "step": 9095 }, { "epoch": 8.746153846153845, "grad_norm": 2.353022575378418, "learning_rate": 1.5104988832715659e-05, "loss": 0.0294, "step": 9096 }, { "epoch": 8.747115384615384, "grad_norm": 0.3452070355415344, "learning_rate": 1.510391766396194e-05, "loss": 0.0024, "step": 9097 }, { "epoch": 8.748076923076923, "grad_norm": 1.7211947441101074, "learning_rate": 1.5102846416010394e-05, "loss": 0.0067, "step": 9098 }, { "epoch": 8.749038461538461, "grad_norm": 1.052454948425293, "learning_rate": 1.5101775088877653e-05, "loss": 0.0048, "step": 9099 }, { "epoch": 8.75, "grad_norm": 1.0130548477172852, "learning_rate": 1.510070368258033e-05, "loss": 0.0514, "step": 9100 }, { "epoch": 8.750961538461539, "grad_norm": 2.513559579849243, "learning_rate": 1.5099632197135061e-05, "loss": 0.04, "step": 9101 }, { "epoch": 8.751923076923077, "grad_norm": 2.377129077911377, "learning_rate": 1.5098560632558466e-05, "loss": 0.0278, "step": 9102 }, { "epoch": 8.752884615384616, "grad_norm": 2.6040780544281006, "learning_rate": 1.5097488988867176e-05, "loss": 0.0105, "step": 9103 }, { "epoch": 8.753846153846155, "grad_norm": 6.029554843902588, "learning_rate": 1.5096417266077814e-05, "loss": 0.1194, "step": 9104 }, { "epoch": 8.754807692307692, "grad_norm": 0.3367685079574585, "learning_rate": 1.5095345464207016e-05, "loss": 0.0026, "step": 9105 }, { "epoch": 8.75576923076923, "grad_norm": 2.4786171913146973, "learning_rate": 1.509427358327141e-05, "loss": 0.0318, "step": 9106 }, { "epoch": 8.756730769230769, "grad_norm": 0.8964170217514038, "learning_rate": 1.5093201623287631e-05, "loss": 0.0064, "step": 9107 }, { "epoch": 8.757692307692308, "grad_norm": 1.576274037361145, "learning_rate": 1.5092129584272312e-05, "loss": 0.0136, "step": 9108 }, { "epoch": 8.758653846153846, "grad_norm": 1.8548649549484253, "learning_rate": 1.5091057466242085e-05, "loss": 0.013, "step": 9109 }, { "epoch": 8.759615384615385, "grad_norm": 0.7451375126838684, "learning_rate": 1.508998526921359e-05, "loss": 0.0042, "step": 9110 }, { "epoch": 8.760576923076924, "grad_norm": 2.539947032928467, "learning_rate": 1.508891299320346e-05, "loss": 0.0931, "step": 9111 }, { "epoch": 8.76153846153846, "grad_norm": 1.9026658535003662, "learning_rate": 1.5087840638228337e-05, "loss": 0.0062, "step": 9112 }, { "epoch": 8.7625, "grad_norm": 3.3254621028900146, "learning_rate": 1.5086768204304862e-05, "loss": 0.0573, "step": 9113 }, { "epoch": 8.763461538461538, "grad_norm": 4.410919666290283, "learning_rate": 1.5085695691449671e-05, "loss": 0.0327, "step": 9114 }, { "epoch": 8.764423076923077, "grad_norm": 1.3018220663070679, "learning_rate": 1.5084623099679409e-05, "loss": 0.0105, "step": 9115 }, { "epoch": 8.765384615384615, "grad_norm": 1.0426832437515259, "learning_rate": 1.5083550429010725e-05, "loss": 0.0086, "step": 9116 }, { "epoch": 8.766346153846154, "grad_norm": 0.5260860919952393, "learning_rate": 1.5082477679460254e-05, "loss": 0.004, "step": 9117 }, { "epoch": 8.767307692307693, "grad_norm": 2.8161933422088623, "learning_rate": 1.5081404851044645e-05, "loss": 0.0267, "step": 9118 }, { "epoch": 8.768269230769231, "grad_norm": 1.814300537109375, "learning_rate": 1.5080331943780549e-05, "loss": 0.0157, "step": 9119 }, { "epoch": 8.76923076923077, "grad_norm": 2.2699291706085205, "learning_rate": 1.507925895768461e-05, "loss": 0.0288, "step": 9120 }, { "epoch": 8.770192307692307, "grad_norm": 4.753387451171875, "learning_rate": 1.5078185892773483e-05, "loss": 0.0895, "step": 9121 }, { "epoch": 8.771153846153846, "grad_norm": 2.2924184799194336, "learning_rate": 1.5077112749063812e-05, "loss": 0.0115, "step": 9122 }, { "epoch": 8.772115384615384, "grad_norm": 0.908040463924408, "learning_rate": 1.5076039526572253e-05, "loss": 0.0031, "step": 9123 }, { "epoch": 8.773076923076923, "grad_norm": 2.6600992679595947, "learning_rate": 1.5074966225315459e-05, "loss": 0.079, "step": 9124 }, { "epoch": 8.774038461538462, "grad_norm": 2.0374231338500977, "learning_rate": 1.5073892845310084e-05, "loss": 0.0149, "step": 9125 }, { "epoch": 8.775, "grad_norm": 2.1884520053863525, "learning_rate": 1.5072819386572784e-05, "loss": 0.0585, "step": 9126 }, { "epoch": 8.775961538461539, "grad_norm": 1.6903736591339111, "learning_rate": 1.5071745849120216e-05, "loss": 0.0214, "step": 9127 }, { "epoch": 8.776923076923078, "grad_norm": 0.5270634293556213, "learning_rate": 1.5070672232969037e-05, "loss": 0.0037, "step": 9128 }, { "epoch": 8.777884615384615, "grad_norm": 1.644648790359497, "learning_rate": 1.5069598538135905e-05, "loss": 0.0291, "step": 9129 }, { "epoch": 8.778846153846153, "grad_norm": 0.35015326738357544, "learning_rate": 1.5068524764637484e-05, "loss": 0.0021, "step": 9130 }, { "epoch": 8.779807692307692, "grad_norm": 2.4692156314849854, "learning_rate": 1.5067450912490438e-05, "loss": 0.0474, "step": 9131 }, { "epoch": 8.78076923076923, "grad_norm": 1.1283400058746338, "learning_rate": 1.5066376981711424e-05, "loss": 0.0068, "step": 9132 }, { "epoch": 8.78173076923077, "grad_norm": 2.8205556869506836, "learning_rate": 1.5065302972317108e-05, "loss": 0.0431, "step": 9133 }, { "epoch": 8.782692307692308, "grad_norm": 2.954292058944702, "learning_rate": 1.506422888432416e-05, "loss": 0.0629, "step": 9134 }, { "epoch": 8.783653846153847, "grad_norm": 1.7357118129730225, "learning_rate": 1.506315471774924e-05, "loss": 0.0145, "step": 9135 }, { "epoch": 8.784615384615385, "grad_norm": 0.597308337688446, "learning_rate": 1.5062080472609017e-05, "loss": 0.0032, "step": 9136 }, { "epoch": 8.785576923076922, "grad_norm": 1.9932423830032349, "learning_rate": 1.5061006148920166e-05, "loss": 0.0186, "step": 9137 }, { "epoch": 8.786538461538461, "grad_norm": 3.7209553718566895, "learning_rate": 1.5059931746699353e-05, "loss": 0.0573, "step": 9138 }, { "epoch": 8.7875, "grad_norm": 0.6693270802497864, "learning_rate": 1.5058857265963248e-05, "loss": 0.0051, "step": 9139 }, { "epoch": 8.788461538461538, "grad_norm": 2.5206751823425293, "learning_rate": 1.5057782706728525e-05, "loss": 0.0181, "step": 9140 }, { "epoch": 8.789423076923077, "grad_norm": 0.5631694197654724, "learning_rate": 1.5056708069011863e-05, "loss": 0.0043, "step": 9141 }, { "epoch": 8.790384615384616, "grad_norm": 4.078771114349365, "learning_rate": 1.505563335282993e-05, "loss": 0.078, "step": 9142 }, { "epoch": 8.791346153846154, "grad_norm": 0.9674423336982727, "learning_rate": 1.5054558558199408e-05, "loss": 0.0091, "step": 9143 }, { "epoch": 8.792307692307693, "grad_norm": 1.5187652111053467, "learning_rate": 1.5053483685136968e-05, "loss": 0.0096, "step": 9144 }, { "epoch": 8.79326923076923, "grad_norm": 2.9819934368133545, "learning_rate": 1.5052408733659297e-05, "loss": 0.0294, "step": 9145 }, { "epoch": 8.794230769230769, "grad_norm": 5.623887062072754, "learning_rate": 1.5051333703783069e-05, "loss": 0.0938, "step": 9146 }, { "epoch": 8.795192307692307, "grad_norm": 3.1751708984375, "learning_rate": 1.5050258595524968e-05, "loss": 0.0305, "step": 9147 }, { "epoch": 8.796153846153846, "grad_norm": 2.925044298171997, "learning_rate": 1.5049183408901672e-05, "loss": 0.0889, "step": 9148 }, { "epoch": 8.797115384615385, "grad_norm": 0.6087377667427063, "learning_rate": 1.5048108143929874e-05, "loss": 0.0029, "step": 9149 }, { "epoch": 8.798076923076923, "grad_norm": 2.278496265411377, "learning_rate": 1.5047032800626253e-05, "loss": 0.0126, "step": 9150 }, { "epoch": 8.799038461538462, "grad_norm": 2.7143948078155518, "learning_rate": 1.5045957379007491e-05, "loss": 0.0141, "step": 9151 }, { "epoch": 8.8, "grad_norm": 0.33994194865226746, "learning_rate": 1.5044881879090287e-05, "loss": 0.0028, "step": 9152 }, { "epoch": 8.80096153846154, "grad_norm": 2.9097487926483154, "learning_rate": 1.5043806300891319e-05, "loss": 0.0199, "step": 9153 }, { "epoch": 8.801923076923076, "grad_norm": 2.1939361095428467, "learning_rate": 1.5042730644427282e-05, "loss": 0.0159, "step": 9154 }, { "epoch": 8.802884615384615, "grad_norm": 0.4110729396343231, "learning_rate": 1.5041654909714864e-05, "loss": 0.0024, "step": 9155 }, { "epoch": 8.803846153846154, "grad_norm": 1.4386690855026245, "learning_rate": 1.504057909677076e-05, "loss": 0.0059, "step": 9156 }, { "epoch": 8.804807692307692, "grad_norm": 4.416897296905518, "learning_rate": 1.5039503205611663e-05, "loss": 0.0698, "step": 9157 }, { "epoch": 8.805769230769231, "grad_norm": 1.9669595956802368, "learning_rate": 1.5038427236254266e-05, "loss": 0.0125, "step": 9158 }, { "epoch": 8.80673076923077, "grad_norm": 0.7336503863334656, "learning_rate": 1.5037351188715265e-05, "loss": 0.0048, "step": 9159 }, { "epoch": 8.807692307692308, "grad_norm": 0.9908552765846252, "learning_rate": 1.503627506301136e-05, "loss": 0.0117, "step": 9160 }, { "epoch": 8.808653846153845, "grad_norm": 1.864518642425537, "learning_rate": 1.5035198859159248e-05, "loss": 0.0162, "step": 9161 }, { "epoch": 8.809615384615384, "grad_norm": 2.2815935611724854, "learning_rate": 1.5034122577175625e-05, "loss": 0.0477, "step": 9162 }, { "epoch": 8.810576923076923, "grad_norm": 5.36134672164917, "learning_rate": 1.5033046217077196e-05, "loss": 0.0426, "step": 9163 }, { "epoch": 8.811538461538461, "grad_norm": 1.8460190296173096, "learning_rate": 1.5031969778880662e-05, "loss": 0.0192, "step": 9164 }, { "epoch": 8.8125, "grad_norm": 2.1717209815979004, "learning_rate": 1.5030893262602724e-05, "loss": 0.0138, "step": 9165 }, { "epoch": 8.813461538461539, "grad_norm": 2.125457286834717, "learning_rate": 1.502981666826009e-05, "loss": 0.0179, "step": 9166 }, { "epoch": 8.814423076923077, "grad_norm": 5.029026508331299, "learning_rate": 1.5028739995869464e-05, "loss": 0.1044, "step": 9167 }, { "epoch": 8.815384615384616, "grad_norm": 1.1265745162963867, "learning_rate": 1.502766324544755e-05, "loss": 0.0064, "step": 9168 }, { "epoch": 8.816346153846155, "grad_norm": 2.3420960903167725, "learning_rate": 1.502658641701106e-05, "loss": 0.0111, "step": 9169 }, { "epoch": 8.817307692307692, "grad_norm": 3.3130905628204346, "learning_rate": 1.5025509510576699e-05, "loss": 0.0252, "step": 9170 }, { "epoch": 8.81826923076923, "grad_norm": 1.1153676509857178, "learning_rate": 1.5024432526161184e-05, "loss": 0.0075, "step": 9171 }, { "epoch": 8.819230769230769, "grad_norm": 3.965569496154785, "learning_rate": 1.5023355463781221e-05, "loss": 0.0313, "step": 9172 }, { "epoch": 8.820192307692308, "grad_norm": 6.043288230895996, "learning_rate": 1.5022278323453522e-05, "loss": 0.0752, "step": 9173 }, { "epoch": 8.821153846153846, "grad_norm": 0.8413951396942139, "learning_rate": 1.502120110519481e-05, "loss": 0.0031, "step": 9174 }, { "epoch": 8.822115384615385, "grad_norm": 4.737295150756836, "learning_rate": 1.502012380902179e-05, "loss": 0.0228, "step": 9175 }, { "epoch": 8.823076923076924, "grad_norm": 3.045985698699951, "learning_rate": 1.5019046434951182e-05, "loss": 0.0388, "step": 9176 }, { "epoch": 8.82403846153846, "grad_norm": 2.0963759422302246, "learning_rate": 1.5017968982999703e-05, "loss": 0.0102, "step": 9177 }, { "epoch": 8.825, "grad_norm": 0.6732113361358643, "learning_rate": 1.5016891453184074e-05, "loss": 0.0044, "step": 9178 }, { "epoch": 8.825961538461538, "grad_norm": 5.052955150604248, "learning_rate": 1.5015813845521017e-05, "loss": 0.0554, "step": 9179 }, { "epoch": 8.826923076923077, "grad_norm": 1.4949395656585693, "learning_rate": 1.5014736160027249e-05, "loss": 0.0111, "step": 9180 }, { "epoch": 8.827884615384615, "grad_norm": 2.7114574909210205, "learning_rate": 1.501365839671949e-05, "loss": 0.0382, "step": 9181 }, { "epoch": 8.828846153846154, "grad_norm": 1.8667283058166504, "learning_rate": 1.5012580555614472e-05, "loss": 0.0071, "step": 9182 }, { "epoch": 8.829807692307693, "grad_norm": 1.0579360723495483, "learning_rate": 1.5011502636728914e-05, "loss": 0.0085, "step": 9183 }, { "epoch": 8.830769230769231, "grad_norm": 3.1042978763580322, "learning_rate": 1.5010424640079544e-05, "loss": 0.0168, "step": 9184 }, { "epoch": 8.83173076923077, "grad_norm": 2.072458505630493, "learning_rate": 1.5009346565683088e-05, "loss": 0.0237, "step": 9185 }, { "epoch": 8.832692307692307, "grad_norm": 3.0304811000823975, "learning_rate": 1.5008268413556275e-05, "loss": 0.0711, "step": 9186 }, { "epoch": 8.833653846153846, "grad_norm": 2.181539535522461, "learning_rate": 1.5007190183715836e-05, "loss": 0.0207, "step": 9187 }, { "epoch": 8.834615384615384, "grad_norm": 3.189039468765259, "learning_rate": 1.5006111876178504e-05, "loss": 0.0892, "step": 9188 }, { "epoch": 8.835576923076923, "grad_norm": 3.245598316192627, "learning_rate": 1.5005033490961006e-05, "loss": 0.0242, "step": 9189 }, { "epoch": 8.836538461538462, "grad_norm": 3.3582916259765625, "learning_rate": 1.500395502808008e-05, "loss": 0.0636, "step": 9190 }, { "epoch": 8.8375, "grad_norm": 0.8365473747253418, "learning_rate": 1.5002876487552453e-05, "loss": 0.0053, "step": 9191 }, { "epoch": 8.838461538461539, "grad_norm": 2.226012945175171, "learning_rate": 1.5001797869394872e-05, "loss": 0.0271, "step": 9192 }, { "epoch": 8.839423076923078, "grad_norm": 1.4004899263381958, "learning_rate": 1.5000719173624064e-05, "loss": 0.0173, "step": 9193 }, { "epoch": 8.840384615384615, "grad_norm": 5.185926914215088, "learning_rate": 1.4999640400256773e-05, "loss": 0.0746, "step": 9194 }, { "epoch": 8.841346153846153, "grad_norm": 0.877312183380127, "learning_rate": 1.4998561549309739e-05, "loss": 0.0064, "step": 9195 }, { "epoch": 8.842307692307692, "grad_norm": 4.422728061676025, "learning_rate": 1.4997482620799697e-05, "loss": 0.0921, "step": 9196 }, { "epoch": 8.84326923076923, "grad_norm": 1.1164766550064087, "learning_rate": 1.4996403614743393e-05, "loss": 0.0085, "step": 9197 }, { "epoch": 8.84423076923077, "grad_norm": 2.071094512939453, "learning_rate": 1.4995324531157569e-05, "loss": 0.0095, "step": 9198 }, { "epoch": 8.845192307692308, "grad_norm": 1.8924908638000488, "learning_rate": 1.499424537005897e-05, "loss": 0.0125, "step": 9199 }, { "epoch": 8.846153846153847, "grad_norm": 1.4056007862091064, "learning_rate": 1.4993166131464341e-05, "loss": 0.0192, "step": 9200 }, { "epoch": 8.847115384615385, "grad_norm": 2.5374839305877686, "learning_rate": 1.4992086815390429e-05, "loss": 0.0232, "step": 9201 }, { "epoch": 8.848076923076922, "grad_norm": 0.26329439878463745, "learning_rate": 1.499100742185398e-05, "loss": 0.0016, "step": 9202 }, { "epoch": 8.849038461538461, "grad_norm": 2.396069049835205, "learning_rate": 1.4989927950871744e-05, "loss": 0.0752, "step": 9203 }, { "epoch": 8.85, "grad_norm": 4.537631988525391, "learning_rate": 1.4988848402460474e-05, "loss": 0.0663, "step": 9204 }, { "epoch": 8.850961538461538, "grad_norm": 2.2166085243225098, "learning_rate": 1.4987768776636915e-05, "loss": 0.0939, "step": 9205 }, { "epoch": 8.851923076923077, "grad_norm": 2.5441536903381348, "learning_rate": 1.4986689073417826e-05, "loss": 0.0256, "step": 9206 }, { "epoch": 8.852884615384616, "grad_norm": 0.5663043260574341, "learning_rate": 1.4985609292819958e-05, "loss": 0.0027, "step": 9207 }, { "epoch": 8.853846153846154, "grad_norm": 4.361611366271973, "learning_rate": 1.4984529434860067e-05, "loss": 0.0594, "step": 9208 }, { "epoch": 8.854807692307693, "grad_norm": 2.1649835109710693, "learning_rate": 1.4983449499554908e-05, "loss": 0.0396, "step": 9209 }, { "epoch": 8.85576923076923, "grad_norm": 4.860637187957764, "learning_rate": 1.4982369486921238e-05, "loss": 0.1643, "step": 9210 }, { "epoch": 8.856730769230769, "grad_norm": 1.096131443977356, "learning_rate": 1.4981289396975818e-05, "loss": 0.0055, "step": 9211 }, { "epoch": 8.857692307692307, "grad_norm": 2.6555511951446533, "learning_rate": 1.4980209229735406e-05, "loss": 0.0251, "step": 9212 }, { "epoch": 8.858653846153846, "grad_norm": 3.415753126144409, "learning_rate": 1.4979128985216762e-05, "loss": 0.0867, "step": 9213 }, { "epoch": 8.859615384615385, "grad_norm": 1.738039493560791, "learning_rate": 1.4978048663436653e-05, "loss": 0.0212, "step": 9214 }, { "epoch": 8.860576923076923, "grad_norm": 1.9337654113769531, "learning_rate": 1.4976968264411836e-05, "loss": 0.0173, "step": 9215 }, { "epoch": 8.861538461538462, "grad_norm": 2.209766149520874, "learning_rate": 1.4975887788159078e-05, "loss": 0.0507, "step": 9216 }, { "epoch": 8.8625, "grad_norm": 1.0237329006195068, "learning_rate": 1.497480723469515e-05, "loss": 0.0102, "step": 9217 }, { "epoch": 8.86346153846154, "grad_norm": 2.402007579803467, "learning_rate": 1.4973726604036811e-05, "loss": 0.0204, "step": 9218 }, { "epoch": 8.864423076923076, "grad_norm": 2.1149392127990723, "learning_rate": 1.4972645896200834e-05, "loss": 0.0268, "step": 9219 }, { "epoch": 8.865384615384615, "grad_norm": 5.671891689300537, "learning_rate": 1.4971565111203988e-05, "loss": 0.0395, "step": 9220 }, { "epoch": 8.866346153846154, "grad_norm": 4.126801490783691, "learning_rate": 1.4970484249063041e-05, "loss": 0.0501, "step": 9221 }, { "epoch": 8.867307692307692, "grad_norm": 4.339081287384033, "learning_rate": 1.496940330979477e-05, "loss": 0.0337, "step": 9222 }, { "epoch": 8.868269230769231, "grad_norm": 3.3186898231506348, "learning_rate": 1.4968322293415942e-05, "loss": 0.0388, "step": 9223 }, { "epoch": 8.86923076923077, "grad_norm": 0.5718738436698914, "learning_rate": 1.4967241199943332e-05, "loss": 0.0026, "step": 9224 }, { "epoch": 8.870192307692308, "grad_norm": 2.433936595916748, "learning_rate": 1.4966160029393722e-05, "loss": 0.0134, "step": 9225 }, { "epoch": 8.871153846153845, "grad_norm": 2.4808156490325928, "learning_rate": 1.4965078781783882e-05, "loss": 0.0173, "step": 9226 }, { "epoch": 8.872115384615384, "grad_norm": 1.303360104560852, "learning_rate": 1.4963997457130588e-05, "loss": 0.0164, "step": 9227 }, { "epoch": 8.873076923076923, "grad_norm": 0.45948857069015503, "learning_rate": 1.4962916055450626e-05, "loss": 0.0035, "step": 9228 }, { "epoch": 8.874038461538461, "grad_norm": 3.5098047256469727, "learning_rate": 1.4961834576760774e-05, "loss": 0.0508, "step": 9229 }, { "epoch": 8.875, "grad_norm": 3.651345729827881, "learning_rate": 1.4960753021077809e-05, "loss": 0.0411, "step": 9230 }, { "epoch": 8.875961538461539, "grad_norm": 1.1046456098556519, "learning_rate": 1.4959671388418517e-05, "loss": 0.0072, "step": 9231 }, { "epoch": 8.876923076923077, "grad_norm": 1.5094757080078125, "learning_rate": 1.4958589678799684e-05, "loss": 0.0096, "step": 9232 }, { "epoch": 8.877884615384616, "grad_norm": 5.332588195800781, "learning_rate": 1.4957507892238092e-05, "loss": 0.0271, "step": 9233 }, { "epoch": 8.878846153846155, "grad_norm": 1.2492533922195435, "learning_rate": 1.4956426028750528e-05, "loss": 0.0108, "step": 9234 }, { "epoch": 8.879807692307692, "grad_norm": 4.088953018188477, "learning_rate": 1.4955344088353777e-05, "loss": 0.0537, "step": 9235 }, { "epoch": 8.88076923076923, "grad_norm": 3.1091184616088867, "learning_rate": 1.495426207106463e-05, "loss": 0.0256, "step": 9236 }, { "epoch": 8.881730769230769, "grad_norm": 4.117608070373535, "learning_rate": 1.4953179976899878e-05, "loss": 0.0415, "step": 9237 }, { "epoch": 8.882692307692308, "grad_norm": 4.488853931427002, "learning_rate": 1.4952097805876306e-05, "loss": 0.0386, "step": 9238 }, { "epoch": 8.883653846153846, "grad_norm": 5.504233360290527, "learning_rate": 1.4951015558010715e-05, "loss": 0.0835, "step": 9239 }, { "epoch": 8.884615384615385, "grad_norm": 1.6213948726654053, "learning_rate": 1.4949933233319893e-05, "loss": 0.0233, "step": 9240 }, { "epoch": 8.885576923076924, "grad_norm": 1.8752192258834839, "learning_rate": 1.4948850831820634e-05, "loss": 0.0123, "step": 9241 }, { "epoch": 8.88653846153846, "grad_norm": 2.708787679672241, "learning_rate": 1.4947768353529735e-05, "loss": 0.0803, "step": 9242 }, { "epoch": 8.8875, "grad_norm": 0.18632559478282928, "learning_rate": 1.4946685798463991e-05, "loss": 0.0019, "step": 9243 }, { "epoch": 8.888461538461538, "grad_norm": 1.3640815019607544, "learning_rate": 1.4945603166640203e-05, "loss": 0.0073, "step": 9244 }, { "epoch": 8.889423076923077, "grad_norm": 4.802401542663574, "learning_rate": 1.494452045807517e-05, "loss": 0.1143, "step": 9245 }, { "epoch": 8.890384615384615, "grad_norm": 3.3157589435577393, "learning_rate": 1.494343767278569e-05, "loss": 0.0547, "step": 9246 }, { "epoch": 8.891346153846154, "grad_norm": 2.076371669769287, "learning_rate": 1.4942354810788567e-05, "loss": 0.024, "step": 9247 }, { "epoch": 8.892307692307693, "grad_norm": 3.0056943893432617, "learning_rate": 1.4941271872100602e-05, "loss": 0.0371, "step": 9248 }, { "epoch": 8.893269230769231, "grad_norm": 1.8256618976593018, "learning_rate": 1.49401888567386e-05, "loss": 0.0111, "step": 9249 }, { "epoch": 8.89423076923077, "grad_norm": 2.2579779624938965, "learning_rate": 1.4939105764719369e-05, "loss": 0.023, "step": 9250 }, { "epoch": 8.895192307692307, "grad_norm": 1.8902969360351562, "learning_rate": 1.493802259605971e-05, "loss": 0.0078, "step": 9251 }, { "epoch": 8.896153846153846, "grad_norm": 2.9836976528167725, "learning_rate": 1.4936939350776434e-05, "loss": 0.0182, "step": 9252 }, { "epoch": 8.897115384615384, "grad_norm": 3.25940203666687, "learning_rate": 1.4935856028886346e-05, "loss": 0.0438, "step": 9253 }, { "epoch": 8.898076923076923, "grad_norm": 0.7314743995666504, "learning_rate": 1.4934772630406265e-05, "loss": 0.0069, "step": 9254 }, { "epoch": 8.899038461538462, "grad_norm": 1.464348316192627, "learning_rate": 1.4933689155352992e-05, "loss": 0.0236, "step": 9255 }, { "epoch": 8.9, "grad_norm": 2.4922118186950684, "learning_rate": 1.4932605603743343e-05, "loss": 0.0417, "step": 9256 }, { "epoch": 8.900961538461539, "grad_norm": 3.0880677700042725, "learning_rate": 1.4931521975594134e-05, "loss": 0.0169, "step": 9257 }, { "epoch": 8.901923076923078, "grad_norm": 3.269364356994629, "learning_rate": 1.4930438270922178e-05, "loss": 0.0364, "step": 9258 }, { "epoch": 8.902884615384615, "grad_norm": 3.7974421977996826, "learning_rate": 1.492935448974429e-05, "loss": 0.024, "step": 9259 }, { "epoch": 8.903846153846153, "grad_norm": 3.398730516433716, "learning_rate": 1.4928270632077287e-05, "loss": 0.0735, "step": 9260 }, { "epoch": 8.904807692307692, "grad_norm": 4.378533363342285, "learning_rate": 1.4927186697937988e-05, "loss": 0.0761, "step": 9261 }, { "epoch": 8.90576923076923, "grad_norm": 5.208569526672363, "learning_rate": 1.4926102687343213e-05, "loss": 0.0564, "step": 9262 }, { "epoch": 8.90673076923077, "grad_norm": 2.0567545890808105, "learning_rate": 1.4925018600309784e-05, "loss": 0.0343, "step": 9263 }, { "epoch": 8.907692307692308, "grad_norm": 3.2724218368530273, "learning_rate": 1.492393443685452e-05, "loss": 0.0244, "step": 9264 }, { "epoch": 8.908653846153847, "grad_norm": 1.3626271486282349, "learning_rate": 1.4922850196994243e-05, "loss": 0.008, "step": 9265 }, { "epoch": 8.909615384615385, "grad_norm": 0.33523014187812805, "learning_rate": 1.4921765880745783e-05, "loss": 0.003, "step": 9266 }, { "epoch": 8.910576923076922, "grad_norm": 2.8967857360839844, "learning_rate": 1.4920681488125959e-05, "loss": 0.0323, "step": 9267 }, { "epoch": 8.911538461538461, "grad_norm": 3.82918643951416, "learning_rate": 1.4919597019151602e-05, "loss": 0.065, "step": 9268 }, { "epoch": 8.9125, "grad_norm": 4.247437000274658, "learning_rate": 1.4918512473839537e-05, "loss": 0.0746, "step": 9269 }, { "epoch": 8.913461538461538, "grad_norm": 3.8869528770446777, "learning_rate": 1.4917427852206593e-05, "loss": 0.0681, "step": 9270 }, { "epoch": 8.914423076923077, "grad_norm": 4.627351760864258, "learning_rate": 1.4916343154269605e-05, "loss": 0.0349, "step": 9271 }, { "epoch": 8.915384615384616, "grad_norm": 2.025343418121338, "learning_rate": 1.4915258380045397e-05, "loss": 0.0181, "step": 9272 }, { "epoch": 8.916346153846154, "grad_norm": 2.1966819763183594, "learning_rate": 1.4914173529550805e-05, "loss": 0.0261, "step": 9273 }, { "epoch": 8.917307692307693, "grad_norm": 2.8286526203155518, "learning_rate": 1.4913088602802666e-05, "loss": 0.0199, "step": 9274 }, { "epoch": 8.91826923076923, "grad_norm": 2.434407949447632, "learning_rate": 1.4912003599817808e-05, "loss": 0.0499, "step": 9275 }, { "epoch": 8.919230769230769, "grad_norm": 0.7851957082748413, "learning_rate": 1.4910918520613074e-05, "loss": 0.0048, "step": 9276 }, { "epoch": 8.920192307692307, "grad_norm": 1.9651408195495605, "learning_rate": 1.4909833365205297e-05, "loss": 0.0138, "step": 9277 }, { "epoch": 8.921153846153846, "grad_norm": 2.5614798069000244, "learning_rate": 1.4908748133611316e-05, "loss": 0.0469, "step": 9278 }, { "epoch": 8.922115384615385, "grad_norm": 4.2046942710876465, "learning_rate": 1.4907662825847968e-05, "loss": 0.0315, "step": 9279 }, { "epoch": 8.923076923076923, "grad_norm": 1.9548282623291016, "learning_rate": 1.4906577441932099e-05, "loss": 0.0062, "step": 9280 }, { "epoch": 8.924038461538462, "grad_norm": 0.13991601765155792, "learning_rate": 1.490549198188055e-05, "loss": 0.0009, "step": 9281 }, { "epoch": 8.925, "grad_norm": 1.2461862564086914, "learning_rate": 1.4904406445710162e-05, "loss": 0.0134, "step": 9282 }, { "epoch": 8.92596153846154, "grad_norm": 2.7181060314178467, "learning_rate": 1.490332083343778e-05, "loss": 0.0407, "step": 9283 }, { "epoch": 8.926923076923076, "grad_norm": 1.6389492750167847, "learning_rate": 1.490223514508025e-05, "loss": 0.0141, "step": 9284 }, { "epoch": 8.927884615384615, "grad_norm": 2.3338329792022705, "learning_rate": 1.490114938065442e-05, "loss": 0.0172, "step": 9285 }, { "epoch": 8.928846153846154, "grad_norm": 3.3485312461853027, "learning_rate": 1.4900063540177133e-05, "loss": 0.0555, "step": 9286 }, { "epoch": 8.929807692307692, "grad_norm": 3.9535272121429443, "learning_rate": 1.4898977623665243e-05, "loss": 0.0405, "step": 9287 }, { "epoch": 8.930769230769231, "grad_norm": 1.555334448814392, "learning_rate": 1.48978916311356e-05, "loss": 0.0086, "step": 9288 }, { "epoch": 8.93173076923077, "grad_norm": 3.658212184906006, "learning_rate": 1.4896805562605052e-05, "loss": 0.0361, "step": 9289 }, { "epoch": 8.932692307692308, "grad_norm": 2.235146999359131, "learning_rate": 1.4895719418090452e-05, "loss": 0.0261, "step": 9290 }, { "epoch": 8.933653846153845, "grad_norm": 3.214285373687744, "learning_rate": 1.4894633197608661e-05, "loss": 0.0363, "step": 9291 }, { "epoch": 8.934615384615384, "grad_norm": 1.9901846647262573, "learning_rate": 1.4893546901176525e-05, "loss": 0.0141, "step": 9292 }, { "epoch": 8.935576923076923, "grad_norm": 5.1556806564331055, "learning_rate": 1.4892460528810903e-05, "loss": 0.0637, "step": 9293 }, { "epoch": 8.936538461538461, "grad_norm": 3.305910348892212, "learning_rate": 1.4891374080528654e-05, "loss": 0.0183, "step": 9294 }, { "epoch": 8.9375, "grad_norm": 0.7321227192878723, "learning_rate": 1.4890287556346634e-05, "loss": 0.0034, "step": 9295 }, { "epoch": 8.938461538461539, "grad_norm": 2.9106390476226807, "learning_rate": 1.4889200956281705e-05, "loss": 0.0355, "step": 9296 }, { "epoch": 8.939423076923077, "grad_norm": 3.7485291957855225, "learning_rate": 1.4888114280350725e-05, "loss": 0.0827, "step": 9297 }, { "epoch": 8.940384615384616, "grad_norm": 1.9451416730880737, "learning_rate": 1.4887027528570559e-05, "loss": 0.0241, "step": 9298 }, { "epoch": 8.941346153846155, "grad_norm": 2.7837061882019043, "learning_rate": 1.488594070095807e-05, "loss": 0.0799, "step": 9299 }, { "epoch": 8.942307692307692, "grad_norm": 0.14436925947666168, "learning_rate": 1.4884853797530121e-05, "loss": 0.0013, "step": 9300 }, { "epoch": 8.94326923076923, "grad_norm": 0.44474998116493225, "learning_rate": 1.4883766818303575e-05, "loss": 0.0037, "step": 9301 }, { "epoch": 8.944230769230769, "grad_norm": 2.2073616981506348, "learning_rate": 1.4882679763295307e-05, "loss": 0.02, "step": 9302 }, { "epoch": 8.945192307692308, "grad_norm": 1.1394563913345337, "learning_rate": 1.4881592632522175e-05, "loss": 0.0054, "step": 9303 }, { "epoch": 8.946153846153846, "grad_norm": 0.4392046630382538, "learning_rate": 1.4880505426001053e-05, "loss": 0.0034, "step": 9304 }, { "epoch": 8.947115384615385, "grad_norm": 2.172550678253174, "learning_rate": 1.4879418143748814e-05, "loss": 0.0119, "step": 9305 }, { "epoch": 8.948076923076924, "grad_norm": 3.1597750186920166, "learning_rate": 1.4878330785782324e-05, "loss": 0.0609, "step": 9306 }, { "epoch": 8.94903846153846, "grad_norm": 2.826197385787964, "learning_rate": 1.487724335211846e-05, "loss": 0.0293, "step": 9307 }, { "epoch": 8.95, "grad_norm": 0.07181447744369507, "learning_rate": 1.4876155842774091e-05, "loss": 0.0005, "step": 9308 }, { "epoch": 8.950961538461538, "grad_norm": 3.575162410736084, "learning_rate": 1.4875068257766097e-05, "loss": 0.0341, "step": 9309 }, { "epoch": 8.951923076923077, "grad_norm": 3.3559844493865967, "learning_rate": 1.487398059711135e-05, "loss": 0.0338, "step": 9310 }, { "epoch": 8.952884615384615, "grad_norm": 2.884371280670166, "learning_rate": 1.487289286082673e-05, "loss": 0.0229, "step": 9311 }, { "epoch": 8.953846153846154, "grad_norm": 4.230193614959717, "learning_rate": 1.4871805048929113e-05, "loss": 0.0616, "step": 9312 }, { "epoch": 8.954807692307693, "grad_norm": 4.631898403167725, "learning_rate": 1.4870717161435382e-05, "loss": 0.056, "step": 9313 }, { "epoch": 8.955769230769231, "grad_norm": 0.46454450488090515, "learning_rate": 1.4869629198362416e-05, "loss": 0.004, "step": 9314 }, { "epoch": 8.95673076923077, "grad_norm": 1.7896873950958252, "learning_rate": 1.4868541159727097e-05, "loss": 0.0348, "step": 9315 }, { "epoch": 8.957692307692307, "grad_norm": 0.4198269546031952, "learning_rate": 1.4867453045546308e-05, "loss": 0.0025, "step": 9316 }, { "epoch": 8.958653846153846, "grad_norm": 3.7757275104522705, "learning_rate": 1.4866364855836935e-05, "loss": 0.0591, "step": 9317 }, { "epoch": 8.959615384615384, "grad_norm": 2.3185975551605225, "learning_rate": 1.486527659061586e-05, "loss": 0.0132, "step": 9318 }, { "epoch": 8.960576923076923, "grad_norm": 2.1255557537078857, "learning_rate": 1.486418824989997e-05, "loss": 0.0102, "step": 9319 }, { "epoch": 8.961538461538462, "grad_norm": 3.1513431072235107, "learning_rate": 1.4863099833706158e-05, "loss": 0.0405, "step": 9320 }, { "epoch": 8.9625, "grad_norm": 4.1774821281433105, "learning_rate": 1.486201134205131e-05, "loss": 0.074, "step": 9321 }, { "epoch": 8.963461538461539, "grad_norm": 1.0002334117889404, "learning_rate": 1.4860922774952312e-05, "loss": 0.0191, "step": 9322 }, { "epoch": 8.964423076923078, "grad_norm": 1.55147123336792, "learning_rate": 1.485983413242606e-05, "loss": 0.0164, "step": 9323 }, { "epoch": 8.965384615384615, "grad_norm": 2.7936322689056396, "learning_rate": 1.4858745414489449e-05, "loss": 0.0251, "step": 9324 }, { "epoch": 8.966346153846153, "grad_norm": 5.08111047744751, "learning_rate": 1.4857656621159369e-05, "loss": 0.0777, "step": 9325 }, { "epoch": 8.967307692307692, "grad_norm": 2.468698024749756, "learning_rate": 1.485656775245271e-05, "loss": 0.0076, "step": 9326 }, { "epoch": 8.96826923076923, "grad_norm": 2.2818784713745117, "learning_rate": 1.4855478808386378e-05, "loss": 0.0206, "step": 9327 }, { "epoch": 8.96923076923077, "grad_norm": 2.355652093887329, "learning_rate": 1.4854389788977266e-05, "loss": 0.0182, "step": 9328 }, { "epoch": 8.970192307692308, "grad_norm": 1.8755805492401123, "learning_rate": 1.4853300694242272e-05, "loss": 0.0183, "step": 9329 }, { "epoch": 8.971153846153847, "grad_norm": 3.0880632400512695, "learning_rate": 1.4852211524198293e-05, "loss": 0.0238, "step": 9330 }, { "epoch": 8.972115384615385, "grad_norm": 2.57610821723938, "learning_rate": 1.4851122278862233e-05, "loss": 0.0228, "step": 9331 }, { "epoch": 8.973076923076922, "grad_norm": 0.43024811148643494, "learning_rate": 1.4850032958250993e-05, "loss": 0.0021, "step": 9332 }, { "epoch": 8.974038461538461, "grad_norm": 0.9451514482498169, "learning_rate": 1.4848943562381478e-05, "loss": 0.0072, "step": 9333 }, { "epoch": 8.975, "grad_norm": 0.7467938661575317, "learning_rate": 1.4847854091270587e-05, "loss": 0.0045, "step": 9334 }, { "epoch": 8.975961538461538, "grad_norm": 1.4380069971084595, "learning_rate": 1.4846764544935233e-05, "loss": 0.0174, "step": 9335 }, { "epoch": 8.976923076923077, "grad_norm": 2.571587562561035, "learning_rate": 1.4845674923392315e-05, "loss": 0.0751, "step": 9336 }, { "epoch": 8.977884615384616, "grad_norm": 1.2874915599822998, "learning_rate": 1.4844585226658743e-05, "loss": 0.0111, "step": 9337 }, { "epoch": 8.978846153846154, "grad_norm": 2.703355312347412, "learning_rate": 1.4843495454751431e-05, "loss": 0.0439, "step": 9338 }, { "epoch": 8.979807692307693, "grad_norm": 0.6257863640785217, "learning_rate": 1.4842405607687282e-05, "loss": 0.0049, "step": 9339 }, { "epoch": 8.98076923076923, "grad_norm": 2.312191963195801, "learning_rate": 1.4841315685483213e-05, "loss": 0.0212, "step": 9340 }, { "epoch": 8.981730769230769, "grad_norm": 6.432795524597168, "learning_rate": 1.4840225688156132e-05, "loss": 0.1954, "step": 9341 }, { "epoch": 8.982692307692307, "grad_norm": 1.5443850755691528, "learning_rate": 1.4839135615722953e-05, "loss": 0.0078, "step": 9342 }, { "epoch": 8.983653846153846, "grad_norm": 1.1237341165542603, "learning_rate": 1.4838045468200595e-05, "loss": 0.0081, "step": 9343 }, { "epoch": 8.984615384615385, "grad_norm": 0.9228258728981018, "learning_rate": 1.4836955245605969e-05, "loss": 0.0049, "step": 9344 }, { "epoch": 8.985576923076923, "grad_norm": 2.8806378841400146, "learning_rate": 1.483586494795599e-05, "loss": 0.041, "step": 9345 }, { "epoch": 8.986538461538462, "grad_norm": 3.205483913421631, "learning_rate": 1.4834774575267587e-05, "loss": 0.0635, "step": 9346 }, { "epoch": 8.9875, "grad_norm": 5.674612522125244, "learning_rate": 1.4833684127557671e-05, "loss": 0.1482, "step": 9347 }, { "epoch": 8.98846153846154, "grad_norm": 6.1712751388549805, "learning_rate": 1.483259360484316e-05, "loss": 0.1102, "step": 9348 }, { "epoch": 8.989423076923076, "grad_norm": 2.0475103855133057, "learning_rate": 1.4831503007140984e-05, "loss": 0.0322, "step": 9349 }, { "epoch": 8.990384615384615, "grad_norm": 0.4976373016834259, "learning_rate": 1.4830412334468058e-05, "loss": 0.0045, "step": 9350 }, { "epoch": 8.991346153846154, "grad_norm": 2.9082858562469482, "learning_rate": 1.4829321586841312e-05, "loss": 0.048, "step": 9351 }, { "epoch": 8.992307692307692, "grad_norm": 3.5499472618103027, "learning_rate": 1.482823076427767e-05, "loss": 0.0484, "step": 9352 }, { "epoch": 8.993269230769231, "grad_norm": 4.495427131652832, "learning_rate": 1.4827139866794054e-05, "loss": 0.037, "step": 9353 }, { "epoch": 8.99423076923077, "grad_norm": 2.3571581840515137, "learning_rate": 1.4826048894407396e-05, "loss": 0.025, "step": 9354 }, { "epoch": 8.995192307692308, "grad_norm": 1.2999179363250732, "learning_rate": 1.4824957847134625e-05, "loss": 0.0072, "step": 9355 }, { "epoch": 8.996153846153845, "grad_norm": 2.7126357555389404, "learning_rate": 1.4823866724992668e-05, "loss": 0.0309, "step": 9356 }, { "epoch": 8.997115384615384, "grad_norm": 2.667079210281372, "learning_rate": 1.4822775527998458e-05, "loss": 0.0209, "step": 9357 }, { "epoch": 8.998076923076923, "grad_norm": 1.3364043235778809, "learning_rate": 1.4821684256168927e-05, "loss": 0.0131, "step": 9358 }, { "epoch": 8.999038461538461, "grad_norm": 2.8056387901306152, "learning_rate": 1.4820592909521004e-05, "loss": 0.0381, "step": 9359 }, { "epoch": 9.0, "grad_norm": 7.1159443855285645, "learning_rate": 1.481950148807163e-05, "loss": 0.083, "step": 9360 }, { "epoch": 9.000961538461539, "grad_norm": 0.3823285400867462, "learning_rate": 1.4818409991837739e-05, "loss": 0.0033, "step": 9361 }, { "epoch": 9.001923076923077, "grad_norm": 3.2644495964050293, "learning_rate": 1.4817318420836267e-05, "loss": 0.0295, "step": 9362 }, { "epoch": 9.002884615384616, "grad_norm": 0.8563216924667358, "learning_rate": 1.4816226775084148e-05, "loss": 0.0035, "step": 9363 }, { "epoch": 9.003846153846155, "grad_norm": 2.2827200889587402, "learning_rate": 1.4815135054598328e-05, "loss": 0.0153, "step": 9364 }, { "epoch": 9.004807692307692, "grad_norm": 0.41984352469444275, "learning_rate": 1.4814043259395744e-05, "loss": 0.0028, "step": 9365 }, { "epoch": 9.00576923076923, "grad_norm": 3.8925647735595703, "learning_rate": 1.4812951389493337e-05, "loss": 0.0444, "step": 9366 }, { "epoch": 9.006730769230769, "grad_norm": 1.4708274602890015, "learning_rate": 1.4811859444908053e-05, "loss": 0.0288, "step": 9367 }, { "epoch": 9.007692307692308, "grad_norm": 0.4870598316192627, "learning_rate": 1.481076742565683e-05, "loss": 0.003, "step": 9368 }, { "epoch": 9.008653846153846, "grad_norm": 1.5309317111968994, "learning_rate": 1.4809675331756617e-05, "loss": 0.0098, "step": 9369 }, { "epoch": 9.009615384615385, "grad_norm": 2.0639522075653076, "learning_rate": 1.4808583163224358e-05, "loss": 0.0192, "step": 9370 }, { "epoch": 9.010576923076924, "grad_norm": 1.2557297945022583, "learning_rate": 1.4807490920077e-05, "loss": 0.0452, "step": 9371 }, { "epoch": 9.011538461538462, "grad_norm": 1.5329124927520752, "learning_rate": 1.4806398602331497e-05, "loss": 0.0102, "step": 9372 }, { "epoch": 9.0125, "grad_norm": 0.33748185634613037, "learning_rate": 1.4805306210004792e-05, "loss": 0.0025, "step": 9373 }, { "epoch": 9.013461538461538, "grad_norm": 3.471757173538208, "learning_rate": 1.4804213743113838e-05, "loss": 0.0266, "step": 9374 }, { "epoch": 9.014423076923077, "grad_norm": 2.913311243057251, "learning_rate": 1.4803121201675588e-05, "loss": 0.0453, "step": 9375 }, { "epoch": 9.015384615384615, "grad_norm": 2.2217156887054443, "learning_rate": 1.4802028585706993e-05, "loss": 0.0182, "step": 9376 }, { "epoch": 9.016346153846154, "grad_norm": 2.592987298965454, "learning_rate": 1.4800935895225005e-05, "loss": 0.0231, "step": 9377 }, { "epoch": 9.017307692307693, "grad_norm": 1.5103380680084229, "learning_rate": 1.4799843130246587e-05, "loss": 0.0158, "step": 9378 }, { "epoch": 9.018269230769231, "grad_norm": 4.007298946380615, "learning_rate": 1.479875029078869e-05, "loss": 0.065, "step": 9379 }, { "epoch": 9.01923076923077, "grad_norm": 1.2202556133270264, "learning_rate": 1.4797657376868273e-05, "loss": 0.0056, "step": 9380 }, { "epoch": 9.020192307692307, "grad_norm": 1.110629677772522, "learning_rate": 1.4796564388502293e-05, "loss": 0.0108, "step": 9381 }, { "epoch": 9.021153846153846, "grad_norm": 1.2175524234771729, "learning_rate": 1.479547132570771e-05, "loss": 0.0121, "step": 9382 }, { "epoch": 9.022115384615384, "grad_norm": 2.095684289932251, "learning_rate": 1.479437818850149e-05, "loss": 0.0162, "step": 9383 }, { "epoch": 9.023076923076923, "grad_norm": 1.2445987462997437, "learning_rate": 1.4793284976900593e-05, "loss": 0.009, "step": 9384 }, { "epoch": 9.024038461538462, "grad_norm": 1.865451455116272, "learning_rate": 1.4792191690921977e-05, "loss": 0.0436, "step": 9385 }, { "epoch": 9.025, "grad_norm": 0.9307472109794617, "learning_rate": 1.4791098330582613e-05, "loss": 0.0067, "step": 9386 }, { "epoch": 9.025961538461539, "grad_norm": 0.3857116103172302, "learning_rate": 1.4790004895899465e-05, "loss": 0.0026, "step": 9387 }, { "epoch": 9.026923076923078, "grad_norm": 2.095715045928955, "learning_rate": 1.4788911386889497e-05, "loss": 0.0186, "step": 9388 }, { "epoch": 9.027884615384615, "grad_norm": 0.5993233323097229, "learning_rate": 1.4787817803569682e-05, "loss": 0.0024, "step": 9389 }, { "epoch": 9.028846153846153, "grad_norm": 0.26257792115211487, "learning_rate": 1.4786724145956988e-05, "loss": 0.0017, "step": 9390 }, { "epoch": 9.029807692307692, "grad_norm": 0.3875627815723419, "learning_rate": 1.4785630414068382e-05, "loss": 0.002, "step": 9391 }, { "epoch": 9.03076923076923, "grad_norm": 1.5378540754318237, "learning_rate": 1.4784536607920836e-05, "loss": 0.0177, "step": 9392 }, { "epoch": 9.03173076923077, "grad_norm": 0.4140704572200775, "learning_rate": 1.4783442727531328e-05, "loss": 0.0025, "step": 9393 }, { "epoch": 9.032692307692308, "grad_norm": 0.48654451966285706, "learning_rate": 1.4782348772916826e-05, "loss": 0.0031, "step": 9394 }, { "epoch": 9.033653846153847, "grad_norm": 3.382046937942505, "learning_rate": 1.4781254744094307e-05, "loss": 0.0275, "step": 9395 }, { "epoch": 9.034615384615385, "grad_norm": 3.764692544937134, "learning_rate": 1.4780160641080747e-05, "loss": 0.0473, "step": 9396 }, { "epoch": 9.035576923076922, "grad_norm": 1.1799767017364502, "learning_rate": 1.4779066463893124e-05, "loss": 0.0057, "step": 9397 }, { "epoch": 9.036538461538461, "grad_norm": 5.858527660369873, "learning_rate": 1.4777972212548417e-05, "loss": 0.1113, "step": 9398 }, { "epoch": 9.0375, "grad_norm": 4.338933944702148, "learning_rate": 1.4776877887063604e-05, "loss": 0.0767, "step": 9399 }, { "epoch": 9.038461538461538, "grad_norm": 1.4787328243255615, "learning_rate": 1.4775783487455664e-05, "loss": 0.0117, "step": 9400 }, { "epoch": 9.039423076923077, "grad_norm": 3.120476245880127, "learning_rate": 1.4774689013741582e-05, "loss": 0.0923, "step": 9401 }, { "epoch": 9.040384615384616, "grad_norm": 2.7205586433410645, "learning_rate": 1.4773594465938344e-05, "loss": 0.0201, "step": 9402 }, { "epoch": 9.041346153846154, "grad_norm": 0.26762932538986206, "learning_rate": 1.4772499844062925e-05, "loss": 0.0021, "step": 9403 }, { "epoch": 9.042307692307693, "grad_norm": 0.9682478308677673, "learning_rate": 1.4771405148132316e-05, "loss": 0.0055, "step": 9404 }, { "epoch": 9.04326923076923, "grad_norm": 1.4140278100967407, "learning_rate": 1.4770310378163506e-05, "loss": 0.0092, "step": 9405 }, { "epoch": 9.044230769230769, "grad_norm": 0.7080929279327393, "learning_rate": 1.4769215534173476e-05, "loss": 0.0047, "step": 9406 }, { "epoch": 9.045192307692307, "grad_norm": 0.09957423061132431, "learning_rate": 1.4768120616179222e-05, "loss": 0.0008, "step": 9407 }, { "epoch": 9.046153846153846, "grad_norm": 1.851370930671692, "learning_rate": 1.4767025624197728e-05, "loss": 0.0266, "step": 9408 }, { "epoch": 9.047115384615385, "grad_norm": 2.801494598388672, "learning_rate": 1.4765930558245989e-05, "loss": 0.0354, "step": 9409 }, { "epoch": 9.048076923076923, "grad_norm": 3.8999032974243164, "learning_rate": 1.4764835418340993e-05, "loss": 0.0646, "step": 9410 }, { "epoch": 9.049038461538462, "grad_norm": 1.0776349306106567, "learning_rate": 1.4763740204499739e-05, "loss": 0.0076, "step": 9411 }, { "epoch": 9.05, "grad_norm": 2.898566484451294, "learning_rate": 1.4762644916739217e-05, "loss": 0.0244, "step": 9412 }, { "epoch": 9.050961538461538, "grad_norm": 1.2480225563049316, "learning_rate": 1.4761549555076425e-05, "loss": 0.0135, "step": 9413 }, { "epoch": 9.051923076923076, "grad_norm": 1.5990562438964844, "learning_rate": 1.4760454119528356e-05, "loss": 0.014, "step": 9414 }, { "epoch": 9.052884615384615, "grad_norm": 1.599552035331726, "learning_rate": 1.4759358610112014e-05, "loss": 0.0293, "step": 9415 }, { "epoch": 9.053846153846154, "grad_norm": 0.0956694632768631, "learning_rate": 1.4758263026844393e-05, "loss": 0.0005, "step": 9416 }, { "epoch": 9.054807692307692, "grad_norm": 0.40720081329345703, "learning_rate": 1.4757167369742495e-05, "loss": 0.0026, "step": 9417 }, { "epoch": 9.055769230769231, "grad_norm": 2.0543715953826904, "learning_rate": 1.4756071638823324e-05, "loss": 0.0151, "step": 9418 }, { "epoch": 9.05673076923077, "grad_norm": 0.19524411857128143, "learning_rate": 1.4754975834103877e-05, "loss": 0.0011, "step": 9419 }, { "epoch": 9.057692307692308, "grad_norm": 0.14377205073833466, "learning_rate": 1.4753879955601162e-05, "loss": 0.0009, "step": 9420 }, { "epoch": 9.058653846153845, "grad_norm": 2.030444622039795, "learning_rate": 1.4752784003332184e-05, "loss": 0.0291, "step": 9421 }, { "epoch": 9.059615384615384, "grad_norm": 0.6192730665206909, "learning_rate": 1.4751687977313947e-05, "loss": 0.0027, "step": 9422 }, { "epoch": 9.060576923076923, "grad_norm": 2.224522352218628, "learning_rate": 1.4750591877563456e-05, "loss": 0.0232, "step": 9423 }, { "epoch": 9.061538461538461, "grad_norm": 3.778351306915283, "learning_rate": 1.4749495704097724e-05, "loss": 0.0492, "step": 9424 }, { "epoch": 9.0625, "grad_norm": 2.730429172515869, "learning_rate": 1.474839945693376e-05, "loss": 0.0417, "step": 9425 }, { "epoch": 9.063461538461539, "grad_norm": 1.998964786529541, "learning_rate": 1.4747303136088569e-05, "loss": 0.0088, "step": 9426 }, { "epoch": 9.064423076923077, "grad_norm": 0.1005902960896492, "learning_rate": 1.4746206741579169e-05, "loss": 0.0009, "step": 9427 }, { "epoch": 9.065384615384616, "grad_norm": 0.35688334703445435, "learning_rate": 1.4745110273422569e-05, "loss": 0.0026, "step": 9428 }, { "epoch": 9.066346153846155, "grad_norm": 0.9583032131195068, "learning_rate": 1.4744013731635786e-05, "loss": 0.0044, "step": 9429 }, { "epoch": 9.067307692307692, "grad_norm": 0.4377642869949341, "learning_rate": 1.4742917116235833e-05, "loss": 0.0048, "step": 9430 }, { "epoch": 9.06826923076923, "grad_norm": 0.6403375864028931, "learning_rate": 1.4741820427239725e-05, "loss": 0.0038, "step": 9431 }, { "epoch": 9.069230769230769, "grad_norm": 3.5015265941619873, "learning_rate": 1.4740723664664483e-05, "loss": 0.0665, "step": 9432 }, { "epoch": 9.070192307692308, "grad_norm": 0.8354874849319458, "learning_rate": 1.4739626828527122e-05, "loss": 0.0058, "step": 9433 }, { "epoch": 9.071153846153846, "grad_norm": 4.002193450927734, "learning_rate": 1.4738529918844664e-05, "loss": 0.1154, "step": 9434 }, { "epoch": 9.072115384615385, "grad_norm": 2.1207942962646484, "learning_rate": 1.4737432935634128e-05, "loss": 0.0069, "step": 9435 }, { "epoch": 9.073076923076924, "grad_norm": 0.21318045258522034, "learning_rate": 1.4736335878912537e-05, "loss": 0.0014, "step": 9436 }, { "epoch": 9.074038461538462, "grad_norm": 1.862382173538208, "learning_rate": 1.4735238748696916e-05, "loss": 0.0162, "step": 9437 }, { "epoch": 9.075, "grad_norm": 0.5830557346343994, "learning_rate": 1.4734141545004287e-05, "loss": 0.0037, "step": 9438 }, { "epoch": 9.075961538461538, "grad_norm": 1.8693093061447144, "learning_rate": 1.4733044267851676e-05, "loss": 0.0125, "step": 9439 }, { "epoch": 9.076923076923077, "grad_norm": 1.655672311782837, "learning_rate": 1.4731946917256108e-05, "loss": 0.0243, "step": 9440 }, { "epoch": 9.077884615384615, "grad_norm": 1.4659364223480225, "learning_rate": 1.4730849493234614e-05, "loss": 0.0048, "step": 9441 }, { "epoch": 9.078846153846154, "grad_norm": 1.0198286771774292, "learning_rate": 1.472975199580422e-05, "loss": 0.0062, "step": 9442 }, { "epoch": 9.079807692307693, "grad_norm": 3.7950439453125, "learning_rate": 1.4728654424981953e-05, "loss": 0.0915, "step": 9443 }, { "epoch": 9.080769230769231, "grad_norm": 0.17448849976062775, "learning_rate": 1.4727556780784853e-05, "loss": 0.0011, "step": 9444 }, { "epoch": 9.08173076923077, "grad_norm": 2.552589178085327, "learning_rate": 1.4726459063229946e-05, "loss": 0.0214, "step": 9445 }, { "epoch": 9.082692307692307, "grad_norm": 2.5648305416107178, "learning_rate": 1.4725361272334262e-05, "loss": 0.0298, "step": 9446 }, { "epoch": 9.083653846153846, "grad_norm": 0.36650392413139343, "learning_rate": 1.4724263408114842e-05, "loss": 0.0018, "step": 9447 }, { "epoch": 9.084615384615384, "grad_norm": 0.6562551259994507, "learning_rate": 1.4723165470588723e-05, "loss": 0.0037, "step": 9448 }, { "epoch": 9.085576923076923, "grad_norm": 0.35194817185401917, "learning_rate": 1.4722067459772935e-05, "loss": 0.0016, "step": 9449 }, { "epoch": 9.086538461538462, "grad_norm": 0.48781540989875793, "learning_rate": 1.472096937568452e-05, "loss": 0.0037, "step": 9450 }, { "epoch": 9.0875, "grad_norm": 0.9235445261001587, "learning_rate": 1.4719871218340516e-05, "loss": 0.0039, "step": 9451 }, { "epoch": 9.088461538461539, "grad_norm": 0.5396634340286255, "learning_rate": 1.4718772987757963e-05, "loss": 0.003, "step": 9452 }, { "epoch": 9.089423076923078, "grad_norm": 2.3046505451202393, "learning_rate": 1.4717674683953904e-05, "loss": 0.0104, "step": 9453 }, { "epoch": 9.090384615384615, "grad_norm": 1.45980966091156, "learning_rate": 1.4716576306945375e-05, "loss": 0.0089, "step": 9454 }, { "epoch": 9.091346153846153, "grad_norm": 3.301421642303467, "learning_rate": 1.4715477856749432e-05, "loss": 0.0576, "step": 9455 }, { "epoch": 9.092307692307692, "grad_norm": 1.6749027967453003, "learning_rate": 1.4714379333383108e-05, "loss": 0.008, "step": 9456 }, { "epoch": 9.09326923076923, "grad_norm": 2.323896884918213, "learning_rate": 1.4713280736863453e-05, "loss": 0.0215, "step": 9457 }, { "epoch": 9.09423076923077, "grad_norm": 3.9714205265045166, "learning_rate": 1.4712182067207516e-05, "loss": 0.0513, "step": 9458 }, { "epoch": 9.095192307692308, "grad_norm": 3.5973875522613525, "learning_rate": 1.4711083324432344e-05, "loss": 0.1157, "step": 9459 }, { "epoch": 9.096153846153847, "grad_norm": 0.6957831382751465, "learning_rate": 1.4709984508554986e-05, "loss": 0.0134, "step": 9460 }, { "epoch": 9.097115384615385, "grad_norm": 1.7253386974334717, "learning_rate": 1.4708885619592488e-05, "loss": 0.0113, "step": 9461 }, { "epoch": 9.098076923076922, "grad_norm": 2.379859447479248, "learning_rate": 1.470778665756191e-05, "loss": 0.0495, "step": 9462 }, { "epoch": 9.099038461538461, "grad_norm": 2.102945327758789, "learning_rate": 1.47066876224803e-05, "loss": 0.036, "step": 9463 }, { "epoch": 9.1, "grad_norm": 0.3412228226661682, "learning_rate": 1.470558851436471e-05, "loss": 0.002, "step": 9464 }, { "epoch": 9.100961538461538, "grad_norm": 5.074624061584473, "learning_rate": 1.4704489333232197e-05, "loss": 0.023, "step": 9465 }, { "epoch": 9.101923076923077, "grad_norm": 0.14259052276611328, "learning_rate": 1.4703390079099818e-05, "loss": 0.0011, "step": 9466 }, { "epoch": 9.102884615384616, "grad_norm": 2.1773056983947754, "learning_rate": 1.470229075198463e-05, "loss": 0.0129, "step": 9467 }, { "epoch": 9.103846153846154, "grad_norm": 0.9013712406158447, "learning_rate": 1.4701191351903688e-05, "loss": 0.0049, "step": 9468 }, { "epoch": 9.104807692307693, "grad_norm": 1.6642335653305054, "learning_rate": 1.4700091878874055e-05, "loss": 0.0261, "step": 9469 }, { "epoch": 9.10576923076923, "grad_norm": 1.4452556371688843, "learning_rate": 1.4698992332912792e-05, "loss": 0.0095, "step": 9470 }, { "epoch": 9.106730769230769, "grad_norm": 1.193832278251648, "learning_rate": 1.4697892714036959e-05, "loss": 0.0055, "step": 9471 }, { "epoch": 9.107692307692307, "grad_norm": 2.651583433151245, "learning_rate": 1.4696793022263618e-05, "loss": 0.102, "step": 9472 }, { "epoch": 9.108653846153846, "grad_norm": 3.570950746536255, "learning_rate": 1.4695693257609836e-05, "loss": 0.1072, "step": 9473 }, { "epoch": 9.109615384615385, "grad_norm": 0.3419105112552643, "learning_rate": 1.4694593420092676e-05, "loss": 0.0027, "step": 9474 }, { "epoch": 9.110576923076923, "grad_norm": 0.03774857521057129, "learning_rate": 1.4693493509729205e-05, "loss": 0.0005, "step": 9475 }, { "epoch": 9.111538461538462, "grad_norm": 3.2241499423980713, "learning_rate": 1.4692393526536486e-05, "loss": 0.0537, "step": 9476 }, { "epoch": 9.1125, "grad_norm": 0.7071313261985779, "learning_rate": 1.4691293470531595e-05, "loss": 0.003, "step": 9477 }, { "epoch": 9.113461538461538, "grad_norm": 0.8594933152198792, "learning_rate": 1.4690193341731598e-05, "loss": 0.004, "step": 9478 }, { "epoch": 9.114423076923076, "grad_norm": 1.16347074508667, "learning_rate": 1.4689093140153565e-05, "loss": 0.0054, "step": 9479 }, { "epoch": 9.115384615384615, "grad_norm": 1.6350210905075073, "learning_rate": 1.4687992865814572e-05, "loss": 0.0093, "step": 9480 }, { "epoch": 9.116346153846154, "grad_norm": 0.8792174458503723, "learning_rate": 1.4686892518731684e-05, "loss": 0.0035, "step": 9481 }, { "epoch": 9.117307692307692, "grad_norm": 0.3859904110431671, "learning_rate": 1.4685792098921983e-05, "loss": 0.0024, "step": 9482 }, { "epoch": 9.118269230769231, "grad_norm": 1.5493910312652588, "learning_rate": 1.468469160640254e-05, "loss": 0.0131, "step": 9483 }, { "epoch": 9.11923076923077, "grad_norm": 0.5071985125541687, "learning_rate": 1.4683591041190433e-05, "loss": 0.0033, "step": 9484 }, { "epoch": 9.120192307692308, "grad_norm": 2.0983972549438477, "learning_rate": 1.468249040330274e-05, "loss": 0.0492, "step": 9485 }, { "epoch": 9.121153846153845, "grad_norm": 2.5143327713012695, "learning_rate": 1.4681389692756536e-05, "loss": 0.0153, "step": 9486 }, { "epoch": 9.122115384615384, "grad_norm": 2.1294455528259277, "learning_rate": 1.4680288909568905e-05, "loss": 0.0081, "step": 9487 }, { "epoch": 9.123076923076923, "grad_norm": 4.363734245300293, "learning_rate": 1.467918805375693e-05, "loss": 0.0953, "step": 9488 }, { "epoch": 9.124038461538461, "grad_norm": 1.720737099647522, "learning_rate": 1.4678087125337688e-05, "loss": 0.006, "step": 9489 }, { "epoch": 9.125, "grad_norm": 1.440602421760559, "learning_rate": 1.467698612432826e-05, "loss": 0.0104, "step": 9490 }, { "epoch": 9.125961538461539, "grad_norm": 2.0742380619049072, "learning_rate": 1.4675885050745737e-05, "loss": 0.0385, "step": 9491 }, { "epoch": 9.126923076923077, "grad_norm": 1.7127842903137207, "learning_rate": 1.4674783904607202e-05, "loss": 0.0425, "step": 9492 }, { "epoch": 9.127884615384616, "grad_norm": 0.3076956868171692, "learning_rate": 1.4673682685929743e-05, "loss": 0.0024, "step": 9493 }, { "epoch": 9.128846153846155, "grad_norm": 0.17766684293746948, "learning_rate": 1.467258139473044e-05, "loss": 0.0015, "step": 9494 }, { "epoch": 9.129807692307692, "grad_norm": 2.276919364929199, "learning_rate": 1.4671480031026392e-05, "loss": 0.0188, "step": 9495 }, { "epoch": 9.13076923076923, "grad_norm": 3.4936859607696533, "learning_rate": 1.4670378594834685e-05, "loss": 0.0624, "step": 9496 }, { "epoch": 9.131730769230769, "grad_norm": 0.5932638049125671, "learning_rate": 1.4669277086172406e-05, "loss": 0.0024, "step": 9497 }, { "epoch": 9.132692307692308, "grad_norm": 0.2247646152973175, "learning_rate": 1.4668175505056654e-05, "loss": 0.0015, "step": 9498 }, { "epoch": 9.133653846153846, "grad_norm": 0.11359360069036484, "learning_rate": 1.4667073851504519e-05, "loss": 0.0017, "step": 9499 }, { "epoch": 9.134615384615385, "grad_norm": 3.8466575145721436, "learning_rate": 1.4665972125533095e-05, "loss": 0.0929, "step": 9500 }, { "epoch": 9.135576923076924, "grad_norm": 1.8213717937469482, "learning_rate": 1.4664870327159476e-05, "loss": 0.0127, "step": 9501 }, { "epoch": 9.136538461538462, "grad_norm": 1.8081845045089722, "learning_rate": 1.4663768456400764e-05, "loss": 0.0229, "step": 9502 }, { "epoch": 9.1375, "grad_norm": 0.710587203502655, "learning_rate": 1.4662666513274053e-05, "loss": 0.0039, "step": 9503 }, { "epoch": 9.138461538461538, "grad_norm": 0.04985417425632477, "learning_rate": 1.466156449779644e-05, "loss": 0.0006, "step": 9504 }, { "epoch": 9.139423076923077, "grad_norm": 1.6507594585418701, "learning_rate": 1.466046240998503e-05, "loss": 0.0144, "step": 9505 }, { "epoch": 9.140384615384615, "grad_norm": 3.317044734954834, "learning_rate": 1.465936024985692e-05, "loss": 0.0203, "step": 9506 }, { "epoch": 9.141346153846154, "grad_norm": 0.09853105992078781, "learning_rate": 1.4658258017429217e-05, "loss": 0.0006, "step": 9507 }, { "epoch": 9.142307692307693, "grad_norm": 0.9233127236366272, "learning_rate": 1.4657155712719019e-05, "loss": 0.0049, "step": 9508 }, { "epoch": 9.143269230769231, "grad_norm": 0.4987460970878601, "learning_rate": 1.465605333574343e-05, "loss": 0.0024, "step": 9509 }, { "epoch": 9.14423076923077, "grad_norm": 2.1506450176239014, "learning_rate": 1.4654950886519563e-05, "loss": 0.0432, "step": 9510 }, { "epoch": 9.145192307692307, "grad_norm": 1.7012674808502197, "learning_rate": 1.465384836506452e-05, "loss": 0.04, "step": 9511 }, { "epoch": 9.146153846153846, "grad_norm": 1.6107386350631714, "learning_rate": 1.4652745771395406e-05, "loss": 0.0071, "step": 9512 }, { "epoch": 9.147115384615384, "grad_norm": 3.275827169418335, "learning_rate": 1.4651643105529336e-05, "loss": 0.032, "step": 9513 }, { "epoch": 9.148076923076923, "grad_norm": 2.096511125564575, "learning_rate": 1.4650540367483419e-05, "loss": 0.0163, "step": 9514 }, { "epoch": 9.149038461538462, "grad_norm": 1.544677972793579, "learning_rate": 1.4649437557274762e-05, "loss": 0.0294, "step": 9515 }, { "epoch": 9.15, "grad_norm": 6.048210144042969, "learning_rate": 1.4648334674920479e-05, "loss": 0.15, "step": 9516 }, { "epoch": 9.150961538461539, "grad_norm": 0.24653121829032898, "learning_rate": 1.4647231720437687e-05, "loss": 0.0012, "step": 9517 }, { "epoch": 9.151923076923078, "grad_norm": 3.310344696044922, "learning_rate": 1.4646128693843498e-05, "loss": 0.0502, "step": 9518 }, { "epoch": 9.152884615384615, "grad_norm": 2.014146327972412, "learning_rate": 1.4645025595155027e-05, "loss": 0.0223, "step": 9519 }, { "epoch": 9.153846153846153, "grad_norm": 3.9377894401550293, "learning_rate": 1.4643922424389393e-05, "loss": 0.1147, "step": 9520 }, { "epoch": 9.154807692307692, "grad_norm": 0.9666778445243835, "learning_rate": 1.4642819181563713e-05, "loss": 0.0064, "step": 9521 }, { "epoch": 9.15576923076923, "grad_norm": 4.427590370178223, "learning_rate": 1.4641715866695103e-05, "loss": 0.0545, "step": 9522 }, { "epoch": 9.15673076923077, "grad_norm": 0.5331135392189026, "learning_rate": 1.4640612479800686e-05, "loss": 0.0032, "step": 9523 }, { "epoch": 9.157692307692308, "grad_norm": 4.875743865966797, "learning_rate": 1.4639509020897586e-05, "loss": 0.0423, "step": 9524 }, { "epoch": 9.158653846153847, "grad_norm": 0.2106577605009079, "learning_rate": 1.4638405490002923e-05, "loss": 0.0017, "step": 9525 }, { "epoch": 9.159615384615385, "grad_norm": 1.6972737312316895, "learning_rate": 1.4637301887133817e-05, "loss": 0.0091, "step": 9526 }, { "epoch": 9.160576923076922, "grad_norm": 0.8145224452018738, "learning_rate": 1.4636198212307397e-05, "loss": 0.0027, "step": 9527 }, { "epoch": 9.161538461538461, "grad_norm": 0.9943292737007141, "learning_rate": 1.4635094465540792e-05, "loss": 0.0109, "step": 9528 }, { "epoch": 9.1625, "grad_norm": 2.6157283782958984, "learning_rate": 1.463399064685112e-05, "loss": 0.0809, "step": 9529 }, { "epoch": 9.163461538461538, "grad_norm": 0.06337975710630417, "learning_rate": 1.4632886756255515e-05, "loss": 0.0007, "step": 9530 }, { "epoch": 9.164423076923077, "grad_norm": 0.3047170639038086, "learning_rate": 1.4631782793771106e-05, "loss": 0.0019, "step": 9531 }, { "epoch": 9.165384615384616, "grad_norm": 0.7481186985969543, "learning_rate": 1.4630678759415023e-05, "loss": 0.0057, "step": 9532 }, { "epoch": 9.166346153846154, "grad_norm": 2.2245168685913086, "learning_rate": 1.4629574653204394e-05, "loss": 0.0167, "step": 9533 }, { "epoch": 9.167307692307693, "grad_norm": 2.2031073570251465, "learning_rate": 1.4628470475156352e-05, "loss": 0.02, "step": 9534 }, { "epoch": 9.16826923076923, "grad_norm": 3.3679234981536865, "learning_rate": 1.4627366225288038e-05, "loss": 0.0358, "step": 9535 }, { "epoch": 9.169230769230769, "grad_norm": 0.7043970227241516, "learning_rate": 1.4626261903616579e-05, "loss": 0.0021, "step": 9536 }, { "epoch": 9.170192307692307, "grad_norm": 1.0590038299560547, "learning_rate": 1.4625157510159112e-05, "loss": 0.0138, "step": 9537 }, { "epoch": 9.171153846153846, "grad_norm": 0.06618845462799072, "learning_rate": 1.4624053044932778e-05, "loss": 0.0006, "step": 9538 }, { "epoch": 9.172115384615385, "grad_norm": 2.5972654819488525, "learning_rate": 1.4622948507954711e-05, "loss": 0.0178, "step": 9539 }, { "epoch": 9.173076923076923, "grad_norm": 1.6778512001037598, "learning_rate": 1.462184389924205e-05, "loss": 0.0224, "step": 9540 }, { "epoch": 9.174038461538462, "grad_norm": 1.7695260047912598, "learning_rate": 1.4620739218811936e-05, "loss": 0.0186, "step": 9541 }, { "epoch": 9.175, "grad_norm": 1.6134463548660278, "learning_rate": 1.4619634466681514e-05, "loss": 0.0096, "step": 9542 }, { "epoch": 9.175961538461538, "grad_norm": 3.7115161418914795, "learning_rate": 1.4618529642867922e-05, "loss": 0.0369, "step": 9543 }, { "epoch": 9.176923076923076, "grad_norm": 2.488593816757202, "learning_rate": 1.4617424747388307e-05, "loss": 0.0211, "step": 9544 }, { "epoch": 9.177884615384615, "grad_norm": 1.2582758665084839, "learning_rate": 1.4616319780259806e-05, "loss": 0.0075, "step": 9545 }, { "epoch": 9.178846153846154, "grad_norm": 0.6304722428321838, "learning_rate": 1.4615214741499578e-05, "loss": 0.0026, "step": 9546 }, { "epoch": 9.179807692307692, "grad_norm": 2.775067090988159, "learning_rate": 1.461410963112476e-05, "loss": 0.0894, "step": 9547 }, { "epoch": 9.180769230769231, "grad_norm": 0.42921221256256104, "learning_rate": 1.46130044491525e-05, "loss": 0.0033, "step": 9548 }, { "epoch": 9.18173076923077, "grad_norm": 2.090602397918701, "learning_rate": 1.4611899195599952e-05, "loss": 0.014, "step": 9549 }, { "epoch": 9.182692307692308, "grad_norm": 0.4874969720840454, "learning_rate": 1.4610793870484266e-05, "loss": 0.0035, "step": 9550 }, { "epoch": 9.183653846153845, "grad_norm": 3.195570945739746, "learning_rate": 1.460968847382259e-05, "loss": 0.0417, "step": 9551 }, { "epoch": 9.184615384615384, "grad_norm": 0.8005932569503784, "learning_rate": 1.4608583005632077e-05, "loss": 0.005, "step": 9552 }, { "epoch": 9.185576923076923, "grad_norm": 0.2551571726799011, "learning_rate": 1.4607477465929884e-05, "loss": 0.0012, "step": 9553 }, { "epoch": 9.186538461538461, "grad_norm": 1.7100526094436646, "learning_rate": 1.4606371854733161e-05, "loss": 0.0136, "step": 9554 }, { "epoch": 9.1875, "grad_norm": 1.0143606662750244, "learning_rate": 1.4605266172059069e-05, "loss": 0.0088, "step": 9555 }, { "epoch": 9.188461538461539, "grad_norm": 5.976651668548584, "learning_rate": 1.4604160417924762e-05, "loss": 0.127, "step": 9556 }, { "epoch": 9.189423076923077, "grad_norm": 0.2901856303215027, "learning_rate": 1.4603054592347396e-05, "loss": 0.0022, "step": 9557 }, { "epoch": 9.190384615384616, "grad_norm": 2.17260479927063, "learning_rate": 1.4601948695344135e-05, "loss": 0.0146, "step": 9558 }, { "epoch": 9.191346153846155, "grad_norm": 0.3841210603713989, "learning_rate": 1.4600842726932136e-05, "loss": 0.0026, "step": 9559 }, { "epoch": 9.192307692307692, "grad_norm": 2.235042095184326, "learning_rate": 1.459973668712856e-05, "loss": 0.0128, "step": 9560 }, { "epoch": 9.19326923076923, "grad_norm": 3.8489480018615723, "learning_rate": 1.4598630575950572e-05, "loss": 0.1406, "step": 9561 }, { "epoch": 9.194230769230769, "grad_norm": 0.5393878221511841, "learning_rate": 1.4597524393415336e-05, "loss": 0.0016, "step": 9562 }, { "epoch": 9.195192307692308, "grad_norm": 1.1450976133346558, "learning_rate": 1.4596418139540012e-05, "loss": 0.006, "step": 9563 }, { "epoch": 9.196153846153846, "grad_norm": 2.318732500076294, "learning_rate": 1.459531181434177e-05, "loss": 0.0393, "step": 9564 }, { "epoch": 9.197115384615385, "grad_norm": 1.7129648923873901, "learning_rate": 1.4594205417837777e-05, "loss": 0.0109, "step": 9565 }, { "epoch": 9.198076923076924, "grad_norm": 2.160935878753662, "learning_rate": 1.4593098950045197e-05, "loss": 0.0935, "step": 9566 }, { "epoch": 9.199038461538462, "grad_norm": 0.3155428469181061, "learning_rate": 1.4591992410981201e-05, "loss": 0.0026, "step": 9567 }, { "epoch": 9.2, "grad_norm": 2.2742483615875244, "learning_rate": 1.4590885800662964e-05, "loss": 0.0233, "step": 9568 }, { "epoch": 9.200961538461538, "grad_norm": 2.3514981269836426, "learning_rate": 1.4589779119107652e-05, "loss": 0.0097, "step": 9569 }, { "epoch": 9.201923076923077, "grad_norm": 1.9627424478530884, "learning_rate": 1.4588672366332439e-05, "loss": 0.0244, "step": 9570 }, { "epoch": 9.202884615384615, "grad_norm": 1.1357924938201904, "learning_rate": 1.4587565542354498e-05, "loss": 0.0045, "step": 9571 }, { "epoch": 9.203846153846154, "grad_norm": 3.563897132873535, "learning_rate": 1.4586458647191006e-05, "loss": 0.0271, "step": 9572 }, { "epoch": 9.204807692307693, "grad_norm": 0.12679007649421692, "learning_rate": 1.4585351680859134e-05, "loss": 0.0015, "step": 9573 }, { "epoch": 9.205769230769231, "grad_norm": 0.22341644763946533, "learning_rate": 1.4584244643376063e-05, "loss": 0.0017, "step": 9574 }, { "epoch": 9.20673076923077, "grad_norm": 4.738874435424805, "learning_rate": 1.4583137534758968e-05, "loss": 0.0903, "step": 9575 }, { "epoch": 9.207692307692307, "grad_norm": 0.09749652445316315, "learning_rate": 1.4582030355025034e-05, "loss": 0.0007, "step": 9576 }, { "epoch": 9.208653846153846, "grad_norm": 0.657829999923706, "learning_rate": 1.4580923104191433e-05, "loss": 0.0043, "step": 9577 }, { "epoch": 9.209615384615384, "grad_norm": 2.3583552837371826, "learning_rate": 1.4579815782275352e-05, "loss": 0.0259, "step": 9578 }, { "epoch": 9.210576923076923, "grad_norm": 1.4478338956832886, "learning_rate": 1.4578708389293973e-05, "loss": 0.0093, "step": 9579 }, { "epoch": 9.211538461538462, "grad_norm": 3.294267177581787, "learning_rate": 1.4577600925264477e-05, "loss": 0.0338, "step": 9580 }, { "epoch": 9.2125, "grad_norm": 3.540255546569824, "learning_rate": 1.4576493390204048e-05, "loss": 0.0426, "step": 9581 }, { "epoch": 9.213461538461539, "grad_norm": 2.747627019882202, "learning_rate": 1.4575385784129873e-05, "loss": 0.0147, "step": 9582 }, { "epoch": 9.214423076923078, "grad_norm": 1.2026896476745605, "learning_rate": 1.4574278107059144e-05, "loss": 0.028, "step": 9583 }, { "epoch": 9.215384615384615, "grad_norm": 0.6230792999267578, "learning_rate": 1.4573170359009039e-05, "loss": 0.0058, "step": 9584 }, { "epoch": 9.216346153846153, "grad_norm": 1.5324316024780273, "learning_rate": 1.4572062539996755e-05, "loss": 0.0126, "step": 9585 }, { "epoch": 9.217307692307692, "grad_norm": 3.7092978954315186, "learning_rate": 1.4570954650039476e-05, "loss": 0.1104, "step": 9586 }, { "epoch": 9.21826923076923, "grad_norm": 1.7452316284179688, "learning_rate": 1.4569846689154401e-05, "loss": 0.0097, "step": 9587 }, { "epoch": 9.21923076923077, "grad_norm": 3.810964584350586, "learning_rate": 1.4568738657358715e-05, "loss": 0.0455, "step": 9588 }, { "epoch": 9.220192307692308, "grad_norm": 0.4350408911705017, "learning_rate": 1.4567630554669613e-05, "loss": 0.0033, "step": 9589 }, { "epoch": 9.221153846153847, "grad_norm": 1.292743444442749, "learning_rate": 1.4566522381104296e-05, "loss": 0.0045, "step": 9590 }, { "epoch": 9.222115384615385, "grad_norm": 0.8665007948875427, "learning_rate": 1.456541413667995e-05, "loss": 0.0056, "step": 9591 }, { "epoch": 9.223076923076922, "grad_norm": 0.18079593777656555, "learning_rate": 1.4564305821413776e-05, "loss": 0.0023, "step": 9592 }, { "epoch": 9.224038461538461, "grad_norm": 2.0173187255859375, "learning_rate": 1.4563197435322971e-05, "loss": 0.0263, "step": 9593 }, { "epoch": 9.225, "grad_norm": 0.3884493112564087, "learning_rate": 1.4562088978424738e-05, "loss": 0.0027, "step": 9594 }, { "epoch": 9.225961538461538, "grad_norm": 2.4638779163360596, "learning_rate": 1.456098045073627e-05, "loss": 0.015, "step": 9595 }, { "epoch": 9.226923076923077, "grad_norm": 0.5868209004402161, "learning_rate": 1.4559871852274773e-05, "loss": 0.0018, "step": 9596 }, { "epoch": 9.227884615384616, "grad_norm": 1.9380438327789307, "learning_rate": 1.455876318305745e-05, "loss": 0.0244, "step": 9597 }, { "epoch": 9.228846153846154, "grad_norm": 0.4369082748889923, "learning_rate": 1.4557654443101499e-05, "loss": 0.0027, "step": 9598 }, { "epoch": 9.229807692307693, "grad_norm": 1.3452821969985962, "learning_rate": 1.4556545632424128e-05, "loss": 0.0054, "step": 9599 }, { "epoch": 9.23076923076923, "grad_norm": 0.05280643329024315, "learning_rate": 1.4555436751042544e-05, "loss": 0.0004, "step": 9600 }, { "epoch": 9.231730769230769, "grad_norm": 0.15430834889411926, "learning_rate": 1.455432779897395e-05, "loss": 0.0011, "step": 9601 }, { "epoch": 9.232692307692307, "grad_norm": 0.2971585690975189, "learning_rate": 1.4553218776235558e-05, "loss": 0.0024, "step": 9602 }, { "epoch": 9.233653846153846, "grad_norm": 1.7391672134399414, "learning_rate": 1.4552109682844569e-05, "loss": 0.0664, "step": 9603 }, { "epoch": 9.234615384615385, "grad_norm": 5.726481914520264, "learning_rate": 1.4551000518818202e-05, "loss": 0.0877, "step": 9604 }, { "epoch": 9.235576923076923, "grad_norm": 2.650177240371704, "learning_rate": 1.4549891284173665e-05, "loss": 0.1646, "step": 9605 }, { "epoch": 9.236538461538462, "grad_norm": 0.070560522377491, "learning_rate": 1.4548781978928168e-05, "loss": 0.0007, "step": 9606 }, { "epoch": 9.2375, "grad_norm": 3.039350986480713, "learning_rate": 1.4547672603098922e-05, "loss": 0.012, "step": 9607 }, { "epoch": 9.238461538461538, "grad_norm": 2.065539598464966, "learning_rate": 1.4546563156703148e-05, "loss": 0.0317, "step": 9608 }, { "epoch": 9.239423076923076, "grad_norm": 1.135921835899353, "learning_rate": 1.4545453639758055e-05, "loss": 0.0088, "step": 9609 }, { "epoch": 9.240384615384615, "grad_norm": 1.8216451406478882, "learning_rate": 1.4544344052280863e-05, "loss": 0.0077, "step": 9610 }, { "epoch": 9.241346153846154, "grad_norm": 2.991258382797241, "learning_rate": 1.4543234394288792e-05, "loss": 0.0096, "step": 9611 }, { "epoch": 9.242307692307692, "grad_norm": 2.39970326423645, "learning_rate": 1.4542124665799055e-05, "loss": 0.0264, "step": 9612 }, { "epoch": 9.243269230769231, "grad_norm": 4.048579692840576, "learning_rate": 1.4541014866828875e-05, "loss": 0.036, "step": 9613 }, { "epoch": 9.24423076923077, "grad_norm": 0.47105762362480164, "learning_rate": 1.4539904997395468e-05, "loss": 0.0019, "step": 9614 }, { "epoch": 9.245192307692308, "grad_norm": 1.3271207809448242, "learning_rate": 1.4538795057516066e-05, "loss": 0.0086, "step": 9615 }, { "epoch": 9.246153846153845, "grad_norm": 1.1789863109588623, "learning_rate": 1.4537685047207884e-05, "loss": 0.0104, "step": 9616 }, { "epoch": 9.247115384615384, "grad_norm": 3.268521308898926, "learning_rate": 1.4536574966488143e-05, "loss": 0.0281, "step": 9617 }, { "epoch": 9.248076923076923, "grad_norm": 1.9423563480377197, "learning_rate": 1.4535464815374081e-05, "loss": 0.007, "step": 9618 }, { "epoch": 9.249038461538461, "grad_norm": 1.6121034622192383, "learning_rate": 1.453435459388291e-05, "loss": 0.0138, "step": 9619 }, { "epoch": 9.25, "grad_norm": 1.8638521432876587, "learning_rate": 1.4533244302031868e-05, "loss": 0.0102, "step": 9620 }, { "epoch": 9.250961538461539, "grad_norm": 2.0967562198638916, "learning_rate": 1.4532133939838178e-05, "loss": 0.018, "step": 9621 }, { "epoch": 9.251923076923077, "grad_norm": 0.029666634276509285, "learning_rate": 1.453102350731907e-05, "loss": 0.0003, "step": 9622 }, { "epoch": 9.252884615384616, "grad_norm": 1.1873242855072021, "learning_rate": 1.4529913004491779e-05, "loss": 0.0061, "step": 9623 }, { "epoch": 9.253846153846155, "grad_norm": 0.7948338985443115, "learning_rate": 1.452880243137353e-05, "loss": 0.0162, "step": 9624 }, { "epoch": 9.254807692307692, "grad_norm": 2.2177419662475586, "learning_rate": 1.452769178798156e-05, "loss": 0.0142, "step": 9625 }, { "epoch": 9.25576923076923, "grad_norm": 4.916788578033447, "learning_rate": 1.4526581074333101e-05, "loss": 0.0817, "step": 9626 }, { "epoch": 9.256730769230769, "grad_norm": 0.8869819641113281, "learning_rate": 1.4525470290445392e-05, "loss": 0.0054, "step": 9627 }, { "epoch": 9.257692307692308, "grad_norm": 0.2156190723180771, "learning_rate": 1.4524359436335662e-05, "loss": 0.001, "step": 9628 }, { "epoch": 9.258653846153846, "grad_norm": 3.0081467628479004, "learning_rate": 1.4523248512021155e-05, "loss": 0.0461, "step": 9629 }, { "epoch": 9.259615384615385, "grad_norm": 1.775615930557251, "learning_rate": 1.4522137517519107e-05, "loss": 0.0081, "step": 9630 }, { "epoch": 9.260576923076924, "grad_norm": 0.3901073932647705, "learning_rate": 1.4521026452846756e-05, "loss": 0.0016, "step": 9631 }, { "epoch": 9.261538461538462, "grad_norm": 1.456566572189331, "learning_rate": 1.4519915318021343e-05, "loss": 0.0186, "step": 9632 }, { "epoch": 9.2625, "grad_norm": 2.6334288120269775, "learning_rate": 1.4518804113060113e-05, "loss": 0.0274, "step": 9633 }, { "epoch": 9.263461538461538, "grad_norm": 3.010000467300415, "learning_rate": 1.4517692837980301e-05, "loss": 0.0758, "step": 9634 }, { "epoch": 9.264423076923077, "grad_norm": 0.21230866014957428, "learning_rate": 1.451658149279916e-05, "loss": 0.002, "step": 9635 }, { "epoch": 9.265384615384615, "grad_norm": 2.391254425048828, "learning_rate": 1.4515470077533928e-05, "loss": 0.0147, "step": 9636 }, { "epoch": 9.266346153846154, "grad_norm": 1.3014177083969116, "learning_rate": 1.4514358592201853e-05, "loss": 0.0071, "step": 9637 }, { "epoch": 9.267307692307693, "grad_norm": 3.6106812953948975, "learning_rate": 1.4513247036820184e-05, "loss": 0.059, "step": 9638 }, { "epoch": 9.268269230769231, "grad_norm": 1.6612064838409424, "learning_rate": 1.4512135411406166e-05, "loss": 0.0117, "step": 9639 }, { "epoch": 9.26923076923077, "grad_norm": 0.34125077724456787, "learning_rate": 1.4511023715977048e-05, "loss": 0.0023, "step": 9640 }, { "epoch": 9.270192307692307, "grad_norm": 2.0227956771850586, "learning_rate": 1.4509911950550084e-05, "loss": 0.0415, "step": 9641 }, { "epoch": 9.271153846153846, "grad_norm": 0.27443209290504456, "learning_rate": 1.450880011514252e-05, "loss": 0.0017, "step": 9642 }, { "epoch": 9.272115384615384, "grad_norm": 1.671933650970459, "learning_rate": 1.4507688209771613e-05, "loss": 0.0182, "step": 9643 }, { "epoch": 9.273076923076923, "grad_norm": 0.10810772329568863, "learning_rate": 1.4506576234454617e-05, "loss": 0.0008, "step": 9644 }, { "epoch": 9.274038461538462, "grad_norm": 1.0039681196212769, "learning_rate": 1.4505464189208782e-05, "loss": 0.0063, "step": 9645 }, { "epoch": 9.275, "grad_norm": 0.16694629192352295, "learning_rate": 1.4504352074051366e-05, "loss": 0.0016, "step": 9646 }, { "epoch": 9.275961538461539, "grad_norm": 0.4777624011039734, "learning_rate": 1.4503239888999625e-05, "loss": 0.0023, "step": 9647 }, { "epoch": 9.276923076923078, "grad_norm": 2.7198777198791504, "learning_rate": 1.4502127634070821e-05, "loss": 0.022, "step": 9648 }, { "epoch": 9.277884615384615, "grad_norm": 2.081690549850464, "learning_rate": 1.4501015309282209e-05, "loss": 0.0231, "step": 9649 }, { "epoch": 9.278846153846153, "grad_norm": 0.9537684917449951, "learning_rate": 1.4499902914651047e-05, "loss": 0.0058, "step": 9650 }, { "epoch": 9.279807692307692, "grad_norm": 1.3738722801208496, "learning_rate": 1.44987904501946e-05, "loss": 0.0089, "step": 9651 }, { "epoch": 9.28076923076923, "grad_norm": 1.8872315883636475, "learning_rate": 1.449767791593013e-05, "loss": 0.0536, "step": 9652 }, { "epoch": 9.28173076923077, "grad_norm": 3.43322491645813, "learning_rate": 1.4496565311874902e-05, "loss": 0.0328, "step": 9653 }, { "epoch": 9.282692307692308, "grad_norm": 1.5649317502975464, "learning_rate": 1.449545263804617e-05, "loss": 0.0071, "step": 9654 }, { "epoch": 9.283653846153847, "grad_norm": 2.481928825378418, "learning_rate": 1.4494339894461212e-05, "loss": 0.0279, "step": 9655 }, { "epoch": 9.284615384615385, "grad_norm": 3.3779566287994385, "learning_rate": 1.4493227081137289e-05, "loss": 0.0354, "step": 9656 }, { "epoch": 9.285576923076922, "grad_norm": 0.3016846776008606, "learning_rate": 1.4492114198091672e-05, "loss": 0.0034, "step": 9657 }, { "epoch": 9.286538461538461, "grad_norm": 2.489574670791626, "learning_rate": 1.4491001245341621e-05, "loss": 0.0178, "step": 9658 }, { "epoch": 9.2875, "grad_norm": 0.4122263789176941, "learning_rate": 1.4489888222904415e-05, "loss": 0.0027, "step": 9659 }, { "epoch": 9.288461538461538, "grad_norm": 0.4483107328414917, "learning_rate": 1.4488775130797321e-05, "loss": 0.0027, "step": 9660 }, { "epoch": 9.289423076923077, "grad_norm": 2.117947816848755, "learning_rate": 1.4487661969037615e-05, "loss": 0.0528, "step": 9661 }, { "epoch": 9.290384615384616, "grad_norm": 0.7908726930618286, "learning_rate": 1.4486548737642561e-05, "loss": 0.0033, "step": 9662 }, { "epoch": 9.291346153846154, "grad_norm": 1.6155202388763428, "learning_rate": 1.448543543662944e-05, "loss": 0.0083, "step": 9663 }, { "epoch": 9.292307692307693, "grad_norm": 0.8977464437484741, "learning_rate": 1.4484322066015527e-05, "loss": 0.0046, "step": 9664 }, { "epoch": 9.29326923076923, "grad_norm": 10.76927375793457, "learning_rate": 1.4483208625818095e-05, "loss": 0.0104, "step": 9665 }, { "epoch": 9.294230769230769, "grad_norm": 2.5119619369506836, "learning_rate": 1.4482095116054421e-05, "loss": 0.0311, "step": 9666 }, { "epoch": 9.295192307692307, "grad_norm": 1.5794841051101685, "learning_rate": 1.4480981536741792e-05, "loss": 0.0111, "step": 9667 }, { "epoch": 9.296153846153846, "grad_norm": 1.844712734222412, "learning_rate": 1.4479867887897476e-05, "loss": 0.0189, "step": 9668 }, { "epoch": 9.297115384615385, "grad_norm": 2.250725507736206, "learning_rate": 1.447875416953876e-05, "loss": 0.017, "step": 9669 }, { "epoch": 9.298076923076923, "grad_norm": 1.6275883913040161, "learning_rate": 1.4477640381682925e-05, "loss": 0.0147, "step": 9670 }, { "epoch": 9.299038461538462, "grad_norm": 2.0526018142700195, "learning_rate": 1.4476526524347254e-05, "loss": 0.0327, "step": 9671 }, { "epoch": 9.3, "grad_norm": 0.1372869312763214, "learning_rate": 1.4475412597549028e-05, "loss": 0.0007, "step": 9672 }, { "epoch": 9.300961538461538, "grad_norm": 2.690459966659546, "learning_rate": 1.4474298601305532e-05, "loss": 0.0323, "step": 9673 }, { "epoch": 9.301923076923076, "grad_norm": 0.43745431303977966, "learning_rate": 1.4473184535634058e-05, "loss": 0.002, "step": 9674 }, { "epoch": 9.302884615384615, "grad_norm": 0.7211487293243408, "learning_rate": 1.4472070400551887e-05, "loss": 0.0052, "step": 9675 }, { "epoch": 9.303846153846154, "grad_norm": 0.5083014965057373, "learning_rate": 1.4470956196076307e-05, "loss": 0.0038, "step": 9676 }, { "epoch": 9.304807692307692, "grad_norm": 0.25500890612602234, "learning_rate": 1.446984192222461e-05, "loss": 0.0015, "step": 9677 }, { "epoch": 9.305769230769231, "grad_norm": 2.2157788276672363, "learning_rate": 1.4468727579014086e-05, "loss": 0.0184, "step": 9678 }, { "epoch": 9.30673076923077, "grad_norm": 1.1909149885177612, "learning_rate": 1.4467613166462024e-05, "loss": 0.0062, "step": 9679 }, { "epoch": 9.307692307692308, "grad_norm": 2.7661399841308594, "learning_rate": 1.4466498684585717e-05, "loss": 0.0271, "step": 9680 }, { "epoch": 9.308653846153845, "grad_norm": 5.3309807777404785, "learning_rate": 1.4465384133402463e-05, "loss": 0.0833, "step": 9681 }, { "epoch": 9.309615384615384, "grad_norm": 0.12538976967334747, "learning_rate": 1.446426951292955e-05, "loss": 0.0009, "step": 9682 }, { "epoch": 9.310576923076923, "grad_norm": 0.2564556896686554, "learning_rate": 1.446315482318428e-05, "loss": 0.0016, "step": 9683 }, { "epoch": 9.311538461538461, "grad_norm": 0.6707826256752014, "learning_rate": 1.4462040064183943e-05, "loss": 0.0023, "step": 9684 }, { "epoch": 9.3125, "grad_norm": 1.998818039894104, "learning_rate": 1.446092523594584e-05, "loss": 0.014, "step": 9685 }, { "epoch": 9.313461538461539, "grad_norm": 3.689727544784546, "learning_rate": 1.4459810338487272e-05, "loss": 0.055, "step": 9686 }, { "epoch": 9.314423076923077, "grad_norm": 2.007019519805908, "learning_rate": 1.4458695371825535e-05, "loss": 0.0178, "step": 9687 }, { "epoch": 9.315384615384616, "grad_norm": 0.5670676231384277, "learning_rate": 1.4457580335977935e-05, "loss": 0.0031, "step": 9688 }, { "epoch": 9.316346153846155, "grad_norm": 2.127685308456421, "learning_rate": 1.4456465230961769e-05, "loss": 0.0076, "step": 9689 }, { "epoch": 9.317307692307692, "grad_norm": 0.409994900226593, "learning_rate": 1.4455350056794342e-05, "loss": 0.0025, "step": 9690 }, { "epoch": 9.31826923076923, "grad_norm": 0.10566926747560501, "learning_rate": 1.4454234813492958e-05, "loss": 0.0009, "step": 9691 }, { "epoch": 9.319230769230769, "grad_norm": 1.7518779039382935, "learning_rate": 1.4453119501074924e-05, "loss": 0.0089, "step": 9692 }, { "epoch": 9.320192307692308, "grad_norm": 4.121770858764648, "learning_rate": 1.4452004119557543e-05, "loss": 0.13, "step": 9693 }, { "epoch": 9.321153846153846, "grad_norm": 2.5493392944335938, "learning_rate": 1.4450888668958127e-05, "loss": 0.0746, "step": 9694 }, { "epoch": 9.322115384615385, "grad_norm": 3.1599624156951904, "learning_rate": 1.444977314929398e-05, "loss": 0.0243, "step": 9695 }, { "epoch": 9.323076923076924, "grad_norm": 2.2434730529785156, "learning_rate": 1.4448657560582415e-05, "loss": 0.055, "step": 9696 }, { "epoch": 9.324038461538462, "grad_norm": 0.7062729001045227, "learning_rate": 1.4447541902840742e-05, "loss": 0.0038, "step": 9697 }, { "epoch": 9.325, "grad_norm": 0.11022811383008957, "learning_rate": 1.444642617608627e-05, "loss": 0.0005, "step": 9698 }, { "epoch": 9.325961538461538, "grad_norm": 1.101283073425293, "learning_rate": 1.4445310380336316e-05, "loss": 0.0061, "step": 9699 }, { "epoch": 9.326923076923077, "grad_norm": 2.0628674030303955, "learning_rate": 1.4444194515608193e-05, "loss": 0.0119, "step": 9700 }, { "epoch": 9.327884615384615, "grad_norm": 1.9222774505615234, "learning_rate": 1.4443078581919213e-05, "loss": 0.0291, "step": 9701 }, { "epoch": 9.328846153846154, "grad_norm": 1.1407470703125, "learning_rate": 1.4441962579286691e-05, "loss": 0.0059, "step": 9702 }, { "epoch": 9.329807692307693, "grad_norm": 0.803971529006958, "learning_rate": 1.4440846507727949e-05, "loss": 0.0041, "step": 9703 }, { "epoch": 9.330769230769231, "grad_norm": 0.8554639220237732, "learning_rate": 1.4439730367260303e-05, "loss": 0.0105, "step": 9704 }, { "epoch": 9.33173076923077, "grad_norm": 2.3491106033325195, "learning_rate": 1.4438614157901073e-05, "loss": 0.0171, "step": 9705 }, { "epoch": 9.332692307692307, "grad_norm": 0.35749027132987976, "learning_rate": 1.4437497879667575e-05, "loss": 0.0021, "step": 9706 }, { "epoch": 9.333653846153846, "grad_norm": 3.542217254638672, "learning_rate": 1.4436381532577137e-05, "loss": 0.0988, "step": 9707 }, { "epoch": 9.334615384615384, "grad_norm": 0.09947758167982101, "learning_rate": 1.4435265116647079e-05, "loss": 0.0008, "step": 9708 }, { "epoch": 9.335576923076923, "grad_norm": 2.6526694297790527, "learning_rate": 1.443414863189472e-05, "loss": 0.0132, "step": 9709 }, { "epoch": 9.336538461538462, "grad_norm": 0.07566782087087631, "learning_rate": 1.443303207833739e-05, "loss": 0.0003, "step": 9710 }, { "epoch": 9.3375, "grad_norm": 5.027685642242432, "learning_rate": 1.4431915455992416e-05, "loss": 0.0405, "step": 9711 }, { "epoch": 9.338461538461539, "grad_norm": 10.898300170898438, "learning_rate": 1.4430798764877118e-05, "loss": 0.0988, "step": 9712 }, { "epoch": 9.339423076923078, "grad_norm": 2.698707103729248, "learning_rate": 1.4429682005008827e-05, "loss": 0.0402, "step": 9713 }, { "epoch": 9.340384615384615, "grad_norm": 2.8966898918151855, "learning_rate": 1.4428565176404874e-05, "loss": 0.0214, "step": 9714 }, { "epoch": 9.341346153846153, "grad_norm": 0.8580484390258789, "learning_rate": 1.4427448279082587e-05, "loss": 0.0105, "step": 9715 }, { "epoch": 9.342307692307692, "grad_norm": 1.0450623035430908, "learning_rate": 1.4426331313059299e-05, "loss": 0.0033, "step": 9716 }, { "epoch": 9.34326923076923, "grad_norm": 3.7865805625915527, "learning_rate": 1.4425214278352335e-05, "loss": 0.0614, "step": 9717 }, { "epoch": 9.34423076923077, "grad_norm": 3.0472941398620605, "learning_rate": 1.4424097174979038e-05, "loss": 0.0472, "step": 9718 }, { "epoch": 9.345192307692308, "grad_norm": 0.945318877696991, "learning_rate": 1.4422980002956737e-05, "loss": 0.0051, "step": 9719 }, { "epoch": 9.346153846153847, "grad_norm": 1.063297152519226, "learning_rate": 1.4421862762302765e-05, "loss": 0.0077, "step": 9720 }, { "epoch": 9.347115384615385, "grad_norm": 1.7420432567596436, "learning_rate": 1.4420745453034461e-05, "loss": 0.0146, "step": 9721 }, { "epoch": 9.348076923076922, "grad_norm": 4.534862041473389, "learning_rate": 1.4419628075169165e-05, "loss": 0.0322, "step": 9722 }, { "epoch": 9.349038461538461, "grad_norm": 0.3531563878059387, "learning_rate": 1.4418510628724212e-05, "loss": 0.0013, "step": 9723 }, { "epoch": 9.35, "grad_norm": 1.5429092645645142, "learning_rate": 1.441739311371694e-05, "loss": 0.0188, "step": 9724 }, { "epoch": 9.350961538461538, "grad_norm": 2.3303236961364746, "learning_rate": 1.4416275530164694e-05, "loss": 0.0604, "step": 9725 }, { "epoch": 9.351923076923077, "grad_norm": 3.1761953830718994, "learning_rate": 1.4415157878084814e-05, "loss": 0.0446, "step": 9726 }, { "epoch": 9.352884615384616, "grad_norm": 2.330573081970215, "learning_rate": 1.441404015749464e-05, "loss": 0.0502, "step": 9727 }, { "epoch": 9.353846153846154, "grad_norm": 3.2408440113067627, "learning_rate": 1.4412922368411518e-05, "loss": 0.0888, "step": 9728 }, { "epoch": 9.354807692307693, "grad_norm": 0.08157851547002792, "learning_rate": 1.4411804510852793e-05, "loss": 0.0006, "step": 9729 }, { "epoch": 9.35576923076923, "grad_norm": 2.5976152420043945, "learning_rate": 1.4410686584835812e-05, "loss": 0.0441, "step": 9730 }, { "epoch": 9.356730769230769, "grad_norm": 3.2229604721069336, "learning_rate": 1.4409568590377918e-05, "loss": 0.1014, "step": 9731 }, { "epoch": 9.357692307692307, "grad_norm": 2.2586047649383545, "learning_rate": 1.4408450527496462e-05, "loss": 0.1159, "step": 9732 }, { "epoch": 9.358653846153846, "grad_norm": 1.433572769165039, "learning_rate": 1.4407332396208795e-05, "loss": 0.0119, "step": 9733 }, { "epoch": 9.359615384615385, "grad_norm": 1.4721447229385376, "learning_rate": 1.4406214196532262e-05, "loss": 0.0108, "step": 9734 }, { "epoch": 9.360576923076923, "grad_norm": 0.8040663599967957, "learning_rate": 1.4405095928484217e-05, "loss": 0.0052, "step": 9735 }, { "epoch": 9.361538461538462, "grad_norm": 1.7173651456832886, "learning_rate": 1.4403977592082012e-05, "loss": 0.0548, "step": 9736 }, { "epoch": 9.3625, "grad_norm": 4.155104160308838, "learning_rate": 1.4402859187343002e-05, "loss": 0.0252, "step": 9737 }, { "epoch": 9.363461538461538, "grad_norm": 1.9300333261489868, "learning_rate": 1.4401740714284535e-05, "loss": 0.023, "step": 9738 }, { "epoch": 9.364423076923076, "grad_norm": 0.9459841847419739, "learning_rate": 1.4400622172923978e-05, "loss": 0.0064, "step": 9739 }, { "epoch": 9.365384615384615, "grad_norm": 0.7167165875434875, "learning_rate": 1.4399503563278673e-05, "loss": 0.0049, "step": 9740 }, { "epoch": 9.366346153846154, "grad_norm": 0.3498237729072571, "learning_rate": 1.4398384885365993e-05, "loss": 0.0023, "step": 9741 }, { "epoch": 9.367307692307692, "grad_norm": 3.754148006439209, "learning_rate": 1.4397266139203282e-05, "loss": 0.1018, "step": 9742 }, { "epoch": 9.368269230769231, "grad_norm": 1.7761551141738892, "learning_rate": 1.439614732480791e-05, "loss": 0.0184, "step": 9743 }, { "epoch": 9.36923076923077, "grad_norm": 3.9191782474517822, "learning_rate": 1.4395028442197231e-05, "loss": 0.0175, "step": 9744 }, { "epoch": 9.370192307692308, "grad_norm": 3.4026989936828613, "learning_rate": 1.4393909491388613e-05, "loss": 0.0513, "step": 9745 }, { "epoch": 9.371153846153845, "grad_norm": 1.2240586280822754, "learning_rate": 1.4392790472399414e-05, "loss": 0.0066, "step": 9746 }, { "epoch": 9.372115384615384, "grad_norm": 3.088376998901367, "learning_rate": 1.4391671385247002e-05, "loss": 0.0474, "step": 9747 }, { "epoch": 9.373076923076923, "grad_norm": 2.2657907009124756, "learning_rate": 1.4390552229948738e-05, "loss": 0.0239, "step": 9748 }, { "epoch": 9.374038461538461, "grad_norm": 2.686936378479004, "learning_rate": 1.4389433006521986e-05, "loss": 0.0373, "step": 9749 }, { "epoch": 9.375, "grad_norm": 1.1583261489868164, "learning_rate": 1.438831371498412e-05, "loss": 0.0058, "step": 9750 }, { "epoch": 9.375961538461539, "grad_norm": 2.0428481101989746, "learning_rate": 1.4387194355352506e-05, "loss": 0.037, "step": 9751 }, { "epoch": 9.376923076923077, "grad_norm": 2.980980634689331, "learning_rate": 1.438607492764451e-05, "loss": 0.0595, "step": 9752 }, { "epoch": 9.377884615384616, "grad_norm": 1.2891000509262085, "learning_rate": 1.4384955431877503e-05, "loss": 0.0298, "step": 9753 }, { "epoch": 9.378846153846155, "grad_norm": 4.036936283111572, "learning_rate": 1.438383586806886e-05, "loss": 0.06, "step": 9754 }, { "epoch": 9.379807692307692, "grad_norm": 3.9548757076263428, "learning_rate": 1.438271623623595e-05, "loss": 0.1326, "step": 9755 }, { "epoch": 9.38076923076923, "grad_norm": 1.2870041131973267, "learning_rate": 1.4381596536396147e-05, "loss": 0.0174, "step": 9756 }, { "epoch": 9.381730769230769, "grad_norm": 2.4495482444763184, "learning_rate": 1.4380476768566825e-05, "loss": 0.0429, "step": 9757 }, { "epoch": 9.382692307692308, "grad_norm": 2.8869969844818115, "learning_rate": 1.4379356932765361e-05, "loss": 0.0501, "step": 9758 }, { "epoch": 9.383653846153846, "grad_norm": 2.903040885925293, "learning_rate": 1.437823702900913e-05, "loss": 0.0182, "step": 9759 }, { "epoch": 9.384615384615385, "grad_norm": 3.9159133434295654, "learning_rate": 1.4377117057315509e-05, "loss": 0.0562, "step": 9760 }, { "epoch": 9.385576923076924, "grad_norm": 1.6715607643127441, "learning_rate": 1.437599701770188e-05, "loss": 0.0177, "step": 9761 }, { "epoch": 9.386538461538462, "grad_norm": 1.3241645097732544, "learning_rate": 1.437487691018562e-05, "loss": 0.0112, "step": 9762 }, { "epoch": 9.3875, "grad_norm": 0.08564791828393936, "learning_rate": 1.437375673478411e-05, "loss": 0.0006, "step": 9763 }, { "epoch": 9.388461538461538, "grad_norm": 1.584805965423584, "learning_rate": 1.437263649151473e-05, "loss": 0.0139, "step": 9764 }, { "epoch": 9.389423076923077, "grad_norm": 8.209786415100098, "learning_rate": 1.4371516180394868e-05, "loss": 0.1273, "step": 9765 }, { "epoch": 9.390384615384615, "grad_norm": 2.9692492485046387, "learning_rate": 1.4370395801441906e-05, "loss": 0.0425, "step": 9766 }, { "epoch": 9.391346153846154, "grad_norm": 2.958601713180542, "learning_rate": 1.4369275354673227e-05, "loss": 0.04, "step": 9767 }, { "epoch": 9.392307692307693, "grad_norm": 6.6933135986328125, "learning_rate": 1.4368154840106218e-05, "loss": 0.0156, "step": 9768 }, { "epoch": 9.393269230769231, "grad_norm": 2.121518611907959, "learning_rate": 1.4367034257758268e-05, "loss": 0.0263, "step": 9769 }, { "epoch": 9.39423076923077, "grad_norm": 2.4711825847625732, "learning_rate": 1.4365913607646762e-05, "loss": 0.0396, "step": 9770 }, { "epoch": 9.395192307692307, "grad_norm": 0.4686044454574585, "learning_rate": 1.4364792889789088e-05, "loss": 0.0029, "step": 9771 }, { "epoch": 9.396153846153846, "grad_norm": 0.7717816233634949, "learning_rate": 1.4363672104202641e-05, "loss": 0.0052, "step": 9772 }, { "epoch": 9.397115384615384, "grad_norm": 0.5855304002761841, "learning_rate": 1.4362551250904813e-05, "loss": 0.0023, "step": 9773 }, { "epoch": 9.398076923076923, "grad_norm": 5.646136283874512, "learning_rate": 1.436143032991299e-05, "loss": 0.0115, "step": 9774 }, { "epoch": 9.399038461538462, "grad_norm": 0.5753064155578613, "learning_rate": 1.436030934124457e-05, "loss": 0.0047, "step": 9775 }, { "epoch": 9.4, "grad_norm": 0.8347327709197998, "learning_rate": 1.4359188284916945e-05, "loss": 0.005, "step": 9776 }, { "epoch": 9.400961538461539, "grad_norm": 2.0588901042938232, "learning_rate": 1.4358067160947516e-05, "loss": 0.0155, "step": 9777 }, { "epoch": 9.401923076923078, "grad_norm": 0.6094497442245483, "learning_rate": 1.4356945969353671e-05, "loss": 0.0028, "step": 9778 }, { "epoch": 9.402884615384615, "grad_norm": 0.5777886509895325, "learning_rate": 1.4355824710152812e-05, "loss": 0.0043, "step": 9779 }, { "epoch": 9.403846153846153, "grad_norm": 3.187150716781616, "learning_rate": 1.4354703383362343e-05, "loss": 0.0385, "step": 9780 }, { "epoch": 9.404807692307692, "grad_norm": 1.9804221391677856, "learning_rate": 1.4353581988999654e-05, "loss": 0.0176, "step": 9781 }, { "epoch": 9.40576923076923, "grad_norm": 4.4598388671875, "learning_rate": 1.4352460527082151e-05, "loss": 0.0756, "step": 9782 }, { "epoch": 9.40673076923077, "grad_norm": 2.8459911346435547, "learning_rate": 1.4351338997627233e-05, "loss": 0.017, "step": 9783 }, { "epoch": 9.407692307692308, "grad_norm": 3.9532077312469482, "learning_rate": 1.4350217400652308e-05, "loss": 0.0626, "step": 9784 }, { "epoch": 9.408653846153847, "grad_norm": 7.1901774406433105, "learning_rate": 1.4349095736174777e-05, "loss": 0.1947, "step": 9785 }, { "epoch": 9.409615384615385, "grad_norm": 1.237699031829834, "learning_rate": 1.434797400421204e-05, "loss": 0.0084, "step": 9786 }, { "epoch": 9.410576923076922, "grad_norm": 1.3901591300964355, "learning_rate": 1.4346852204781511e-05, "loss": 0.019, "step": 9787 }, { "epoch": 9.411538461538461, "grad_norm": 0.5068207383155823, "learning_rate": 1.4345730337900593e-05, "loss": 0.004, "step": 9788 }, { "epoch": 9.4125, "grad_norm": 1.5683444738388062, "learning_rate": 1.4344608403586695e-05, "loss": 0.0044, "step": 9789 }, { "epoch": 9.413461538461538, "grad_norm": 2.9544458389282227, "learning_rate": 1.4343486401857225e-05, "loss": 0.0163, "step": 9790 }, { "epoch": 9.414423076923077, "grad_norm": 0.8716723322868347, "learning_rate": 1.4342364332729597e-05, "loss": 0.0068, "step": 9791 }, { "epoch": 9.415384615384616, "grad_norm": 0.2460877150297165, "learning_rate": 1.4341242196221217e-05, "loss": 0.0016, "step": 9792 }, { "epoch": 9.416346153846154, "grad_norm": 0.6318602561950684, "learning_rate": 1.4340119992349497e-05, "loss": 0.0035, "step": 9793 }, { "epoch": 9.417307692307693, "grad_norm": 0.8911585211753845, "learning_rate": 1.4338997721131858e-05, "loss": 0.0037, "step": 9794 }, { "epoch": 9.41826923076923, "grad_norm": 1.0708093643188477, "learning_rate": 1.4337875382585706e-05, "loss": 0.0062, "step": 9795 }, { "epoch": 9.419230769230769, "grad_norm": 4.7510223388671875, "learning_rate": 1.433675297672846e-05, "loss": 0.1292, "step": 9796 }, { "epoch": 9.420192307692307, "grad_norm": 4.048461437225342, "learning_rate": 1.4335630503577536e-05, "loss": 0.0324, "step": 9797 }, { "epoch": 9.421153846153846, "grad_norm": 3.4164140224456787, "learning_rate": 1.433450796315035e-05, "loss": 0.0303, "step": 9798 }, { "epoch": 9.422115384615385, "grad_norm": 3.003577470779419, "learning_rate": 1.4333385355464327e-05, "loss": 0.0268, "step": 9799 }, { "epoch": 9.423076923076923, "grad_norm": 3.4375648498535156, "learning_rate": 1.4332262680536874e-05, "loss": 0.0922, "step": 9800 }, { "epoch": 9.424038461538462, "grad_norm": 1.9168223142623901, "learning_rate": 1.4331139938385426e-05, "loss": 0.0174, "step": 9801 }, { "epoch": 9.425, "grad_norm": 1.867607831954956, "learning_rate": 1.4330017129027395e-05, "loss": 0.0311, "step": 9802 }, { "epoch": 9.425961538461538, "grad_norm": 4.012792587280273, "learning_rate": 1.4328894252480208e-05, "loss": 0.0557, "step": 9803 }, { "epoch": 9.426923076923076, "grad_norm": 2.4251155853271484, "learning_rate": 1.4327771308761284e-05, "loss": 0.0256, "step": 9804 }, { "epoch": 9.427884615384615, "grad_norm": 1.8536051511764526, "learning_rate": 1.4326648297888054e-05, "loss": 0.0654, "step": 9805 }, { "epoch": 9.428846153846154, "grad_norm": 1.5947723388671875, "learning_rate": 1.4325525219877942e-05, "loss": 0.0196, "step": 9806 }, { "epoch": 9.429807692307692, "grad_norm": 0.29367005825042725, "learning_rate": 1.432440207474837e-05, "loss": 0.0018, "step": 9807 }, { "epoch": 9.430769230769231, "grad_norm": 3.0466997623443604, "learning_rate": 1.4323278862516774e-05, "loss": 0.0417, "step": 9808 }, { "epoch": 9.43173076923077, "grad_norm": 1.531313419342041, "learning_rate": 1.4322155583200577e-05, "loss": 0.0099, "step": 9809 }, { "epoch": 9.432692307692308, "grad_norm": 1.7208001613616943, "learning_rate": 1.432103223681721e-05, "loss": 0.0162, "step": 9810 }, { "epoch": 9.433653846153845, "grad_norm": 2.9678685665130615, "learning_rate": 1.4319908823384103e-05, "loss": 0.0717, "step": 9811 }, { "epoch": 9.434615384615384, "grad_norm": 2.0967257022857666, "learning_rate": 1.4318785342918693e-05, "loss": 0.0236, "step": 9812 }, { "epoch": 9.435576923076923, "grad_norm": 3.2336769104003906, "learning_rate": 1.431766179543841e-05, "loss": 0.0341, "step": 9813 }, { "epoch": 9.436538461538461, "grad_norm": 4.338350296020508, "learning_rate": 1.4316538180960688e-05, "loss": 0.0913, "step": 9814 }, { "epoch": 9.4375, "grad_norm": 5.951183319091797, "learning_rate": 1.431541449950296e-05, "loss": 0.0537, "step": 9815 }, { "epoch": 9.438461538461539, "grad_norm": 1.184281349182129, "learning_rate": 1.4314290751082667e-05, "loss": 0.014, "step": 9816 }, { "epoch": 9.439423076923077, "grad_norm": 0.5364010334014893, "learning_rate": 1.4313166935717243e-05, "loss": 0.0068, "step": 9817 }, { "epoch": 9.440384615384616, "grad_norm": 1.5147229433059692, "learning_rate": 1.4312043053424127e-05, "loss": 0.0234, "step": 9818 }, { "epoch": 9.441346153846155, "grad_norm": 1.4450876712799072, "learning_rate": 1.4310919104220759e-05, "loss": 0.0129, "step": 9819 }, { "epoch": 9.442307692307692, "grad_norm": 1.0042210817337036, "learning_rate": 1.4309795088124579e-05, "loss": 0.0059, "step": 9820 }, { "epoch": 9.44326923076923, "grad_norm": 0.8993890285491943, "learning_rate": 1.4308671005153028e-05, "loss": 0.0104, "step": 9821 }, { "epoch": 9.444230769230769, "grad_norm": 0.2839755117893219, "learning_rate": 1.4307546855323549e-05, "loss": 0.0011, "step": 9822 }, { "epoch": 9.445192307692308, "grad_norm": 2.4219484329223633, "learning_rate": 1.4306422638653583e-05, "loss": 0.0179, "step": 9823 }, { "epoch": 9.446153846153846, "grad_norm": 2.618136167526245, "learning_rate": 1.4305298355160579e-05, "loss": 0.0065, "step": 9824 }, { "epoch": 9.447115384615385, "grad_norm": 0.7702177166938782, "learning_rate": 1.4304174004861981e-05, "loss": 0.006, "step": 9825 }, { "epoch": 9.448076923076924, "grad_norm": 2.7727131843566895, "learning_rate": 1.4303049587775233e-05, "loss": 0.0564, "step": 9826 }, { "epoch": 9.449038461538462, "grad_norm": 2.6505863666534424, "learning_rate": 1.4301925103917786e-05, "loss": 0.0614, "step": 9827 }, { "epoch": 9.45, "grad_norm": 1.6203128099441528, "learning_rate": 1.4300800553307089e-05, "loss": 0.018, "step": 9828 }, { "epoch": 9.450961538461538, "grad_norm": 1.4432238340377808, "learning_rate": 1.4299675935960586e-05, "loss": 0.0152, "step": 9829 }, { "epoch": 9.451923076923077, "grad_norm": 4.083882808685303, "learning_rate": 1.4298551251895737e-05, "loss": 0.0446, "step": 9830 }, { "epoch": 9.452884615384615, "grad_norm": 3.081073760986328, "learning_rate": 1.4297426501129985e-05, "loss": 0.0678, "step": 9831 }, { "epoch": 9.453846153846154, "grad_norm": 0.9375067949295044, "learning_rate": 1.4296301683680788e-05, "loss": 0.0099, "step": 9832 }, { "epoch": 9.454807692307693, "grad_norm": 0.2378683239221573, "learning_rate": 1.4295176799565598e-05, "loss": 0.0022, "step": 9833 }, { "epoch": 9.455769230769231, "grad_norm": 3.359987258911133, "learning_rate": 1.4294051848801869e-05, "loss": 0.0758, "step": 9834 }, { "epoch": 9.45673076923077, "grad_norm": 1.5867524147033691, "learning_rate": 1.429292683140706e-05, "loss": 0.0139, "step": 9835 }, { "epoch": 9.457692307692307, "grad_norm": 1.0725772380828857, "learning_rate": 1.4291801747398625e-05, "loss": 0.0081, "step": 9836 }, { "epoch": 9.458653846153846, "grad_norm": 2.1467087268829346, "learning_rate": 1.4290676596794024e-05, "loss": 0.0289, "step": 9837 }, { "epoch": 9.459615384615384, "grad_norm": 0.26538363099098206, "learning_rate": 1.4289551379610716e-05, "loss": 0.0019, "step": 9838 }, { "epoch": 9.460576923076923, "grad_norm": 2.6602256298065186, "learning_rate": 1.4288426095866159e-05, "loss": 0.0298, "step": 9839 }, { "epoch": 9.461538461538462, "grad_norm": 1.3796306848526, "learning_rate": 1.4287300745577812e-05, "loss": 0.0241, "step": 9840 }, { "epoch": 9.4625, "grad_norm": 1.6201649904251099, "learning_rate": 1.4286175328763145e-05, "loss": 0.0106, "step": 9841 }, { "epoch": 9.463461538461539, "grad_norm": 2.129194736480713, "learning_rate": 1.4285049845439618e-05, "loss": 0.0301, "step": 9842 }, { "epoch": 9.464423076923078, "grad_norm": 1.764377236366272, "learning_rate": 1.4283924295624691e-05, "loss": 0.0163, "step": 9843 }, { "epoch": 9.465384615384615, "grad_norm": 1.6272799968719482, "learning_rate": 1.4282798679335832e-05, "loss": 0.007, "step": 9844 }, { "epoch": 9.466346153846153, "grad_norm": 0.3902212679386139, "learning_rate": 1.4281672996590508e-05, "loss": 0.0018, "step": 9845 }, { "epoch": 9.467307692307692, "grad_norm": 2.0138070583343506, "learning_rate": 1.4280547247406186e-05, "loss": 0.0234, "step": 9846 }, { "epoch": 9.46826923076923, "grad_norm": 4.733522891998291, "learning_rate": 1.4279421431800336e-05, "loss": 0.1051, "step": 9847 }, { "epoch": 9.46923076923077, "grad_norm": 0.7691282033920288, "learning_rate": 1.4278295549790419e-05, "loss": 0.0054, "step": 9848 }, { "epoch": 9.470192307692308, "grad_norm": 4.059843063354492, "learning_rate": 1.4277169601393916e-05, "loss": 0.0626, "step": 9849 }, { "epoch": 9.471153846153847, "grad_norm": 2.823333501815796, "learning_rate": 1.4276043586628295e-05, "loss": 0.0655, "step": 9850 }, { "epoch": 9.472115384615385, "grad_norm": 0.5633649826049805, "learning_rate": 1.4274917505511026e-05, "loss": 0.0041, "step": 9851 }, { "epoch": 9.473076923076922, "grad_norm": 2.237539768218994, "learning_rate": 1.4273791358059583e-05, "loss": 0.0247, "step": 9852 }, { "epoch": 9.474038461538461, "grad_norm": 1.992992877960205, "learning_rate": 1.4272665144291445e-05, "loss": 0.0259, "step": 9853 }, { "epoch": 9.475, "grad_norm": 0.8864151239395142, "learning_rate": 1.4271538864224082e-05, "loss": 0.0066, "step": 9854 }, { "epoch": 9.475961538461538, "grad_norm": 0.9363168478012085, "learning_rate": 1.4270412517874973e-05, "loss": 0.0073, "step": 9855 }, { "epoch": 9.476923076923077, "grad_norm": 1.848113775253296, "learning_rate": 1.4269286105261595e-05, "loss": 0.0175, "step": 9856 }, { "epoch": 9.477884615384616, "grad_norm": 2.0273356437683105, "learning_rate": 1.4268159626401428e-05, "loss": 0.0181, "step": 9857 }, { "epoch": 9.478846153846154, "grad_norm": 3.277421474456787, "learning_rate": 1.4267033081311947e-05, "loss": 0.055, "step": 9858 }, { "epoch": 9.479807692307693, "grad_norm": 0.5584831237792969, "learning_rate": 1.426590647001064e-05, "loss": 0.0043, "step": 9859 }, { "epoch": 9.48076923076923, "grad_norm": 2.4886434078216553, "learning_rate": 1.4264779792514983e-05, "loss": 0.037, "step": 9860 }, { "epoch": 9.481730769230769, "grad_norm": 1.4137848615646362, "learning_rate": 1.4263653048842461e-05, "loss": 0.0101, "step": 9861 }, { "epoch": 9.482692307692307, "grad_norm": 0.200957790017128, "learning_rate": 1.4262526239010557e-05, "loss": 0.0012, "step": 9862 }, { "epoch": 9.483653846153846, "grad_norm": 2.885495662689209, "learning_rate": 1.4261399363036757e-05, "loss": 0.0174, "step": 9863 }, { "epoch": 9.484615384615385, "grad_norm": 1.4766839742660522, "learning_rate": 1.4260272420938544e-05, "loss": 0.0202, "step": 9864 }, { "epoch": 9.485576923076923, "grad_norm": 2.813776731491089, "learning_rate": 1.4259145412733409e-05, "loss": 0.0234, "step": 9865 }, { "epoch": 9.486538461538462, "grad_norm": 0.43526750802993774, "learning_rate": 1.4258018338438835e-05, "loss": 0.0038, "step": 9866 }, { "epoch": 9.4875, "grad_norm": 1.0331060886383057, "learning_rate": 1.4256891198072315e-05, "loss": 0.0082, "step": 9867 }, { "epoch": 9.488461538461538, "grad_norm": 0.40126582980155945, "learning_rate": 1.425576399165134e-05, "loss": 0.003, "step": 9868 }, { "epoch": 9.489423076923076, "grad_norm": 2.653940439224243, "learning_rate": 1.4254636719193396e-05, "loss": 0.0133, "step": 9869 }, { "epoch": 9.490384615384615, "grad_norm": 1.1384592056274414, "learning_rate": 1.4253509380715975e-05, "loss": 0.0224, "step": 9870 }, { "epoch": 9.491346153846154, "grad_norm": 3.956634044647217, "learning_rate": 1.4252381976236575e-05, "loss": 0.0484, "step": 9871 }, { "epoch": 9.492307692307692, "grad_norm": 0.43600285053253174, "learning_rate": 1.425125450577269e-05, "loss": 0.0021, "step": 9872 }, { "epoch": 9.493269230769231, "grad_norm": 1.738047480583191, "learning_rate": 1.4250126969341807e-05, "loss": 0.0238, "step": 9873 }, { "epoch": 9.49423076923077, "grad_norm": 3.3092076778411865, "learning_rate": 1.424899936696143e-05, "loss": 0.0257, "step": 9874 }, { "epoch": 9.495192307692308, "grad_norm": 2.8429348468780518, "learning_rate": 1.4247871698649053e-05, "loss": 0.0186, "step": 9875 }, { "epoch": 9.496153846153845, "grad_norm": 2.853339195251465, "learning_rate": 1.4246743964422176e-05, "loss": 0.0222, "step": 9876 }, { "epoch": 9.497115384615384, "grad_norm": 1.876594066619873, "learning_rate": 1.4245616164298295e-05, "loss": 0.0121, "step": 9877 }, { "epoch": 9.498076923076923, "grad_norm": 0.28542715311050415, "learning_rate": 1.4244488298294912e-05, "loss": 0.0017, "step": 9878 }, { "epoch": 9.499038461538461, "grad_norm": 1.3856555223464966, "learning_rate": 1.424336036642953e-05, "loss": 0.0125, "step": 9879 }, { "epoch": 9.5, "grad_norm": 1.1982197761535645, "learning_rate": 1.4242232368719645e-05, "loss": 0.0193, "step": 9880 }, { "epoch": 9.500961538461539, "grad_norm": 6.68096399307251, "learning_rate": 1.4241104305182768e-05, "loss": 0.0703, "step": 9881 }, { "epoch": 9.501923076923077, "grad_norm": 1.9089704751968384, "learning_rate": 1.4239976175836398e-05, "loss": 0.0419, "step": 9882 }, { "epoch": 9.502884615384616, "grad_norm": 2.015385627746582, "learning_rate": 1.4238847980698045e-05, "loss": 0.0124, "step": 9883 }, { "epoch": 9.503846153846155, "grad_norm": 1.6212873458862305, "learning_rate": 1.4237719719785208e-05, "loss": 0.014, "step": 9884 }, { "epoch": 9.504807692307692, "grad_norm": 1.8793747425079346, "learning_rate": 1.4236591393115401e-05, "loss": 0.0157, "step": 9885 }, { "epoch": 9.50576923076923, "grad_norm": 1.3274638652801514, "learning_rate": 1.423546300070613e-05, "loss": 0.0189, "step": 9886 }, { "epoch": 9.506730769230769, "grad_norm": 0.06252597272396088, "learning_rate": 1.4234334542574906e-05, "loss": 0.0004, "step": 9887 }, { "epoch": 9.507692307692308, "grad_norm": 3.532764434814453, "learning_rate": 1.4233206018739231e-05, "loss": 0.1302, "step": 9888 }, { "epoch": 9.508653846153846, "grad_norm": 1.4406696557998657, "learning_rate": 1.4232077429216629e-05, "loss": 0.0104, "step": 9889 }, { "epoch": 9.509615384615385, "grad_norm": 2.3927197456359863, "learning_rate": 1.4230948774024605e-05, "loss": 0.011, "step": 9890 }, { "epoch": 9.510576923076924, "grad_norm": 3.181326150894165, "learning_rate": 1.4229820053180672e-05, "loss": 0.0374, "step": 9891 }, { "epoch": 9.51153846153846, "grad_norm": 2.644585609436035, "learning_rate": 1.4228691266702345e-05, "loss": 0.0225, "step": 9892 }, { "epoch": 9.5125, "grad_norm": 1.1094216108322144, "learning_rate": 1.4227562414607144e-05, "loss": 0.0049, "step": 9893 }, { "epoch": 9.513461538461538, "grad_norm": 0.3123157024383545, "learning_rate": 1.4226433496912582e-05, "loss": 0.0023, "step": 9894 }, { "epoch": 9.514423076923077, "grad_norm": 0.17583827674388885, "learning_rate": 1.4225304513636173e-05, "loss": 0.0016, "step": 9895 }, { "epoch": 9.515384615384615, "grad_norm": 2.207259178161621, "learning_rate": 1.4224175464795442e-05, "loss": 0.0267, "step": 9896 }, { "epoch": 9.516346153846154, "grad_norm": 2.1609103679656982, "learning_rate": 1.4223046350407905e-05, "loss": 0.0306, "step": 9897 }, { "epoch": 9.517307692307693, "grad_norm": 1.2443410158157349, "learning_rate": 1.4221917170491081e-05, "loss": 0.0068, "step": 9898 }, { "epoch": 9.518269230769231, "grad_norm": 5.225073337554932, "learning_rate": 1.4220787925062499e-05, "loss": 0.0572, "step": 9899 }, { "epoch": 9.51923076923077, "grad_norm": 3.478205919265747, "learning_rate": 1.4219658614139674e-05, "loss": 0.04, "step": 9900 }, { "epoch": 9.520192307692307, "grad_norm": 2.4727017879486084, "learning_rate": 1.4218529237740131e-05, "loss": 0.0526, "step": 9901 }, { "epoch": 9.521153846153846, "grad_norm": 3.285275936126709, "learning_rate": 1.4217399795881393e-05, "loss": 0.0327, "step": 9902 }, { "epoch": 9.522115384615384, "grad_norm": 1.114716649055481, "learning_rate": 1.421627028858099e-05, "loss": 0.0092, "step": 9903 }, { "epoch": 9.523076923076923, "grad_norm": 1.8488337993621826, "learning_rate": 1.4215140715856448e-05, "loss": 0.0275, "step": 9904 }, { "epoch": 9.524038461538462, "grad_norm": 4.159041404724121, "learning_rate": 1.4214011077725293e-05, "loss": 0.0261, "step": 9905 }, { "epoch": 9.525, "grad_norm": 4.198831081390381, "learning_rate": 1.4212881374205053e-05, "loss": 0.0895, "step": 9906 }, { "epoch": 9.525961538461539, "grad_norm": 0.28749576210975647, "learning_rate": 1.421175160531326e-05, "loss": 0.0022, "step": 9907 }, { "epoch": 9.526923076923078, "grad_norm": 0.5284762978553772, "learning_rate": 1.4210621771067443e-05, "loss": 0.003, "step": 9908 }, { "epoch": 9.527884615384615, "grad_norm": 0.13095273077487946, "learning_rate": 1.4209491871485132e-05, "loss": 0.0012, "step": 9909 }, { "epoch": 9.528846153846153, "grad_norm": 1.4563121795654297, "learning_rate": 1.4208361906583866e-05, "loss": 0.0075, "step": 9910 }, { "epoch": 9.529807692307692, "grad_norm": 2.0436692237854004, "learning_rate": 1.4207231876381172e-05, "loss": 0.0228, "step": 9911 }, { "epoch": 9.53076923076923, "grad_norm": 2.430947780609131, "learning_rate": 1.4206101780894587e-05, "loss": 0.0306, "step": 9912 }, { "epoch": 9.53173076923077, "grad_norm": 1.3532497882843018, "learning_rate": 1.4204971620141648e-05, "loss": 0.0091, "step": 9913 }, { "epoch": 9.532692307692308, "grad_norm": 3.0253348350524902, "learning_rate": 1.420384139413989e-05, "loss": 0.0315, "step": 9914 }, { "epoch": 9.533653846153847, "grad_norm": 1.1869810819625854, "learning_rate": 1.4202711102906855e-05, "loss": 0.02, "step": 9915 }, { "epoch": 9.534615384615385, "grad_norm": 2.4124295711517334, "learning_rate": 1.4201580746460075e-05, "loss": 0.0656, "step": 9916 }, { "epoch": 9.535576923076922, "grad_norm": 2.0480048656463623, "learning_rate": 1.4200450324817092e-05, "loss": 0.0681, "step": 9917 }, { "epoch": 9.536538461538461, "grad_norm": 2.1137914657592773, "learning_rate": 1.4199319837995451e-05, "loss": 0.0201, "step": 9918 }, { "epoch": 9.5375, "grad_norm": 1.8571373224258423, "learning_rate": 1.4198189286012691e-05, "loss": 0.0173, "step": 9919 }, { "epoch": 9.538461538461538, "grad_norm": 0.8104669451713562, "learning_rate": 1.4197058668886352e-05, "loss": 0.0034, "step": 9920 }, { "epoch": 9.539423076923077, "grad_norm": 1.3077316284179688, "learning_rate": 1.4195927986633983e-05, "loss": 0.0097, "step": 9921 }, { "epoch": 9.540384615384616, "grad_norm": 4.3097710609436035, "learning_rate": 1.4194797239273128e-05, "loss": 0.0351, "step": 9922 }, { "epoch": 9.541346153846154, "grad_norm": 2.628305196762085, "learning_rate": 1.4193666426821332e-05, "loss": 0.081, "step": 9923 }, { "epoch": 9.542307692307693, "grad_norm": 3.9769930839538574, "learning_rate": 1.4192535549296137e-05, "loss": 0.0812, "step": 9924 }, { "epoch": 9.54326923076923, "grad_norm": 1.302015781402588, "learning_rate": 1.4191404606715098e-05, "loss": 0.0087, "step": 9925 }, { "epoch": 9.544230769230769, "grad_norm": 1.7441846132278442, "learning_rate": 1.4190273599095761e-05, "loss": 0.0195, "step": 9926 }, { "epoch": 9.545192307692307, "grad_norm": 0.899958074092865, "learning_rate": 1.4189142526455677e-05, "loss": 0.0106, "step": 9927 }, { "epoch": 9.546153846153846, "grad_norm": 2.363159656524658, "learning_rate": 1.4188011388812392e-05, "loss": 0.0143, "step": 9928 }, { "epoch": 9.547115384615385, "grad_norm": 0.1732359379529953, "learning_rate": 1.4186880186183464e-05, "loss": 0.0022, "step": 9929 }, { "epoch": 9.548076923076923, "grad_norm": 2.1118693351745605, "learning_rate": 1.4185748918586447e-05, "loss": 0.0227, "step": 9930 }, { "epoch": 9.549038461538462, "grad_norm": 1.8147268295288086, "learning_rate": 1.4184617586038887e-05, "loss": 0.0185, "step": 9931 }, { "epoch": 9.55, "grad_norm": 1.4909484386444092, "learning_rate": 1.4183486188558346e-05, "loss": 0.0145, "step": 9932 }, { "epoch": 9.55096153846154, "grad_norm": 1.8030369281768799, "learning_rate": 1.4182354726162376e-05, "loss": 0.014, "step": 9933 }, { "epoch": 9.551923076923076, "grad_norm": 1.716698169708252, "learning_rate": 1.4181223198868539e-05, "loss": 0.0453, "step": 9934 }, { "epoch": 9.552884615384615, "grad_norm": 1.2663954496383667, "learning_rate": 1.4180091606694388e-05, "loss": 0.0132, "step": 9935 }, { "epoch": 9.553846153846154, "grad_norm": 1.61640202999115, "learning_rate": 1.4178959949657483e-05, "loss": 0.0085, "step": 9936 }, { "epoch": 9.554807692307692, "grad_norm": 2.8800809383392334, "learning_rate": 1.4177828227775386e-05, "loss": 0.0176, "step": 9937 }, { "epoch": 9.555769230769231, "grad_norm": 3.1609222888946533, "learning_rate": 1.4176696441065656e-05, "loss": 0.0269, "step": 9938 }, { "epoch": 9.55673076923077, "grad_norm": 3.1576077938079834, "learning_rate": 1.4175564589545853e-05, "loss": 0.043, "step": 9939 }, { "epoch": 9.557692307692308, "grad_norm": 0.3319249153137207, "learning_rate": 1.4174432673233547e-05, "loss": 0.002, "step": 9940 }, { "epoch": 9.558653846153845, "grad_norm": 0.23558570444583893, "learning_rate": 1.4173300692146299e-05, "loss": 0.0023, "step": 9941 }, { "epoch": 9.559615384615384, "grad_norm": 1.9366410970687866, "learning_rate": 1.4172168646301667e-05, "loss": 0.025, "step": 9942 }, { "epoch": 9.560576923076923, "grad_norm": 2.897336721420288, "learning_rate": 1.4171036535717226e-05, "loss": 0.0331, "step": 9943 }, { "epoch": 9.561538461538461, "grad_norm": 3.2686116695404053, "learning_rate": 1.4169904360410541e-05, "loss": 0.0329, "step": 9944 }, { "epoch": 9.5625, "grad_norm": 1.8380228281021118, "learning_rate": 1.4168772120399176e-05, "loss": 0.0233, "step": 9945 }, { "epoch": 9.563461538461539, "grad_norm": 2.8705015182495117, "learning_rate": 1.4167639815700705e-05, "loss": 0.0297, "step": 9946 }, { "epoch": 9.564423076923077, "grad_norm": 4.270308494567871, "learning_rate": 1.4166507446332695e-05, "loss": 0.0535, "step": 9947 }, { "epoch": 9.565384615384616, "grad_norm": 3.213653326034546, "learning_rate": 1.4165375012312718e-05, "loss": 0.0343, "step": 9948 }, { "epoch": 9.566346153846155, "grad_norm": 1.5719114542007446, "learning_rate": 1.4164242513658346e-05, "loss": 0.0137, "step": 9949 }, { "epoch": 9.567307692307692, "grad_norm": 1.024006962776184, "learning_rate": 1.4163109950387153e-05, "loss": 0.0065, "step": 9950 }, { "epoch": 9.56826923076923, "grad_norm": 3.4918456077575684, "learning_rate": 1.4161977322516712e-05, "loss": 0.0185, "step": 9951 }, { "epoch": 9.569230769230769, "grad_norm": 2.7795372009277344, "learning_rate": 1.4160844630064596e-05, "loss": 0.0417, "step": 9952 }, { "epoch": 9.570192307692308, "grad_norm": 1.9453846216201782, "learning_rate": 1.4159711873048385e-05, "loss": 0.049, "step": 9953 }, { "epoch": 9.571153846153846, "grad_norm": 0.8249236345291138, "learning_rate": 1.4158579051485655e-05, "loss": 0.0046, "step": 9954 }, { "epoch": 9.572115384615385, "grad_norm": 0.15722884237766266, "learning_rate": 1.4157446165393984e-05, "loss": 0.0013, "step": 9955 }, { "epoch": 9.573076923076924, "grad_norm": 2.8671889305114746, "learning_rate": 1.415631321479095e-05, "loss": 0.0545, "step": 9956 }, { "epoch": 9.57403846153846, "grad_norm": 0.7796509265899658, "learning_rate": 1.415518019969413e-05, "loss": 0.004, "step": 9957 }, { "epoch": 9.575, "grad_norm": 0.37023207545280457, "learning_rate": 1.4154047120121112e-05, "loss": 0.0023, "step": 9958 }, { "epoch": 9.575961538461538, "grad_norm": 0.23699119687080383, "learning_rate": 1.4152913976089476e-05, "loss": 0.0029, "step": 9959 }, { "epoch": 9.576923076923077, "grad_norm": 0.7703957557678223, "learning_rate": 1.41517807676168e-05, "loss": 0.0074, "step": 9960 }, { "epoch": 9.577884615384615, "grad_norm": 0.2579350173473358, "learning_rate": 1.4150647494720674e-05, "loss": 0.0024, "step": 9961 }, { "epoch": 9.578846153846154, "grad_norm": 1.5179111957550049, "learning_rate": 1.414951415741868e-05, "loss": 0.0255, "step": 9962 }, { "epoch": 9.579807692307693, "grad_norm": 3.046661853790283, "learning_rate": 1.4148380755728407e-05, "loss": 0.0166, "step": 9963 }, { "epoch": 9.580769230769231, "grad_norm": 3.461256742477417, "learning_rate": 1.4147247289667435e-05, "loss": 0.0132, "step": 9964 }, { "epoch": 9.58173076923077, "grad_norm": 0.5609598159790039, "learning_rate": 1.4146113759253362e-05, "loss": 0.0039, "step": 9965 }, { "epoch": 9.582692307692307, "grad_norm": 0.26032501459121704, "learning_rate": 1.4144980164503773e-05, "loss": 0.0026, "step": 9966 }, { "epoch": 9.583653846153846, "grad_norm": 1.3923627138137817, "learning_rate": 1.4143846505436255e-05, "loss": 0.0086, "step": 9967 }, { "epoch": 9.584615384615384, "grad_norm": 1.331404685974121, "learning_rate": 1.4142712782068397e-05, "loss": 0.0075, "step": 9968 }, { "epoch": 9.585576923076923, "grad_norm": 2.73268461227417, "learning_rate": 1.4141578994417802e-05, "loss": 0.0205, "step": 9969 }, { "epoch": 9.586538461538462, "grad_norm": 1.0438722372055054, "learning_rate": 1.4140445142502052e-05, "loss": 0.007, "step": 9970 }, { "epoch": 9.5875, "grad_norm": 1.203324794769287, "learning_rate": 1.4139311226338747e-05, "loss": 0.008, "step": 9971 }, { "epoch": 9.588461538461539, "grad_norm": 0.4626598656177521, "learning_rate": 1.4138177245945481e-05, "loss": 0.002, "step": 9972 }, { "epoch": 9.589423076923078, "grad_norm": 1.2428828477859497, "learning_rate": 1.4137043201339851e-05, "loss": 0.0042, "step": 9973 }, { "epoch": 9.590384615384615, "grad_norm": 2.253479242324829, "learning_rate": 1.413590909253945e-05, "loss": 0.026, "step": 9974 }, { "epoch": 9.591346153846153, "grad_norm": 0.36443063616752625, "learning_rate": 1.413477491956188e-05, "loss": 0.0019, "step": 9975 }, { "epoch": 9.592307692307692, "grad_norm": 0.08887375891208649, "learning_rate": 1.413364068242474e-05, "loss": 0.0009, "step": 9976 }, { "epoch": 9.59326923076923, "grad_norm": 0.5822814702987671, "learning_rate": 1.4132506381145626e-05, "loss": 0.0024, "step": 9977 }, { "epoch": 9.59423076923077, "grad_norm": 4.657005786895752, "learning_rate": 1.4131372015742141e-05, "loss": 0.0785, "step": 9978 }, { "epoch": 9.595192307692308, "grad_norm": 6.576656341552734, "learning_rate": 1.413023758623189e-05, "loss": 0.0536, "step": 9979 }, { "epoch": 9.596153846153847, "grad_norm": 1.1958742141723633, "learning_rate": 1.4129103092632472e-05, "loss": 0.0281, "step": 9980 }, { "epoch": 9.597115384615385, "grad_norm": 0.4510535001754761, "learning_rate": 1.4127968534961496e-05, "loss": 0.0026, "step": 9981 }, { "epoch": 9.598076923076922, "grad_norm": 0.516505241394043, "learning_rate": 1.4126833913236562e-05, "loss": 0.0034, "step": 9982 }, { "epoch": 9.599038461538461, "grad_norm": 1.42633855342865, "learning_rate": 1.4125699227475275e-05, "loss": 0.0139, "step": 9983 }, { "epoch": 9.6, "grad_norm": 1.6800005435943604, "learning_rate": 1.4124564477695249e-05, "loss": 0.0095, "step": 9984 }, { "epoch": 9.600961538461538, "grad_norm": 2.08249568939209, "learning_rate": 1.4123429663914087e-05, "loss": 0.0481, "step": 9985 }, { "epoch": 9.601923076923077, "grad_norm": 2.4114952087402344, "learning_rate": 1.4122294786149397e-05, "loss": 0.0235, "step": 9986 }, { "epoch": 9.602884615384616, "grad_norm": 1.8476868867874146, "learning_rate": 1.4121159844418793e-05, "loss": 0.0175, "step": 9987 }, { "epoch": 9.603846153846154, "grad_norm": 0.8011091947555542, "learning_rate": 1.412002483873988e-05, "loss": 0.0054, "step": 9988 }, { "epoch": 9.604807692307693, "grad_norm": 3.2086172103881836, "learning_rate": 1.4118889769130275e-05, "loss": 0.0239, "step": 9989 }, { "epoch": 9.60576923076923, "grad_norm": 3.9204952716827393, "learning_rate": 1.4117754635607592e-05, "loss": 0.0149, "step": 9990 }, { "epoch": 9.606730769230769, "grad_norm": 0.8972702026367188, "learning_rate": 1.411661943818944e-05, "loss": 0.0044, "step": 9991 }, { "epoch": 9.607692307692307, "grad_norm": 3.273705244064331, "learning_rate": 1.411548417689344e-05, "loss": 0.0561, "step": 9992 }, { "epoch": 9.608653846153846, "grad_norm": 2.081547737121582, "learning_rate": 1.4114348851737198e-05, "loss": 0.0116, "step": 9993 }, { "epoch": 9.609615384615385, "grad_norm": 0.8503103852272034, "learning_rate": 1.4113213462738342e-05, "loss": 0.0044, "step": 9994 }, { "epoch": 9.610576923076923, "grad_norm": 0.5858983397483826, "learning_rate": 1.4112078009914485e-05, "loss": 0.0028, "step": 9995 }, { "epoch": 9.611538461538462, "grad_norm": 3.1389479637145996, "learning_rate": 1.4110942493283246e-05, "loss": 0.0598, "step": 9996 }, { "epoch": 9.6125, "grad_norm": 0.34662505984306335, "learning_rate": 1.4109806912862244e-05, "loss": 0.0016, "step": 9997 }, { "epoch": 9.61346153846154, "grad_norm": 1.3571596145629883, "learning_rate": 1.4108671268669101e-05, "loss": 0.0075, "step": 9998 }, { "epoch": 9.614423076923076, "grad_norm": 1.0006253719329834, "learning_rate": 1.410753556072144e-05, "loss": 0.0039, "step": 9999 }, { "epoch": 9.615384615384615, "grad_norm": 0.0832895040512085, "learning_rate": 1.4106399789036879e-05, "loss": 0.0007, "step": 10000 }, { "epoch": 9.616346153846154, "grad_norm": 3.011711359024048, "learning_rate": 1.4105263953633049e-05, "loss": 0.0416, "step": 10001 }, { "epoch": 9.617307692307692, "grad_norm": 0.3990190923213959, "learning_rate": 1.410412805452757e-05, "loss": 0.0025, "step": 10002 }, { "epoch": 9.618269230769231, "grad_norm": 1.4101864099502563, "learning_rate": 1.410299209173807e-05, "loss": 0.0061, "step": 10003 }, { "epoch": 9.61923076923077, "grad_norm": 0.17470009624958038, "learning_rate": 1.4101856065282174e-05, "loss": 0.0009, "step": 10004 }, { "epoch": 9.620192307692308, "grad_norm": 2.8687894344329834, "learning_rate": 1.410071997517751e-05, "loss": 0.0386, "step": 10005 }, { "epoch": 9.621153846153845, "grad_norm": 0.9241604208946228, "learning_rate": 1.4099583821441708e-05, "loss": 0.0048, "step": 10006 }, { "epoch": 9.622115384615384, "grad_norm": 0.24459674954414368, "learning_rate": 1.4098447604092398e-05, "loss": 0.0015, "step": 10007 }, { "epoch": 9.623076923076923, "grad_norm": 2.62764310836792, "learning_rate": 1.409731132314721e-05, "loss": 0.0304, "step": 10008 }, { "epoch": 9.624038461538461, "grad_norm": 2.1533141136169434, "learning_rate": 1.4096174978623774e-05, "loss": 0.0127, "step": 10009 }, { "epoch": 9.625, "grad_norm": 1.2736042737960815, "learning_rate": 1.4095038570539728e-05, "loss": 0.0076, "step": 10010 }, { "epoch": 9.625961538461539, "grad_norm": 2.2556920051574707, "learning_rate": 1.4093902098912695e-05, "loss": 0.041, "step": 10011 }, { "epoch": 9.626923076923077, "grad_norm": 2.5511062145233154, "learning_rate": 1.4092765563760323e-05, "loss": 0.0224, "step": 10012 }, { "epoch": 9.627884615384616, "grad_norm": 1.4088059663772583, "learning_rate": 1.4091628965100242e-05, "loss": 0.0082, "step": 10013 }, { "epoch": 9.628846153846155, "grad_norm": 0.6173691749572754, "learning_rate": 1.4090492302950085e-05, "loss": 0.0036, "step": 10014 }, { "epoch": 9.629807692307692, "grad_norm": 0.1852216124534607, "learning_rate": 1.4089355577327495e-05, "loss": 0.0013, "step": 10015 }, { "epoch": 9.63076923076923, "grad_norm": 3.715027332305908, "learning_rate": 1.4088218788250107e-05, "loss": 0.1181, "step": 10016 }, { "epoch": 9.631730769230769, "grad_norm": 2.8942368030548096, "learning_rate": 1.4087081935735565e-05, "loss": 0.0405, "step": 10017 }, { "epoch": 9.632692307692308, "grad_norm": 0.1821412891149521, "learning_rate": 1.4085945019801504e-05, "loss": 0.0014, "step": 10018 }, { "epoch": 9.633653846153846, "grad_norm": 3.4984774589538574, "learning_rate": 1.4084808040465569e-05, "loss": 0.0548, "step": 10019 }, { "epoch": 9.634615384615385, "grad_norm": 0.826257586479187, "learning_rate": 1.4083670997745403e-05, "loss": 0.0039, "step": 10020 }, { "epoch": 9.635576923076924, "grad_norm": 1.490722894668579, "learning_rate": 1.4082533891658647e-05, "loss": 0.0102, "step": 10021 }, { "epoch": 9.63653846153846, "grad_norm": 4.913949966430664, "learning_rate": 1.4081396722222947e-05, "loss": 0.0837, "step": 10022 }, { "epoch": 9.6375, "grad_norm": 4.83450984954834, "learning_rate": 1.408025948945595e-05, "loss": 0.0393, "step": 10023 }, { "epoch": 9.638461538461538, "grad_norm": 2.944411039352417, "learning_rate": 1.4079122193375302e-05, "loss": 0.0212, "step": 10024 }, { "epoch": 9.639423076923077, "grad_norm": 0.342539519071579, "learning_rate": 1.4077984833998648e-05, "loss": 0.0024, "step": 10025 }, { "epoch": 9.640384615384615, "grad_norm": 1.319622278213501, "learning_rate": 1.4076847411343637e-05, "loss": 0.0721, "step": 10026 }, { "epoch": 9.641346153846154, "grad_norm": 0.2720148265361786, "learning_rate": 1.4075709925427923e-05, "loss": 0.0015, "step": 10027 }, { "epoch": 9.642307692307693, "grad_norm": 1.4469796419143677, "learning_rate": 1.4074572376269151e-05, "loss": 0.0335, "step": 10028 }, { "epoch": 9.643269230769231, "grad_norm": 3.559929609298706, "learning_rate": 1.4073434763884972e-05, "loss": 0.1429, "step": 10029 }, { "epoch": 9.64423076923077, "grad_norm": 2.5482373237609863, "learning_rate": 1.4072297088293043e-05, "loss": 0.0288, "step": 10030 }, { "epoch": 9.645192307692307, "grad_norm": 1.8511629104614258, "learning_rate": 1.4071159349511017e-05, "loss": 0.0235, "step": 10031 }, { "epoch": 9.646153846153846, "grad_norm": 2.0329246520996094, "learning_rate": 1.4070021547556544e-05, "loss": 0.0261, "step": 10032 }, { "epoch": 9.647115384615384, "grad_norm": 3.622429609298706, "learning_rate": 1.4068883682447283e-05, "loss": 0.0407, "step": 10033 }, { "epoch": 9.648076923076923, "grad_norm": 0.9192362427711487, "learning_rate": 1.406774575420089e-05, "loss": 0.0063, "step": 10034 }, { "epoch": 9.649038461538462, "grad_norm": 1.9453762769699097, "learning_rate": 1.4066607762835022e-05, "loss": 0.0727, "step": 10035 }, { "epoch": 9.65, "grad_norm": 2.8601298332214355, "learning_rate": 1.4065469708367336e-05, "loss": 0.0334, "step": 10036 }, { "epoch": 9.650961538461539, "grad_norm": 1.91093111038208, "learning_rate": 1.4064331590815493e-05, "loss": 0.0156, "step": 10037 }, { "epoch": 9.651923076923078, "grad_norm": 2.0932106971740723, "learning_rate": 1.4063193410197152e-05, "loss": 0.0094, "step": 10038 }, { "epoch": 9.652884615384615, "grad_norm": 1.6996794939041138, "learning_rate": 1.4062055166529973e-05, "loss": 0.0267, "step": 10039 }, { "epoch": 9.653846153846153, "grad_norm": 4.513079643249512, "learning_rate": 1.4060916859831618e-05, "loss": 0.1106, "step": 10040 }, { "epoch": 9.654807692307692, "grad_norm": 4.079282283782959, "learning_rate": 1.4059778490119755e-05, "loss": 0.0778, "step": 10041 }, { "epoch": 9.65576923076923, "grad_norm": 1.1919572353363037, "learning_rate": 1.4058640057412048e-05, "loss": 0.0055, "step": 10042 }, { "epoch": 9.65673076923077, "grad_norm": 0.515059232711792, "learning_rate": 1.4057501561726157e-05, "loss": 0.0024, "step": 10043 }, { "epoch": 9.657692307692308, "grad_norm": 1.6898058652877808, "learning_rate": 1.4056363003079748e-05, "loss": 0.0145, "step": 10044 }, { "epoch": 9.658653846153847, "grad_norm": 2.531022787094116, "learning_rate": 1.4055224381490491e-05, "loss": 0.0426, "step": 10045 }, { "epoch": 9.659615384615385, "grad_norm": 0.12963610887527466, "learning_rate": 1.4054085696976057e-05, "loss": 0.0014, "step": 10046 }, { "epoch": 9.660576923076922, "grad_norm": 3.0626742839813232, "learning_rate": 1.4052946949554107e-05, "loss": 0.0158, "step": 10047 }, { "epoch": 9.661538461538461, "grad_norm": 0.3237701654434204, "learning_rate": 1.4051808139242315e-05, "loss": 0.0016, "step": 10048 }, { "epoch": 9.6625, "grad_norm": 3.8618698120117188, "learning_rate": 1.4050669266058357e-05, "loss": 0.0376, "step": 10049 }, { "epoch": 9.663461538461538, "grad_norm": 0.7624026536941528, "learning_rate": 1.4049530330019898e-05, "loss": 0.0034, "step": 10050 }, { "epoch": 9.664423076923077, "grad_norm": 3.2435479164123535, "learning_rate": 1.4048391331144615e-05, "loss": 0.0455, "step": 10051 }, { "epoch": 9.665384615384616, "grad_norm": 5.091781139373779, "learning_rate": 1.404725226945018e-05, "loss": 0.0286, "step": 10052 }, { "epoch": 9.666346153846154, "grad_norm": 1.0524331331253052, "learning_rate": 1.4046113144954268e-05, "loss": 0.0057, "step": 10053 }, { "epoch": 9.667307692307693, "grad_norm": 2.607386827468872, "learning_rate": 1.4044973957674556e-05, "loss": 0.0158, "step": 10054 }, { "epoch": 9.66826923076923, "grad_norm": 2.540663242340088, "learning_rate": 1.4043834707628721e-05, "loss": 0.0328, "step": 10055 }, { "epoch": 9.669230769230769, "grad_norm": 1.0879703760147095, "learning_rate": 1.4042695394834435e-05, "loss": 0.0088, "step": 10056 }, { "epoch": 9.670192307692307, "grad_norm": 1.6664807796478271, "learning_rate": 1.4041556019309386e-05, "loss": 0.0142, "step": 10057 }, { "epoch": 9.671153846153846, "grad_norm": 3.2616353034973145, "learning_rate": 1.4040416581071248e-05, "loss": 0.0266, "step": 10058 }, { "epoch": 9.672115384615385, "grad_norm": 0.08641599863767624, "learning_rate": 1.4039277080137702e-05, "loss": 0.0008, "step": 10059 }, { "epoch": 9.673076923076923, "grad_norm": 2.309948205947876, "learning_rate": 1.4038137516526434e-05, "loss": 0.0355, "step": 10060 }, { "epoch": 9.674038461538462, "grad_norm": 1.5481897592544556, "learning_rate": 1.4036997890255122e-05, "loss": 0.0074, "step": 10061 }, { "epoch": 9.675, "grad_norm": 2.1886558532714844, "learning_rate": 1.4035858201341452e-05, "loss": 0.0163, "step": 10062 }, { "epoch": 9.67596153846154, "grad_norm": 2.0971219539642334, "learning_rate": 1.4034718449803105e-05, "loss": 0.0143, "step": 10063 }, { "epoch": 9.676923076923076, "grad_norm": 0.8049708008766174, "learning_rate": 1.4033578635657771e-05, "loss": 0.0042, "step": 10064 }, { "epoch": 9.677884615384615, "grad_norm": 0.7067060470581055, "learning_rate": 1.4032438758923137e-05, "loss": 0.0054, "step": 10065 }, { "epoch": 9.678846153846154, "grad_norm": 3.508267641067505, "learning_rate": 1.4031298819616888e-05, "loss": 0.0238, "step": 10066 }, { "epoch": 9.679807692307692, "grad_norm": 4.117551326751709, "learning_rate": 1.4030158817756709e-05, "loss": 0.0554, "step": 10067 }, { "epoch": 9.680769230769231, "grad_norm": 1.7918380498886108, "learning_rate": 1.4029018753360297e-05, "loss": 0.0371, "step": 10068 }, { "epoch": 9.68173076923077, "grad_norm": 3.102515459060669, "learning_rate": 1.4027878626445339e-05, "loss": 0.0164, "step": 10069 }, { "epoch": 9.682692307692308, "grad_norm": 3.48359751701355, "learning_rate": 1.4026738437029524e-05, "loss": 0.02, "step": 10070 }, { "epoch": 9.683653846153845, "grad_norm": 3.086559295654297, "learning_rate": 1.402559818513055e-05, "loss": 0.1206, "step": 10071 }, { "epoch": 9.684615384615384, "grad_norm": 2.960315227508545, "learning_rate": 1.4024457870766106e-05, "loss": 0.0359, "step": 10072 }, { "epoch": 9.685576923076923, "grad_norm": 2.7648823261260986, "learning_rate": 1.4023317493953887e-05, "loss": 0.0264, "step": 10073 }, { "epoch": 9.686538461538461, "grad_norm": 2.8029096126556396, "learning_rate": 1.4022177054711587e-05, "loss": 0.0215, "step": 10074 }, { "epoch": 9.6875, "grad_norm": 2.1526896953582764, "learning_rate": 1.4021036553056905e-05, "loss": 0.026, "step": 10075 }, { "epoch": 9.688461538461539, "grad_norm": 2.6414198875427246, "learning_rate": 1.4019895989007542e-05, "loss": 0.0303, "step": 10076 }, { "epoch": 9.689423076923077, "grad_norm": 2.0835554599761963, "learning_rate": 1.4018755362581187e-05, "loss": 0.0383, "step": 10077 }, { "epoch": 9.690384615384616, "grad_norm": 2.911965847015381, "learning_rate": 1.4017614673795544e-05, "loss": 0.0376, "step": 10078 }, { "epoch": 9.691346153846155, "grad_norm": 1.4323205947875977, "learning_rate": 1.4016473922668312e-05, "loss": 0.0067, "step": 10079 }, { "epoch": 9.692307692307692, "grad_norm": 1.6571186780929565, "learning_rate": 1.4015333109217195e-05, "loss": 0.0241, "step": 10080 }, { "epoch": 9.69326923076923, "grad_norm": 0.24633418023586273, "learning_rate": 1.4014192233459892e-05, "loss": 0.0014, "step": 10081 }, { "epoch": 9.694230769230769, "grad_norm": 4.474287033081055, "learning_rate": 1.4013051295414108e-05, "loss": 0.0752, "step": 10082 }, { "epoch": 9.695192307692308, "grad_norm": 2.86491060256958, "learning_rate": 1.4011910295097545e-05, "loss": 0.0476, "step": 10083 }, { "epoch": 9.696153846153846, "grad_norm": 1.5958218574523926, "learning_rate": 1.401076923252791e-05, "loss": 0.0178, "step": 10084 }, { "epoch": 9.697115384615385, "grad_norm": 2.8125646114349365, "learning_rate": 1.4009628107722906e-05, "loss": 0.097, "step": 10085 }, { "epoch": 9.698076923076924, "grad_norm": 1.127993106842041, "learning_rate": 1.4008486920700244e-05, "loss": 0.008, "step": 10086 }, { "epoch": 9.69903846153846, "grad_norm": 2.214371681213379, "learning_rate": 1.400734567147763e-05, "loss": 0.0142, "step": 10087 }, { "epoch": 9.7, "grad_norm": 1.2254140377044678, "learning_rate": 1.400620436007277e-05, "loss": 0.0054, "step": 10088 }, { "epoch": 9.700961538461538, "grad_norm": 0.8693193793296814, "learning_rate": 1.400506298650338e-05, "loss": 0.0061, "step": 10089 }, { "epoch": 9.701923076923077, "grad_norm": 4.284818172454834, "learning_rate": 1.4003921550787162e-05, "loss": 0.0759, "step": 10090 }, { "epoch": 9.702884615384615, "grad_norm": 1.9531900882720947, "learning_rate": 1.4002780052941837e-05, "loss": 0.0234, "step": 10091 }, { "epoch": 9.703846153846154, "grad_norm": 0.35548949241638184, "learning_rate": 1.4001638492985112e-05, "loss": 0.003, "step": 10092 }, { "epoch": 9.704807692307693, "grad_norm": 1.5896071195602417, "learning_rate": 1.4000496870934704e-05, "loss": 0.0157, "step": 10093 }, { "epoch": 9.705769230769231, "grad_norm": 2.2862586975097656, "learning_rate": 1.3999355186808326e-05, "loss": 0.0297, "step": 10094 }, { "epoch": 9.70673076923077, "grad_norm": 1.8841811418533325, "learning_rate": 1.3998213440623691e-05, "loss": 0.0237, "step": 10095 }, { "epoch": 9.707692307692307, "grad_norm": 3.347656726837158, "learning_rate": 1.3997071632398518e-05, "loss": 0.0258, "step": 10096 }, { "epoch": 9.708653846153846, "grad_norm": 0.08676869422197342, "learning_rate": 1.3995929762150526e-05, "loss": 0.0005, "step": 10097 }, { "epoch": 9.709615384615384, "grad_norm": 4.550826549530029, "learning_rate": 1.3994787829897433e-05, "loss": 0.0497, "step": 10098 }, { "epoch": 9.710576923076923, "grad_norm": 4.581793308258057, "learning_rate": 1.3993645835656955e-05, "loss": 0.136, "step": 10099 }, { "epoch": 9.711538461538462, "grad_norm": 1.3345712423324585, "learning_rate": 1.3992503779446818e-05, "loss": 0.0142, "step": 10100 }, { "epoch": 9.7125, "grad_norm": 3.092571258544922, "learning_rate": 1.3991361661284737e-05, "loss": 0.0762, "step": 10101 }, { "epoch": 9.713461538461539, "grad_norm": 1.5406742095947266, "learning_rate": 1.399021948118844e-05, "loss": 0.015, "step": 10102 }, { "epoch": 9.714423076923078, "grad_norm": 1.2662460803985596, "learning_rate": 1.3989077239175646e-05, "loss": 0.008, "step": 10103 }, { "epoch": 9.715384615384615, "grad_norm": 0.5401046872138977, "learning_rate": 1.3987934935264082e-05, "loss": 0.0028, "step": 10104 }, { "epoch": 9.716346153846153, "grad_norm": 2.4623959064483643, "learning_rate": 1.3986792569471473e-05, "loss": 0.0214, "step": 10105 }, { "epoch": 9.717307692307692, "grad_norm": 2.421339750289917, "learning_rate": 1.3985650141815544e-05, "loss": 0.0209, "step": 10106 }, { "epoch": 9.71826923076923, "grad_norm": 0.659960150718689, "learning_rate": 1.3984507652314019e-05, "loss": 0.0038, "step": 10107 }, { "epoch": 9.71923076923077, "grad_norm": 1.9090100526809692, "learning_rate": 1.3983365100984633e-05, "loss": 0.0158, "step": 10108 }, { "epoch": 9.720192307692308, "grad_norm": 3.0992867946624756, "learning_rate": 1.3982222487845112e-05, "loss": 0.0242, "step": 10109 }, { "epoch": 9.721153846153847, "grad_norm": 1.071856141090393, "learning_rate": 1.3981079812913183e-05, "loss": 0.0118, "step": 10110 }, { "epoch": 9.722115384615385, "grad_norm": 1.7453356981277466, "learning_rate": 1.3979937076206583e-05, "loss": 0.0208, "step": 10111 }, { "epoch": 9.723076923076922, "grad_norm": 4.110151290893555, "learning_rate": 1.397879427774304e-05, "loss": 0.079, "step": 10112 }, { "epoch": 9.724038461538461, "grad_norm": 1.488394856452942, "learning_rate": 1.3977651417540288e-05, "loss": 0.0091, "step": 10113 }, { "epoch": 9.725, "grad_norm": 0.7073709964752197, "learning_rate": 1.3976508495616056e-05, "loss": 0.0035, "step": 10114 }, { "epoch": 9.725961538461538, "grad_norm": 0.6873065233230591, "learning_rate": 1.3975365511988087e-05, "loss": 0.0053, "step": 10115 }, { "epoch": 9.726923076923077, "grad_norm": 2.742595672607422, "learning_rate": 1.3974222466674113e-05, "loss": 0.0189, "step": 10116 }, { "epoch": 9.727884615384616, "grad_norm": 2.380629301071167, "learning_rate": 1.3973079359691868e-05, "loss": 0.0384, "step": 10117 }, { "epoch": 9.728846153846154, "grad_norm": 5.105806350708008, "learning_rate": 1.3971936191059092e-05, "loss": 0.0852, "step": 10118 }, { "epoch": 9.729807692307693, "grad_norm": 2.165889263153076, "learning_rate": 1.3970792960793524e-05, "loss": 0.0187, "step": 10119 }, { "epoch": 9.73076923076923, "grad_norm": 3.3129372596740723, "learning_rate": 1.3969649668912906e-05, "loss": 0.0398, "step": 10120 }, { "epoch": 9.731730769230769, "grad_norm": 1.5172635316848755, "learning_rate": 1.3968506315434973e-05, "loss": 0.0048, "step": 10121 }, { "epoch": 9.732692307692307, "grad_norm": 1.464531421661377, "learning_rate": 1.3967362900377472e-05, "loss": 0.007, "step": 10122 }, { "epoch": 9.733653846153846, "grad_norm": 3.836336612701416, "learning_rate": 1.3966219423758143e-05, "loss": 0.1383, "step": 10123 }, { "epoch": 9.734615384615385, "grad_norm": 3.0577967166900635, "learning_rate": 1.3965075885594728e-05, "loss": 0.0335, "step": 10124 }, { "epoch": 9.735576923076923, "grad_norm": 2.964125156402588, "learning_rate": 1.3963932285904971e-05, "loss": 0.0333, "step": 10125 }, { "epoch": 9.736538461538462, "grad_norm": 0.5778108239173889, "learning_rate": 1.3962788624706621e-05, "loss": 0.004, "step": 10126 }, { "epoch": 9.7375, "grad_norm": 0.35297513008117676, "learning_rate": 1.396164490201742e-05, "loss": 0.0031, "step": 10127 }, { "epoch": 9.73846153846154, "grad_norm": 1.1088871955871582, "learning_rate": 1.3960501117855121e-05, "loss": 0.0077, "step": 10128 }, { "epoch": 9.739423076923076, "grad_norm": 1.1842819452285767, "learning_rate": 1.3959357272237466e-05, "loss": 0.0102, "step": 10129 }, { "epoch": 9.740384615384615, "grad_norm": 3.2390425205230713, "learning_rate": 1.3958213365182208e-05, "loss": 0.0345, "step": 10130 }, { "epoch": 9.741346153846154, "grad_norm": 0.5049465894699097, "learning_rate": 1.3957069396707094e-05, "loss": 0.0038, "step": 10131 }, { "epoch": 9.742307692307692, "grad_norm": 2.364281415939331, "learning_rate": 1.3955925366829876e-05, "loss": 0.0074, "step": 10132 }, { "epoch": 9.743269230769231, "grad_norm": 2.839246988296509, "learning_rate": 1.3954781275568312e-05, "loss": 0.0201, "step": 10133 }, { "epoch": 9.74423076923077, "grad_norm": 2.518296241760254, "learning_rate": 1.3953637122940147e-05, "loss": 0.0405, "step": 10134 }, { "epoch": 9.745192307692308, "grad_norm": 1.3332439661026, "learning_rate": 1.3952492908963138e-05, "loss": 0.0099, "step": 10135 }, { "epoch": 9.746153846153845, "grad_norm": 0.7745558023452759, "learning_rate": 1.3951348633655037e-05, "loss": 0.0035, "step": 10136 }, { "epoch": 9.747115384615384, "grad_norm": 2.4172756671905518, "learning_rate": 1.3950204297033602e-05, "loss": 0.0387, "step": 10137 }, { "epoch": 9.748076923076923, "grad_norm": 3.410820484161377, "learning_rate": 1.3949059899116594e-05, "loss": 0.0876, "step": 10138 }, { "epoch": 9.749038461538461, "grad_norm": 0.6760756969451904, "learning_rate": 1.3947915439921764e-05, "loss": 0.004, "step": 10139 }, { "epoch": 9.75, "grad_norm": 1.17919921875, "learning_rate": 1.3946770919466874e-05, "loss": 0.0075, "step": 10140 }, { "epoch": 9.750961538461539, "grad_norm": 0.5217161774635315, "learning_rate": 1.3945626337769684e-05, "loss": 0.0027, "step": 10141 }, { "epoch": 9.751923076923077, "grad_norm": 1.2924035787582397, "learning_rate": 1.3944481694847954e-05, "loss": 0.0067, "step": 10142 }, { "epoch": 9.752884615384616, "grad_norm": 1.5951646566390991, "learning_rate": 1.394333699071944e-05, "loss": 0.0096, "step": 10143 }, { "epoch": 9.753846153846155, "grad_norm": 0.2640724778175354, "learning_rate": 1.3942192225401913e-05, "loss": 0.0016, "step": 10144 }, { "epoch": 9.754807692307692, "grad_norm": 3.340916633605957, "learning_rate": 1.3941047398913134e-05, "loss": 0.0178, "step": 10145 }, { "epoch": 9.75576923076923, "grad_norm": 0.3683551847934723, "learning_rate": 1.3939902511270866e-05, "loss": 0.0011, "step": 10146 }, { "epoch": 9.756730769230769, "grad_norm": 2.8555448055267334, "learning_rate": 1.3938757562492873e-05, "loss": 0.0228, "step": 10147 }, { "epoch": 9.757692307692308, "grad_norm": 3.8175010681152344, "learning_rate": 1.3937612552596924e-05, "loss": 0.0162, "step": 10148 }, { "epoch": 9.758653846153846, "grad_norm": 2.967810869216919, "learning_rate": 1.3936467481600783e-05, "loss": 0.0245, "step": 10149 }, { "epoch": 9.759615384615385, "grad_norm": 1.2605098485946655, "learning_rate": 1.3935322349522222e-05, "loss": 0.0113, "step": 10150 }, { "epoch": 9.760576923076924, "grad_norm": 3.3889319896698, "learning_rate": 1.3934177156379008e-05, "loss": 0.053, "step": 10151 }, { "epoch": 9.76153846153846, "grad_norm": 1.192462682723999, "learning_rate": 1.3933031902188909e-05, "loss": 0.0113, "step": 10152 }, { "epoch": 9.7625, "grad_norm": 0.7898922562599182, "learning_rate": 1.3931886586969703e-05, "loss": 0.0041, "step": 10153 }, { "epoch": 9.763461538461538, "grad_norm": 1.7004562616348267, "learning_rate": 1.3930741210739152e-05, "loss": 0.0155, "step": 10154 }, { "epoch": 9.764423076923077, "grad_norm": 2.8883895874023438, "learning_rate": 1.3929595773515035e-05, "loss": 0.0258, "step": 10155 }, { "epoch": 9.765384615384615, "grad_norm": 2.9883651733398438, "learning_rate": 1.3928450275315126e-05, "loss": 0.0121, "step": 10156 }, { "epoch": 9.766346153846154, "grad_norm": 5.0936479568481445, "learning_rate": 1.39273047161572e-05, "loss": 0.1537, "step": 10157 }, { "epoch": 9.767307692307693, "grad_norm": 2.1266465187072754, "learning_rate": 1.3926159096059028e-05, "loss": 0.0305, "step": 10158 }, { "epoch": 9.768269230769231, "grad_norm": 3.1654999256134033, "learning_rate": 1.3925013415038392e-05, "loss": 0.0462, "step": 10159 }, { "epoch": 9.76923076923077, "grad_norm": 0.14590270817279816, "learning_rate": 1.3923867673113067e-05, "loss": 0.0006, "step": 10160 }, { "epoch": 9.770192307692307, "grad_norm": 2.8388595581054688, "learning_rate": 1.3922721870300833e-05, "loss": 0.0172, "step": 10161 }, { "epoch": 9.771153846153846, "grad_norm": 1.516352891921997, "learning_rate": 1.3921576006619468e-05, "loss": 0.0082, "step": 10162 }, { "epoch": 9.772115384615384, "grad_norm": 2.886538028717041, "learning_rate": 1.3920430082086752e-05, "loss": 0.0228, "step": 10163 }, { "epoch": 9.773076923076923, "grad_norm": 2.2873525619506836, "learning_rate": 1.3919284096720472e-05, "loss": 0.017, "step": 10164 }, { "epoch": 9.774038461538462, "grad_norm": 0.3281277120113373, "learning_rate": 1.3918138050538399e-05, "loss": 0.0027, "step": 10165 }, { "epoch": 9.775, "grad_norm": 2.784114122390747, "learning_rate": 1.3916991943558326e-05, "loss": 0.0282, "step": 10166 }, { "epoch": 9.775961538461539, "grad_norm": 1.050592303276062, "learning_rate": 1.3915845775798037e-05, "loss": 0.0066, "step": 10167 }, { "epoch": 9.776923076923078, "grad_norm": 2.033313751220703, "learning_rate": 1.3914699547275312e-05, "loss": 0.0352, "step": 10168 }, { "epoch": 9.777884615384615, "grad_norm": 1.6899641752243042, "learning_rate": 1.391355325800794e-05, "loss": 0.015, "step": 10169 }, { "epoch": 9.778846153846153, "grad_norm": 3.109032154083252, "learning_rate": 1.3912406908013708e-05, "loss": 0.0312, "step": 10170 }, { "epoch": 9.779807692307692, "grad_norm": 2.406879186630249, "learning_rate": 1.3911260497310401e-05, "loss": 0.0107, "step": 10171 }, { "epoch": 9.78076923076923, "grad_norm": 0.13647021353244781, "learning_rate": 1.391011402591581e-05, "loss": 0.0008, "step": 10172 }, { "epoch": 9.78173076923077, "grad_norm": 0.2715209722518921, "learning_rate": 1.390896749384773e-05, "loss": 0.0026, "step": 10173 }, { "epoch": 9.782692307692308, "grad_norm": 2.2229857444763184, "learning_rate": 1.3907820901123945e-05, "loss": 0.0178, "step": 10174 }, { "epoch": 9.783653846153847, "grad_norm": 2.6363861560821533, "learning_rate": 1.3906674247762249e-05, "loss": 0.0221, "step": 10175 }, { "epoch": 9.784615384615385, "grad_norm": 3.18489408493042, "learning_rate": 1.3905527533780432e-05, "loss": 0.0218, "step": 10176 }, { "epoch": 9.785576923076922, "grad_norm": 0.1282864660024643, "learning_rate": 1.3904380759196294e-05, "loss": 0.0009, "step": 10177 }, { "epoch": 9.786538461538461, "grad_norm": 2.96708083152771, "learning_rate": 1.3903233924027623e-05, "loss": 0.0107, "step": 10178 }, { "epoch": 9.7875, "grad_norm": 2.400761365890503, "learning_rate": 1.390208702829222e-05, "loss": 0.0145, "step": 10179 }, { "epoch": 9.788461538461538, "grad_norm": 1.6950632333755493, "learning_rate": 1.3900940072007876e-05, "loss": 0.0105, "step": 10180 }, { "epoch": 9.789423076923077, "grad_norm": 1.357262134552002, "learning_rate": 1.3899793055192394e-05, "loss": 0.0298, "step": 10181 }, { "epoch": 9.790384615384616, "grad_norm": 2.266023635864258, "learning_rate": 1.389864597786357e-05, "loss": 0.0139, "step": 10182 }, { "epoch": 9.791346153846154, "grad_norm": 3.6169700622558594, "learning_rate": 1.38974988400392e-05, "loss": 0.0542, "step": 10183 }, { "epoch": 9.792307692307693, "grad_norm": 1.223996877670288, "learning_rate": 1.3896351641737086e-05, "loss": 0.0122, "step": 10184 }, { "epoch": 9.79326923076923, "grad_norm": 2.5646770000457764, "learning_rate": 1.3895204382975034e-05, "loss": 0.0248, "step": 10185 }, { "epoch": 9.794230769230769, "grad_norm": 1.9771373271942139, "learning_rate": 1.3894057063770841e-05, "loss": 0.0154, "step": 10186 }, { "epoch": 9.795192307692307, "grad_norm": 0.25932350754737854, "learning_rate": 1.3892909684142311e-05, "loss": 0.0018, "step": 10187 }, { "epoch": 9.796153846153846, "grad_norm": 0.6065590381622314, "learning_rate": 1.3891762244107247e-05, "loss": 0.003, "step": 10188 }, { "epoch": 9.797115384615385, "grad_norm": 0.3146449327468872, "learning_rate": 1.3890614743683458e-05, "loss": 0.0025, "step": 10189 }, { "epoch": 9.798076923076923, "grad_norm": 2.4780735969543457, "learning_rate": 1.3889467182888743e-05, "loss": 0.0198, "step": 10190 }, { "epoch": 9.799038461538462, "grad_norm": 0.5874944925308228, "learning_rate": 1.3888319561740916e-05, "loss": 0.0033, "step": 10191 }, { "epoch": 9.8, "grad_norm": 2.3558340072631836, "learning_rate": 1.3887171880257781e-05, "loss": 0.0277, "step": 10192 }, { "epoch": 9.80096153846154, "grad_norm": 0.78575199842453, "learning_rate": 1.3886024138457149e-05, "loss": 0.0329, "step": 10193 }, { "epoch": 9.801923076923076, "grad_norm": 3.241818428039551, "learning_rate": 1.3884876336356823e-05, "loss": 0.0285, "step": 10194 }, { "epoch": 9.802884615384615, "grad_norm": 1.3611538410186768, "learning_rate": 1.388372847397462e-05, "loss": 0.0064, "step": 10195 }, { "epoch": 9.803846153846154, "grad_norm": 3.5268197059631348, "learning_rate": 1.388258055132835e-05, "loss": 0.0284, "step": 10196 }, { "epoch": 9.804807692307692, "grad_norm": 3.058030128479004, "learning_rate": 1.3881432568435827e-05, "loss": 0.0345, "step": 10197 }, { "epoch": 9.805769230769231, "grad_norm": 2.5409862995147705, "learning_rate": 1.388028452531486e-05, "loss": 0.0211, "step": 10198 }, { "epoch": 9.80673076923077, "grad_norm": 2.433213472366333, "learning_rate": 1.3879136421983265e-05, "loss": 0.0193, "step": 10199 }, { "epoch": 9.807692307692308, "grad_norm": 2.1098389625549316, "learning_rate": 1.3877988258458862e-05, "loss": 0.0123, "step": 10200 }, { "epoch": 9.808653846153845, "grad_norm": 0.17396777868270874, "learning_rate": 1.3876840034759457e-05, "loss": 0.0013, "step": 10201 }, { "epoch": 9.809615384615384, "grad_norm": 1.8713963031768799, "learning_rate": 1.3875691750902877e-05, "loss": 0.0222, "step": 10202 }, { "epoch": 9.810576923076923, "grad_norm": 0.192727729678154, "learning_rate": 1.3874543406906937e-05, "loss": 0.0015, "step": 10203 }, { "epoch": 9.811538461538461, "grad_norm": 3.444040060043335, "learning_rate": 1.3873395002789454e-05, "loss": 0.046, "step": 10204 }, { "epoch": 9.8125, "grad_norm": 3.68194317817688, "learning_rate": 1.3872246538568246e-05, "loss": 0.0361, "step": 10205 }, { "epoch": 9.813461538461539, "grad_norm": 1.1033662557601929, "learning_rate": 1.387109801426114e-05, "loss": 0.0066, "step": 10206 }, { "epoch": 9.814423076923077, "grad_norm": 1.6777808666229248, "learning_rate": 1.3869949429885955e-05, "loss": 0.0144, "step": 10207 }, { "epoch": 9.815384615384616, "grad_norm": 0.9740934371948242, "learning_rate": 1.386880078546051e-05, "loss": 0.004, "step": 10208 }, { "epoch": 9.816346153846155, "grad_norm": 1.0426959991455078, "learning_rate": 1.3867652081002631e-05, "loss": 0.0092, "step": 10209 }, { "epoch": 9.817307692307692, "grad_norm": 0.5958360433578491, "learning_rate": 1.3866503316530146e-05, "loss": 0.0023, "step": 10210 }, { "epoch": 9.81826923076923, "grad_norm": 6.085489749908447, "learning_rate": 1.3865354492060879e-05, "loss": 0.0407, "step": 10211 }, { "epoch": 9.819230769230769, "grad_norm": 2.4077446460723877, "learning_rate": 1.3864205607612648e-05, "loss": 0.0154, "step": 10212 }, { "epoch": 9.820192307692308, "grad_norm": 0.8618101477622986, "learning_rate": 1.3863056663203293e-05, "loss": 0.0037, "step": 10213 }, { "epoch": 9.821153846153846, "grad_norm": 5.426600933074951, "learning_rate": 1.3861907658850637e-05, "loss": 0.0662, "step": 10214 }, { "epoch": 9.822115384615385, "grad_norm": 3.6262173652648926, "learning_rate": 1.3860758594572507e-05, "loss": 0.0508, "step": 10215 }, { "epoch": 9.823076923076924, "grad_norm": 3.0355985164642334, "learning_rate": 1.3859609470386735e-05, "loss": 0.0249, "step": 10216 }, { "epoch": 9.82403846153846, "grad_norm": 1.0355088710784912, "learning_rate": 1.3858460286311151e-05, "loss": 0.0049, "step": 10217 }, { "epoch": 9.825, "grad_norm": 3.584315061569214, "learning_rate": 1.3857311042363588e-05, "loss": 0.0486, "step": 10218 }, { "epoch": 9.825961538461538, "grad_norm": 1.985213041305542, "learning_rate": 1.3856161738561879e-05, "loss": 0.015, "step": 10219 }, { "epoch": 9.826923076923077, "grad_norm": 3.0098698139190674, "learning_rate": 1.3855012374923857e-05, "loss": 0.0295, "step": 10220 }, { "epoch": 9.827884615384615, "grad_norm": 3.5874016284942627, "learning_rate": 1.385386295146736e-05, "loss": 0.0572, "step": 10221 }, { "epoch": 9.828846153846154, "grad_norm": 3.3607254028320312, "learning_rate": 1.3852713468210219e-05, "loss": 0.0587, "step": 10222 }, { "epoch": 9.829807692307693, "grad_norm": 3.7925631999969482, "learning_rate": 1.385156392517027e-05, "loss": 0.0897, "step": 10223 }, { "epoch": 9.830769230769231, "grad_norm": 2.356285810470581, "learning_rate": 1.3850414322365358e-05, "loss": 0.053, "step": 10224 }, { "epoch": 9.83173076923077, "grad_norm": 1.3454315662384033, "learning_rate": 1.3849264659813314e-05, "loss": 0.0066, "step": 10225 }, { "epoch": 9.832692307692307, "grad_norm": 1.6222281455993652, "learning_rate": 1.384811493753198e-05, "loss": 0.0103, "step": 10226 }, { "epoch": 9.833653846153846, "grad_norm": 2.0375990867614746, "learning_rate": 1.3846965155539194e-05, "loss": 0.0607, "step": 10227 }, { "epoch": 9.834615384615384, "grad_norm": 0.37236058712005615, "learning_rate": 1.3845815313852802e-05, "loss": 0.0024, "step": 10228 }, { "epoch": 9.835576923076923, "grad_norm": 0.15616002678871155, "learning_rate": 1.3844665412490645e-05, "loss": 0.0006, "step": 10229 }, { "epoch": 9.836538461538462, "grad_norm": 2.5173637866973877, "learning_rate": 1.3843515451470561e-05, "loss": 0.0313, "step": 10230 }, { "epoch": 9.8375, "grad_norm": 0.12406276911497116, "learning_rate": 1.38423654308104e-05, "loss": 0.0007, "step": 10231 }, { "epoch": 9.838461538461539, "grad_norm": 5.381851673126221, "learning_rate": 1.3841215350528005e-05, "loss": 0.0832, "step": 10232 }, { "epoch": 9.839423076923078, "grad_norm": 0.7718460559844971, "learning_rate": 1.3840065210641223e-05, "loss": 0.0091, "step": 10233 }, { "epoch": 9.840384615384615, "grad_norm": 2.747683525085449, "learning_rate": 1.3838915011167897e-05, "loss": 0.0401, "step": 10234 }, { "epoch": 9.841346153846153, "grad_norm": 2.8641483783721924, "learning_rate": 1.3837764752125877e-05, "loss": 0.0147, "step": 10235 }, { "epoch": 9.842307692307692, "grad_norm": 2.9814882278442383, "learning_rate": 1.3836614433533013e-05, "loss": 0.0275, "step": 10236 }, { "epoch": 9.84326923076923, "grad_norm": 0.20604534447193146, "learning_rate": 1.3835464055407152e-05, "loss": 0.0012, "step": 10237 }, { "epoch": 9.84423076923077, "grad_norm": 1.951665997505188, "learning_rate": 1.3834313617766146e-05, "loss": 0.0141, "step": 10238 }, { "epoch": 9.845192307692308, "grad_norm": 2.7594449520111084, "learning_rate": 1.3833163120627848e-05, "loss": 0.0217, "step": 10239 }, { "epoch": 9.846153846153847, "grad_norm": 3.8823025226593018, "learning_rate": 1.3832012564010109e-05, "loss": 0.0353, "step": 10240 }, { "epoch": 9.847115384615385, "grad_norm": 4.111116886138916, "learning_rate": 1.3830861947930778e-05, "loss": 0.0552, "step": 10241 }, { "epoch": 9.848076923076922, "grad_norm": 1.8784974813461304, "learning_rate": 1.3829711272407716e-05, "loss": 0.0104, "step": 10242 }, { "epoch": 9.849038461538461, "grad_norm": 2.1305525302886963, "learning_rate": 1.3828560537458775e-05, "loss": 0.0289, "step": 10243 }, { "epoch": 9.85, "grad_norm": 2.2780778408050537, "learning_rate": 1.3827409743101812e-05, "loss": 0.0286, "step": 10244 }, { "epoch": 9.850961538461538, "grad_norm": 2.5900516510009766, "learning_rate": 1.3826258889354678e-05, "loss": 0.0107, "step": 10245 }, { "epoch": 9.851923076923077, "grad_norm": 1.5988593101501465, "learning_rate": 1.3825107976235242e-05, "loss": 0.0291, "step": 10246 }, { "epoch": 9.852884615384616, "grad_norm": 0.4262489676475525, "learning_rate": 1.3823957003761352e-05, "loss": 0.0026, "step": 10247 }, { "epoch": 9.853846153846154, "grad_norm": 0.22536230087280273, "learning_rate": 1.3822805971950877e-05, "loss": 0.001, "step": 10248 }, { "epoch": 9.854807692307693, "grad_norm": 2.023735761642456, "learning_rate": 1.3821654880821669e-05, "loss": 0.008, "step": 10249 }, { "epoch": 9.85576923076923, "grad_norm": 0.49537748098373413, "learning_rate": 1.3820503730391597e-05, "loss": 0.0031, "step": 10250 }, { "epoch": 9.856730769230769, "grad_norm": 0.9799173474311829, "learning_rate": 1.3819352520678519e-05, "loss": 0.0061, "step": 10251 }, { "epoch": 9.857692307692307, "grad_norm": 2.7666802406311035, "learning_rate": 1.3818201251700298e-05, "loss": 0.0303, "step": 10252 }, { "epoch": 9.858653846153846, "grad_norm": 0.6012463569641113, "learning_rate": 1.3817049923474802e-05, "loss": 0.0046, "step": 10253 }, { "epoch": 9.859615384615385, "grad_norm": 1.7188389301300049, "learning_rate": 1.3815898536019893e-05, "loss": 0.0291, "step": 10254 }, { "epoch": 9.860576923076923, "grad_norm": 1.3561629056930542, "learning_rate": 1.381474708935344e-05, "loss": 0.0067, "step": 10255 }, { "epoch": 9.861538461538462, "grad_norm": 2.5627663135528564, "learning_rate": 1.3813595583493305e-05, "loss": 0.0322, "step": 10256 }, { "epoch": 9.8625, "grad_norm": 2.7962570190429688, "learning_rate": 1.381244401845736e-05, "loss": 0.0865, "step": 10257 }, { "epoch": 9.86346153846154, "grad_norm": 6.388498306274414, "learning_rate": 1.3811292394263475e-05, "loss": 0.0984, "step": 10258 }, { "epoch": 9.864423076923076, "grad_norm": 1.6599276065826416, "learning_rate": 1.3810140710929521e-05, "loss": 0.0159, "step": 10259 }, { "epoch": 9.865384615384615, "grad_norm": 1.149479866027832, "learning_rate": 1.3808988968473358e-05, "loss": 0.011, "step": 10260 }, { "epoch": 9.866346153846154, "grad_norm": 3.4478020668029785, "learning_rate": 1.3807837166912872e-05, "loss": 0.0561, "step": 10261 }, { "epoch": 9.867307692307692, "grad_norm": 1.9339079856872559, "learning_rate": 1.3806685306265926e-05, "loss": 0.0109, "step": 10262 }, { "epoch": 9.868269230769231, "grad_norm": 3.3412461280822754, "learning_rate": 1.3805533386550396e-05, "loss": 0.0234, "step": 10263 }, { "epoch": 9.86923076923077, "grad_norm": 1.7058227062225342, "learning_rate": 1.380438140778416e-05, "loss": 0.0147, "step": 10264 }, { "epoch": 9.870192307692308, "grad_norm": 1.1483474969863892, "learning_rate": 1.3803229369985087e-05, "loss": 0.0124, "step": 10265 }, { "epoch": 9.871153846153845, "grad_norm": 2.4653873443603516, "learning_rate": 1.380207727317106e-05, "loss": 0.0328, "step": 10266 }, { "epoch": 9.872115384615384, "grad_norm": 1.04293954372406, "learning_rate": 1.3800925117359949e-05, "loss": 0.0083, "step": 10267 }, { "epoch": 9.873076923076923, "grad_norm": 2.8989412784576416, "learning_rate": 1.3799772902569636e-05, "loss": 0.1157, "step": 10268 }, { "epoch": 9.874038461538461, "grad_norm": 0.8147398233413696, "learning_rate": 1.3798620628818005e-05, "loss": 0.0057, "step": 10269 }, { "epoch": 9.875, "grad_norm": 1.1910350322723389, "learning_rate": 1.3797468296122924e-05, "loss": 0.0079, "step": 10270 }, { "epoch": 9.875961538461539, "grad_norm": 1.8320741653442383, "learning_rate": 1.3796315904502284e-05, "loss": 0.008, "step": 10271 }, { "epoch": 9.876923076923077, "grad_norm": 2.0481626987457275, "learning_rate": 1.3795163453973962e-05, "loss": 0.0113, "step": 10272 }, { "epoch": 9.877884615384616, "grad_norm": 2.6606132984161377, "learning_rate": 1.3794010944555844e-05, "loss": 0.035, "step": 10273 }, { "epoch": 9.878846153846155, "grad_norm": 2.1711137294769287, "learning_rate": 1.379285837626581e-05, "loss": 0.0427, "step": 10274 }, { "epoch": 9.879807692307692, "grad_norm": 1.247883677482605, "learning_rate": 1.3791705749121745e-05, "loss": 0.0028, "step": 10275 }, { "epoch": 9.88076923076923, "grad_norm": 3.733074188232422, "learning_rate": 1.3790553063141537e-05, "loss": 0.0765, "step": 10276 }, { "epoch": 9.881730769230769, "grad_norm": 1.7758500576019287, "learning_rate": 1.378940031834307e-05, "loss": 0.0135, "step": 10277 }, { "epoch": 9.882692307692308, "grad_norm": 2.6862027645111084, "learning_rate": 1.378824751474423e-05, "loss": 0.0336, "step": 10278 }, { "epoch": 9.883653846153846, "grad_norm": 3.9794363975524902, "learning_rate": 1.378709465236291e-05, "loss": 0.0706, "step": 10279 }, { "epoch": 9.884615384615385, "grad_norm": 2.561919689178467, "learning_rate": 1.3785941731216994e-05, "loss": 0.023, "step": 10280 }, { "epoch": 9.885576923076924, "grad_norm": 0.499165803194046, "learning_rate": 1.3784788751324371e-05, "loss": 0.0022, "step": 10281 }, { "epoch": 9.88653846153846, "grad_norm": 3.03025484085083, "learning_rate": 1.3783635712702941e-05, "loss": 0.031, "step": 10282 }, { "epoch": 9.8875, "grad_norm": 3.187438726425171, "learning_rate": 1.3782482615370584e-05, "loss": 0.0112, "step": 10283 }, { "epoch": 9.888461538461538, "grad_norm": 0.6208659410476685, "learning_rate": 1.3781329459345202e-05, "loss": 0.003, "step": 10284 }, { "epoch": 9.889423076923077, "grad_norm": 0.36085620522499084, "learning_rate": 1.3780176244644686e-05, "loss": 0.0022, "step": 10285 }, { "epoch": 9.890384615384615, "grad_norm": 4.692843437194824, "learning_rate": 1.3779022971286926e-05, "loss": 0.055, "step": 10286 }, { "epoch": 9.891346153846154, "grad_norm": 2.5882506370544434, "learning_rate": 1.3777869639289822e-05, "loss": 0.0481, "step": 10287 }, { "epoch": 9.892307692307693, "grad_norm": 2.0435597896575928, "learning_rate": 1.3776716248671269e-05, "loss": 0.0754, "step": 10288 }, { "epoch": 9.893269230769231, "grad_norm": 3.4343714714050293, "learning_rate": 1.3775562799449164e-05, "loss": 0.0354, "step": 10289 }, { "epoch": 9.89423076923077, "grad_norm": 0.6122985482215881, "learning_rate": 1.3774409291641407e-05, "loss": 0.0041, "step": 10290 }, { "epoch": 9.895192307692307, "grad_norm": 2.530935525894165, "learning_rate": 1.3773255725265896e-05, "loss": 0.0218, "step": 10291 }, { "epoch": 9.896153846153846, "grad_norm": 2.879392147064209, "learning_rate": 1.3772102100340526e-05, "loss": 0.0561, "step": 10292 }, { "epoch": 9.897115384615384, "grad_norm": 5.082653522491455, "learning_rate": 1.3770948416883205e-05, "loss": 0.0613, "step": 10293 }, { "epoch": 9.898076923076923, "grad_norm": 0.10986802726984024, "learning_rate": 1.3769794674911834e-05, "loss": 0.001, "step": 10294 }, { "epoch": 9.899038461538462, "grad_norm": 2.551267147064209, "learning_rate": 1.3768640874444312e-05, "loss": 0.0126, "step": 10295 }, { "epoch": 9.9, "grad_norm": 1.6073248386383057, "learning_rate": 1.3767487015498544e-05, "loss": 0.0096, "step": 10296 }, { "epoch": 9.900961538461539, "grad_norm": 2.2194840908050537, "learning_rate": 1.3766333098092437e-05, "loss": 0.0092, "step": 10297 }, { "epoch": 9.901923076923078, "grad_norm": 1.4285448789596558, "learning_rate": 1.3765179122243892e-05, "loss": 0.0163, "step": 10298 }, { "epoch": 9.902884615384615, "grad_norm": 3.813530445098877, "learning_rate": 1.376402508797082e-05, "loss": 0.0238, "step": 10299 }, { "epoch": 9.903846153846153, "grad_norm": 0.933175265789032, "learning_rate": 1.3762870995291124e-05, "loss": 0.0043, "step": 10300 }, { "epoch": 9.904807692307692, "grad_norm": 0.2978883683681488, "learning_rate": 1.3761716844222717e-05, "loss": 0.0016, "step": 10301 }, { "epoch": 9.90576923076923, "grad_norm": 3.904512643814087, "learning_rate": 1.3760562634783504e-05, "loss": 0.0514, "step": 10302 }, { "epoch": 9.90673076923077, "grad_norm": 2.574030876159668, "learning_rate": 1.3759408366991391e-05, "loss": 0.0165, "step": 10303 }, { "epoch": 9.907692307692308, "grad_norm": 1.8087903261184692, "learning_rate": 1.3758254040864299e-05, "loss": 0.0165, "step": 10304 }, { "epoch": 9.908653846153847, "grad_norm": 2.1095614433288574, "learning_rate": 1.3757099656420134e-05, "loss": 0.0155, "step": 10305 }, { "epoch": 9.909615384615385, "grad_norm": 2.065199136734009, "learning_rate": 1.3755945213676808e-05, "loss": 0.0186, "step": 10306 }, { "epoch": 9.910576923076922, "grad_norm": 3.3710274696350098, "learning_rate": 1.3754790712652238e-05, "loss": 0.0668, "step": 10307 }, { "epoch": 9.911538461538461, "grad_norm": 2.750394582748413, "learning_rate": 1.3753636153364334e-05, "loss": 0.0198, "step": 10308 }, { "epoch": 9.9125, "grad_norm": 0.7826799154281616, "learning_rate": 1.3752481535831015e-05, "loss": 0.0022, "step": 10309 }, { "epoch": 9.913461538461538, "grad_norm": 2.0526909828186035, "learning_rate": 1.3751326860070195e-05, "loss": 0.0121, "step": 10310 }, { "epoch": 9.914423076923077, "grad_norm": 0.5154909491539001, "learning_rate": 1.3750172126099791e-05, "loss": 0.0022, "step": 10311 }, { "epoch": 9.915384615384616, "grad_norm": 0.6699308753013611, "learning_rate": 1.3749017333937727e-05, "loss": 0.0054, "step": 10312 }, { "epoch": 9.916346153846154, "grad_norm": 2.886350393295288, "learning_rate": 1.3747862483601915e-05, "loss": 0.0581, "step": 10313 }, { "epoch": 9.917307692307693, "grad_norm": 2.0335066318511963, "learning_rate": 1.3746707575110274e-05, "loss": 0.025, "step": 10314 }, { "epoch": 9.91826923076923, "grad_norm": 0.02987680956721306, "learning_rate": 1.374555260848073e-05, "loss": 0.0002, "step": 10315 }, { "epoch": 9.919230769230769, "grad_norm": 2.7543375492095947, "learning_rate": 1.3744397583731204e-05, "loss": 0.0137, "step": 10316 }, { "epoch": 9.920192307692307, "grad_norm": 0.08027628064155579, "learning_rate": 1.3743242500879617e-05, "loss": 0.0005, "step": 10317 }, { "epoch": 9.921153846153846, "grad_norm": 0.46444952487945557, "learning_rate": 1.3742087359943889e-05, "loss": 0.0021, "step": 10318 }, { "epoch": 9.922115384615385, "grad_norm": 5.375476360321045, "learning_rate": 1.374093216094195e-05, "loss": 0.0624, "step": 10319 }, { "epoch": 9.923076923076923, "grad_norm": 0.3338691294193268, "learning_rate": 1.3739776903891726e-05, "loss": 0.002, "step": 10320 }, { "epoch": 9.924038461538462, "grad_norm": 0.6212894916534424, "learning_rate": 1.3738621588811137e-05, "loss": 0.0019, "step": 10321 }, { "epoch": 9.925, "grad_norm": 2.9954752922058105, "learning_rate": 1.3737466215718116e-05, "loss": 0.062, "step": 10322 }, { "epoch": 9.92596153846154, "grad_norm": 1.2806328535079956, "learning_rate": 1.3736310784630588e-05, "loss": 0.0067, "step": 10323 }, { "epoch": 9.926923076923076, "grad_norm": 1.4694679975509644, "learning_rate": 1.3735155295566482e-05, "loss": 0.012, "step": 10324 }, { "epoch": 9.927884615384615, "grad_norm": 2.1996443271636963, "learning_rate": 1.3733999748543729e-05, "loss": 0.0327, "step": 10325 }, { "epoch": 9.928846153846154, "grad_norm": 4.059708595275879, "learning_rate": 1.3732844143580259e-05, "loss": 0.0667, "step": 10326 }, { "epoch": 9.929807692307692, "grad_norm": 0.6265741586685181, "learning_rate": 1.3731688480694006e-05, "loss": 0.0027, "step": 10327 }, { "epoch": 9.930769230769231, "grad_norm": 1.302376627922058, "learning_rate": 1.3730532759902897e-05, "loss": 0.007, "step": 10328 }, { "epoch": 9.93173076923077, "grad_norm": 2.2848243713378906, "learning_rate": 1.3729376981224869e-05, "loss": 0.0155, "step": 10329 }, { "epoch": 9.932692307692308, "grad_norm": 5.875232219696045, "learning_rate": 1.3728221144677856e-05, "loss": 0.1079, "step": 10330 }, { "epoch": 9.933653846153845, "grad_norm": 3.18932843208313, "learning_rate": 1.3727065250279795e-05, "loss": 0.0284, "step": 10331 }, { "epoch": 9.934615384615384, "grad_norm": 1.4571646451950073, "learning_rate": 1.3725909298048614e-05, "loss": 0.0167, "step": 10332 }, { "epoch": 9.935576923076923, "grad_norm": 5.46258544921875, "learning_rate": 1.3724753288002262e-05, "loss": 0.121, "step": 10333 }, { "epoch": 9.936538461538461, "grad_norm": 0.22135917842388153, "learning_rate": 1.3723597220158671e-05, "loss": 0.0018, "step": 10334 }, { "epoch": 9.9375, "grad_norm": 0.0535944402217865, "learning_rate": 1.3722441094535777e-05, "loss": 0.0005, "step": 10335 }, { "epoch": 9.938461538461539, "grad_norm": 3.149620532989502, "learning_rate": 1.3721284911151522e-05, "loss": 0.0235, "step": 10336 }, { "epoch": 9.939423076923077, "grad_norm": 1.9179997444152832, "learning_rate": 1.3720128670023848e-05, "loss": 0.0142, "step": 10337 }, { "epoch": 9.940384615384616, "grad_norm": 1.2980294227600098, "learning_rate": 1.3718972371170698e-05, "loss": 0.0081, "step": 10338 }, { "epoch": 9.941346153846155, "grad_norm": 2.569380760192871, "learning_rate": 1.371781601461001e-05, "loss": 0.0276, "step": 10339 }, { "epoch": 9.942307692307692, "grad_norm": 5.05878210067749, "learning_rate": 1.3716659600359728e-05, "loss": 0.0636, "step": 10340 }, { "epoch": 9.94326923076923, "grad_norm": 3.2953431606292725, "learning_rate": 1.3715503128437799e-05, "loss": 0.044, "step": 10341 }, { "epoch": 9.944230769230769, "grad_norm": 0.413082093000412, "learning_rate": 1.3714346598862168e-05, "loss": 0.0016, "step": 10342 }, { "epoch": 9.945192307692308, "grad_norm": 3.1831793785095215, "learning_rate": 1.3713190011650774e-05, "loss": 0.1061, "step": 10343 }, { "epoch": 9.946153846153846, "grad_norm": 2.9177327156066895, "learning_rate": 1.3712033366821574e-05, "loss": 0.0343, "step": 10344 }, { "epoch": 9.947115384615385, "grad_norm": 1.0766524076461792, "learning_rate": 1.3710876664392507e-05, "loss": 0.0081, "step": 10345 }, { "epoch": 9.948076923076924, "grad_norm": 1.5663859844207764, "learning_rate": 1.3709719904381529e-05, "loss": 0.0134, "step": 10346 }, { "epoch": 9.94903846153846, "grad_norm": 3.8489489555358887, "learning_rate": 1.3708563086806583e-05, "loss": 0.0688, "step": 10347 }, { "epoch": 9.95, "grad_norm": 1.1851571798324585, "learning_rate": 1.3707406211685624e-05, "loss": 0.0117, "step": 10348 }, { "epoch": 9.950961538461538, "grad_norm": 2.0178351402282715, "learning_rate": 1.37062492790366e-05, "loss": 0.0241, "step": 10349 }, { "epoch": 9.951923076923077, "grad_norm": 5.532429218292236, "learning_rate": 1.3705092288877468e-05, "loss": 0.0367, "step": 10350 }, { "epoch": 9.952884615384615, "grad_norm": 5.118195533752441, "learning_rate": 1.3703935241226174e-05, "loss": 0.1265, "step": 10351 }, { "epoch": 9.953846153846154, "grad_norm": 2.42004132270813, "learning_rate": 1.370277813610068e-05, "loss": 0.025, "step": 10352 }, { "epoch": 9.954807692307693, "grad_norm": 1.297560691833496, "learning_rate": 1.3701620973518935e-05, "loss": 0.0065, "step": 10353 }, { "epoch": 9.955769230769231, "grad_norm": 3.216228485107422, "learning_rate": 1.3700463753498896e-05, "loss": 0.0133, "step": 10354 }, { "epoch": 9.95673076923077, "grad_norm": 1.7102904319763184, "learning_rate": 1.3699306476058523e-05, "loss": 0.0122, "step": 10355 }, { "epoch": 9.957692307692307, "grad_norm": 3.6230363845825195, "learning_rate": 1.3698149141215767e-05, "loss": 0.1162, "step": 10356 }, { "epoch": 9.958653846153846, "grad_norm": 2.3777990341186523, "learning_rate": 1.3696991748988592e-05, "loss": 0.0205, "step": 10357 }, { "epoch": 9.959615384615384, "grad_norm": 0.9504661560058594, "learning_rate": 1.3695834299394957e-05, "loss": 0.0095, "step": 10358 }, { "epoch": 9.960576923076923, "grad_norm": 3.8782589435577393, "learning_rate": 1.369467679245282e-05, "loss": 0.0279, "step": 10359 }, { "epoch": 9.961538461538462, "grad_norm": 0.3936578929424286, "learning_rate": 1.3693519228180141e-05, "loss": 0.0029, "step": 10360 }, { "epoch": 9.9625, "grad_norm": 1.0194668769836426, "learning_rate": 1.3692361606594884e-05, "loss": 0.0046, "step": 10361 }, { "epoch": 9.963461538461539, "grad_norm": 1.0735645294189453, "learning_rate": 1.3691203927715013e-05, "loss": 0.0087, "step": 10362 }, { "epoch": 9.964423076923078, "grad_norm": 1.212430715560913, "learning_rate": 1.3690046191558492e-05, "loss": 0.0122, "step": 10363 }, { "epoch": 9.965384615384615, "grad_norm": 1.9715111255645752, "learning_rate": 1.3688888398143286e-05, "loss": 0.0291, "step": 10364 }, { "epoch": 9.966346153846153, "grad_norm": 3.20939302444458, "learning_rate": 1.3687730547487352e-05, "loss": 0.0715, "step": 10365 }, { "epoch": 9.967307692307692, "grad_norm": 0.3347283601760864, "learning_rate": 1.3686572639608668e-05, "loss": 0.0018, "step": 10366 }, { "epoch": 9.96826923076923, "grad_norm": 3.784778594970703, "learning_rate": 1.3685414674525197e-05, "loss": 0.049, "step": 10367 }, { "epoch": 9.96923076923077, "grad_norm": 1.5935343503952026, "learning_rate": 1.3684256652254906e-05, "loss": 0.0091, "step": 10368 }, { "epoch": 9.970192307692308, "grad_norm": 0.99251788854599, "learning_rate": 1.3683098572815762e-05, "loss": 0.0043, "step": 10369 }, { "epoch": 9.971153846153847, "grad_norm": 1.2002489566802979, "learning_rate": 1.368194043622574e-05, "loss": 0.0132, "step": 10370 }, { "epoch": 9.972115384615385, "grad_norm": 0.23525778949260712, "learning_rate": 1.368078224250281e-05, "loss": 0.0025, "step": 10371 }, { "epoch": 9.973076923076922, "grad_norm": 1.3025336265563965, "learning_rate": 1.3679623991664941e-05, "loss": 0.0088, "step": 10372 }, { "epoch": 9.974038461538461, "grad_norm": 0.8867038488388062, "learning_rate": 1.3678465683730108e-05, "loss": 0.0039, "step": 10373 }, { "epoch": 9.975, "grad_norm": 1.3405227661132812, "learning_rate": 1.3677307318716285e-05, "loss": 0.0214, "step": 10374 }, { "epoch": 9.975961538461538, "grad_norm": 2.886491298675537, "learning_rate": 1.3676148896641443e-05, "loss": 0.0539, "step": 10375 }, { "epoch": 9.976923076923077, "grad_norm": 1.7641066312789917, "learning_rate": 1.367499041752356e-05, "loss": 0.0149, "step": 10376 }, { "epoch": 9.977884615384616, "grad_norm": 1.2881535291671753, "learning_rate": 1.3673831881380612e-05, "loss": 0.008, "step": 10377 }, { "epoch": 9.978846153846154, "grad_norm": 0.7298580408096313, "learning_rate": 1.3672673288230579e-05, "loss": 0.0058, "step": 10378 }, { "epoch": 9.979807692307693, "grad_norm": 2.7589941024780273, "learning_rate": 1.367151463809143e-05, "loss": 0.0282, "step": 10379 }, { "epoch": 9.98076923076923, "grad_norm": 2.399019718170166, "learning_rate": 1.3670355930981152e-05, "loss": 0.0432, "step": 10380 }, { "epoch": 9.981730769230769, "grad_norm": 5.6566033363342285, "learning_rate": 1.3669197166917723e-05, "loss": 0.0862, "step": 10381 }, { "epoch": 9.982692307692307, "grad_norm": 0.6403483152389526, "learning_rate": 1.3668038345919124e-05, "loss": 0.0046, "step": 10382 }, { "epoch": 9.983653846153846, "grad_norm": 0.6883722543716431, "learning_rate": 1.366687946800333e-05, "loss": 0.0039, "step": 10383 }, { "epoch": 9.984615384615385, "grad_norm": 1.9361852407455444, "learning_rate": 1.3665720533188333e-05, "loss": 0.0423, "step": 10384 }, { "epoch": 9.985576923076923, "grad_norm": 0.320564866065979, "learning_rate": 1.3664561541492113e-05, "loss": 0.0029, "step": 10385 }, { "epoch": 9.986538461538462, "grad_norm": 1.3235557079315186, "learning_rate": 1.3663402492932654e-05, "loss": 0.014, "step": 10386 }, { "epoch": 9.9875, "grad_norm": 1.9731953144073486, "learning_rate": 1.3662243387527936e-05, "loss": 0.0096, "step": 10387 }, { "epoch": 9.98846153846154, "grad_norm": 2.8144986629486084, "learning_rate": 1.3661084225295951e-05, "loss": 0.0218, "step": 10388 }, { "epoch": 9.989423076923076, "grad_norm": 1.9305181503295898, "learning_rate": 1.3659925006254686e-05, "loss": 0.0247, "step": 10389 }, { "epoch": 9.990384615384615, "grad_norm": 1.5507439374923706, "learning_rate": 1.3658765730422126e-05, "loss": 0.0095, "step": 10390 }, { "epoch": 9.991346153846154, "grad_norm": 1.919884204864502, "learning_rate": 1.3657606397816257e-05, "loss": 0.0065, "step": 10391 }, { "epoch": 9.992307692307692, "grad_norm": 1.6118367910385132, "learning_rate": 1.3656447008455073e-05, "loss": 0.0089, "step": 10392 }, { "epoch": 9.993269230769231, "grad_norm": 1.0025190114974976, "learning_rate": 1.3655287562356565e-05, "loss": 0.0071, "step": 10393 }, { "epoch": 9.99423076923077, "grad_norm": 1.2592296600341797, "learning_rate": 1.365412805953872e-05, "loss": 0.0136, "step": 10394 }, { "epoch": 9.995192307692308, "grad_norm": 3.0085511207580566, "learning_rate": 1.3652968500019534e-05, "loss": 0.0684, "step": 10395 }, { "epoch": 9.996153846153845, "grad_norm": 0.222737118601799, "learning_rate": 1.3651808883817e-05, "loss": 0.0019, "step": 10396 }, { "epoch": 9.997115384615384, "grad_norm": 1.8999879360198975, "learning_rate": 1.3650649210949105e-05, "loss": 0.0239, "step": 10397 }, { "epoch": 9.998076923076923, "grad_norm": 2.4935638904571533, "learning_rate": 1.3649489481433851e-05, "loss": 0.0281, "step": 10398 }, { "epoch": 9.999038461538461, "grad_norm": 1.6602909564971924, "learning_rate": 1.3648329695289233e-05, "loss": 0.0121, "step": 10399 }, { "epoch": 10.0, "grad_norm": 0.2728170156478882, "learning_rate": 1.3647169852533244e-05, "loss": 0.0015, "step": 10400 }, { "epoch": 10.000961538461539, "grad_norm": 0.6165457367897034, "learning_rate": 1.3646009953183883e-05, "loss": 0.0039, "step": 10401 }, { "epoch": 10.001923076923077, "grad_norm": 0.6156075596809387, "learning_rate": 1.364484999725915e-05, "loss": 0.0046, "step": 10402 }, { "epoch": 10.002884615384616, "grad_norm": 0.1462361365556717, "learning_rate": 1.3643689984777041e-05, "loss": 0.0009, "step": 10403 }, { "epoch": 10.003846153846155, "grad_norm": 0.11907120794057846, "learning_rate": 1.364252991575556e-05, "loss": 0.0008, "step": 10404 }, { "epoch": 10.004807692307692, "grad_norm": 3.5850634574890137, "learning_rate": 1.3641369790212701e-05, "loss": 0.0405, "step": 10405 }, { "epoch": 10.00576923076923, "grad_norm": 3.392730236053467, "learning_rate": 1.3640209608166475e-05, "loss": 0.0339, "step": 10406 }, { "epoch": 10.006730769230769, "grad_norm": 0.7606573104858398, "learning_rate": 1.3639049369634878e-05, "loss": 0.0058, "step": 10407 }, { "epoch": 10.007692307692308, "grad_norm": 1.4240977764129639, "learning_rate": 1.3637889074635915e-05, "loss": 0.0134, "step": 10408 }, { "epoch": 10.008653846153846, "grad_norm": 0.029674500226974487, "learning_rate": 1.3636728723187589e-05, "loss": 0.0004, "step": 10409 }, { "epoch": 10.009615384615385, "grad_norm": 2.1508219242095947, "learning_rate": 1.363556831530791e-05, "loss": 0.0235, "step": 10410 }, { "epoch": 10.010576923076924, "grad_norm": 1.2370084524154663, "learning_rate": 1.363440785101488e-05, "loss": 0.0047, "step": 10411 }, { "epoch": 10.011538461538462, "grad_norm": 1.6880048513412476, "learning_rate": 1.3633247330326504e-05, "loss": 0.0296, "step": 10412 }, { "epoch": 10.0125, "grad_norm": 1.2994753122329712, "learning_rate": 1.3632086753260796e-05, "loss": 0.0059, "step": 10413 }, { "epoch": 10.013461538461538, "grad_norm": 1.8939086198806763, "learning_rate": 1.3630926119835761e-05, "loss": 0.0272, "step": 10414 }, { "epoch": 10.014423076923077, "grad_norm": 2.166172742843628, "learning_rate": 1.362976543006941e-05, "loss": 0.0334, "step": 10415 }, { "epoch": 10.015384615384615, "grad_norm": 0.7519107460975647, "learning_rate": 1.3628604683979752e-05, "loss": 0.0046, "step": 10416 }, { "epoch": 10.016346153846154, "grad_norm": 1.1990140676498413, "learning_rate": 1.3627443881584799e-05, "loss": 0.007, "step": 10417 }, { "epoch": 10.017307692307693, "grad_norm": 0.43576186895370483, "learning_rate": 1.3626283022902562e-05, "loss": 0.0025, "step": 10418 }, { "epoch": 10.018269230769231, "grad_norm": 2.4472079277038574, "learning_rate": 1.3625122107951059e-05, "loss": 0.0099, "step": 10419 }, { "epoch": 10.01923076923077, "grad_norm": 1.0411306619644165, "learning_rate": 1.3623961136748296e-05, "loss": 0.0117, "step": 10420 }, { "epoch": 10.020192307692307, "grad_norm": 3.1166107654571533, "learning_rate": 1.3622800109312295e-05, "loss": 0.1401, "step": 10421 }, { "epoch": 10.021153846153846, "grad_norm": 0.5911391377449036, "learning_rate": 1.3621639025661072e-05, "loss": 0.0037, "step": 10422 }, { "epoch": 10.022115384615384, "grad_norm": 1.858540415763855, "learning_rate": 1.3620477885812635e-05, "loss": 0.0127, "step": 10423 }, { "epoch": 10.023076923076923, "grad_norm": 2.5614805221557617, "learning_rate": 1.361931668978501e-05, "loss": 0.0315, "step": 10424 }, { "epoch": 10.024038461538462, "grad_norm": 0.3004757761955261, "learning_rate": 1.3618155437596212e-05, "loss": 0.001, "step": 10425 }, { "epoch": 10.025, "grad_norm": 0.1895766705274582, "learning_rate": 1.3616994129264262e-05, "loss": 0.0012, "step": 10426 }, { "epoch": 10.025961538461539, "grad_norm": 2.8134329319000244, "learning_rate": 1.3615832764807178e-05, "loss": 0.0135, "step": 10427 }, { "epoch": 10.026923076923078, "grad_norm": 0.0813920870423317, "learning_rate": 1.3614671344242983e-05, "loss": 0.0004, "step": 10428 }, { "epoch": 10.027884615384615, "grad_norm": 2.78471302986145, "learning_rate": 1.3613509867589696e-05, "loss": 0.0421, "step": 10429 }, { "epoch": 10.028846153846153, "grad_norm": 7.43936014175415, "learning_rate": 1.3612348334865341e-05, "loss": 0.0448, "step": 10430 }, { "epoch": 10.029807692307692, "grad_norm": 2.0382285118103027, "learning_rate": 1.3611186746087941e-05, "loss": 0.0129, "step": 10431 }, { "epoch": 10.03076923076923, "grad_norm": 2.296499729156494, "learning_rate": 1.3610025101275523e-05, "loss": 0.0188, "step": 10432 }, { "epoch": 10.03173076923077, "grad_norm": 2.855323553085327, "learning_rate": 1.3608863400446113e-05, "loss": 0.0135, "step": 10433 }, { "epoch": 10.032692307692308, "grad_norm": 1.6603225469589233, "learning_rate": 1.3607701643617732e-05, "loss": 0.0195, "step": 10434 }, { "epoch": 10.033653846153847, "grad_norm": 1.4756627082824707, "learning_rate": 1.360653983080841e-05, "loss": 0.0262, "step": 10435 }, { "epoch": 10.034615384615385, "grad_norm": 1.1965160369873047, "learning_rate": 1.3605377962036176e-05, "loss": 0.0121, "step": 10436 }, { "epoch": 10.035576923076922, "grad_norm": 0.25041988492012024, "learning_rate": 1.360421603731906e-05, "loss": 0.0014, "step": 10437 }, { "epoch": 10.036538461538461, "grad_norm": 0.10654015094041824, "learning_rate": 1.3603054056675084e-05, "loss": 0.001, "step": 10438 }, { "epoch": 10.0375, "grad_norm": 1.9576505422592163, "learning_rate": 1.3601892020122287e-05, "loss": 0.0203, "step": 10439 }, { "epoch": 10.038461538461538, "grad_norm": 2.779034376144409, "learning_rate": 1.3600729927678698e-05, "loss": 0.0409, "step": 10440 }, { "epoch": 10.039423076923077, "grad_norm": 4.256453514099121, "learning_rate": 1.3599567779362347e-05, "loss": 0.0516, "step": 10441 }, { "epoch": 10.040384615384616, "grad_norm": 1.841712474822998, "learning_rate": 1.3598405575191267e-05, "loss": 0.0062, "step": 10442 }, { "epoch": 10.041346153846154, "grad_norm": 0.038142140954732895, "learning_rate": 1.3597243315183498e-05, "loss": 0.0003, "step": 10443 }, { "epoch": 10.042307692307693, "grad_norm": 0.9551611542701721, "learning_rate": 1.3596080999357066e-05, "loss": 0.0061, "step": 10444 }, { "epoch": 10.04326923076923, "grad_norm": 1.2644222974777222, "learning_rate": 1.3594918627730014e-05, "loss": 0.0118, "step": 10445 }, { "epoch": 10.044230769230769, "grad_norm": 1.8864188194274902, "learning_rate": 1.3593756200320373e-05, "loss": 0.0171, "step": 10446 }, { "epoch": 10.045192307692307, "grad_norm": 3.7519989013671875, "learning_rate": 1.3592593717146188e-05, "loss": 0.0833, "step": 10447 }, { "epoch": 10.046153846153846, "grad_norm": 1.2379423379898071, "learning_rate": 1.3591431178225491e-05, "loss": 0.0047, "step": 10448 }, { "epoch": 10.047115384615385, "grad_norm": 0.894464373588562, "learning_rate": 1.3590268583576322e-05, "loss": 0.0032, "step": 10449 }, { "epoch": 10.048076923076923, "grad_norm": 0.9085459113121033, "learning_rate": 1.3589105933216722e-05, "loss": 0.004, "step": 10450 }, { "epoch": 10.049038461538462, "grad_norm": 0.30037984251976013, "learning_rate": 1.3587943227164733e-05, "loss": 0.0024, "step": 10451 }, { "epoch": 10.05, "grad_norm": 3.940365791320801, "learning_rate": 1.3586780465438394e-05, "loss": 0.0235, "step": 10452 }, { "epoch": 10.050961538461538, "grad_norm": 2.1758055686950684, "learning_rate": 1.3585617648055749e-05, "loss": 0.0444, "step": 10453 }, { "epoch": 10.051923076923076, "grad_norm": 0.569850504398346, "learning_rate": 1.3584454775034843e-05, "loss": 0.0036, "step": 10454 }, { "epoch": 10.052884615384615, "grad_norm": 2.3443384170532227, "learning_rate": 1.3583291846393718e-05, "loss": 0.0291, "step": 10455 }, { "epoch": 10.053846153846154, "grad_norm": 1.336169719696045, "learning_rate": 1.3582128862150424e-05, "loss": 0.0137, "step": 10456 }, { "epoch": 10.054807692307692, "grad_norm": 3.2111053466796875, "learning_rate": 1.3580965822322997e-05, "loss": 0.0272, "step": 10457 }, { "epoch": 10.055769230769231, "grad_norm": 2.966266393661499, "learning_rate": 1.3579802726929497e-05, "loss": 0.0703, "step": 10458 }, { "epoch": 10.05673076923077, "grad_norm": 0.9556804299354553, "learning_rate": 1.357863957598796e-05, "loss": 0.0024, "step": 10459 }, { "epoch": 10.057692307692308, "grad_norm": 4.352133750915527, "learning_rate": 1.3577476369516442e-05, "loss": 0.0689, "step": 10460 }, { "epoch": 10.058653846153845, "grad_norm": 0.5865584015846252, "learning_rate": 1.357631310753299e-05, "loss": 0.0025, "step": 10461 }, { "epoch": 10.059615384615384, "grad_norm": 0.7110732793807983, "learning_rate": 1.3575149790055657e-05, "loss": 0.005, "step": 10462 }, { "epoch": 10.060576923076923, "grad_norm": 3.1266775131225586, "learning_rate": 1.3573986417102492e-05, "loss": 0.0193, "step": 10463 }, { "epoch": 10.061538461538461, "grad_norm": 1.143134593963623, "learning_rate": 1.3572822988691545e-05, "loss": 0.0052, "step": 10464 }, { "epoch": 10.0625, "grad_norm": 0.0695209726691246, "learning_rate": 1.3571659504840873e-05, "loss": 0.0005, "step": 10465 }, { "epoch": 10.063461538461539, "grad_norm": 0.43671396374702454, "learning_rate": 1.3570495965568528e-05, "loss": 0.0017, "step": 10466 }, { "epoch": 10.064423076923077, "grad_norm": 0.18479017913341522, "learning_rate": 1.3569332370892565e-05, "loss": 0.0014, "step": 10467 }, { "epoch": 10.065384615384616, "grad_norm": 2.2804646492004395, "learning_rate": 1.356816872083104e-05, "loss": 0.0081, "step": 10468 }, { "epoch": 10.066346153846155, "grad_norm": 0.3198689818382263, "learning_rate": 1.3567005015402006e-05, "loss": 0.0022, "step": 10469 }, { "epoch": 10.067307692307692, "grad_norm": 6.525513172149658, "learning_rate": 1.3565841254623527e-05, "loss": 0.0889, "step": 10470 }, { "epoch": 10.06826923076923, "grad_norm": 0.484210729598999, "learning_rate": 1.3564677438513654e-05, "loss": 0.0025, "step": 10471 }, { "epoch": 10.069230769230769, "grad_norm": 2.090757131576538, "learning_rate": 1.356351356709045e-05, "loss": 0.0114, "step": 10472 }, { "epoch": 10.070192307692308, "grad_norm": 2.5238587856292725, "learning_rate": 1.3562349640371975e-05, "loss": 0.0265, "step": 10473 }, { "epoch": 10.071153846153846, "grad_norm": 0.865667462348938, "learning_rate": 1.356118565837629e-05, "loss": 0.0044, "step": 10474 }, { "epoch": 10.072115384615385, "grad_norm": 0.340270072221756, "learning_rate": 1.3560021621121454e-05, "loss": 0.0027, "step": 10475 }, { "epoch": 10.073076923076924, "grad_norm": 3.8076207637786865, "learning_rate": 1.3558857528625531e-05, "loss": 0.0456, "step": 10476 }, { "epoch": 10.074038461538462, "grad_norm": 1.9844774007797241, "learning_rate": 1.3557693380906588e-05, "loss": 0.0138, "step": 10477 }, { "epoch": 10.075, "grad_norm": 0.19179148972034454, "learning_rate": 1.355652917798268e-05, "loss": 0.0011, "step": 10478 }, { "epoch": 10.075961538461538, "grad_norm": 0.8860924243927002, "learning_rate": 1.3555364919871879e-05, "loss": 0.0035, "step": 10479 }, { "epoch": 10.076923076923077, "grad_norm": 2.0048139095306396, "learning_rate": 1.355420060659225e-05, "loss": 0.0652, "step": 10480 }, { "epoch": 10.077884615384615, "grad_norm": 0.9475020170211792, "learning_rate": 1.355303623816186e-05, "loss": 0.0061, "step": 10481 }, { "epoch": 10.078846153846154, "grad_norm": 3.303692579269409, "learning_rate": 1.3551871814598774e-05, "loss": 0.071, "step": 10482 }, { "epoch": 10.079807692307693, "grad_norm": 1.4219284057617188, "learning_rate": 1.3550707335921063e-05, "loss": 0.005, "step": 10483 }, { "epoch": 10.080769230769231, "grad_norm": 1.3693408966064453, "learning_rate": 1.3549542802146796e-05, "loss": 0.0097, "step": 10484 }, { "epoch": 10.08173076923077, "grad_norm": 1.915521502494812, "learning_rate": 1.3548378213294042e-05, "loss": 0.0257, "step": 10485 }, { "epoch": 10.082692307692307, "grad_norm": 4.833156108856201, "learning_rate": 1.354721356938087e-05, "loss": 0.1762, "step": 10486 }, { "epoch": 10.083653846153846, "grad_norm": 4.767383575439453, "learning_rate": 1.3546048870425356e-05, "loss": 0.0459, "step": 10487 }, { "epoch": 10.084615384615384, "grad_norm": 0.09910505264997482, "learning_rate": 1.3544884116445574e-05, "loss": 0.0011, "step": 10488 }, { "epoch": 10.085576923076923, "grad_norm": 0.038009241223335266, "learning_rate": 1.3543719307459591e-05, "loss": 0.0004, "step": 10489 }, { "epoch": 10.086538461538462, "grad_norm": 2.3994054794311523, "learning_rate": 1.3542554443485484e-05, "loss": 0.0097, "step": 10490 }, { "epoch": 10.0875, "grad_norm": 0.7769547700881958, "learning_rate": 1.3541389524541333e-05, "loss": 0.0038, "step": 10491 }, { "epoch": 10.088461538461539, "grad_norm": 0.07530523836612701, "learning_rate": 1.354022455064521e-05, "loss": 0.0008, "step": 10492 }, { "epoch": 10.089423076923078, "grad_norm": 1.3866525888442993, "learning_rate": 1.353905952181519e-05, "loss": 0.0062, "step": 10493 }, { "epoch": 10.090384615384615, "grad_norm": 1.0790842771530151, "learning_rate": 1.3537894438069355e-05, "loss": 0.0088, "step": 10494 }, { "epoch": 10.091346153846153, "grad_norm": 0.38854649662971497, "learning_rate": 1.3536729299425782e-05, "loss": 0.0014, "step": 10495 }, { "epoch": 10.092307692307692, "grad_norm": 1.9856302738189697, "learning_rate": 1.353556410590255e-05, "loss": 0.0153, "step": 10496 }, { "epoch": 10.09326923076923, "grad_norm": 0.8972845673561096, "learning_rate": 1.353439885751774e-05, "loss": 0.0033, "step": 10497 }, { "epoch": 10.09423076923077, "grad_norm": 1.5520631074905396, "learning_rate": 1.3533233554289433e-05, "loss": 0.0125, "step": 10498 }, { "epoch": 10.095192307692308, "grad_norm": 0.18643908202648163, "learning_rate": 1.353206819623571e-05, "loss": 0.002, "step": 10499 }, { "epoch": 10.096153846153847, "grad_norm": 0.2756754755973816, "learning_rate": 1.3530902783374657e-05, "loss": 0.0018, "step": 10500 }, { "epoch": 10.097115384615385, "grad_norm": 0.9666686058044434, "learning_rate": 1.3529737315724353e-05, "loss": 0.0261, "step": 10501 }, { "epoch": 10.098076923076922, "grad_norm": 0.2555798590183258, "learning_rate": 1.352857179330289e-05, "loss": 0.0016, "step": 10502 }, { "epoch": 10.099038461538461, "grad_norm": 0.41470131278038025, "learning_rate": 1.3527406216128344e-05, "loss": 0.003, "step": 10503 }, { "epoch": 10.1, "grad_norm": 1.2466380596160889, "learning_rate": 1.3526240584218809e-05, "loss": 0.0044, "step": 10504 }, { "epoch": 10.100961538461538, "grad_norm": 0.09032531082630157, "learning_rate": 1.352507489759237e-05, "loss": 0.001, "step": 10505 }, { "epoch": 10.101923076923077, "grad_norm": 2.7861905097961426, "learning_rate": 1.3523909156267114e-05, "loss": 0.0332, "step": 10506 }, { "epoch": 10.102884615384616, "grad_norm": 2.3072550296783447, "learning_rate": 1.352274336026113e-05, "loss": 0.0129, "step": 10507 }, { "epoch": 10.103846153846154, "grad_norm": 0.04958955571055412, "learning_rate": 1.3521577509592507e-05, "loss": 0.0005, "step": 10508 }, { "epoch": 10.104807692307693, "grad_norm": 0.49483221769332886, "learning_rate": 1.3520411604279339e-05, "loss": 0.004, "step": 10509 }, { "epoch": 10.10576923076923, "grad_norm": 0.026659883558750153, "learning_rate": 1.3519245644339715e-05, "loss": 0.0003, "step": 10510 }, { "epoch": 10.106730769230769, "grad_norm": 3.2261574268341064, "learning_rate": 1.3518079629791725e-05, "loss": 0.1462, "step": 10511 }, { "epoch": 10.107692307692307, "grad_norm": 3.1942551136016846, "learning_rate": 1.351691356065347e-05, "loss": 0.0471, "step": 10512 }, { "epoch": 10.108653846153846, "grad_norm": 1.8162496089935303, "learning_rate": 1.3515747436943034e-05, "loss": 0.0146, "step": 10513 }, { "epoch": 10.109615384615385, "grad_norm": 2.845942497253418, "learning_rate": 1.3514581258678518e-05, "loss": 0.0156, "step": 10514 }, { "epoch": 10.110576923076923, "grad_norm": 1.417120337486267, "learning_rate": 1.3513415025878015e-05, "loss": 0.0074, "step": 10515 }, { "epoch": 10.111538461538462, "grad_norm": 2.23523211479187, "learning_rate": 1.3512248738559622e-05, "loss": 0.0412, "step": 10516 }, { "epoch": 10.1125, "grad_norm": 0.14403778314590454, "learning_rate": 1.351108239674144e-05, "loss": 0.0014, "step": 10517 }, { "epoch": 10.113461538461538, "grad_norm": 3.07590389251709, "learning_rate": 1.3509916000441561e-05, "loss": 0.018, "step": 10518 }, { "epoch": 10.114423076923076, "grad_norm": 2.5836539268493652, "learning_rate": 1.3508749549678086e-05, "loss": 0.0259, "step": 10519 }, { "epoch": 10.115384615384615, "grad_norm": 1.3616772890090942, "learning_rate": 1.3507583044469119e-05, "loss": 0.013, "step": 10520 }, { "epoch": 10.116346153846154, "grad_norm": 0.131001815199852, "learning_rate": 1.3506416484832759e-05, "loss": 0.001, "step": 10521 }, { "epoch": 10.117307692307692, "grad_norm": 0.3326416015625, "learning_rate": 1.3505249870787103e-05, "loss": 0.0021, "step": 10522 }, { "epoch": 10.118269230769231, "grad_norm": 0.18070223927497864, "learning_rate": 1.3504083202350257e-05, "loss": 0.0009, "step": 10523 }, { "epoch": 10.11923076923077, "grad_norm": 1.3590569496154785, "learning_rate": 1.3502916479540327e-05, "loss": 0.0191, "step": 10524 }, { "epoch": 10.120192307692308, "grad_norm": 3.3979763984680176, "learning_rate": 1.3501749702375413e-05, "loss": 0.0351, "step": 10525 }, { "epoch": 10.121153846153845, "grad_norm": 0.06415224075317383, "learning_rate": 1.3500582870873618e-05, "loss": 0.0005, "step": 10526 }, { "epoch": 10.122115384615384, "grad_norm": 3.1964223384857178, "learning_rate": 1.3499415985053055e-05, "loss": 0.0141, "step": 10527 }, { "epoch": 10.123076923076923, "grad_norm": 2.310044765472412, "learning_rate": 1.3498249044931826e-05, "loss": 0.0366, "step": 10528 }, { "epoch": 10.124038461538461, "grad_norm": 0.6194199323654175, "learning_rate": 1.3497082050528038e-05, "loss": 0.0027, "step": 10529 }, { "epoch": 10.125, "grad_norm": 2.5855367183685303, "learning_rate": 1.3495915001859798e-05, "loss": 0.0092, "step": 10530 }, { "epoch": 10.125961538461539, "grad_norm": 0.13029542565345764, "learning_rate": 1.3494747898945221e-05, "loss": 0.001, "step": 10531 }, { "epoch": 10.126923076923077, "grad_norm": 1.6370583772659302, "learning_rate": 1.3493580741802411e-05, "loss": 0.0117, "step": 10532 }, { "epoch": 10.127884615384616, "grad_norm": 1.2199233770370483, "learning_rate": 1.3492413530449484e-05, "loss": 0.0099, "step": 10533 }, { "epoch": 10.128846153846155, "grad_norm": 1.9689292907714844, "learning_rate": 1.349124626490455e-05, "loss": 0.0112, "step": 10534 }, { "epoch": 10.129807692307692, "grad_norm": 0.17079564929008484, "learning_rate": 1.3490078945185719e-05, "loss": 0.001, "step": 10535 }, { "epoch": 10.13076923076923, "grad_norm": 0.7929040193557739, "learning_rate": 1.3488911571311104e-05, "loss": 0.0045, "step": 10536 }, { "epoch": 10.131730769230769, "grad_norm": 0.25180819630622864, "learning_rate": 1.3487744143298822e-05, "loss": 0.002, "step": 10537 }, { "epoch": 10.132692307692308, "grad_norm": 0.2615458071231842, "learning_rate": 1.348657666116699e-05, "loss": 0.001, "step": 10538 }, { "epoch": 10.133653846153846, "grad_norm": 0.20254942774772644, "learning_rate": 1.348540912493372e-05, "loss": 0.0015, "step": 10539 }, { "epoch": 10.134615384615385, "grad_norm": 0.15779826045036316, "learning_rate": 1.3484241534617131e-05, "loss": 0.0011, "step": 10540 }, { "epoch": 10.135576923076924, "grad_norm": 1.263832449913025, "learning_rate": 1.3483073890235337e-05, "loss": 0.0085, "step": 10541 }, { "epoch": 10.136538461538462, "grad_norm": 0.3230067789554596, "learning_rate": 1.3481906191806459e-05, "loss": 0.0014, "step": 10542 }, { "epoch": 10.1375, "grad_norm": 1.7707226276397705, "learning_rate": 1.3480738439348617e-05, "loss": 0.0279, "step": 10543 }, { "epoch": 10.138461538461538, "grad_norm": 2.530015707015991, "learning_rate": 1.347957063287993e-05, "loss": 0.0222, "step": 10544 }, { "epoch": 10.139423076923077, "grad_norm": 2.836594581604004, "learning_rate": 1.347840277241852e-05, "loss": 0.0359, "step": 10545 }, { "epoch": 10.140384615384615, "grad_norm": 3.1346118450164795, "learning_rate": 1.3477234857982508e-05, "loss": 0.0906, "step": 10546 }, { "epoch": 10.141346153846154, "grad_norm": 0.05818786844611168, "learning_rate": 1.3476066889590015e-05, "loss": 0.0006, "step": 10547 }, { "epoch": 10.142307692307693, "grad_norm": 0.23560698330402374, "learning_rate": 1.3474898867259167e-05, "loss": 0.0016, "step": 10548 }, { "epoch": 10.143269230769231, "grad_norm": 0.768444836139679, "learning_rate": 1.3473730791008089e-05, "loss": 0.0032, "step": 10549 }, { "epoch": 10.14423076923077, "grad_norm": 1.1141351461410522, "learning_rate": 1.3472562660854902e-05, "loss": 0.0032, "step": 10550 }, { "epoch": 10.145192307692307, "grad_norm": 5.562485218048096, "learning_rate": 1.3471394476817734e-05, "loss": 0.0343, "step": 10551 }, { "epoch": 10.146153846153846, "grad_norm": 2.148952007293701, "learning_rate": 1.3470226238914714e-05, "loss": 0.0222, "step": 10552 }, { "epoch": 10.147115384615384, "grad_norm": 2.656054973602295, "learning_rate": 1.3469057947163968e-05, "loss": 0.1009, "step": 10553 }, { "epoch": 10.148076923076923, "grad_norm": 0.1536068171262741, "learning_rate": 1.3467889601583623e-05, "loss": 0.0012, "step": 10554 }, { "epoch": 10.149038461538462, "grad_norm": 0.039168715476989746, "learning_rate": 1.3466721202191808e-05, "loss": 0.0004, "step": 10555 }, { "epoch": 10.15, "grad_norm": 0.46185773611068726, "learning_rate": 1.3465552749006658e-05, "loss": 0.0018, "step": 10556 }, { "epoch": 10.150961538461539, "grad_norm": 0.3386859595775604, "learning_rate": 1.34643842420463e-05, "loss": 0.0018, "step": 10557 }, { "epoch": 10.151923076923078, "grad_norm": 1.5671544075012207, "learning_rate": 1.3463215681328865e-05, "loss": 0.0055, "step": 10558 }, { "epoch": 10.152884615384615, "grad_norm": 0.12225480377674103, "learning_rate": 1.3462047066872485e-05, "loss": 0.0012, "step": 10559 }, { "epoch": 10.153846153846153, "grad_norm": 1.321330189704895, "learning_rate": 1.3460878398695301e-05, "loss": 0.0101, "step": 10560 }, { "epoch": 10.154807692307692, "grad_norm": 0.3433830142021179, "learning_rate": 1.3459709676815438e-05, "loss": 0.0019, "step": 10561 }, { "epoch": 10.15576923076923, "grad_norm": 2.1688785552978516, "learning_rate": 1.3458540901251036e-05, "loss": 0.0124, "step": 10562 }, { "epoch": 10.15673076923077, "grad_norm": 1.3348643779754639, "learning_rate": 1.345737207202023e-05, "loss": 0.0095, "step": 10563 }, { "epoch": 10.157692307692308, "grad_norm": 1.4254727363586426, "learning_rate": 1.3456203189141154e-05, "loss": 0.0139, "step": 10564 }, { "epoch": 10.158653846153847, "grad_norm": 2.9537503719329834, "learning_rate": 1.3455034252631953e-05, "loss": 0.0227, "step": 10565 }, { "epoch": 10.159615384615385, "grad_norm": 1.8433822393417358, "learning_rate": 1.3453865262510756e-05, "loss": 0.0145, "step": 10566 }, { "epoch": 10.160576923076922, "grad_norm": 1.0583852529525757, "learning_rate": 1.3452696218795712e-05, "loss": 0.006, "step": 10567 }, { "epoch": 10.161538461538461, "grad_norm": 3.624480724334717, "learning_rate": 1.3451527121504952e-05, "loss": 0.0498, "step": 10568 }, { "epoch": 10.1625, "grad_norm": 1.5023388862609863, "learning_rate": 1.3450357970656625e-05, "loss": 0.0152, "step": 10569 }, { "epoch": 10.163461538461538, "grad_norm": 3.320035219192505, "learning_rate": 1.3449188766268866e-05, "loss": 0.0446, "step": 10570 }, { "epoch": 10.164423076923077, "grad_norm": 4.804766654968262, "learning_rate": 1.3448019508359821e-05, "loss": 0.0378, "step": 10571 }, { "epoch": 10.165384615384616, "grad_norm": 0.9525381326675415, "learning_rate": 1.3446850196947633e-05, "loss": 0.0084, "step": 10572 }, { "epoch": 10.166346153846154, "grad_norm": 0.09761880338191986, "learning_rate": 1.3445680832050447e-05, "loss": 0.0009, "step": 10573 }, { "epoch": 10.167307692307693, "grad_norm": 2.8276000022888184, "learning_rate": 1.3444511413686407e-05, "loss": 0.0403, "step": 10574 }, { "epoch": 10.16826923076923, "grad_norm": 2.697850465774536, "learning_rate": 1.3443341941873662e-05, "loss": 0.0205, "step": 10575 }, { "epoch": 10.169230769230769, "grad_norm": 0.445995569229126, "learning_rate": 1.3442172416630355e-05, "loss": 0.0033, "step": 10576 }, { "epoch": 10.170192307692307, "grad_norm": 2.7488598823547363, "learning_rate": 1.344100283797463e-05, "loss": 0.0098, "step": 10577 }, { "epoch": 10.171153846153846, "grad_norm": 2.529858350753784, "learning_rate": 1.3439833205924646e-05, "loss": 0.0116, "step": 10578 }, { "epoch": 10.172115384615385, "grad_norm": 1.6921666860580444, "learning_rate": 1.3438663520498545e-05, "loss": 0.011, "step": 10579 }, { "epoch": 10.173076923076923, "grad_norm": 7.913054943084717, "learning_rate": 1.343749378171448e-05, "loss": 0.0418, "step": 10580 }, { "epoch": 10.174038461538462, "grad_norm": 4.531497478485107, "learning_rate": 1.3436323989590596e-05, "loss": 0.0111, "step": 10581 }, { "epoch": 10.175, "grad_norm": 2.282449722290039, "learning_rate": 1.343515414414505e-05, "loss": 0.0104, "step": 10582 }, { "epoch": 10.175961538461538, "grad_norm": 0.935416579246521, "learning_rate": 1.3433984245395997e-05, "loss": 0.0074, "step": 10583 }, { "epoch": 10.176923076923076, "grad_norm": 1.0883957147598267, "learning_rate": 1.3432814293361585e-05, "loss": 0.0065, "step": 10584 }, { "epoch": 10.177884615384615, "grad_norm": 0.5998119115829468, "learning_rate": 1.3431644288059968e-05, "loss": 0.0042, "step": 10585 }, { "epoch": 10.178846153846154, "grad_norm": 2.887223482131958, "learning_rate": 1.3430474229509307e-05, "loss": 0.0305, "step": 10586 }, { "epoch": 10.179807692307692, "grad_norm": 1.7320232391357422, "learning_rate": 1.3429304117727755e-05, "loss": 0.023, "step": 10587 }, { "epoch": 10.180769230769231, "grad_norm": 3.940688133239746, "learning_rate": 1.3428133952733465e-05, "loss": 0.0304, "step": 10588 }, { "epoch": 10.18173076923077, "grad_norm": 1.0619484186172485, "learning_rate": 1.3426963734544601e-05, "loss": 0.0186, "step": 10589 }, { "epoch": 10.182692307692308, "grad_norm": 0.5057142376899719, "learning_rate": 1.3425793463179315e-05, "loss": 0.0037, "step": 10590 }, { "epoch": 10.183653846153845, "grad_norm": 3.2103323936462402, "learning_rate": 1.342462313865577e-05, "loss": 0.0763, "step": 10591 }, { "epoch": 10.184615384615384, "grad_norm": 1.587591528892517, "learning_rate": 1.3423452760992125e-05, "loss": 0.0206, "step": 10592 }, { "epoch": 10.185576923076923, "grad_norm": 0.08804667741060257, "learning_rate": 1.3422282330206539e-05, "loss": 0.0008, "step": 10593 }, { "epoch": 10.186538461538461, "grad_norm": 1.9542020559310913, "learning_rate": 1.3421111846317178e-05, "loss": 0.0833, "step": 10594 }, { "epoch": 10.1875, "grad_norm": 1.5800211429595947, "learning_rate": 1.34199413093422e-05, "loss": 0.0133, "step": 10595 }, { "epoch": 10.188461538461539, "grad_norm": 0.1655915379524231, "learning_rate": 1.3418770719299771e-05, "loss": 0.0018, "step": 10596 }, { "epoch": 10.189423076923077, "grad_norm": 2.720080852508545, "learning_rate": 1.3417600076208057e-05, "loss": 0.1027, "step": 10597 }, { "epoch": 10.190384615384616, "grad_norm": 1.8409427404403687, "learning_rate": 1.3416429380085219e-05, "loss": 0.0169, "step": 10598 }, { "epoch": 10.191346153846155, "grad_norm": 1.2917206287384033, "learning_rate": 1.3415258630949422e-05, "loss": 0.0024, "step": 10599 }, { "epoch": 10.192307692307692, "grad_norm": 0.39985358715057373, "learning_rate": 1.3414087828818834e-05, "loss": 0.0031, "step": 10600 }, { "epoch": 10.19326923076923, "grad_norm": 1.532073974609375, "learning_rate": 1.3412916973711628e-05, "loss": 0.004, "step": 10601 }, { "epoch": 10.194230769230769, "grad_norm": 2.2042160034179688, "learning_rate": 1.3411746065645961e-05, "loss": 0.0103, "step": 10602 }, { "epoch": 10.195192307692308, "grad_norm": 1.5109158754348755, "learning_rate": 1.3410575104640014e-05, "loss": 0.0067, "step": 10603 }, { "epoch": 10.196153846153846, "grad_norm": 1.94215989112854, "learning_rate": 1.3409404090711948e-05, "loss": 0.0163, "step": 10604 }, { "epoch": 10.197115384615385, "grad_norm": 1.8819643259048462, "learning_rate": 1.340823302387994e-05, "loss": 0.0171, "step": 10605 }, { "epoch": 10.198076923076924, "grad_norm": 4.304013729095459, "learning_rate": 1.3407061904162156e-05, "loss": 0.0435, "step": 10606 }, { "epoch": 10.199038461538462, "grad_norm": 2.693150281906128, "learning_rate": 1.3405890731576772e-05, "loss": 0.0219, "step": 10607 }, { "epoch": 10.2, "grad_norm": 0.18995492160320282, "learning_rate": 1.3404719506141958e-05, "loss": 0.0023, "step": 10608 }, { "epoch": 10.200961538461538, "grad_norm": 1.6334599256515503, "learning_rate": 1.3403548227875892e-05, "loss": 0.0128, "step": 10609 }, { "epoch": 10.201923076923077, "grad_norm": 1.3369574546813965, "learning_rate": 1.3402376896796744e-05, "loss": 0.0079, "step": 10610 }, { "epoch": 10.202884615384615, "grad_norm": 0.6286884546279907, "learning_rate": 1.3401205512922697e-05, "loss": 0.005, "step": 10611 }, { "epoch": 10.203846153846154, "grad_norm": 1.4629930257797241, "learning_rate": 1.3400034076271922e-05, "loss": 0.0188, "step": 10612 }, { "epoch": 10.204807692307693, "grad_norm": 0.6037370562553406, "learning_rate": 1.3398862586862594e-05, "loss": 0.0027, "step": 10613 }, { "epoch": 10.205769230769231, "grad_norm": 0.9295338988304138, "learning_rate": 1.3397691044712895e-05, "loss": 0.0065, "step": 10614 }, { "epoch": 10.20673076923077, "grad_norm": 1.2232561111450195, "learning_rate": 1.3396519449841006e-05, "loss": 0.0101, "step": 10615 }, { "epoch": 10.207692307692307, "grad_norm": 0.7706010937690735, "learning_rate": 1.3395347802265102e-05, "loss": 0.0028, "step": 10616 }, { "epoch": 10.208653846153846, "grad_norm": 1.0735909938812256, "learning_rate": 1.3394176102003364e-05, "loss": 0.0178, "step": 10617 }, { "epoch": 10.209615384615384, "grad_norm": 0.18566346168518066, "learning_rate": 1.3393004349073976e-05, "loss": 0.0022, "step": 10618 }, { "epoch": 10.210576923076923, "grad_norm": 2.1284728050231934, "learning_rate": 1.339183254349512e-05, "loss": 0.0299, "step": 10619 }, { "epoch": 10.211538461538462, "grad_norm": 3.2244367599487305, "learning_rate": 1.3390660685284977e-05, "loss": 0.0185, "step": 10620 }, { "epoch": 10.2125, "grad_norm": 1.5744106769561768, "learning_rate": 1.3389488774461729e-05, "loss": 0.0094, "step": 10621 }, { "epoch": 10.213461538461539, "grad_norm": 0.24421177804470062, "learning_rate": 1.3388316811043567e-05, "loss": 0.0018, "step": 10622 }, { "epoch": 10.214423076923078, "grad_norm": 2.6455726623535156, "learning_rate": 1.338714479504867e-05, "loss": 0.0255, "step": 10623 }, { "epoch": 10.215384615384615, "grad_norm": 2.3087093830108643, "learning_rate": 1.3385972726495226e-05, "loss": 0.038, "step": 10624 }, { "epoch": 10.216346153846153, "grad_norm": 5.540505886077881, "learning_rate": 1.3384800605401426e-05, "loss": 0.0671, "step": 10625 }, { "epoch": 10.217307692307692, "grad_norm": 0.6604533195495605, "learning_rate": 1.3383628431785457e-05, "loss": 0.0057, "step": 10626 }, { "epoch": 10.21826923076923, "grad_norm": 0.7164644598960876, "learning_rate": 1.3382456205665502e-05, "loss": 0.005, "step": 10627 }, { "epoch": 10.21923076923077, "grad_norm": 2.4767870903015137, "learning_rate": 1.3381283927059751e-05, "loss": 0.0217, "step": 10628 }, { "epoch": 10.220192307692308, "grad_norm": 1.141347050666809, "learning_rate": 1.3380111595986403e-05, "loss": 0.0053, "step": 10629 }, { "epoch": 10.221153846153847, "grad_norm": 1.8909481763839722, "learning_rate": 1.3378939212463642e-05, "loss": 0.0173, "step": 10630 }, { "epoch": 10.222115384615385, "grad_norm": 0.4438680112361908, "learning_rate": 1.3377766776509661e-05, "loss": 0.0026, "step": 10631 }, { "epoch": 10.223076923076922, "grad_norm": 1.2778514623641968, "learning_rate": 1.3376594288142652e-05, "loss": 0.0073, "step": 10632 }, { "epoch": 10.224038461538461, "grad_norm": 1.5002800226211548, "learning_rate": 1.3375421747380812e-05, "loss": 0.0069, "step": 10633 }, { "epoch": 10.225, "grad_norm": 3.2817153930664062, "learning_rate": 1.3374249154242334e-05, "loss": 0.0298, "step": 10634 }, { "epoch": 10.225961538461538, "grad_norm": 2.046339273452759, "learning_rate": 1.3373076508745408e-05, "loss": 0.0105, "step": 10635 }, { "epoch": 10.226923076923077, "grad_norm": 1.9963281154632568, "learning_rate": 1.3371903810908239e-05, "loss": 0.0432, "step": 10636 }, { "epoch": 10.227884615384616, "grad_norm": 1.7166564464569092, "learning_rate": 1.3370731060749015e-05, "loss": 0.0076, "step": 10637 }, { "epoch": 10.228846153846154, "grad_norm": 0.2656538188457489, "learning_rate": 1.3369558258285941e-05, "loss": 0.0018, "step": 10638 }, { "epoch": 10.229807692307693, "grad_norm": 0.291738897562027, "learning_rate": 1.3368385403537212e-05, "loss": 0.0026, "step": 10639 }, { "epoch": 10.23076923076923, "grad_norm": 1.4178082942962646, "learning_rate": 1.336721249652103e-05, "loss": 0.0092, "step": 10640 }, { "epoch": 10.231730769230769, "grad_norm": 2.114107131958008, "learning_rate": 1.3366039537255589e-05, "loss": 0.0193, "step": 10641 }, { "epoch": 10.232692307692307, "grad_norm": 0.15809021890163422, "learning_rate": 1.3364866525759093e-05, "loss": 0.0022, "step": 10642 }, { "epoch": 10.233653846153846, "grad_norm": 0.21106603741645813, "learning_rate": 1.3363693462049747e-05, "loss": 0.001, "step": 10643 }, { "epoch": 10.234615384615385, "grad_norm": 3.988917350769043, "learning_rate": 1.336252034614575e-05, "loss": 0.0631, "step": 10644 }, { "epoch": 10.235576923076923, "grad_norm": 0.22731654345989227, "learning_rate": 1.3361347178065306e-05, "loss": 0.0013, "step": 10645 }, { "epoch": 10.236538461538462, "grad_norm": 0.6308984160423279, "learning_rate": 1.336017395782662e-05, "loss": 0.0031, "step": 10646 }, { "epoch": 10.2375, "grad_norm": 1.4808411598205566, "learning_rate": 1.3359000685447894e-05, "loss": 0.0047, "step": 10647 }, { "epoch": 10.238461538461538, "grad_norm": 0.34469538927078247, "learning_rate": 1.3357827360947337e-05, "loss": 0.0014, "step": 10648 }, { "epoch": 10.239423076923076, "grad_norm": 0.5186179876327515, "learning_rate": 1.3356653984343155e-05, "loss": 0.0036, "step": 10649 }, { "epoch": 10.240384615384615, "grad_norm": 1.6631683111190796, "learning_rate": 1.3355480555653552e-05, "loss": 0.0111, "step": 10650 }, { "epoch": 10.241346153846154, "grad_norm": 0.07521021366119385, "learning_rate": 1.3354307074896745e-05, "loss": 0.0009, "step": 10651 }, { "epoch": 10.242307692307692, "grad_norm": 0.40729305148124695, "learning_rate": 1.3353133542090933e-05, "loss": 0.0015, "step": 10652 }, { "epoch": 10.243269230769231, "grad_norm": 0.4505077004432678, "learning_rate": 1.3351959957254327e-05, "loss": 0.0024, "step": 10653 }, { "epoch": 10.24423076923077, "grad_norm": 1.5810624361038208, "learning_rate": 1.3350786320405145e-05, "loss": 0.0102, "step": 10654 }, { "epoch": 10.245192307692308, "grad_norm": 0.93266361951828, "learning_rate": 1.3349612631561592e-05, "loss": 0.0056, "step": 10655 }, { "epoch": 10.246153846153845, "grad_norm": 0.4371345639228821, "learning_rate": 1.3348438890741883e-05, "loss": 0.002, "step": 10656 }, { "epoch": 10.247115384615384, "grad_norm": 0.6879789233207703, "learning_rate": 1.3347265097964228e-05, "loss": 0.0034, "step": 10657 }, { "epoch": 10.248076923076923, "grad_norm": 0.10321629792451859, "learning_rate": 1.3346091253246845e-05, "loss": 0.0005, "step": 10658 }, { "epoch": 10.249038461538461, "grad_norm": 0.3130674660205841, "learning_rate": 1.3344917356607945e-05, "loss": 0.0016, "step": 10659 }, { "epoch": 10.25, "grad_norm": 1.9201781749725342, "learning_rate": 1.3343743408065746e-05, "loss": 0.0069, "step": 10660 }, { "epoch": 10.250961538461539, "grad_norm": 0.8786798715591431, "learning_rate": 1.3342569407638462e-05, "loss": 0.0033, "step": 10661 }, { "epoch": 10.251923076923077, "grad_norm": 1.0097166299819946, "learning_rate": 1.3341395355344311e-05, "loss": 0.0044, "step": 10662 }, { "epoch": 10.252884615384616, "grad_norm": 2.00221586227417, "learning_rate": 1.3340221251201512e-05, "loss": 0.0245, "step": 10663 }, { "epoch": 10.253846153846155, "grad_norm": 0.22134700417518616, "learning_rate": 1.3339047095228281e-05, "loss": 0.0014, "step": 10664 }, { "epoch": 10.254807692307692, "grad_norm": 0.7146108150482178, "learning_rate": 1.333787288744284e-05, "loss": 0.005, "step": 10665 }, { "epoch": 10.25576923076923, "grad_norm": 0.029269840568304062, "learning_rate": 1.333669862786341e-05, "loss": 0.0002, "step": 10666 }, { "epoch": 10.256730769230769, "grad_norm": 1.537784457206726, "learning_rate": 1.3335524316508208e-05, "loss": 0.0205, "step": 10667 }, { "epoch": 10.257692307692308, "grad_norm": 0.2286497801542282, "learning_rate": 1.3334349953395458e-05, "loss": 0.0008, "step": 10668 }, { "epoch": 10.258653846153846, "grad_norm": 2.422475814819336, "learning_rate": 1.3333175538543383e-05, "loss": 0.0527, "step": 10669 }, { "epoch": 10.259615384615385, "grad_norm": 0.1260729730129242, "learning_rate": 1.3332001071970209e-05, "loss": 0.001, "step": 10670 }, { "epoch": 10.260576923076924, "grad_norm": 3.3384275436401367, "learning_rate": 1.3330826553694156e-05, "loss": 0.0683, "step": 10671 }, { "epoch": 10.261538461538462, "grad_norm": 1.7959911823272705, "learning_rate": 1.332965198373345e-05, "loss": 0.0148, "step": 10672 }, { "epoch": 10.2625, "grad_norm": 1.9888554811477661, "learning_rate": 1.3328477362106317e-05, "loss": 0.0248, "step": 10673 }, { "epoch": 10.263461538461538, "grad_norm": 1.686149001121521, "learning_rate": 1.3327302688830988e-05, "loss": 0.0254, "step": 10674 }, { "epoch": 10.264423076923077, "grad_norm": 3.0276613235473633, "learning_rate": 1.3326127963925684e-05, "loss": 0.0378, "step": 10675 }, { "epoch": 10.265384615384615, "grad_norm": 0.08645962923765182, "learning_rate": 1.3324953187408635e-05, "loss": 0.0004, "step": 10676 }, { "epoch": 10.266346153846154, "grad_norm": 0.08419837057590485, "learning_rate": 1.3323778359298074e-05, "loss": 0.0006, "step": 10677 }, { "epoch": 10.267307692307693, "grad_norm": 3.2071173191070557, "learning_rate": 1.3322603479612228e-05, "loss": 0.0046, "step": 10678 }, { "epoch": 10.268269230769231, "grad_norm": 2.7218968868255615, "learning_rate": 1.3321428548369324e-05, "loss": 0.0161, "step": 10679 }, { "epoch": 10.26923076923077, "grad_norm": 0.12955935299396515, "learning_rate": 1.3320253565587602e-05, "loss": 0.0008, "step": 10680 }, { "epoch": 10.270192307692307, "grad_norm": 0.1971791684627533, "learning_rate": 1.3319078531285286e-05, "loss": 0.001, "step": 10681 }, { "epoch": 10.271153846153846, "grad_norm": 2.971324920654297, "learning_rate": 1.3317903445480615e-05, "loss": 0.0594, "step": 10682 }, { "epoch": 10.272115384615384, "grad_norm": 0.10606290400028229, "learning_rate": 1.3316728308191817e-05, "loss": 0.001, "step": 10683 }, { "epoch": 10.273076923076923, "grad_norm": 0.1681804656982422, "learning_rate": 1.3315553119437134e-05, "loss": 0.0013, "step": 10684 }, { "epoch": 10.274038461538462, "grad_norm": 3.7938575744628906, "learning_rate": 1.3314377879234797e-05, "loss": 0.0159, "step": 10685 }, { "epoch": 10.275, "grad_norm": 0.25575387477874756, "learning_rate": 1.3313202587603038e-05, "loss": 0.0015, "step": 10686 }, { "epoch": 10.275961538461539, "grad_norm": 2.1306498050689697, "learning_rate": 1.3312027244560104e-05, "loss": 0.0116, "step": 10687 }, { "epoch": 10.276923076923078, "grad_norm": 0.2786954641342163, "learning_rate": 1.3310851850124227e-05, "loss": 0.0012, "step": 10688 }, { "epoch": 10.277884615384615, "grad_norm": 1.2880854606628418, "learning_rate": 1.3309676404313647e-05, "loss": 0.0054, "step": 10689 }, { "epoch": 10.278846153846153, "grad_norm": 2.040858507156372, "learning_rate": 1.3308500907146602e-05, "loss": 0.0124, "step": 10690 }, { "epoch": 10.279807692307692, "grad_norm": 1.7801446914672852, "learning_rate": 1.3307325358641335e-05, "loss": 0.0107, "step": 10691 }, { "epoch": 10.28076923076923, "grad_norm": 1.295434594154358, "learning_rate": 1.3306149758816082e-05, "loss": 0.0151, "step": 10692 }, { "epoch": 10.28173076923077, "grad_norm": 1.6365058422088623, "learning_rate": 1.3304974107689088e-05, "loss": 0.0169, "step": 10693 }, { "epoch": 10.282692307692308, "grad_norm": 7.488852500915527, "learning_rate": 1.3303798405278598e-05, "loss": 0.1065, "step": 10694 }, { "epoch": 10.283653846153847, "grad_norm": 1.3113540410995483, "learning_rate": 1.3302622651602855e-05, "loss": 0.0128, "step": 10695 }, { "epoch": 10.284615384615385, "grad_norm": 0.17800560593605042, "learning_rate": 1.3301446846680099e-05, "loss": 0.0011, "step": 10696 }, { "epoch": 10.285576923076922, "grad_norm": 1.7361886501312256, "learning_rate": 1.3300270990528577e-05, "loss": 0.0095, "step": 10697 }, { "epoch": 10.286538461538461, "grad_norm": 1.5907516479492188, "learning_rate": 1.3299095083166537e-05, "loss": 0.0155, "step": 10698 }, { "epoch": 10.2875, "grad_norm": 2.3556947708129883, "learning_rate": 1.3297919124612225e-05, "loss": 0.0319, "step": 10699 }, { "epoch": 10.288461538461538, "grad_norm": 2.946232795715332, "learning_rate": 1.3296743114883885e-05, "loss": 0.0494, "step": 10700 }, { "epoch": 10.289423076923077, "grad_norm": 2.087991952896118, "learning_rate": 1.3295567053999767e-05, "loss": 0.0102, "step": 10701 }, { "epoch": 10.290384615384616, "grad_norm": 1.0223530530929565, "learning_rate": 1.3294390941978124e-05, "loss": 0.0047, "step": 10702 }, { "epoch": 10.291346153846154, "grad_norm": 1.5711865425109863, "learning_rate": 1.32932147788372e-05, "loss": 0.0087, "step": 10703 }, { "epoch": 10.292307692307693, "grad_norm": 0.2971797287464142, "learning_rate": 1.3292038564595249e-05, "loss": 0.0026, "step": 10704 }, { "epoch": 10.29326923076923, "grad_norm": 2.5359411239624023, "learning_rate": 1.3290862299270521e-05, "loss": 0.0637, "step": 10705 }, { "epoch": 10.294230769230769, "grad_norm": 2.4618091583251953, "learning_rate": 1.328968598288127e-05, "loss": 0.0254, "step": 10706 }, { "epoch": 10.295192307692307, "grad_norm": 0.428436815738678, "learning_rate": 1.3288509615445748e-05, "loss": 0.0015, "step": 10707 }, { "epoch": 10.296153846153846, "grad_norm": 1.2913721799850464, "learning_rate": 1.3287333196982205e-05, "loss": 0.0548, "step": 10708 }, { "epoch": 10.297115384615385, "grad_norm": 0.1414395570755005, "learning_rate": 1.3286156727508904e-05, "loss": 0.0006, "step": 10709 }, { "epoch": 10.298076923076923, "grad_norm": 1.0992100238800049, "learning_rate": 1.3284980207044091e-05, "loss": 0.0041, "step": 10710 }, { "epoch": 10.299038461538462, "grad_norm": 2.620229959487915, "learning_rate": 1.328380363560603e-05, "loss": 0.0299, "step": 10711 }, { "epoch": 10.3, "grad_norm": 0.0847044587135315, "learning_rate": 1.328262701321297e-05, "loss": 0.0008, "step": 10712 }, { "epoch": 10.300961538461538, "grad_norm": 0.01857154630124569, "learning_rate": 1.3281450339883178e-05, "loss": 0.0001, "step": 10713 }, { "epoch": 10.301923076923076, "grad_norm": 0.06902365386486053, "learning_rate": 1.3280273615634905e-05, "loss": 0.0007, "step": 10714 }, { "epoch": 10.302884615384615, "grad_norm": 4.592703342437744, "learning_rate": 1.3279096840486411e-05, "loss": 0.0621, "step": 10715 }, { "epoch": 10.303846153846154, "grad_norm": 1.7760287523269653, "learning_rate": 1.327792001445596e-05, "loss": 0.0101, "step": 10716 }, { "epoch": 10.304807692307692, "grad_norm": 0.5769391059875488, "learning_rate": 1.3276743137561812e-05, "loss": 0.0031, "step": 10717 }, { "epoch": 10.305769230769231, "grad_norm": 1.5549936294555664, "learning_rate": 1.3275566209822229e-05, "loss": 0.0088, "step": 10718 }, { "epoch": 10.30673076923077, "grad_norm": 2.8246233463287354, "learning_rate": 1.3274389231255466e-05, "loss": 0.0364, "step": 10719 }, { "epoch": 10.307692307692308, "grad_norm": 0.16954964399337769, "learning_rate": 1.32732122018798e-05, "loss": 0.0012, "step": 10720 }, { "epoch": 10.308653846153845, "grad_norm": 2.29606032371521, "learning_rate": 1.3272035121713485e-05, "loss": 0.0193, "step": 10721 }, { "epoch": 10.309615384615384, "grad_norm": 0.1076594740152359, "learning_rate": 1.3270857990774787e-05, "loss": 0.0011, "step": 10722 }, { "epoch": 10.310576923076923, "grad_norm": 2.554081916809082, "learning_rate": 1.3269680809081971e-05, "loss": 0.0264, "step": 10723 }, { "epoch": 10.311538461538461, "grad_norm": 0.49959293007850647, "learning_rate": 1.3268503576653307e-05, "loss": 0.0035, "step": 10724 }, { "epoch": 10.3125, "grad_norm": 1.3961563110351562, "learning_rate": 1.326732629350706e-05, "loss": 0.0129, "step": 10725 }, { "epoch": 10.313461538461539, "grad_norm": 1.0576542615890503, "learning_rate": 1.3266148959661498e-05, "loss": 0.0065, "step": 10726 }, { "epoch": 10.314423076923077, "grad_norm": 0.7848041653633118, "learning_rate": 1.3264971575134893e-05, "loss": 0.0041, "step": 10727 }, { "epoch": 10.315384615384616, "grad_norm": 1.0797556638717651, "learning_rate": 1.326379413994551e-05, "loss": 0.0148, "step": 10728 }, { "epoch": 10.316346153846155, "grad_norm": 1.6141040325164795, "learning_rate": 1.326261665411162e-05, "loss": 0.0301, "step": 10729 }, { "epoch": 10.317307692307692, "grad_norm": 0.7042537927627563, "learning_rate": 1.3261439117651496e-05, "loss": 0.0039, "step": 10730 }, { "epoch": 10.31826923076923, "grad_norm": 1.0768903493881226, "learning_rate": 1.3260261530583409e-05, "loss": 0.0039, "step": 10731 }, { "epoch": 10.319230769230769, "grad_norm": 0.24841472506523132, "learning_rate": 1.3259083892925633e-05, "loss": 0.0011, "step": 10732 }, { "epoch": 10.320192307692308, "grad_norm": 3.191660165786743, "learning_rate": 1.3257906204696439e-05, "loss": 0.0306, "step": 10733 }, { "epoch": 10.321153846153846, "grad_norm": 3.625992774963379, "learning_rate": 1.3256728465914101e-05, "loss": 0.0353, "step": 10734 }, { "epoch": 10.322115384615385, "grad_norm": 3.434401035308838, "learning_rate": 1.32555506765969e-05, "loss": 0.0278, "step": 10735 }, { "epoch": 10.323076923076924, "grad_norm": 0.5499375462532043, "learning_rate": 1.3254372836763105e-05, "loss": 0.0051, "step": 10736 }, { "epoch": 10.324038461538462, "grad_norm": 0.5805371999740601, "learning_rate": 1.3253194946430992e-05, "loss": 0.0032, "step": 10737 }, { "epoch": 10.325, "grad_norm": 1.8364646434783936, "learning_rate": 1.3252017005618845e-05, "loss": 0.0202, "step": 10738 }, { "epoch": 10.325961538461538, "grad_norm": 0.09280870109796524, "learning_rate": 1.3250839014344939e-05, "loss": 0.0011, "step": 10739 }, { "epoch": 10.326923076923077, "grad_norm": 3.137188673019409, "learning_rate": 1.3249660972627552e-05, "loss": 0.0827, "step": 10740 }, { "epoch": 10.327884615384615, "grad_norm": 3.104975461959839, "learning_rate": 1.3248482880484963e-05, "loss": 0.0531, "step": 10741 }, { "epoch": 10.328846153846154, "grad_norm": 2.0830159187316895, "learning_rate": 1.3247304737935456e-05, "loss": 0.0148, "step": 10742 }, { "epoch": 10.329807692307693, "grad_norm": 2.7729501724243164, "learning_rate": 1.3246126544997308e-05, "loss": 0.0134, "step": 10743 }, { "epoch": 10.330769230769231, "grad_norm": 0.19574971497058868, "learning_rate": 1.3244948301688803e-05, "loss": 0.001, "step": 10744 }, { "epoch": 10.33173076923077, "grad_norm": 2.7301418781280518, "learning_rate": 1.3243770008028225e-05, "loss": 0.0411, "step": 10745 }, { "epoch": 10.332692307692307, "grad_norm": 0.48908311128616333, "learning_rate": 1.3242591664033859e-05, "loss": 0.0039, "step": 10746 }, { "epoch": 10.333653846153846, "grad_norm": 0.7459701299667358, "learning_rate": 1.3241413269723987e-05, "loss": 0.0033, "step": 10747 }, { "epoch": 10.334615384615384, "grad_norm": 1.3543083667755127, "learning_rate": 1.3240234825116888e-05, "loss": 0.0049, "step": 10748 }, { "epoch": 10.335576923076923, "grad_norm": 1.7193336486816406, "learning_rate": 1.3239056330230861e-05, "loss": 0.031, "step": 10749 }, { "epoch": 10.336538461538462, "grad_norm": 0.24039313197135925, "learning_rate": 1.3237877785084186e-05, "loss": 0.0012, "step": 10750 }, { "epoch": 10.3375, "grad_norm": 0.039250697940588, "learning_rate": 1.323669918969515e-05, "loss": 0.0005, "step": 10751 }, { "epoch": 10.338461538461539, "grad_norm": 2.1846325397491455, "learning_rate": 1.323552054408204e-05, "loss": 0.0131, "step": 10752 }, { "epoch": 10.339423076923078, "grad_norm": 1.5698955059051514, "learning_rate": 1.3234341848263151e-05, "loss": 0.0225, "step": 10753 }, { "epoch": 10.340384615384615, "grad_norm": 3.972572088241577, "learning_rate": 1.3233163102256768e-05, "loss": 0.0671, "step": 10754 }, { "epoch": 10.341346153846153, "grad_norm": 0.7069756984710693, "learning_rate": 1.323198430608118e-05, "loss": 0.004, "step": 10755 }, { "epoch": 10.342307692307692, "grad_norm": 0.44238704442977905, "learning_rate": 1.3230805459754685e-05, "loss": 0.0039, "step": 10756 }, { "epoch": 10.34326923076923, "grad_norm": 1.28053617477417, "learning_rate": 1.322962656329557e-05, "loss": 0.004, "step": 10757 }, { "epoch": 10.34423076923077, "grad_norm": 1.869775414466858, "learning_rate": 1.3228447616722128e-05, "loss": 0.0038, "step": 10758 }, { "epoch": 10.345192307692308, "grad_norm": 3.172234535217285, "learning_rate": 1.3227268620052656e-05, "loss": 0.0126, "step": 10759 }, { "epoch": 10.346153846153847, "grad_norm": 0.9862876534461975, "learning_rate": 1.3226089573305447e-05, "loss": 0.0072, "step": 10760 }, { "epoch": 10.347115384615385, "grad_norm": 2.497844934463501, "learning_rate": 1.3224910476498798e-05, "loss": 0.0118, "step": 10761 }, { "epoch": 10.348076923076922, "grad_norm": 0.5371953845024109, "learning_rate": 1.3223731329651001e-05, "loss": 0.0035, "step": 10762 }, { "epoch": 10.349038461538461, "grad_norm": 0.4176819324493408, "learning_rate": 1.3222552132780356e-05, "loss": 0.0023, "step": 10763 }, { "epoch": 10.35, "grad_norm": 3.255624532699585, "learning_rate": 1.322137288590516e-05, "loss": 0.0183, "step": 10764 }, { "epoch": 10.350961538461538, "grad_norm": 0.961760401725769, "learning_rate": 1.322019358904371e-05, "loss": 0.005, "step": 10765 }, { "epoch": 10.351923076923077, "grad_norm": 3.8622546195983887, "learning_rate": 1.3219014242214308e-05, "loss": 0.0373, "step": 10766 }, { "epoch": 10.352884615384616, "grad_norm": 0.28611424565315247, "learning_rate": 1.3217834845435254e-05, "loss": 0.0016, "step": 10767 }, { "epoch": 10.353846153846154, "grad_norm": 2.2522759437561035, "learning_rate": 1.3216655398724844e-05, "loss": 0.0397, "step": 10768 }, { "epoch": 10.354807692307693, "grad_norm": 2.9360859394073486, "learning_rate": 1.3215475902101387e-05, "loss": 0.0454, "step": 10769 }, { "epoch": 10.35576923076923, "grad_norm": 5.241725921630859, "learning_rate": 1.3214296355583178e-05, "loss": 0.0607, "step": 10770 }, { "epoch": 10.356730769230769, "grad_norm": 0.7508188486099243, "learning_rate": 1.3213116759188525e-05, "loss": 0.0031, "step": 10771 }, { "epoch": 10.357692307692307, "grad_norm": 1.9900037050247192, "learning_rate": 1.321193711293573e-05, "loss": 0.0397, "step": 10772 }, { "epoch": 10.358653846153846, "grad_norm": 2.0904440879821777, "learning_rate": 1.3210757416843097e-05, "loss": 0.0057, "step": 10773 }, { "epoch": 10.359615384615385, "grad_norm": 1.945468544960022, "learning_rate": 1.3209577670928933e-05, "loss": 0.0357, "step": 10774 }, { "epoch": 10.360576923076923, "grad_norm": 1.0193519592285156, "learning_rate": 1.3208397875211545e-05, "loss": 0.0189, "step": 10775 }, { "epoch": 10.361538461538462, "grad_norm": 0.7018136978149414, "learning_rate": 1.3207218029709239e-05, "loss": 0.0044, "step": 10776 }, { "epoch": 10.3625, "grad_norm": 0.8267654776573181, "learning_rate": 1.3206038134440318e-05, "loss": 0.0045, "step": 10777 }, { "epoch": 10.363461538461538, "grad_norm": 2.8782689571380615, "learning_rate": 1.3204858189423097e-05, "loss": 0.0416, "step": 10778 }, { "epoch": 10.364423076923076, "grad_norm": 5.471968173980713, "learning_rate": 1.3203678194675886e-05, "loss": 0.1541, "step": 10779 }, { "epoch": 10.365384615384615, "grad_norm": 0.04107150435447693, "learning_rate": 1.320249815021699e-05, "loss": 0.0003, "step": 10780 }, { "epoch": 10.366346153846154, "grad_norm": 4.937378406524658, "learning_rate": 1.3201318056064719e-05, "loss": 0.0464, "step": 10781 }, { "epoch": 10.367307692307692, "grad_norm": 0.6887193918228149, "learning_rate": 1.320013791223739e-05, "loss": 0.0038, "step": 10782 }, { "epoch": 10.368269230769231, "grad_norm": 3.1329915523529053, "learning_rate": 1.3198957718753313e-05, "loss": 0.0412, "step": 10783 }, { "epoch": 10.36923076923077, "grad_norm": 4.7172322273254395, "learning_rate": 1.31977774756308e-05, "loss": 0.0558, "step": 10784 }, { "epoch": 10.370192307692308, "grad_norm": 2.3800032138824463, "learning_rate": 1.3196597182888168e-05, "loss": 0.034, "step": 10785 }, { "epoch": 10.371153846153845, "grad_norm": 2.287024974822998, "learning_rate": 1.3195416840543729e-05, "loss": 0.0291, "step": 10786 }, { "epoch": 10.372115384615384, "grad_norm": 1.7354639768600464, "learning_rate": 1.3194236448615798e-05, "loss": 0.0063, "step": 10787 }, { "epoch": 10.373076923076923, "grad_norm": 0.2359955757856369, "learning_rate": 1.3193056007122694e-05, "loss": 0.0013, "step": 10788 }, { "epoch": 10.374038461538461, "grad_norm": 0.3034264147281647, "learning_rate": 1.3191875516082731e-05, "loss": 0.0024, "step": 10789 }, { "epoch": 10.375, "grad_norm": 3.6959707736968994, "learning_rate": 1.3190694975514226e-05, "loss": 0.0825, "step": 10790 }, { "epoch": 10.375961538461539, "grad_norm": 3.7138779163360596, "learning_rate": 1.3189514385435502e-05, "loss": 0.0294, "step": 10791 }, { "epoch": 10.376923076923077, "grad_norm": 0.7039945125579834, "learning_rate": 1.3188333745864876e-05, "loss": 0.0029, "step": 10792 }, { "epoch": 10.377884615384616, "grad_norm": 0.592434287071228, "learning_rate": 1.3187153056820667e-05, "loss": 0.0079, "step": 10793 }, { "epoch": 10.378846153846155, "grad_norm": 0.4169478118419647, "learning_rate": 1.31859723183212e-05, "loss": 0.0031, "step": 10794 }, { "epoch": 10.379807692307692, "grad_norm": 1.0316482782363892, "learning_rate": 1.318479153038479e-05, "loss": 0.0032, "step": 10795 }, { "epoch": 10.38076923076923, "grad_norm": 0.5531983971595764, "learning_rate": 1.3183610693029763e-05, "loss": 0.0037, "step": 10796 }, { "epoch": 10.381730769230769, "grad_norm": 1.0664784908294678, "learning_rate": 1.3182429806274442e-05, "loss": 0.0362, "step": 10797 }, { "epoch": 10.382692307692308, "grad_norm": 0.20604965090751648, "learning_rate": 1.3181248870137151e-05, "loss": 0.0007, "step": 10798 }, { "epoch": 10.383653846153846, "grad_norm": 1.104317307472229, "learning_rate": 1.3180067884636212e-05, "loss": 0.0075, "step": 10799 }, { "epoch": 10.384615384615385, "grad_norm": 2.620180130004883, "learning_rate": 1.3178886849789955e-05, "loss": 0.0183, "step": 10800 }, { "epoch": 10.385576923076924, "grad_norm": 1.2088203430175781, "learning_rate": 1.3177705765616704e-05, "loss": 0.0078, "step": 10801 }, { "epoch": 10.386538461538462, "grad_norm": 1.23771071434021, "learning_rate": 1.3176524632134785e-05, "loss": 0.0356, "step": 10802 }, { "epoch": 10.3875, "grad_norm": 2.8576266765594482, "learning_rate": 1.3175343449362527e-05, "loss": 0.0557, "step": 10803 }, { "epoch": 10.388461538461538, "grad_norm": 0.80802983045578, "learning_rate": 1.317416221731826e-05, "loss": 0.0028, "step": 10804 }, { "epoch": 10.389423076923077, "grad_norm": 1.50509512424469, "learning_rate": 1.3172980936020312e-05, "loss": 0.0089, "step": 10805 }, { "epoch": 10.390384615384615, "grad_norm": 1.490155577659607, "learning_rate": 1.317179960548701e-05, "loss": 0.0104, "step": 10806 }, { "epoch": 10.391346153846154, "grad_norm": 0.6867982149124146, "learning_rate": 1.3170618225736686e-05, "loss": 0.0047, "step": 10807 }, { "epoch": 10.392307692307693, "grad_norm": 1.4290361404418945, "learning_rate": 1.3169436796787678e-05, "loss": 0.0108, "step": 10808 }, { "epoch": 10.393269230769231, "grad_norm": 0.3999587297439575, "learning_rate": 1.316825531865831e-05, "loss": 0.0025, "step": 10809 }, { "epoch": 10.39423076923077, "grad_norm": 0.18253856897354126, "learning_rate": 1.3167073791366915e-05, "loss": 0.0013, "step": 10810 }, { "epoch": 10.395192307692307, "grad_norm": 0.8401700854301453, "learning_rate": 1.3165892214931834e-05, "loss": 0.005, "step": 10811 }, { "epoch": 10.396153846153846, "grad_norm": 1.901555061340332, "learning_rate": 1.3164710589371397e-05, "loss": 0.0126, "step": 10812 }, { "epoch": 10.397115384615384, "grad_norm": 0.3916216790676117, "learning_rate": 1.3163528914703941e-05, "loss": 0.0026, "step": 10813 }, { "epoch": 10.398076923076923, "grad_norm": 0.6074063777923584, "learning_rate": 1.3162347190947799e-05, "loss": 0.0027, "step": 10814 }, { "epoch": 10.399038461538462, "grad_norm": 0.3769114911556244, "learning_rate": 1.316116541812131e-05, "loss": 0.0028, "step": 10815 }, { "epoch": 10.4, "grad_norm": 0.067415252327919, "learning_rate": 1.3159983596242813e-05, "loss": 0.0008, "step": 10816 }, { "epoch": 10.400961538461539, "grad_norm": 3.351886749267578, "learning_rate": 1.3158801725330643e-05, "loss": 0.0435, "step": 10817 }, { "epoch": 10.401923076923078, "grad_norm": 1.185595989227295, "learning_rate": 1.3157619805403144e-05, "loss": 0.0124, "step": 10818 }, { "epoch": 10.402884615384615, "grad_norm": 1.6150586605072021, "learning_rate": 1.315643783647865e-05, "loss": 0.017, "step": 10819 }, { "epoch": 10.403846153846153, "grad_norm": 2.5627405643463135, "learning_rate": 1.3155255818575508e-05, "loss": 0.0351, "step": 10820 }, { "epoch": 10.404807692307692, "grad_norm": 0.3885679543018341, "learning_rate": 1.3154073751712053e-05, "loss": 0.002, "step": 10821 }, { "epoch": 10.40576923076923, "grad_norm": 1.369846224784851, "learning_rate": 1.3152891635906632e-05, "loss": 0.0093, "step": 10822 }, { "epoch": 10.40673076923077, "grad_norm": 1.8916488885879517, "learning_rate": 1.3151709471177589e-05, "loss": 0.0743, "step": 10823 }, { "epoch": 10.407692307692308, "grad_norm": 3.4470415115356445, "learning_rate": 1.3150527257543262e-05, "loss": 0.0694, "step": 10824 }, { "epoch": 10.408653846153847, "grad_norm": 4.119363307952881, "learning_rate": 1.3149344995022e-05, "loss": 0.0422, "step": 10825 }, { "epoch": 10.409615384615385, "grad_norm": 2.147520065307617, "learning_rate": 1.3148162683632147e-05, "loss": 0.0206, "step": 10826 }, { "epoch": 10.410576923076922, "grad_norm": 2.598245620727539, "learning_rate": 1.3146980323392046e-05, "loss": 0.0169, "step": 10827 }, { "epoch": 10.411538461538461, "grad_norm": 0.42743510007858276, "learning_rate": 1.3145797914320049e-05, "loss": 0.0029, "step": 10828 }, { "epoch": 10.4125, "grad_norm": 1.0592588186264038, "learning_rate": 1.3144615456434502e-05, "loss": 0.0061, "step": 10829 }, { "epoch": 10.413461538461538, "grad_norm": 3.417884111404419, "learning_rate": 1.3143432949753752e-05, "loss": 0.0229, "step": 10830 }, { "epoch": 10.414423076923077, "grad_norm": 1.3832433223724365, "learning_rate": 1.3142250394296147e-05, "loss": 0.0367, "step": 10831 }, { "epoch": 10.415384615384616, "grad_norm": 1.9651620388031006, "learning_rate": 1.314106779008004e-05, "loss": 0.012, "step": 10832 }, { "epoch": 10.416346153846154, "grad_norm": 1.3060349225997925, "learning_rate": 1.3139885137123779e-05, "loss": 0.012, "step": 10833 }, { "epoch": 10.417307692307693, "grad_norm": 2.160574436187744, "learning_rate": 1.3138702435445715e-05, "loss": 0.0251, "step": 10834 }, { "epoch": 10.41826923076923, "grad_norm": 0.15373815596103668, "learning_rate": 1.3137519685064204e-05, "loss": 0.0011, "step": 10835 }, { "epoch": 10.419230769230769, "grad_norm": 1.552838683128357, "learning_rate": 1.3136336885997591e-05, "loss": 0.0092, "step": 10836 }, { "epoch": 10.420192307692307, "grad_norm": 0.3731011748313904, "learning_rate": 1.3135154038264239e-05, "loss": 0.0025, "step": 10837 }, { "epoch": 10.421153846153846, "grad_norm": 2.7431883811950684, "learning_rate": 1.3133971141882498e-05, "loss": 0.0549, "step": 10838 }, { "epoch": 10.422115384615385, "grad_norm": 2.026602029800415, "learning_rate": 1.3132788196870723e-05, "loss": 0.0101, "step": 10839 }, { "epoch": 10.423076923076923, "grad_norm": 0.4723884165287018, "learning_rate": 1.3131605203247267e-05, "loss": 0.0047, "step": 10840 }, { "epoch": 10.424038461538462, "grad_norm": 0.5235018730163574, "learning_rate": 1.3130422161030491e-05, "loss": 0.0026, "step": 10841 }, { "epoch": 10.425, "grad_norm": 4.390417575836182, "learning_rate": 1.312923907023875e-05, "loss": 0.0389, "step": 10842 }, { "epoch": 10.425961538461538, "grad_norm": 0.9680396318435669, "learning_rate": 1.3128055930890402e-05, "loss": 0.0038, "step": 10843 }, { "epoch": 10.426923076923076, "grad_norm": 3.015218734741211, "learning_rate": 1.3126872743003811e-05, "loss": 0.0187, "step": 10844 }, { "epoch": 10.427884615384615, "grad_norm": 1.8067642450332642, "learning_rate": 1.312568950659733e-05, "loss": 0.0165, "step": 10845 }, { "epoch": 10.428846153846154, "grad_norm": 0.5535371899604797, "learning_rate": 1.3124506221689321e-05, "loss": 0.0041, "step": 10846 }, { "epoch": 10.429807692307692, "grad_norm": 0.08696377277374268, "learning_rate": 1.3123322888298145e-05, "loss": 0.0006, "step": 10847 }, { "epoch": 10.430769230769231, "grad_norm": 0.1596081703901291, "learning_rate": 1.3122139506442166e-05, "loss": 0.0014, "step": 10848 }, { "epoch": 10.43173076923077, "grad_norm": 1.7490346431732178, "learning_rate": 1.3120956076139746e-05, "loss": 0.0123, "step": 10849 }, { "epoch": 10.432692307692308, "grad_norm": 1.8014631271362305, "learning_rate": 1.3119772597409245e-05, "loss": 0.0414, "step": 10850 }, { "epoch": 10.433653846153845, "grad_norm": 0.24954760074615479, "learning_rate": 1.311858907026903e-05, "loss": 0.0011, "step": 10851 }, { "epoch": 10.434615384615384, "grad_norm": 3.1271979808807373, "learning_rate": 1.3117405494737468e-05, "loss": 0.0763, "step": 10852 }, { "epoch": 10.435576923076923, "grad_norm": 0.2687095105648041, "learning_rate": 1.3116221870832922e-05, "loss": 0.0009, "step": 10853 }, { "epoch": 10.436538461538461, "grad_norm": 2.325305223464966, "learning_rate": 1.3115038198573755e-05, "loss": 0.0177, "step": 10854 }, { "epoch": 10.4375, "grad_norm": 2.557072639465332, "learning_rate": 1.311385447797834e-05, "loss": 0.0266, "step": 10855 }, { "epoch": 10.438461538461539, "grad_norm": 2.2518255710601807, "learning_rate": 1.3112670709065045e-05, "loss": 0.0169, "step": 10856 }, { "epoch": 10.439423076923077, "grad_norm": 1.0648962259292603, "learning_rate": 1.3111486891852233e-05, "loss": 0.0173, "step": 10857 }, { "epoch": 10.440384615384616, "grad_norm": 4.166215896606445, "learning_rate": 1.3110303026358278e-05, "loss": 0.0301, "step": 10858 }, { "epoch": 10.441346153846155, "grad_norm": 2.3545851707458496, "learning_rate": 1.3109119112601548e-05, "loss": 0.0181, "step": 10859 }, { "epoch": 10.442307692307692, "grad_norm": 1.6436649560928345, "learning_rate": 1.3107935150600414e-05, "loss": 0.0078, "step": 10860 }, { "epoch": 10.44326923076923, "grad_norm": 4.623059272766113, "learning_rate": 1.310675114037325e-05, "loss": 0.1033, "step": 10861 }, { "epoch": 10.444230769230769, "grad_norm": 3.6775636672973633, "learning_rate": 1.3105567081938423e-05, "loss": 0.0301, "step": 10862 }, { "epoch": 10.445192307692308, "grad_norm": 1.9374585151672363, "learning_rate": 1.3104382975314314e-05, "loss": 0.0123, "step": 10863 }, { "epoch": 10.446153846153846, "grad_norm": 0.6761426329612732, "learning_rate": 1.3103198820519291e-05, "loss": 0.0043, "step": 10864 }, { "epoch": 10.447115384615385, "grad_norm": 2.6539907455444336, "learning_rate": 1.3102014617571728e-05, "loss": 0.0301, "step": 10865 }, { "epoch": 10.448076923076924, "grad_norm": 0.37066441774368286, "learning_rate": 1.3100830366490005e-05, "loss": 0.0018, "step": 10866 }, { "epoch": 10.449038461538462, "grad_norm": 0.758167564868927, "learning_rate": 1.3099646067292495e-05, "loss": 0.0036, "step": 10867 }, { "epoch": 10.45, "grad_norm": 2.986555576324463, "learning_rate": 1.3098461719997575e-05, "loss": 0.0482, "step": 10868 }, { "epoch": 10.450961538461538, "grad_norm": 1.8562078475952148, "learning_rate": 1.3097277324623622e-05, "loss": 0.1201, "step": 10869 }, { "epoch": 10.451923076923077, "grad_norm": 1.209022879600525, "learning_rate": 1.3096092881189015e-05, "loss": 0.0055, "step": 10870 }, { "epoch": 10.452884615384615, "grad_norm": 2.082362174987793, "learning_rate": 1.3094908389712135e-05, "loss": 0.0167, "step": 10871 }, { "epoch": 10.453846153846154, "grad_norm": 2.532154083251953, "learning_rate": 1.3093723850211361e-05, "loss": 0.0245, "step": 10872 }, { "epoch": 10.454807692307693, "grad_norm": 1.4374568462371826, "learning_rate": 1.309253926270507e-05, "loss": 0.0028, "step": 10873 }, { "epoch": 10.455769230769231, "grad_norm": 2.8289670944213867, "learning_rate": 1.3091354627211647e-05, "loss": 0.0327, "step": 10874 }, { "epoch": 10.45673076923077, "grad_norm": 0.3956315517425537, "learning_rate": 1.3090169943749475e-05, "loss": 0.0026, "step": 10875 }, { "epoch": 10.457692307692307, "grad_norm": 2.2338337898254395, "learning_rate": 1.3088985212336933e-05, "loss": 0.0309, "step": 10876 }, { "epoch": 10.458653846153846, "grad_norm": 1.6442819833755493, "learning_rate": 1.3087800432992408e-05, "loss": 0.0174, "step": 10877 }, { "epoch": 10.459615384615384, "grad_norm": 0.09775403141975403, "learning_rate": 1.3086615605734283e-05, "loss": 0.0006, "step": 10878 }, { "epoch": 10.460576923076923, "grad_norm": 0.5627571940422058, "learning_rate": 1.3085430730580944e-05, "loss": 0.0028, "step": 10879 }, { "epoch": 10.461538461538462, "grad_norm": 1.8888262510299683, "learning_rate": 1.308424580755077e-05, "loss": 0.0119, "step": 10880 }, { "epoch": 10.4625, "grad_norm": 2.8003089427948, "learning_rate": 1.3083060836662158e-05, "loss": 0.0264, "step": 10881 }, { "epoch": 10.463461538461539, "grad_norm": 3.0589704513549805, "learning_rate": 1.3081875817933492e-05, "loss": 0.027, "step": 10882 }, { "epoch": 10.464423076923078, "grad_norm": 0.9908571243286133, "learning_rate": 1.3080690751383154e-05, "loss": 0.0047, "step": 10883 }, { "epoch": 10.465384615384615, "grad_norm": 1.0518972873687744, "learning_rate": 1.3079505637029542e-05, "loss": 0.0044, "step": 10884 }, { "epoch": 10.466346153846153, "grad_norm": 2.260668992996216, "learning_rate": 1.3078320474891037e-05, "loss": 0.0518, "step": 10885 }, { "epoch": 10.467307692307692, "grad_norm": 4.136845111846924, "learning_rate": 1.3077135264986036e-05, "loss": 0.0289, "step": 10886 }, { "epoch": 10.46826923076923, "grad_norm": 0.3816760182380676, "learning_rate": 1.3075950007332923e-05, "loss": 0.0036, "step": 10887 }, { "epoch": 10.46923076923077, "grad_norm": 0.017862234264612198, "learning_rate": 1.3074764701950095e-05, "loss": 0.0001, "step": 10888 }, { "epoch": 10.470192307692308, "grad_norm": 3.109635829925537, "learning_rate": 1.3073579348855944e-05, "loss": 0.0342, "step": 10889 }, { "epoch": 10.471153846153847, "grad_norm": 4.060203552246094, "learning_rate": 1.3072393948068865e-05, "loss": 0.0447, "step": 10890 }, { "epoch": 10.472115384615385, "grad_norm": 0.2975848317146301, "learning_rate": 1.3071208499607244e-05, "loss": 0.0024, "step": 10891 }, { "epoch": 10.473076923076922, "grad_norm": 2.9003777503967285, "learning_rate": 1.3070023003489484e-05, "loss": 0.1184, "step": 10892 }, { "epoch": 10.474038461538461, "grad_norm": 1.05304753780365, "learning_rate": 1.3068837459733978e-05, "loss": 0.0055, "step": 10893 }, { "epoch": 10.475, "grad_norm": 4.517746925354004, "learning_rate": 1.3067651868359119e-05, "loss": 0.0244, "step": 10894 }, { "epoch": 10.475961538461538, "grad_norm": 3.5767760276794434, "learning_rate": 1.306646622938331e-05, "loss": 0.0411, "step": 10895 }, { "epoch": 10.476923076923077, "grad_norm": 3.909553050994873, "learning_rate": 1.3065280542824942e-05, "loss": 0.0544, "step": 10896 }, { "epoch": 10.477884615384616, "grad_norm": 0.46701595187187195, "learning_rate": 1.3064094808702419e-05, "loss": 0.0027, "step": 10897 }, { "epoch": 10.478846153846154, "grad_norm": 2.0483970642089844, "learning_rate": 1.3062909027034133e-05, "loss": 0.0101, "step": 10898 }, { "epoch": 10.479807692307693, "grad_norm": 1.297438621520996, "learning_rate": 1.3061723197838494e-05, "loss": 0.0132, "step": 10899 }, { "epoch": 10.48076923076923, "grad_norm": 3.0852062702178955, "learning_rate": 1.3060537321133895e-05, "loss": 0.0739, "step": 10900 }, { "epoch": 10.481730769230769, "grad_norm": 1.533321738243103, "learning_rate": 1.305935139693874e-05, "loss": 0.0143, "step": 10901 }, { "epoch": 10.482692307692307, "grad_norm": 0.18323500454425812, "learning_rate": 1.3058165425271427e-05, "loss": 0.0016, "step": 10902 }, { "epoch": 10.483653846153846, "grad_norm": 3.3796887397766113, "learning_rate": 1.3056979406150365e-05, "loss": 0.0193, "step": 10903 }, { "epoch": 10.484615384615385, "grad_norm": 4.022772789001465, "learning_rate": 1.3055793339593956e-05, "loss": 0.0662, "step": 10904 }, { "epoch": 10.485576923076923, "grad_norm": 2.005704164505005, "learning_rate": 1.30546072256206e-05, "loss": 0.0141, "step": 10905 }, { "epoch": 10.486538461538462, "grad_norm": 4.130509853363037, "learning_rate": 1.3053421064248706e-05, "loss": 0.0711, "step": 10906 }, { "epoch": 10.4875, "grad_norm": 0.059892237186431885, "learning_rate": 1.305223485549668e-05, "loss": 0.0005, "step": 10907 }, { "epoch": 10.488461538461538, "grad_norm": 0.7290496230125427, "learning_rate": 1.3051048599382927e-05, "loss": 0.0032, "step": 10908 }, { "epoch": 10.489423076923076, "grad_norm": 2.948923349380493, "learning_rate": 1.3049862295925852e-05, "loss": 0.0195, "step": 10909 }, { "epoch": 10.490384615384615, "grad_norm": 0.5246583223342896, "learning_rate": 1.304867594514387e-05, "loss": 0.0078, "step": 10910 }, { "epoch": 10.491346153846154, "grad_norm": 3.0205976963043213, "learning_rate": 1.304748954705538e-05, "loss": 0.043, "step": 10911 }, { "epoch": 10.492307692307692, "grad_norm": 0.26477184891700745, "learning_rate": 1.3046303101678801e-05, "loss": 0.002, "step": 10912 }, { "epoch": 10.493269230769231, "grad_norm": 1.522025227546692, "learning_rate": 1.3045116609032536e-05, "loss": 0.0121, "step": 10913 }, { "epoch": 10.49423076923077, "grad_norm": 1.714272379875183, "learning_rate": 1.3043930069134998e-05, "loss": 0.0145, "step": 10914 }, { "epoch": 10.495192307692308, "grad_norm": 2.860161066055298, "learning_rate": 1.30427434820046e-05, "loss": 0.0304, "step": 10915 }, { "epoch": 10.496153846153845, "grad_norm": 2.0112252235412598, "learning_rate": 1.3041556847659754e-05, "loss": 0.012, "step": 10916 }, { "epoch": 10.497115384615384, "grad_norm": 1.4538990259170532, "learning_rate": 1.3040370166118873e-05, "loss": 0.0144, "step": 10917 }, { "epoch": 10.498076923076923, "grad_norm": 3.365558624267578, "learning_rate": 1.303918343740037e-05, "loss": 0.0245, "step": 10918 }, { "epoch": 10.499038461538461, "grad_norm": 0.7377278804779053, "learning_rate": 1.303799666152266e-05, "loss": 0.005, "step": 10919 }, { "epoch": 10.5, "grad_norm": 3.4248318672180176, "learning_rate": 1.3036809838504157e-05, "loss": 0.0152, "step": 10920 }, { "epoch": 10.500961538461539, "grad_norm": 1.4080896377563477, "learning_rate": 1.303562296836328e-05, "loss": 0.0138, "step": 10921 }, { "epoch": 10.501923076923077, "grad_norm": 2.0908374786376953, "learning_rate": 1.3034436051118442e-05, "loss": 0.0326, "step": 10922 }, { "epoch": 10.502884615384616, "grad_norm": 0.13121554255485535, "learning_rate": 1.3033249086788063e-05, "loss": 0.0009, "step": 10923 }, { "epoch": 10.503846153846155, "grad_norm": 2.209007501602173, "learning_rate": 1.3032062075390562e-05, "loss": 0.0245, "step": 10924 }, { "epoch": 10.504807692307692, "grad_norm": 0.9617965817451477, "learning_rate": 1.3030875016944356e-05, "loss": 0.0072, "step": 10925 }, { "epoch": 10.50576923076923, "grad_norm": 2.5434885025024414, "learning_rate": 1.3029687911467866e-05, "loss": 0.0979, "step": 10926 }, { "epoch": 10.506730769230769, "grad_norm": 1.5074671506881714, "learning_rate": 1.3028500758979507e-05, "loss": 0.029, "step": 10927 }, { "epoch": 10.507692307692308, "grad_norm": 4.696421146392822, "learning_rate": 1.302731355949771e-05, "loss": 0.1767, "step": 10928 }, { "epoch": 10.508653846153846, "grad_norm": 1.887352466583252, "learning_rate": 1.302612631304089e-05, "loss": 0.023, "step": 10929 }, { "epoch": 10.509615384615385, "grad_norm": 1.9123845100402832, "learning_rate": 1.3024939019627472e-05, "loss": 0.0102, "step": 10930 }, { "epoch": 10.510576923076924, "grad_norm": 0.6779114603996277, "learning_rate": 1.3023751679275876e-05, "loss": 0.0031, "step": 10931 }, { "epoch": 10.51153846153846, "grad_norm": 2.5708656311035156, "learning_rate": 1.3022564292004532e-05, "loss": 0.0176, "step": 10932 }, { "epoch": 10.5125, "grad_norm": 1.0624204874038696, "learning_rate": 1.302137685783186e-05, "loss": 0.0076, "step": 10933 }, { "epoch": 10.513461538461538, "grad_norm": 3.782454013824463, "learning_rate": 1.3020189376776284e-05, "loss": 0.0295, "step": 10934 }, { "epoch": 10.514423076923077, "grad_norm": 2.165642499923706, "learning_rate": 1.3019001848856236e-05, "loss": 0.0301, "step": 10935 }, { "epoch": 10.515384615384615, "grad_norm": 2.592745780944824, "learning_rate": 1.3017814274090142e-05, "loss": 0.0563, "step": 10936 }, { "epoch": 10.516346153846154, "grad_norm": 1.630421757698059, "learning_rate": 1.3016626652496425e-05, "loss": 0.0188, "step": 10937 }, { "epoch": 10.517307692307693, "grad_norm": 1.7433936595916748, "learning_rate": 1.3015438984093513e-05, "loss": 0.0162, "step": 10938 }, { "epoch": 10.518269230769231, "grad_norm": 0.765702486038208, "learning_rate": 1.3014251268899843e-05, "loss": 0.0067, "step": 10939 }, { "epoch": 10.51923076923077, "grad_norm": 0.18281777203083038, "learning_rate": 1.3013063506933838e-05, "loss": 0.0011, "step": 10940 }, { "epoch": 10.520192307692307, "grad_norm": 2.55513072013855, "learning_rate": 1.3011875698213932e-05, "loss": 0.0353, "step": 10941 }, { "epoch": 10.521153846153846, "grad_norm": 1.8965046405792236, "learning_rate": 1.3010687842758551e-05, "loss": 0.0193, "step": 10942 }, { "epoch": 10.522115384615384, "grad_norm": 1.333886742591858, "learning_rate": 1.3009499940586134e-05, "loss": 0.0198, "step": 10943 }, { "epoch": 10.523076923076923, "grad_norm": 2.5041298866271973, "learning_rate": 1.3008311991715113e-05, "loss": 0.0245, "step": 10944 }, { "epoch": 10.524038461538462, "grad_norm": 5.930942058563232, "learning_rate": 1.3007123996163915e-05, "loss": 0.0983, "step": 10945 }, { "epoch": 10.525, "grad_norm": 0.38452833890914917, "learning_rate": 1.3005935953950978e-05, "loss": 0.0022, "step": 10946 }, { "epoch": 10.525961538461539, "grad_norm": 2.4667954444885254, "learning_rate": 1.3004747865094742e-05, "loss": 0.0338, "step": 10947 }, { "epoch": 10.526923076923078, "grad_norm": 1.4314274787902832, "learning_rate": 1.3003559729613635e-05, "loss": 0.0087, "step": 10948 }, { "epoch": 10.527884615384615, "grad_norm": 2.334285259246826, "learning_rate": 1.3002371547526095e-05, "loss": 0.0196, "step": 10949 }, { "epoch": 10.528846153846153, "grad_norm": 1.6114214658737183, "learning_rate": 1.3001183318850563e-05, "loss": 0.0465, "step": 10950 }, { "epoch": 10.529807692307692, "grad_norm": 1.0775928497314453, "learning_rate": 1.2999995043605474e-05, "loss": 0.0105, "step": 10951 }, { "epoch": 10.53076923076923, "grad_norm": 2.543393611907959, "learning_rate": 1.2998806721809268e-05, "loss": 0.0867, "step": 10952 }, { "epoch": 10.53173076923077, "grad_norm": 0.31552985310554504, "learning_rate": 1.299761835348038e-05, "loss": 0.0021, "step": 10953 }, { "epoch": 10.532692307692308, "grad_norm": 2.1955034732818604, "learning_rate": 1.2996429938637254e-05, "loss": 0.0152, "step": 10954 }, { "epoch": 10.533653846153847, "grad_norm": 0.08490806072950363, "learning_rate": 1.299524147729833e-05, "loss": 0.0009, "step": 10955 }, { "epoch": 10.534615384615385, "grad_norm": 1.6984986066818237, "learning_rate": 1.2994052969482048e-05, "loss": 0.011, "step": 10956 }, { "epoch": 10.535576923076922, "grad_norm": 0.3293575048446655, "learning_rate": 1.2992864415206852e-05, "loss": 0.0038, "step": 10957 }, { "epoch": 10.536538461538461, "grad_norm": 2.016315460205078, "learning_rate": 1.2991675814491184e-05, "loss": 0.0289, "step": 10958 }, { "epoch": 10.5375, "grad_norm": 0.652686595916748, "learning_rate": 1.2990487167353492e-05, "loss": 0.0054, "step": 10959 }, { "epoch": 10.538461538461538, "grad_norm": 0.05626910924911499, "learning_rate": 1.2989298473812212e-05, "loss": 0.0007, "step": 10960 }, { "epoch": 10.539423076923077, "grad_norm": 2.573742628097534, "learning_rate": 1.2988109733885791e-05, "loss": 0.019, "step": 10961 }, { "epoch": 10.540384615384616, "grad_norm": 3.8380086421966553, "learning_rate": 1.2986920947592682e-05, "loss": 0.0341, "step": 10962 }, { "epoch": 10.541346153846154, "grad_norm": 0.12008857727050781, "learning_rate": 1.2985732114951323e-05, "loss": 0.0016, "step": 10963 }, { "epoch": 10.542307692307693, "grad_norm": 2.4039766788482666, "learning_rate": 1.2984543235980166e-05, "loss": 0.0277, "step": 10964 }, { "epoch": 10.54326923076923, "grad_norm": 0.5164469480514526, "learning_rate": 1.2983354310697658e-05, "loss": 0.0042, "step": 10965 }, { "epoch": 10.544230769230769, "grad_norm": 0.9997539520263672, "learning_rate": 1.2982165339122248e-05, "loss": 0.0095, "step": 10966 }, { "epoch": 10.545192307692307, "grad_norm": 2.9384403228759766, "learning_rate": 1.2980976321272381e-05, "loss": 0.0194, "step": 10967 }, { "epoch": 10.546153846153846, "grad_norm": 2.9827842712402344, "learning_rate": 1.2979787257166514e-05, "loss": 0.1083, "step": 10968 }, { "epoch": 10.547115384615385, "grad_norm": 1.8822851181030273, "learning_rate": 1.2978598146823091e-05, "loss": 0.0072, "step": 10969 }, { "epoch": 10.548076923076923, "grad_norm": 6.507681846618652, "learning_rate": 1.297740899026057e-05, "loss": 0.0709, "step": 10970 }, { "epoch": 10.549038461538462, "grad_norm": 1.7588331699371338, "learning_rate": 1.2976219787497397e-05, "loss": 0.0198, "step": 10971 }, { "epoch": 10.55, "grad_norm": 2.76142954826355, "learning_rate": 1.297503053855203e-05, "loss": 0.0228, "step": 10972 }, { "epoch": 10.55096153846154, "grad_norm": 0.08360084891319275, "learning_rate": 1.2973841243442922e-05, "loss": 0.0007, "step": 10973 }, { "epoch": 10.551923076923076, "grad_norm": 0.13381212949752808, "learning_rate": 1.2972651902188522e-05, "loss": 0.0012, "step": 10974 }, { "epoch": 10.552884615384615, "grad_norm": 2.9448323249816895, "learning_rate": 1.2971462514807293e-05, "loss": 0.1134, "step": 10975 }, { "epoch": 10.553846153846154, "grad_norm": 1.0107626914978027, "learning_rate": 1.2970273081317685e-05, "loss": 0.0084, "step": 10976 }, { "epoch": 10.554807692307692, "grad_norm": 2.5249407291412354, "learning_rate": 1.296908360173816e-05, "loss": 0.0238, "step": 10977 }, { "epoch": 10.555769230769231, "grad_norm": 1.8614405393600464, "learning_rate": 1.2967894076087166e-05, "loss": 0.0139, "step": 10978 }, { "epoch": 10.55673076923077, "grad_norm": 2.6202306747436523, "learning_rate": 1.296670450438317e-05, "loss": 0.0164, "step": 10979 }, { "epoch": 10.557692307692308, "grad_norm": 2.0191497802734375, "learning_rate": 1.2965514886644626e-05, "loss": 0.0148, "step": 10980 }, { "epoch": 10.558653846153845, "grad_norm": 0.3485310971736908, "learning_rate": 1.2964325222889995e-05, "loss": 0.0018, "step": 10981 }, { "epoch": 10.559615384615384, "grad_norm": 4.955264568328857, "learning_rate": 1.2963135513137737e-05, "loss": 0.0516, "step": 10982 }, { "epoch": 10.560576923076923, "grad_norm": 0.3834623098373413, "learning_rate": 1.2961945757406311e-05, "loss": 0.0034, "step": 10983 }, { "epoch": 10.561538461538461, "grad_norm": 3.111576557159424, "learning_rate": 1.2960755955714181e-05, "loss": 0.0449, "step": 10984 }, { "epoch": 10.5625, "grad_norm": 0.10728763788938522, "learning_rate": 1.2959566108079809e-05, "loss": 0.0013, "step": 10985 }, { "epoch": 10.563461538461539, "grad_norm": 2.1149113178253174, "learning_rate": 1.2958376214521657e-05, "loss": 0.0318, "step": 10986 }, { "epoch": 10.564423076923077, "grad_norm": 0.11777324229478836, "learning_rate": 1.2957186275058188e-05, "loss": 0.0012, "step": 10987 }, { "epoch": 10.565384615384616, "grad_norm": 3.869990825653076, "learning_rate": 1.295599628970787e-05, "loss": 0.0391, "step": 10988 }, { "epoch": 10.566346153846155, "grad_norm": 0.9558551907539368, "learning_rate": 1.2954806258489163e-05, "loss": 0.005, "step": 10989 }, { "epoch": 10.567307692307692, "grad_norm": 2.5096323490142822, "learning_rate": 1.2953616181420533e-05, "loss": 0.0757, "step": 10990 }, { "epoch": 10.56826923076923, "grad_norm": 2.525702714920044, "learning_rate": 1.2952426058520454e-05, "loss": 0.0145, "step": 10991 }, { "epoch": 10.569230769230769, "grad_norm": 0.3259618282318115, "learning_rate": 1.2951235889807386e-05, "loss": 0.0026, "step": 10992 }, { "epoch": 10.570192307692308, "grad_norm": 0.663632333278656, "learning_rate": 1.2950045675299797e-05, "loss": 0.0045, "step": 10993 }, { "epoch": 10.571153846153846, "grad_norm": 2.3960318565368652, "learning_rate": 1.2948855415016157e-05, "loss": 0.0587, "step": 10994 }, { "epoch": 10.572115384615385, "grad_norm": 2.2686450481414795, "learning_rate": 1.2947665108974938e-05, "loss": 0.0246, "step": 10995 }, { "epoch": 10.573076923076924, "grad_norm": 3.510908603668213, "learning_rate": 1.2946474757194604e-05, "loss": 0.0249, "step": 10996 }, { "epoch": 10.57403846153846, "grad_norm": 2.5655345916748047, "learning_rate": 1.2945284359693633e-05, "loss": 0.0336, "step": 10997 }, { "epoch": 10.575, "grad_norm": 2.101311445236206, "learning_rate": 1.2944093916490492e-05, "loss": 0.0171, "step": 10998 }, { "epoch": 10.575961538461538, "grad_norm": 0.5980834364891052, "learning_rate": 1.2942903427603658e-05, "loss": 0.0028, "step": 10999 }, { "epoch": 10.576923076923077, "grad_norm": 0.5805820226669312, "learning_rate": 1.2941712893051595e-05, "loss": 0.0033, "step": 11000 }, { "epoch": 10.577884615384615, "grad_norm": 2.031395673751831, "learning_rate": 1.294052231285278e-05, "loss": 0.081, "step": 11001 }, { "epoch": 10.578846153846154, "grad_norm": 1.1588162183761597, "learning_rate": 1.2939331687025696e-05, "loss": 0.0059, "step": 11002 }, { "epoch": 10.579807692307693, "grad_norm": 2.0237808227539062, "learning_rate": 1.2938141015588807e-05, "loss": 0.0434, "step": 11003 }, { "epoch": 10.580769230769231, "grad_norm": 1.6067826747894287, "learning_rate": 1.2936950298560591e-05, "loss": 0.0143, "step": 11004 }, { "epoch": 10.58173076923077, "grad_norm": 0.5862506031990051, "learning_rate": 1.2935759535959528e-05, "loss": 0.0031, "step": 11005 }, { "epoch": 10.582692307692307, "grad_norm": 1.2283248901367188, "learning_rate": 1.2934568727804096e-05, "loss": 0.0065, "step": 11006 }, { "epoch": 10.583653846153846, "grad_norm": 0.820810079574585, "learning_rate": 1.2933377874112768e-05, "loss": 0.0045, "step": 11007 }, { "epoch": 10.584615384615384, "grad_norm": 1.6918447017669678, "learning_rate": 1.2932186974904024e-05, "loss": 0.016, "step": 11008 }, { "epoch": 10.585576923076923, "grad_norm": 2.701327323913574, "learning_rate": 1.2930996030196346e-05, "loss": 0.0332, "step": 11009 }, { "epoch": 10.586538461538462, "grad_norm": 1.4741098880767822, "learning_rate": 1.292980504000821e-05, "loss": 0.023, "step": 11010 }, { "epoch": 10.5875, "grad_norm": 0.22520199418067932, "learning_rate": 1.2928614004358098e-05, "loss": 0.0017, "step": 11011 }, { "epoch": 10.588461538461539, "grad_norm": 0.19345322251319885, "learning_rate": 1.2927422923264494e-05, "loss": 0.0017, "step": 11012 }, { "epoch": 10.589423076923078, "grad_norm": 2.6010782718658447, "learning_rate": 1.2926231796745878e-05, "loss": 0.0258, "step": 11013 }, { "epoch": 10.590384615384615, "grad_norm": 1.010522484779358, "learning_rate": 1.292504062482073e-05, "loss": 0.0037, "step": 11014 }, { "epoch": 10.591346153846153, "grad_norm": 1.9869786500930786, "learning_rate": 1.2923849407507541e-05, "loss": 0.0108, "step": 11015 }, { "epoch": 10.592307692307692, "grad_norm": 0.7670847773551941, "learning_rate": 1.292265814482479e-05, "loss": 0.0039, "step": 11016 }, { "epoch": 10.59326923076923, "grad_norm": 1.0766059160232544, "learning_rate": 1.2921466836790959e-05, "loss": 0.0032, "step": 11017 }, { "epoch": 10.59423076923077, "grad_norm": 1.9333314895629883, "learning_rate": 1.2920275483424538e-05, "loss": 0.0117, "step": 11018 }, { "epoch": 10.595192307692308, "grad_norm": 0.3475037217140198, "learning_rate": 1.2919084084744014e-05, "loss": 0.002, "step": 11019 }, { "epoch": 10.596153846153847, "grad_norm": 2.0291807651519775, "learning_rate": 1.2917892640767873e-05, "loss": 0.0146, "step": 11020 }, { "epoch": 10.597115384615385, "grad_norm": 0.7777220010757446, "learning_rate": 1.2916701151514601e-05, "loss": 0.0039, "step": 11021 }, { "epoch": 10.598076923076922, "grad_norm": 0.6017826795578003, "learning_rate": 1.2915509617002687e-05, "loss": 0.0031, "step": 11022 }, { "epoch": 10.599038461538461, "grad_norm": 0.506989598274231, "learning_rate": 1.2914318037250619e-05, "loss": 0.0029, "step": 11023 }, { "epoch": 10.6, "grad_norm": 3.082853317260742, "learning_rate": 1.2913126412276893e-05, "loss": 0.0371, "step": 11024 }, { "epoch": 10.600961538461538, "grad_norm": 1.5177072286605835, "learning_rate": 1.2911934742099989e-05, "loss": 0.0097, "step": 11025 }, { "epoch": 10.601923076923077, "grad_norm": 1.9334020614624023, "learning_rate": 1.291074302673841e-05, "loss": 0.0139, "step": 11026 }, { "epoch": 10.602884615384616, "grad_norm": 3.8445749282836914, "learning_rate": 1.2909551266210641e-05, "loss": 0.0291, "step": 11027 }, { "epoch": 10.603846153846154, "grad_norm": 0.17761273682117462, "learning_rate": 1.2908359460535174e-05, "loss": 0.0012, "step": 11028 }, { "epoch": 10.604807692307693, "grad_norm": 1.4488505125045776, "learning_rate": 1.2907167609730504e-05, "loss": 0.0075, "step": 11029 }, { "epoch": 10.60576923076923, "grad_norm": 3.034334897994995, "learning_rate": 1.2905975713815127e-05, "loss": 0.0215, "step": 11030 }, { "epoch": 10.606730769230769, "grad_norm": 1.7050145864486694, "learning_rate": 1.2904783772807534e-05, "loss": 0.0164, "step": 11031 }, { "epoch": 10.607692307692307, "grad_norm": 0.5747406482696533, "learning_rate": 1.2903591786726226e-05, "loss": 0.0041, "step": 11032 }, { "epoch": 10.608653846153846, "grad_norm": 0.47204118967056274, "learning_rate": 1.290239975558969e-05, "loss": 0.0034, "step": 11033 }, { "epoch": 10.609615384615385, "grad_norm": 3.1605615615844727, "learning_rate": 1.2901207679416432e-05, "loss": 0.0561, "step": 11034 }, { "epoch": 10.610576923076923, "grad_norm": 2.653682231903076, "learning_rate": 1.2900015558224946e-05, "loss": 0.028, "step": 11035 }, { "epoch": 10.611538461538462, "grad_norm": 0.5617170929908752, "learning_rate": 1.2898823392033727e-05, "loss": 0.0031, "step": 11036 }, { "epoch": 10.6125, "grad_norm": 2.200251340866089, "learning_rate": 1.2897631180861277e-05, "loss": 0.0134, "step": 11037 }, { "epoch": 10.61346153846154, "grad_norm": 2.186715841293335, "learning_rate": 1.2896438924726099e-05, "loss": 0.083, "step": 11038 }, { "epoch": 10.614423076923076, "grad_norm": 3.6250245571136475, "learning_rate": 1.2895246623646687e-05, "loss": 0.0251, "step": 11039 }, { "epoch": 10.615384615384615, "grad_norm": 0.5782026052474976, "learning_rate": 1.2894054277641544e-05, "loss": 0.0057, "step": 11040 }, { "epoch": 10.616346153846154, "grad_norm": 0.10147164762020111, "learning_rate": 1.2892861886729175e-05, "loss": 0.0009, "step": 11041 }, { "epoch": 10.617307692307692, "grad_norm": 0.23606792092323303, "learning_rate": 1.289166945092808e-05, "loss": 0.0016, "step": 11042 }, { "epoch": 10.618269230769231, "grad_norm": 0.5556548237800598, "learning_rate": 1.2890476970256762e-05, "loss": 0.0029, "step": 11043 }, { "epoch": 10.61923076923077, "grad_norm": 1.5256892442703247, "learning_rate": 1.2889284444733722e-05, "loss": 0.0197, "step": 11044 }, { "epoch": 10.620192307692308, "grad_norm": 1.5374963283538818, "learning_rate": 1.2888091874377471e-05, "loss": 0.0073, "step": 11045 }, { "epoch": 10.621153846153845, "grad_norm": 0.2996689975261688, "learning_rate": 1.288689925920651e-05, "loss": 0.0012, "step": 11046 }, { "epoch": 10.622115384615384, "grad_norm": 2.3693008422851562, "learning_rate": 1.2885706599239343e-05, "loss": 0.0481, "step": 11047 }, { "epoch": 10.623076923076923, "grad_norm": 1.270272135734558, "learning_rate": 1.2884513894494479e-05, "loss": 0.0065, "step": 11048 }, { "epoch": 10.624038461538461, "grad_norm": 2.563825845718384, "learning_rate": 1.2883321144990428e-05, "loss": 0.0136, "step": 11049 }, { "epoch": 10.625, "grad_norm": 0.7904440760612488, "learning_rate": 1.2882128350745694e-05, "loss": 0.0043, "step": 11050 }, { "epoch": 10.625961538461539, "grad_norm": 2.172452688217163, "learning_rate": 1.2880935511778785e-05, "loss": 0.038, "step": 11051 }, { "epoch": 10.626923076923077, "grad_norm": 0.33020392060279846, "learning_rate": 1.2879742628108217e-05, "loss": 0.0009, "step": 11052 }, { "epoch": 10.627884615384616, "grad_norm": 1.7844334840774536, "learning_rate": 1.2878549699752491e-05, "loss": 0.0275, "step": 11053 }, { "epoch": 10.628846153846155, "grad_norm": 3.0697033405303955, "learning_rate": 1.2877356726730126e-05, "loss": 0.0391, "step": 11054 }, { "epoch": 10.629807692307692, "grad_norm": 0.602858304977417, "learning_rate": 1.2876163709059626e-05, "loss": 0.0026, "step": 11055 }, { "epoch": 10.63076923076923, "grad_norm": 3.648557186126709, "learning_rate": 1.2874970646759507e-05, "loss": 0.0247, "step": 11056 }, { "epoch": 10.631730769230769, "grad_norm": 0.6636611819267273, "learning_rate": 1.2873777539848284e-05, "loss": 0.0038, "step": 11057 }, { "epoch": 10.632692307692308, "grad_norm": 2.2099030017852783, "learning_rate": 1.2872584388344464e-05, "loss": 0.0175, "step": 11058 }, { "epoch": 10.633653846153846, "grad_norm": 2.6103336811065674, "learning_rate": 1.287139119226657e-05, "loss": 0.0126, "step": 11059 }, { "epoch": 10.634615384615385, "grad_norm": 0.2997899353504181, "learning_rate": 1.287019795163311e-05, "loss": 0.0016, "step": 11060 }, { "epoch": 10.635576923076924, "grad_norm": 2.0712831020355225, "learning_rate": 1.2869004666462602e-05, "loss": 0.0139, "step": 11061 }, { "epoch": 10.63653846153846, "grad_norm": 2.6125686168670654, "learning_rate": 1.286781133677356e-05, "loss": 0.0438, "step": 11062 }, { "epoch": 10.6375, "grad_norm": 0.4109610915184021, "learning_rate": 1.2866617962584504e-05, "loss": 0.0018, "step": 11063 }, { "epoch": 10.638461538461538, "grad_norm": 3.0416057109832764, "learning_rate": 1.2865424543913951e-05, "loss": 0.0953, "step": 11064 }, { "epoch": 10.639423076923077, "grad_norm": 2.7512524127960205, "learning_rate": 1.2864231080780419e-05, "loss": 0.021, "step": 11065 }, { "epoch": 10.640384615384615, "grad_norm": 1.701477289199829, "learning_rate": 1.2863037573202428e-05, "loss": 0.0109, "step": 11066 }, { "epoch": 10.641346153846154, "grad_norm": 2.489777088165283, "learning_rate": 1.2861844021198497e-05, "loss": 0.0412, "step": 11067 }, { "epoch": 10.642307692307693, "grad_norm": 0.16719381511211395, "learning_rate": 1.2860650424787147e-05, "loss": 0.0013, "step": 11068 }, { "epoch": 10.643269230769231, "grad_norm": 3.6882779598236084, "learning_rate": 1.2859456783986892e-05, "loss": 0.0403, "step": 11069 }, { "epoch": 10.64423076923077, "grad_norm": 0.2752093970775604, "learning_rate": 1.2858263098816265e-05, "loss": 0.0023, "step": 11070 }, { "epoch": 10.645192307692307, "grad_norm": 2.2184898853302, "learning_rate": 1.2857069369293784e-05, "loss": 0.0386, "step": 11071 }, { "epoch": 10.646153846153846, "grad_norm": 7.881589889526367, "learning_rate": 1.2855875595437972e-05, "loss": 0.0441, "step": 11072 }, { "epoch": 10.647115384615384, "grad_norm": 1.5689700841903687, "learning_rate": 1.285468177726735e-05, "loss": 0.0145, "step": 11073 }, { "epoch": 10.648076923076923, "grad_norm": 2.0867679119110107, "learning_rate": 1.285348791480045e-05, "loss": 0.0165, "step": 11074 }, { "epoch": 10.649038461538462, "grad_norm": 0.060871366411447525, "learning_rate": 1.2852294008055787e-05, "loss": 0.0005, "step": 11075 }, { "epoch": 10.65, "grad_norm": 4.264911651611328, "learning_rate": 1.2851100057051895e-05, "loss": 0.0356, "step": 11076 }, { "epoch": 10.650961538461539, "grad_norm": 0.9981663823127747, "learning_rate": 1.2849906061807298e-05, "loss": 0.0284, "step": 11077 }, { "epoch": 10.651923076923078, "grad_norm": 0.10196100920438766, "learning_rate": 1.2848712022340523e-05, "loss": 0.0006, "step": 11078 }, { "epoch": 10.652884615384615, "grad_norm": 2.0122549533843994, "learning_rate": 1.28475179386701e-05, "loss": 0.0222, "step": 11079 }, { "epoch": 10.653846153846153, "grad_norm": 2.4266579151153564, "learning_rate": 1.284632381081455e-05, "loss": 0.0724, "step": 11080 }, { "epoch": 10.654807692307692, "grad_norm": 1.3701893091201782, "learning_rate": 1.2845129638792416e-05, "loss": 0.0061, "step": 11081 }, { "epoch": 10.65576923076923, "grad_norm": 1.685465693473816, "learning_rate": 1.2843935422622218e-05, "loss": 0.011, "step": 11082 }, { "epoch": 10.65673076923077, "grad_norm": 6.44041633605957, "learning_rate": 1.2842741162322487e-05, "loss": 0.1638, "step": 11083 }, { "epoch": 10.657692307692308, "grad_norm": 1.0076334476470947, "learning_rate": 1.2841546857911757e-05, "loss": 0.0037, "step": 11084 }, { "epoch": 10.658653846153847, "grad_norm": 2.014455795288086, "learning_rate": 1.284035250940856e-05, "loss": 0.015, "step": 11085 }, { "epoch": 10.659615384615385, "grad_norm": 1.883418321609497, "learning_rate": 1.283915811683143e-05, "loss": 0.0155, "step": 11086 }, { "epoch": 10.660576923076922, "grad_norm": 1.5212132930755615, "learning_rate": 1.2837963680198895e-05, "loss": 0.008, "step": 11087 }, { "epoch": 10.661538461538461, "grad_norm": 2.6685919761657715, "learning_rate": 1.2836769199529497e-05, "loss": 0.0186, "step": 11088 }, { "epoch": 10.6625, "grad_norm": 2.6795902252197266, "learning_rate": 1.2835574674841764e-05, "loss": 0.0164, "step": 11089 }, { "epoch": 10.663461538461538, "grad_norm": 1.6370372772216797, "learning_rate": 1.2834380106154235e-05, "loss": 0.0071, "step": 11090 }, { "epoch": 10.664423076923077, "grad_norm": 4.18134069442749, "learning_rate": 1.2833185493485447e-05, "loss": 0.0333, "step": 11091 }, { "epoch": 10.665384615384616, "grad_norm": 1.3961706161499023, "learning_rate": 1.2831990836853937e-05, "loss": 0.0242, "step": 11092 }, { "epoch": 10.666346153846154, "grad_norm": 2.110422134399414, "learning_rate": 1.2830796136278236e-05, "loss": 0.0508, "step": 11093 }, { "epoch": 10.667307692307693, "grad_norm": 0.18525129556655884, "learning_rate": 1.2829601391776891e-05, "loss": 0.0019, "step": 11094 }, { "epoch": 10.66826923076923, "grad_norm": 0.6162082552909851, "learning_rate": 1.2828406603368435e-05, "loss": 0.0047, "step": 11095 }, { "epoch": 10.669230769230769, "grad_norm": 0.9151756763458252, "learning_rate": 1.282721177107141e-05, "loss": 0.0058, "step": 11096 }, { "epoch": 10.670192307692307, "grad_norm": 0.8276777267456055, "learning_rate": 1.2826016894904359e-05, "loss": 0.0042, "step": 11097 }, { "epoch": 10.671153846153846, "grad_norm": 2.7106077671051025, "learning_rate": 1.2824821974885817e-05, "loss": 0.0743, "step": 11098 }, { "epoch": 10.672115384615385, "grad_norm": 2.806227684020996, "learning_rate": 1.282362701103433e-05, "loss": 0.0413, "step": 11099 }, { "epoch": 10.673076923076923, "grad_norm": 2.408924102783203, "learning_rate": 1.2822432003368439e-05, "loss": 0.0296, "step": 11100 }, { "epoch": 10.674038461538462, "grad_norm": 3.0512523651123047, "learning_rate": 1.2821236951906689e-05, "loss": 0.0359, "step": 11101 }, { "epoch": 10.675, "grad_norm": 3.0187153816223145, "learning_rate": 1.2820041856667615e-05, "loss": 0.0621, "step": 11102 }, { "epoch": 10.67596153846154, "grad_norm": 0.47914421558380127, "learning_rate": 1.2818846717669775e-05, "loss": 0.0027, "step": 11103 }, { "epoch": 10.676923076923076, "grad_norm": 0.2592766582965851, "learning_rate": 1.2817651534931705e-05, "loss": 0.0018, "step": 11104 }, { "epoch": 10.677884615384615, "grad_norm": 0.2203872948884964, "learning_rate": 1.2816456308471952e-05, "loss": 0.0013, "step": 11105 }, { "epoch": 10.678846153846154, "grad_norm": 0.9229957461357117, "learning_rate": 1.2815261038309063e-05, "loss": 0.0074, "step": 11106 }, { "epoch": 10.679807692307692, "grad_norm": 0.05861637368798256, "learning_rate": 1.2814065724461587e-05, "loss": 0.0004, "step": 11107 }, { "epoch": 10.680769230769231, "grad_norm": 0.8695030212402344, "learning_rate": 1.2812870366948071e-05, "loss": 0.0065, "step": 11108 }, { "epoch": 10.68173076923077, "grad_norm": 1.2641932964324951, "learning_rate": 1.2811674965787058e-05, "loss": 0.0045, "step": 11109 }, { "epoch": 10.682692307692308, "grad_norm": 1.46172297000885, "learning_rate": 1.2810479520997106e-05, "loss": 0.0103, "step": 11110 }, { "epoch": 10.683653846153845, "grad_norm": 1.1058146953582764, "learning_rate": 1.2809284032596761e-05, "loss": 0.0076, "step": 11111 }, { "epoch": 10.684615384615384, "grad_norm": 2.3900740146636963, "learning_rate": 1.2808088500604572e-05, "loss": 0.0499, "step": 11112 }, { "epoch": 10.685576923076923, "grad_norm": 3.182762861251831, "learning_rate": 1.2806892925039087e-05, "loss": 0.0284, "step": 11113 }, { "epoch": 10.686538461538461, "grad_norm": 2.1623036861419678, "learning_rate": 1.2805697305918865e-05, "loss": 0.0188, "step": 11114 }, { "epoch": 10.6875, "grad_norm": 0.9173160791397095, "learning_rate": 1.2804501643262457e-05, "loss": 0.0087, "step": 11115 }, { "epoch": 10.688461538461539, "grad_norm": 1.2830616235733032, "learning_rate": 1.2803305937088413e-05, "loss": 0.0079, "step": 11116 }, { "epoch": 10.689423076923077, "grad_norm": 2.239281177520752, "learning_rate": 1.2802110187415289e-05, "loss": 0.0198, "step": 11117 }, { "epoch": 10.690384615384616, "grad_norm": 2.750080108642578, "learning_rate": 1.280091439426164e-05, "loss": 0.0181, "step": 11118 }, { "epoch": 10.691346153846155, "grad_norm": 2.2637367248535156, "learning_rate": 1.2799718557646018e-05, "loss": 0.0204, "step": 11119 }, { "epoch": 10.692307692307692, "grad_norm": 0.7903817892074585, "learning_rate": 1.2798522677586979e-05, "loss": 0.0041, "step": 11120 }, { "epoch": 10.69326923076923, "grad_norm": 0.15040543675422668, "learning_rate": 1.2797326754103087e-05, "loss": 0.0009, "step": 11121 }, { "epoch": 10.694230769230769, "grad_norm": 2.2673463821411133, "learning_rate": 1.279613078721289e-05, "loss": 0.0242, "step": 11122 }, { "epoch": 10.695192307692308, "grad_norm": 2.909635305404663, "learning_rate": 1.2794934776934953e-05, "loss": 0.0246, "step": 11123 }, { "epoch": 10.696153846153846, "grad_norm": 2.2555997371673584, "learning_rate": 1.2793738723287827e-05, "loss": 0.0172, "step": 11124 }, { "epoch": 10.697115384615385, "grad_norm": 0.08495950698852539, "learning_rate": 1.279254262629008e-05, "loss": 0.001, "step": 11125 }, { "epoch": 10.698076923076924, "grad_norm": 2.975748062133789, "learning_rate": 1.2791346485960266e-05, "loss": 0.017, "step": 11126 }, { "epoch": 10.69903846153846, "grad_norm": 0.19043876230716705, "learning_rate": 1.2790150302316943e-05, "loss": 0.0011, "step": 11127 }, { "epoch": 10.7, "grad_norm": 1.4508427381515503, "learning_rate": 1.2788954075378682e-05, "loss": 0.0137, "step": 11128 }, { "epoch": 10.700961538461538, "grad_norm": 2.556641101837158, "learning_rate": 1.2787757805164038e-05, "loss": 0.0164, "step": 11129 }, { "epoch": 10.701923076923077, "grad_norm": 0.7164038419723511, "learning_rate": 1.2786561491691574e-05, "loss": 0.0059, "step": 11130 }, { "epoch": 10.702884615384615, "grad_norm": 1.6417661905288696, "learning_rate": 1.2785365134979852e-05, "loss": 0.0289, "step": 11131 }, { "epoch": 10.703846153846154, "grad_norm": 6.64668607711792, "learning_rate": 1.2784168735047442e-05, "loss": 0.0588, "step": 11132 }, { "epoch": 10.704807692307693, "grad_norm": 0.07686187326908112, "learning_rate": 1.2782972291912903e-05, "loss": 0.0006, "step": 11133 }, { "epoch": 10.705769230769231, "grad_norm": 0.2316724807024002, "learning_rate": 1.27817758055948e-05, "loss": 0.0008, "step": 11134 }, { "epoch": 10.70673076923077, "grad_norm": 1.084996223449707, "learning_rate": 1.2780579276111702e-05, "loss": 0.0126, "step": 11135 }, { "epoch": 10.707692307692307, "grad_norm": 3.1521096229553223, "learning_rate": 1.2779382703482175e-05, "loss": 0.0301, "step": 11136 }, { "epoch": 10.708653846153846, "grad_norm": 1.3516005277633667, "learning_rate": 1.2778186087724786e-05, "loss": 0.0118, "step": 11137 }, { "epoch": 10.709615384615384, "grad_norm": 0.656154990196228, "learning_rate": 1.2776989428858102e-05, "loss": 0.0019, "step": 11138 }, { "epoch": 10.710576923076923, "grad_norm": 4.246893405914307, "learning_rate": 1.2775792726900696e-05, "loss": 0.0788, "step": 11139 }, { "epoch": 10.711538461538462, "grad_norm": 0.26627615094184875, "learning_rate": 1.277459598187113e-05, "loss": 0.0016, "step": 11140 }, { "epoch": 10.7125, "grad_norm": 1.5759656429290771, "learning_rate": 1.2773399193787978e-05, "loss": 0.0106, "step": 11141 }, { "epoch": 10.713461538461539, "grad_norm": 3.4554450511932373, "learning_rate": 1.277220236266981e-05, "loss": 0.1159, "step": 11142 }, { "epoch": 10.714423076923078, "grad_norm": 3.2877848148345947, "learning_rate": 1.27710054885352e-05, "loss": 0.0757, "step": 11143 }, { "epoch": 10.715384615384615, "grad_norm": 2.5474624633789062, "learning_rate": 1.2769808571402715e-05, "loss": 0.0409, "step": 11144 }, { "epoch": 10.716346153846153, "grad_norm": 1.8424475193023682, "learning_rate": 1.276861161129093e-05, "loss": 0.0095, "step": 11145 }, { "epoch": 10.717307692307692, "grad_norm": 0.23563233017921448, "learning_rate": 1.276741460821842e-05, "loss": 0.0019, "step": 11146 }, { "epoch": 10.71826923076923, "grad_norm": 2.2642886638641357, "learning_rate": 1.276621756220376e-05, "loss": 0.0193, "step": 11147 }, { "epoch": 10.71923076923077, "grad_norm": 0.31391647458076477, "learning_rate": 1.276502047326552e-05, "loss": 0.0021, "step": 11148 }, { "epoch": 10.720192307692308, "grad_norm": 0.05771420896053314, "learning_rate": 1.2763823341422277e-05, "loss": 0.0006, "step": 11149 }, { "epoch": 10.721153846153847, "grad_norm": 0.7901147603988647, "learning_rate": 1.2762626166692606e-05, "loss": 0.0048, "step": 11150 }, { "epoch": 10.722115384615385, "grad_norm": 1.3679628372192383, "learning_rate": 1.2761428949095089e-05, "loss": 0.0052, "step": 11151 }, { "epoch": 10.723076923076922, "grad_norm": 2.0999152660369873, "learning_rate": 1.2760231688648296e-05, "loss": 0.0105, "step": 11152 }, { "epoch": 10.724038461538461, "grad_norm": 1.9616785049438477, "learning_rate": 1.275903438537081e-05, "loss": 0.0138, "step": 11153 }, { "epoch": 10.725, "grad_norm": 3.1882102489471436, "learning_rate": 1.2757837039281207e-05, "loss": 0.0265, "step": 11154 }, { "epoch": 10.725961538461538, "grad_norm": 2.694986581802368, "learning_rate": 1.2756639650398068e-05, "loss": 0.0328, "step": 11155 }, { "epoch": 10.726923076923077, "grad_norm": 2.0214216709136963, "learning_rate": 1.275544221873997e-05, "loss": 0.0114, "step": 11156 }, { "epoch": 10.727884615384616, "grad_norm": 2.4792792797088623, "learning_rate": 1.2754244744325499e-05, "loss": 0.0584, "step": 11157 }, { "epoch": 10.728846153846154, "grad_norm": 3.564631223678589, "learning_rate": 1.2753047227173234e-05, "loss": 0.038, "step": 11158 }, { "epoch": 10.729807692307693, "grad_norm": 1.5009821653366089, "learning_rate": 1.2751849667301754e-05, "loss": 0.0175, "step": 11159 }, { "epoch": 10.73076923076923, "grad_norm": 2.2519075870513916, "learning_rate": 1.2750652064729643e-05, "loss": 0.0312, "step": 11160 }, { "epoch": 10.731730769230769, "grad_norm": 1.8310588598251343, "learning_rate": 1.2749454419475486e-05, "loss": 0.067, "step": 11161 }, { "epoch": 10.732692307692307, "grad_norm": 1.7116739749908447, "learning_rate": 1.2748256731557866e-05, "loss": 0.0158, "step": 11162 }, { "epoch": 10.733653846153846, "grad_norm": 2.1595373153686523, "learning_rate": 1.274705900099537e-05, "loss": 0.0519, "step": 11163 }, { "epoch": 10.734615384615385, "grad_norm": 0.1868685781955719, "learning_rate": 1.2745861227806578e-05, "loss": 0.0011, "step": 11164 }, { "epoch": 10.735576923076923, "grad_norm": 2.6080081462860107, "learning_rate": 1.2744663412010079e-05, "loss": 0.0198, "step": 11165 }, { "epoch": 10.736538461538462, "grad_norm": 0.13658590614795685, "learning_rate": 1.274346555362446e-05, "loss": 0.001, "step": 11166 }, { "epoch": 10.7375, "grad_norm": 0.2723459303379059, "learning_rate": 1.2742267652668305e-05, "loss": 0.0018, "step": 11167 }, { "epoch": 10.73846153846154, "grad_norm": 2.1904842853546143, "learning_rate": 1.2741069709160208e-05, "loss": 0.0326, "step": 11168 }, { "epoch": 10.739423076923076, "grad_norm": 2.9075982570648193, "learning_rate": 1.2739871723118753e-05, "loss": 0.0132, "step": 11169 }, { "epoch": 10.740384615384615, "grad_norm": 3.979736804962158, "learning_rate": 1.2738673694562531e-05, "loss": 0.1079, "step": 11170 }, { "epoch": 10.741346153846154, "grad_norm": 0.2736016511917114, "learning_rate": 1.2737475623510127e-05, "loss": 0.0026, "step": 11171 }, { "epoch": 10.742307692307692, "grad_norm": 1.0360645055770874, "learning_rate": 1.2736277509980139e-05, "loss": 0.0045, "step": 11172 }, { "epoch": 10.743269230769231, "grad_norm": 2.3381996154785156, "learning_rate": 1.2735079353991156e-05, "loss": 0.012, "step": 11173 }, { "epoch": 10.74423076923077, "grad_norm": 1.7649016380310059, "learning_rate": 1.273388115556177e-05, "loss": 0.0344, "step": 11174 }, { "epoch": 10.745192307692308, "grad_norm": 0.9364100694656372, "learning_rate": 1.2732682914710568e-05, "loss": 0.0061, "step": 11175 }, { "epoch": 10.746153846153845, "grad_norm": 0.04882754012942314, "learning_rate": 1.2731484631456148e-05, "loss": 0.0003, "step": 11176 }, { "epoch": 10.747115384615384, "grad_norm": 0.3673798739910126, "learning_rate": 1.2730286305817106e-05, "loss": 0.0036, "step": 11177 }, { "epoch": 10.748076923076923, "grad_norm": 1.6312143802642822, "learning_rate": 1.2729087937812032e-05, "loss": 0.0137, "step": 11178 }, { "epoch": 10.749038461538461, "grad_norm": 1.746514081954956, "learning_rate": 1.2727889527459523e-05, "loss": 0.0102, "step": 11179 }, { "epoch": 10.75, "grad_norm": 0.04578319936990738, "learning_rate": 1.2726691074778178e-05, "loss": 0.0003, "step": 11180 }, { "epoch": 10.750961538461539, "grad_norm": 0.22317562997341156, "learning_rate": 1.2725492579786587e-05, "loss": 0.0017, "step": 11181 }, { "epoch": 10.751923076923077, "grad_norm": 0.5315492153167725, "learning_rate": 1.2724294042503349e-05, "loss": 0.0047, "step": 11182 }, { "epoch": 10.752884615384616, "grad_norm": 1.9921783208847046, "learning_rate": 1.2723095462947067e-05, "loss": 0.0211, "step": 11183 }, { "epoch": 10.753846153846155, "grad_norm": 1.7521631717681885, "learning_rate": 1.2721896841136334e-05, "loss": 0.0116, "step": 11184 }, { "epoch": 10.754807692307692, "grad_norm": 1.510375738143921, "learning_rate": 1.272069817708975e-05, "loss": 0.0126, "step": 11185 }, { "epoch": 10.75576923076923, "grad_norm": 4.680807113647461, "learning_rate": 1.2719499470825913e-05, "loss": 0.1459, "step": 11186 }, { "epoch": 10.756730769230769, "grad_norm": 3.100196361541748, "learning_rate": 1.2718300722363431e-05, "loss": 0.0312, "step": 11187 }, { "epoch": 10.757692307692308, "grad_norm": 2.510211229324341, "learning_rate": 1.2717101931720897e-05, "loss": 0.03, "step": 11188 }, { "epoch": 10.758653846153846, "grad_norm": 2.788419485092163, "learning_rate": 1.2715903098916915e-05, "loss": 0.1229, "step": 11189 }, { "epoch": 10.759615384615385, "grad_norm": 0.6956012845039368, "learning_rate": 1.2714704223970088e-05, "loss": 0.0034, "step": 11190 }, { "epoch": 10.760576923076924, "grad_norm": 0.38163048028945923, "learning_rate": 1.2713505306899024e-05, "loss": 0.0028, "step": 11191 }, { "epoch": 10.76153846153846, "grad_norm": 2.8027944564819336, "learning_rate": 1.2712306347722319e-05, "loss": 0.0328, "step": 11192 }, { "epoch": 10.7625, "grad_norm": 3.601519823074341, "learning_rate": 1.2711107346458575e-05, "loss": 0.0236, "step": 11193 }, { "epoch": 10.763461538461538, "grad_norm": 4.593261241912842, "learning_rate": 1.2709908303126409e-05, "loss": 0.0386, "step": 11194 }, { "epoch": 10.764423076923077, "grad_norm": 0.8449654579162598, "learning_rate": 1.2708709217744416e-05, "loss": 0.0052, "step": 11195 }, { "epoch": 10.765384615384615, "grad_norm": 0.7400532364845276, "learning_rate": 1.2707510090331202e-05, "loss": 0.004, "step": 11196 }, { "epoch": 10.766346153846154, "grad_norm": 0.8434100151062012, "learning_rate": 1.2706310920905385e-05, "loss": 0.0035, "step": 11197 }, { "epoch": 10.767307692307693, "grad_norm": 2.70249605178833, "learning_rate": 1.2705111709485562e-05, "loss": 0.0241, "step": 11198 }, { "epoch": 10.768269230769231, "grad_norm": 3.341008424758911, "learning_rate": 1.2703912456090349e-05, "loss": 0.0206, "step": 11199 }, { "epoch": 10.76923076923077, "grad_norm": 0.10062888264656067, "learning_rate": 1.2702713160738344e-05, "loss": 0.0005, "step": 11200 }, { "epoch": 10.770192307692307, "grad_norm": 0.184703066945076, "learning_rate": 1.2701513823448166e-05, "loss": 0.0013, "step": 11201 }, { "epoch": 10.771153846153846, "grad_norm": 1.398309350013733, "learning_rate": 1.2700314444238424e-05, "loss": 0.0085, "step": 11202 }, { "epoch": 10.772115384615384, "grad_norm": 1.9125734567642212, "learning_rate": 1.2699115023127726e-05, "loss": 0.0116, "step": 11203 }, { "epoch": 10.773076923076923, "grad_norm": 0.42075619101524353, "learning_rate": 1.2697915560134684e-05, "loss": 0.0025, "step": 11204 }, { "epoch": 10.774038461538462, "grad_norm": 0.3625514805316925, "learning_rate": 1.269671605527791e-05, "loss": 0.0023, "step": 11205 }, { "epoch": 10.775, "grad_norm": 0.5350033044815063, "learning_rate": 1.269551650857602e-05, "loss": 0.0029, "step": 11206 }, { "epoch": 10.775961538461539, "grad_norm": 0.7286801934242249, "learning_rate": 1.269431692004762e-05, "loss": 0.0032, "step": 11207 }, { "epoch": 10.776923076923078, "grad_norm": 0.16788682341575623, "learning_rate": 1.2693117289711335e-05, "loss": 0.0012, "step": 11208 }, { "epoch": 10.777884615384615, "grad_norm": 1.890906572341919, "learning_rate": 1.2691917617585772e-05, "loss": 0.0283, "step": 11209 }, { "epoch": 10.778846153846153, "grad_norm": 2.7918100357055664, "learning_rate": 1.269071790368955e-05, "loss": 0.0152, "step": 11210 }, { "epoch": 10.779807692307692, "grad_norm": 1.0097761154174805, "learning_rate": 1.2689518148041278e-05, "loss": 0.005, "step": 11211 }, { "epoch": 10.78076923076923, "grad_norm": 2.9463655948638916, "learning_rate": 1.2688318350659581e-05, "loss": 0.0265, "step": 11212 }, { "epoch": 10.78173076923077, "grad_norm": 1.1161208152770996, "learning_rate": 1.2687118511563075e-05, "loss": 0.007, "step": 11213 }, { "epoch": 10.782692307692308, "grad_norm": 0.33515939116477966, "learning_rate": 1.2685918630770375e-05, "loss": 0.0032, "step": 11214 }, { "epoch": 10.783653846153847, "grad_norm": 2.6343817710876465, "learning_rate": 1.2684718708300099e-05, "loss": 0.1244, "step": 11215 }, { "epoch": 10.784615384615385, "grad_norm": 0.6850845813751221, "learning_rate": 1.2683518744170871e-05, "loss": 0.0026, "step": 11216 }, { "epoch": 10.785576923076922, "grad_norm": 1.5905447006225586, "learning_rate": 1.2682318738401307e-05, "loss": 0.0219, "step": 11217 }, { "epoch": 10.786538461538461, "grad_norm": 0.45855167508125305, "learning_rate": 1.2681118691010026e-05, "loss": 0.0029, "step": 11218 }, { "epoch": 10.7875, "grad_norm": 0.3431086242198944, "learning_rate": 1.2679918602015655e-05, "loss": 0.0021, "step": 11219 }, { "epoch": 10.788461538461538, "grad_norm": 4.396215915679932, "learning_rate": 1.2678718471436816e-05, "loss": 0.2366, "step": 11220 }, { "epoch": 10.789423076923077, "grad_norm": 2.4743571281433105, "learning_rate": 1.2677518299292124e-05, "loss": 0.0845, "step": 11221 }, { "epoch": 10.790384615384616, "grad_norm": 2.547882556915283, "learning_rate": 1.2676318085600206e-05, "loss": 0.0272, "step": 11222 }, { "epoch": 10.791346153846154, "grad_norm": 1.5715904235839844, "learning_rate": 1.2675117830379686e-05, "loss": 0.0163, "step": 11223 }, { "epoch": 10.792307692307693, "grad_norm": 1.2992618083953857, "learning_rate": 1.2673917533649192e-05, "loss": 0.0092, "step": 11224 }, { "epoch": 10.79326923076923, "grad_norm": 1.2184425592422485, "learning_rate": 1.2672717195427345e-05, "loss": 0.0067, "step": 11225 }, { "epoch": 10.794230769230769, "grad_norm": 3.965451717376709, "learning_rate": 1.2671516815732767e-05, "loss": 0.1618, "step": 11226 }, { "epoch": 10.795192307692307, "grad_norm": 1.3143678903579712, "learning_rate": 1.2670316394584093e-05, "loss": 0.0141, "step": 11227 }, { "epoch": 10.796153846153846, "grad_norm": 0.9473877549171448, "learning_rate": 1.2669115931999945e-05, "loss": 0.0049, "step": 11228 }, { "epoch": 10.797115384615385, "grad_norm": 1.132025122642517, "learning_rate": 1.2667915427998949e-05, "loss": 0.0119, "step": 11229 }, { "epoch": 10.798076923076923, "grad_norm": 2.2496187686920166, "learning_rate": 1.2666714882599737e-05, "loss": 0.0464, "step": 11230 }, { "epoch": 10.799038461538462, "grad_norm": 1.7293553352355957, "learning_rate": 1.2665514295820938e-05, "loss": 0.0171, "step": 11231 }, { "epoch": 10.8, "grad_norm": 1.3068774938583374, "learning_rate": 1.266431366768118e-05, "loss": 0.006, "step": 11232 }, { "epoch": 10.80096153846154, "grad_norm": 1.7269693613052368, "learning_rate": 1.2663112998199096e-05, "loss": 0.0078, "step": 11233 }, { "epoch": 10.801923076923076, "grad_norm": 0.8731273412704468, "learning_rate": 1.266191228739331e-05, "loss": 0.0068, "step": 11234 }, { "epoch": 10.802884615384615, "grad_norm": 2.9598937034606934, "learning_rate": 1.266071153528246e-05, "loss": 0.0328, "step": 11235 }, { "epoch": 10.803846153846154, "grad_norm": 2.9978697299957275, "learning_rate": 1.2659510741885178e-05, "loss": 0.0327, "step": 11236 }, { "epoch": 10.804807692307692, "grad_norm": 2.5782620906829834, "learning_rate": 1.265830990722009e-05, "loss": 0.021, "step": 11237 }, { "epoch": 10.805769230769231, "grad_norm": 2.1416282653808594, "learning_rate": 1.2657109031305839e-05, "loss": 0.0527, "step": 11238 }, { "epoch": 10.80673076923077, "grad_norm": 0.5420299768447876, "learning_rate": 1.2655908114161053e-05, "loss": 0.0036, "step": 11239 }, { "epoch": 10.807692307692308, "grad_norm": 0.10171427577733994, "learning_rate": 1.2654707155804369e-05, "loss": 0.0011, "step": 11240 }, { "epoch": 10.808653846153845, "grad_norm": 2.7857537269592285, "learning_rate": 1.2653506156254416e-05, "loss": 0.0264, "step": 11241 }, { "epoch": 10.809615384615384, "grad_norm": 0.1912146359682083, "learning_rate": 1.2652305115529842e-05, "loss": 0.001, "step": 11242 }, { "epoch": 10.810576923076923, "grad_norm": 3.422642230987549, "learning_rate": 1.2651104033649272e-05, "loss": 0.0703, "step": 11243 }, { "epoch": 10.811538461538461, "grad_norm": 1.1347438097000122, "learning_rate": 1.2649902910631353e-05, "loss": 0.0071, "step": 11244 }, { "epoch": 10.8125, "grad_norm": 1.9975429773330688, "learning_rate": 1.2648701746494716e-05, "loss": 0.0175, "step": 11245 }, { "epoch": 10.813461538461539, "grad_norm": 0.10480228811502457, "learning_rate": 1.2647500541258003e-05, "loss": 0.0013, "step": 11246 }, { "epoch": 10.814423076923077, "grad_norm": 1.8321149349212646, "learning_rate": 1.264629929493985e-05, "loss": 0.0146, "step": 11247 }, { "epoch": 10.815384615384616, "grad_norm": 1.3081272840499878, "learning_rate": 1.2645098007558898e-05, "loss": 0.0313, "step": 11248 }, { "epoch": 10.816346153846155, "grad_norm": 4.137003421783447, "learning_rate": 1.2643896679133789e-05, "loss": 0.0422, "step": 11249 }, { "epoch": 10.817307692307692, "grad_norm": 2.678406000137329, "learning_rate": 1.2642695309683167e-05, "loss": 0.0363, "step": 11250 }, { "epoch": 10.81826923076923, "grad_norm": 1.3675512075424194, "learning_rate": 1.2641493899225666e-05, "loss": 0.016, "step": 11251 }, { "epoch": 10.819230769230769, "grad_norm": 2.1257095336914062, "learning_rate": 1.2640292447779932e-05, "loss": 0.0216, "step": 11252 }, { "epoch": 10.820192307692308, "grad_norm": 0.33603811264038086, "learning_rate": 1.263909095536461e-05, "loss": 0.0012, "step": 11253 }, { "epoch": 10.821153846153846, "grad_norm": 0.7625572681427002, "learning_rate": 1.2637889421998341e-05, "loss": 0.0066, "step": 11254 }, { "epoch": 10.822115384615385, "grad_norm": 2.6188204288482666, "learning_rate": 1.2636687847699769e-05, "loss": 0.0308, "step": 11255 }, { "epoch": 10.823076923076924, "grad_norm": 0.7317509651184082, "learning_rate": 1.2635486232487543e-05, "loss": 0.0061, "step": 11256 }, { "epoch": 10.82403846153846, "grad_norm": 1.5307209491729736, "learning_rate": 1.2634284576380305e-05, "loss": 0.0082, "step": 11257 }, { "epoch": 10.825, "grad_norm": 1.9499149322509766, "learning_rate": 1.2633082879396702e-05, "loss": 0.0432, "step": 11258 }, { "epoch": 10.825961538461538, "grad_norm": 0.9146704077720642, "learning_rate": 1.263188114155538e-05, "loss": 0.0043, "step": 11259 }, { "epoch": 10.826923076923077, "grad_norm": 2.449158191680908, "learning_rate": 1.2630679362874986e-05, "loss": 0.0083, "step": 11260 }, { "epoch": 10.827884615384615, "grad_norm": 0.3485134243965149, "learning_rate": 1.2629477543374174e-05, "loss": 0.0025, "step": 11261 }, { "epoch": 10.828846153846154, "grad_norm": 0.754172682762146, "learning_rate": 1.2628275683071584e-05, "loss": 0.0046, "step": 11262 }, { "epoch": 10.829807692307693, "grad_norm": 1.502260684967041, "learning_rate": 1.262707378198587e-05, "loss": 0.0276, "step": 11263 }, { "epoch": 10.830769230769231, "grad_norm": 1.3671001195907593, "learning_rate": 1.2625871840135681e-05, "loss": 0.0077, "step": 11264 }, { "epoch": 10.83173076923077, "grad_norm": 0.4855985939502716, "learning_rate": 1.2624669857539669e-05, "loss": 0.0052, "step": 11265 }, { "epoch": 10.832692307692307, "grad_norm": 2.5005133152008057, "learning_rate": 1.262346783421648e-05, "loss": 0.0293, "step": 11266 }, { "epoch": 10.833653846153846, "grad_norm": 1.430055856704712, "learning_rate": 1.2622265770184776e-05, "loss": 0.0293, "step": 11267 }, { "epoch": 10.834615384615384, "grad_norm": 0.4402918815612793, "learning_rate": 1.2621063665463202e-05, "loss": 0.0025, "step": 11268 }, { "epoch": 10.835576923076923, "grad_norm": 0.02990964613854885, "learning_rate": 1.261986152007041e-05, "loss": 0.0003, "step": 11269 }, { "epoch": 10.836538461538462, "grad_norm": 0.2547825872898102, "learning_rate": 1.2618659334025059e-05, "loss": 0.0018, "step": 11270 }, { "epoch": 10.8375, "grad_norm": 2.4463391304016113, "learning_rate": 1.2617457107345798e-05, "loss": 0.04, "step": 11271 }, { "epoch": 10.838461538461539, "grad_norm": 2.2845301628112793, "learning_rate": 1.2616254840051289e-05, "loss": 0.0131, "step": 11272 }, { "epoch": 10.839423076923078, "grad_norm": 4.7363386154174805, "learning_rate": 1.2615052532160181e-05, "loss": 0.0572, "step": 11273 }, { "epoch": 10.840384615384615, "grad_norm": 0.21695435047149658, "learning_rate": 1.2613850183691132e-05, "loss": 0.002, "step": 11274 }, { "epoch": 10.841346153846153, "grad_norm": 1.2818152904510498, "learning_rate": 1.26126477946628e-05, "loss": 0.008, "step": 11275 }, { "epoch": 10.842307692307692, "grad_norm": 2.395169258117676, "learning_rate": 1.2611445365093844e-05, "loss": 0.0185, "step": 11276 }, { "epoch": 10.84326923076923, "grad_norm": 0.6689999103546143, "learning_rate": 1.2610242895002918e-05, "loss": 0.0062, "step": 11277 }, { "epoch": 10.84423076923077, "grad_norm": 0.5361427664756775, "learning_rate": 1.2609040384408685e-05, "loss": 0.0044, "step": 11278 }, { "epoch": 10.845192307692308, "grad_norm": 1.067460536956787, "learning_rate": 1.2607837833329803e-05, "loss": 0.0073, "step": 11279 }, { "epoch": 10.846153846153847, "grad_norm": 1.9162817001342773, "learning_rate": 1.2606635241784931e-05, "loss": 0.013, "step": 11280 }, { "epoch": 10.847115384615385, "grad_norm": 9.215737342834473, "learning_rate": 1.260543260979273e-05, "loss": 0.1585, "step": 11281 }, { "epoch": 10.848076923076922, "grad_norm": 1.4362941980361938, "learning_rate": 1.2604229937371862e-05, "loss": 0.0088, "step": 11282 }, { "epoch": 10.849038461538461, "grad_norm": 1.6816240549087524, "learning_rate": 1.2603027224540989e-05, "loss": 0.0089, "step": 11283 }, { "epoch": 10.85, "grad_norm": 4.593515396118164, "learning_rate": 1.2601824471318774e-05, "loss": 0.1036, "step": 11284 }, { "epoch": 10.850961538461538, "grad_norm": 0.7219842076301575, "learning_rate": 1.2600621677723877e-05, "loss": 0.0052, "step": 11285 }, { "epoch": 10.851923076923077, "grad_norm": 1.352731704711914, "learning_rate": 1.2599418843774966e-05, "loss": 0.0064, "step": 11286 }, { "epoch": 10.852884615384616, "grad_norm": 3.282529354095459, "learning_rate": 1.2598215969490703e-05, "loss": 0.0336, "step": 11287 }, { "epoch": 10.853846153846154, "grad_norm": 3.0762903690338135, "learning_rate": 1.2597013054889753e-05, "loss": 0.0734, "step": 11288 }, { "epoch": 10.854807692307693, "grad_norm": 1.9054677486419678, "learning_rate": 1.2595810099990784e-05, "loss": 0.019, "step": 11289 }, { "epoch": 10.85576923076923, "grad_norm": 2.060851812362671, "learning_rate": 1.2594607104812462e-05, "loss": 0.0329, "step": 11290 }, { "epoch": 10.856730769230769, "grad_norm": 2.97202730178833, "learning_rate": 1.2593404069373452e-05, "loss": 0.026, "step": 11291 }, { "epoch": 10.857692307692307, "grad_norm": 0.6694102883338928, "learning_rate": 1.259220099369242e-05, "loss": 0.0035, "step": 11292 }, { "epoch": 10.858653846153846, "grad_norm": 0.3471263647079468, "learning_rate": 1.259099787778804e-05, "loss": 0.0024, "step": 11293 }, { "epoch": 10.859615384615385, "grad_norm": 1.002950668334961, "learning_rate": 1.2589794721678976e-05, "loss": 0.0044, "step": 11294 }, { "epoch": 10.860576923076923, "grad_norm": 0.7574610114097595, "learning_rate": 1.2588591525383897e-05, "loss": 0.0065, "step": 11295 }, { "epoch": 10.861538461538462, "grad_norm": 0.7190188765525818, "learning_rate": 1.2587388288921478e-05, "loss": 0.0046, "step": 11296 }, { "epoch": 10.8625, "grad_norm": 2.509105920791626, "learning_rate": 1.2586185012310388e-05, "loss": 0.0988, "step": 11297 }, { "epoch": 10.86346153846154, "grad_norm": 0.46911531686782837, "learning_rate": 1.2584981695569294e-05, "loss": 0.0044, "step": 11298 }, { "epoch": 10.864423076923076, "grad_norm": 0.6628646850585938, "learning_rate": 1.258377833871687e-05, "loss": 0.0048, "step": 11299 }, { "epoch": 10.865384615384615, "grad_norm": 0.3316875100135803, "learning_rate": 1.258257494177179e-05, "loss": 0.0023, "step": 11300 }, { "epoch": 10.866346153846154, "grad_norm": 0.6972458958625793, "learning_rate": 1.2581371504752729e-05, "loss": 0.0027, "step": 11301 }, { "epoch": 10.867307692307692, "grad_norm": 0.5786672830581665, "learning_rate": 1.258016802767836e-05, "loss": 0.0033, "step": 11302 }, { "epoch": 10.868269230769231, "grad_norm": 3.0862839221954346, "learning_rate": 1.2578964510567348e-05, "loss": 0.0403, "step": 11303 }, { "epoch": 10.86923076923077, "grad_norm": 2.475935935974121, "learning_rate": 1.2577760953438382e-05, "loss": 0.0287, "step": 11304 }, { "epoch": 10.870192307692308, "grad_norm": 0.14412462711334229, "learning_rate": 1.2576557356310133e-05, "loss": 0.001, "step": 11305 }, { "epoch": 10.871153846153845, "grad_norm": 2.128614664077759, "learning_rate": 1.257535371920127e-05, "loss": 0.019, "step": 11306 }, { "epoch": 10.872115384615384, "grad_norm": 0.5772162079811096, "learning_rate": 1.2574150042130479e-05, "loss": 0.0047, "step": 11307 }, { "epoch": 10.873076923076923, "grad_norm": 3.7804601192474365, "learning_rate": 1.2572946325116435e-05, "loss": 0.0809, "step": 11308 }, { "epoch": 10.874038461538461, "grad_norm": 3.9096648693084717, "learning_rate": 1.2571742568177819e-05, "loss": 0.0344, "step": 11309 }, { "epoch": 10.875, "grad_norm": 0.08063890039920807, "learning_rate": 1.2570538771333301e-05, "loss": 0.0005, "step": 11310 }, { "epoch": 10.875961538461539, "grad_norm": 0.2463494837284088, "learning_rate": 1.2569334934601566e-05, "loss": 0.002, "step": 11311 }, { "epoch": 10.876923076923077, "grad_norm": 0.5801563262939453, "learning_rate": 1.2568131058001297e-05, "loss": 0.0019, "step": 11312 }, { "epoch": 10.877884615384616, "grad_norm": 0.21605820953845978, "learning_rate": 1.2566927141551168e-05, "loss": 0.0017, "step": 11313 }, { "epoch": 10.878846153846155, "grad_norm": 1.9754339456558228, "learning_rate": 1.2565723185269865e-05, "loss": 0.0188, "step": 11314 }, { "epoch": 10.879807692307692, "grad_norm": 0.6246569752693176, "learning_rate": 1.2564519189176067e-05, "loss": 0.0065, "step": 11315 }, { "epoch": 10.88076923076923, "grad_norm": 0.39316117763519287, "learning_rate": 1.2563315153288462e-05, "loss": 0.0022, "step": 11316 }, { "epoch": 10.881730769230769, "grad_norm": 1.2089203596115112, "learning_rate": 1.2562111077625723e-05, "loss": 0.005, "step": 11317 }, { "epoch": 10.882692307692308, "grad_norm": 4.361876487731934, "learning_rate": 1.2560906962206545e-05, "loss": 0.0364, "step": 11318 }, { "epoch": 10.883653846153846, "grad_norm": 2.431002140045166, "learning_rate": 1.2559702807049606e-05, "loss": 0.0108, "step": 11319 }, { "epoch": 10.884615384615385, "grad_norm": 1.6291447877883911, "learning_rate": 1.255849861217359e-05, "loss": 0.0315, "step": 11320 }, { "epoch": 10.885576923076924, "grad_norm": 1.7560282945632935, "learning_rate": 1.2557294377597187e-05, "loss": 0.0078, "step": 11321 }, { "epoch": 10.88653846153846, "grad_norm": 0.9532543420791626, "learning_rate": 1.2556090103339078e-05, "loss": 0.0081, "step": 11322 }, { "epoch": 10.8875, "grad_norm": 1.1881837844848633, "learning_rate": 1.2554885789417957e-05, "loss": 0.0185, "step": 11323 }, { "epoch": 10.888461538461538, "grad_norm": 3.7080514430999756, "learning_rate": 1.2553681435852505e-05, "loss": 0.0196, "step": 11324 }, { "epoch": 10.889423076923077, "grad_norm": 3.7304954528808594, "learning_rate": 1.2552477042661408e-05, "loss": 0.0519, "step": 11325 }, { "epoch": 10.890384615384615, "grad_norm": 2.520329236984253, "learning_rate": 1.2551272609863366e-05, "loss": 0.0182, "step": 11326 }, { "epoch": 10.891346153846154, "grad_norm": 2.3670542240142822, "learning_rate": 1.2550068137477057e-05, "loss": 0.0228, "step": 11327 }, { "epoch": 10.892307692307693, "grad_norm": 17.41611671447754, "learning_rate": 1.2548863625521173e-05, "loss": 0.0365, "step": 11328 }, { "epoch": 10.893269230769231, "grad_norm": 1.4497463703155518, "learning_rate": 1.254765907401441e-05, "loss": 0.0491, "step": 11329 }, { "epoch": 10.89423076923077, "grad_norm": 1.3761255741119385, "learning_rate": 1.2546454482975454e-05, "loss": 0.0157, "step": 11330 }, { "epoch": 10.895192307692307, "grad_norm": 3.3585238456726074, "learning_rate": 1.2545249852422998e-05, "loss": 0.021, "step": 11331 }, { "epoch": 10.896153846153846, "grad_norm": 3.1179232597351074, "learning_rate": 1.2544045182375735e-05, "loss": 0.0217, "step": 11332 }, { "epoch": 10.897115384615384, "grad_norm": 2.0451080799102783, "learning_rate": 1.2542840472852356e-05, "loss": 0.023, "step": 11333 }, { "epoch": 10.898076923076923, "grad_norm": 3.00484037399292, "learning_rate": 1.254163572387156e-05, "loss": 0.0327, "step": 11334 }, { "epoch": 10.899038461538462, "grad_norm": 0.7824102640151978, "learning_rate": 1.2540430935452035e-05, "loss": 0.0047, "step": 11335 }, { "epoch": 10.9, "grad_norm": 3.5836706161499023, "learning_rate": 1.2539226107612474e-05, "loss": 0.0447, "step": 11336 }, { "epoch": 10.900961538461539, "grad_norm": 2.906961441040039, "learning_rate": 1.253802124037158e-05, "loss": 0.0487, "step": 11337 }, { "epoch": 10.901923076923078, "grad_norm": 1.6340051889419556, "learning_rate": 1.2536816333748046e-05, "loss": 0.0096, "step": 11338 }, { "epoch": 10.902884615384615, "grad_norm": 1.1986182928085327, "learning_rate": 1.2535611387760564e-05, "loss": 0.0088, "step": 11339 }, { "epoch": 10.903846153846153, "grad_norm": 0.1939409077167511, "learning_rate": 1.2534406402427841e-05, "loss": 0.0014, "step": 11340 }, { "epoch": 10.904807692307692, "grad_norm": 3.01435923576355, "learning_rate": 1.2533201377768566e-05, "loss": 0.0327, "step": 11341 }, { "epoch": 10.90576923076923, "grad_norm": 1.0353671312332153, "learning_rate": 1.2531996313801442e-05, "loss": 0.0092, "step": 11342 }, { "epoch": 10.90673076923077, "grad_norm": 4.117336750030518, "learning_rate": 1.2530791210545163e-05, "loss": 0.0497, "step": 11343 }, { "epoch": 10.907692307692308, "grad_norm": 0.5152198672294617, "learning_rate": 1.2529586068018436e-05, "loss": 0.0017, "step": 11344 }, { "epoch": 10.908653846153847, "grad_norm": 1.896185040473938, "learning_rate": 1.2528380886239955e-05, "loss": 0.0288, "step": 11345 }, { "epoch": 10.909615384615385, "grad_norm": 1.4145362377166748, "learning_rate": 1.2527175665228424e-05, "loss": 0.0154, "step": 11346 }, { "epoch": 10.910576923076922, "grad_norm": 2.188326835632324, "learning_rate": 1.2525970405002544e-05, "loss": 0.0163, "step": 11347 }, { "epoch": 10.911538461538461, "grad_norm": 1.8261489868164062, "learning_rate": 1.2524765105581019e-05, "loss": 0.0102, "step": 11348 }, { "epoch": 10.9125, "grad_norm": 4.754673004150391, "learning_rate": 1.2523559766982548e-05, "loss": 0.0461, "step": 11349 }, { "epoch": 10.913461538461538, "grad_norm": 0.9841150045394897, "learning_rate": 1.2522354389225832e-05, "loss": 0.0046, "step": 11350 }, { "epoch": 10.914423076923077, "grad_norm": 0.9603809118270874, "learning_rate": 1.2521148972329584e-05, "loss": 0.0105, "step": 11351 }, { "epoch": 10.915384615384616, "grad_norm": 2.3920633792877197, "learning_rate": 1.2519943516312502e-05, "loss": 0.0179, "step": 11352 }, { "epoch": 10.916346153846154, "grad_norm": 0.33578094840049744, "learning_rate": 1.2518738021193293e-05, "loss": 0.0016, "step": 11353 }, { "epoch": 10.917307692307693, "grad_norm": 2.9896793365478516, "learning_rate": 1.2517532486990662e-05, "loss": 0.0267, "step": 11354 }, { "epoch": 10.91826923076923, "grad_norm": 2.1471400260925293, "learning_rate": 1.2516326913723316e-05, "loss": 0.0326, "step": 11355 }, { "epoch": 10.919230769230769, "grad_norm": 1.4273402690887451, "learning_rate": 1.251512130140996e-05, "loss": 0.0102, "step": 11356 }, { "epoch": 10.920192307692307, "grad_norm": 1.4321955442428589, "learning_rate": 1.2513915650069303e-05, "loss": 0.005, "step": 11357 }, { "epoch": 10.921153846153846, "grad_norm": 2.805591583251953, "learning_rate": 1.2512709959720055e-05, "loss": 0.0643, "step": 11358 }, { "epoch": 10.922115384615385, "grad_norm": 0.20359516143798828, "learning_rate": 1.2511504230380925e-05, "loss": 0.001, "step": 11359 }, { "epoch": 10.923076923076923, "grad_norm": 0.09384998679161072, "learning_rate": 1.2510298462070619e-05, "loss": 0.0005, "step": 11360 }, { "epoch": 10.924038461538462, "grad_norm": 0.33631545305252075, "learning_rate": 1.2509092654807846e-05, "loss": 0.0027, "step": 11361 }, { "epoch": 10.925, "grad_norm": 0.1732696145772934, "learning_rate": 1.250788680861132e-05, "loss": 0.001, "step": 11362 }, { "epoch": 10.92596153846154, "grad_norm": 0.6360626816749573, "learning_rate": 1.2506680923499755e-05, "loss": 0.0033, "step": 11363 }, { "epoch": 10.926923076923076, "grad_norm": 2.8540165424346924, "learning_rate": 1.2505474999491856e-05, "loss": 0.0231, "step": 11364 }, { "epoch": 10.927884615384615, "grad_norm": 1.394339919090271, "learning_rate": 1.2504269036606338e-05, "loss": 0.0065, "step": 11365 }, { "epoch": 10.928846153846154, "grad_norm": 2.3309412002563477, "learning_rate": 1.2503063034861915e-05, "loss": 0.0127, "step": 11366 }, { "epoch": 10.929807692307692, "grad_norm": 0.5106244683265686, "learning_rate": 1.2501856994277302e-05, "loss": 0.0024, "step": 11367 }, { "epoch": 10.930769230769231, "grad_norm": 2.3185412883758545, "learning_rate": 1.2500650914871208e-05, "loss": 0.0204, "step": 11368 }, { "epoch": 10.93173076923077, "grad_norm": 1.4413666725158691, "learning_rate": 1.2499444796662354e-05, "loss": 0.0142, "step": 11369 }, { "epoch": 10.932692307692308, "grad_norm": 1.0409600734710693, "learning_rate": 1.2498238639669452e-05, "loss": 0.016, "step": 11370 }, { "epoch": 10.933653846153845, "grad_norm": 3.842658758163452, "learning_rate": 1.249703244391122e-05, "loss": 0.0203, "step": 11371 }, { "epoch": 10.934615384615384, "grad_norm": 1.8541430234909058, "learning_rate": 1.2495826209406368e-05, "loss": 0.0257, "step": 11372 }, { "epoch": 10.935576923076923, "grad_norm": 0.8442811965942383, "learning_rate": 1.2494619936173622e-05, "loss": 0.0233, "step": 11373 }, { "epoch": 10.936538461538461, "grad_norm": 1.5956802368164062, "learning_rate": 1.2493413624231697e-05, "loss": 0.0051, "step": 11374 }, { "epoch": 10.9375, "grad_norm": 2.0634238719940186, "learning_rate": 1.2492207273599309e-05, "loss": 0.0144, "step": 11375 }, { "epoch": 10.938461538461539, "grad_norm": 1.1433948278427124, "learning_rate": 1.2491000884295176e-05, "loss": 0.0084, "step": 11376 }, { "epoch": 10.939423076923077, "grad_norm": 0.4166378378868103, "learning_rate": 1.2489794456338025e-05, "loss": 0.0016, "step": 11377 }, { "epoch": 10.940384615384616, "grad_norm": 0.2176072597503662, "learning_rate": 1.248858798974657e-05, "loss": 0.0015, "step": 11378 }, { "epoch": 10.941346153846155, "grad_norm": 3.5377986431121826, "learning_rate": 1.248738148453953e-05, "loss": 0.0325, "step": 11379 }, { "epoch": 10.942307692307692, "grad_norm": 1.4940840005874634, "learning_rate": 1.2486174940735636e-05, "loss": 0.0092, "step": 11380 }, { "epoch": 10.94326923076923, "grad_norm": 1.4251347780227661, "learning_rate": 1.2484968358353601e-05, "loss": 0.008, "step": 11381 }, { "epoch": 10.944230769230769, "grad_norm": 1.3346190452575684, "learning_rate": 1.248376173741215e-05, "loss": 0.0051, "step": 11382 }, { "epoch": 10.945192307692308, "grad_norm": 4.76052713394165, "learning_rate": 1.2482555077930006e-05, "loss": 0.1039, "step": 11383 }, { "epoch": 10.946153846153846, "grad_norm": 3.4705305099487305, "learning_rate": 1.2481348379925895e-05, "loss": 0.073, "step": 11384 }, { "epoch": 10.947115384615385, "grad_norm": 2.6304802894592285, "learning_rate": 1.2480141643418539e-05, "loss": 0.0244, "step": 11385 }, { "epoch": 10.948076923076924, "grad_norm": 3.987004041671753, "learning_rate": 1.2478934868426661e-05, "loss": 0.1123, "step": 11386 }, { "epoch": 10.94903846153846, "grad_norm": 4.290165424346924, "learning_rate": 1.2477728054968993e-05, "loss": 0.0473, "step": 11387 }, { "epoch": 10.95, "grad_norm": 2.2531421184539795, "learning_rate": 1.2476521203064257e-05, "loss": 0.0351, "step": 11388 }, { "epoch": 10.950961538461538, "grad_norm": 0.9983391165733337, "learning_rate": 1.2475314312731182e-05, "loss": 0.0054, "step": 11389 }, { "epoch": 10.951923076923077, "grad_norm": 0.951998233795166, "learning_rate": 1.2474107383988489e-05, "loss": 0.0085, "step": 11390 }, { "epoch": 10.952884615384615, "grad_norm": 1.2874045372009277, "learning_rate": 1.2472900416854916e-05, "loss": 0.0079, "step": 11391 }, { "epoch": 10.953846153846154, "grad_norm": 1.0979927778244019, "learning_rate": 1.2471693411349185e-05, "loss": 0.0055, "step": 11392 }, { "epoch": 10.954807692307693, "grad_norm": 2.3425869941711426, "learning_rate": 1.2470486367490025e-05, "loss": 0.0433, "step": 11393 }, { "epoch": 10.955769230769231, "grad_norm": 2.4433271884918213, "learning_rate": 1.2469279285296167e-05, "loss": 0.0321, "step": 11394 }, { "epoch": 10.95673076923077, "grad_norm": 2.573646068572998, "learning_rate": 1.2468072164786342e-05, "loss": 0.0231, "step": 11395 }, { "epoch": 10.957692307692307, "grad_norm": 1.474865436553955, "learning_rate": 1.2466865005979284e-05, "loss": 0.0167, "step": 11396 }, { "epoch": 10.958653846153846, "grad_norm": 0.7228723764419556, "learning_rate": 1.2465657808893716e-05, "loss": 0.0017, "step": 11397 }, { "epoch": 10.959615384615384, "grad_norm": 3.440498113632202, "learning_rate": 1.246445057354838e-05, "loss": 0.0477, "step": 11398 }, { "epoch": 10.960576923076923, "grad_norm": 2.554015636444092, "learning_rate": 1.2463243299962002e-05, "loss": 0.039, "step": 11399 }, { "epoch": 10.961538461538462, "grad_norm": 0.4927259087562561, "learning_rate": 1.246203598815332e-05, "loss": 0.0034, "step": 11400 }, { "epoch": 10.9625, "grad_norm": 1.4001500606536865, "learning_rate": 1.246082863814106e-05, "loss": 0.0069, "step": 11401 }, { "epoch": 10.963461538461539, "grad_norm": 0.23786921799182892, "learning_rate": 1.2459621249943967e-05, "loss": 0.0014, "step": 11402 }, { "epoch": 10.964423076923078, "grad_norm": 0.40934616327285767, "learning_rate": 1.2458413823580771e-05, "loss": 0.0029, "step": 11403 }, { "epoch": 10.965384615384615, "grad_norm": 1.1915901899337769, "learning_rate": 1.2457206359070207e-05, "loss": 0.0077, "step": 11404 }, { "epoch": 10.966346153846153, "grad_norm": 2.1273672580718994, "learning_rate": 1.2455998856431008e-05, "loss": 0.0139, "step": 11405 }, { "epoch": 10.967307692307692, "grad_norm": 0.15179789066314697, "learning_rate": 1.245479131568192e-05, "loss": 0.0011, "step": 11406 }, { "epoch": 10.96826923076923, "grad_norm": 0.21139393746852875, "learning_rate": 1.2453583736841675e-05, "loss": 0.0012, "step": 11407 }, { "epoch": 10.96923076923077, "grad_norm": 2.9648609161376953, "learning_rate": 1.2452376119929009e-05, "loss": 0.0622, "step": 11408 }, { "epoch": 10.970192307692308, "grad_norm": 1.7863699197769165, "learning_rate": 1.2451168464962664e-05, "loss": 0.0079, "step": 11409 }, { "epoch": 10.971153846153847, "grad_norm": 0.7585959434509277, "learning_rate": 1.2449960771961381e-05, "loss": 0.0052, "step": 11410 }, { "epoch": 10.972115384615385, "grad_norm": 0.8709383010864258, "learning_rate": 1.2448753040943897e-05, "loss": 0.0103, "step": 11411 }, { "epoch": 10.973076923076922, "grad_norm": 2.2304913997650146, "learning_rate": 1.2447545271928949e-05, "loss": 0.0275, "step": 11412 }, { "epoch": 10.974038461538461, "grad_norm": 0.8278045654296875, "learning_rate": 1.2446337464935285e-05, "loss": 0.0322, "step": 11413 }, { "epoch": 10.975, "grad_norm": 1.366593837738037, "learning_rate": 1.2445129619981644e-05, "loss": 0.009, "step": 11414 }, { "epoch": 10.975961538461538, "grad_norm": 2.6283366680145264, "learning_rate": 1.2443921737086767e-05, "loss": 0.0491, "step": 11415 }, { "epoch": 10.976923076923077, "grad_norm": 3.371060609817505, "learning_rate": 1.2442713816269394e-05, "loss": 0.0684, "step": 11416 }, { "epoch": 10.977884615384616, "grad_norm": 0.07226276397705078, "learning_rate": 1.2441505857548277e-05, "loss": 0.0004, "step": 11417 }, { "epoch": 10.978846153846154, "grad_norm": 1.728248119354248, "learning_rate": 1.2440297860942152e-05, "loss": 0.0186, "step": 11418 }, { "epoch": 10.979807692307693, "grad_norm": 1.4571263790130615, "learning_rate": 1.2439089826469766e-05, "loss": 0.0095, "step": 11419 }, { "epoch": 10.98076923076923, "grad_norm": 0.8733936548233032, "learning_rate": 1.2437881754149865e-05, "loss": 0.0059, "step": 11420 }, { "epoch": 10.981730769230769, "grad_norm": 1.2318083047866821, "learning_rate": 1.2436673644001196e-05, "loss": 0.0079, "step": 11421 }, { "epoch": 10.982692307692307, "grad_norm": 1.1339153051376343, "learning_rate": 1.2435465496042505e-05, "loss": 0.0071, "step": 11422 }, { "epoch": 10.983653846153846, "grad_norm": 2.605860710144043, "learning_rate": 1.2434257310292532e-05, "loss": 0.0304, "step": 11423 }, { "epoch": 10.984615384615385, "grad_norm": 0.9074492454528809, "learning_rate": 1.2433049086770033e-05, "loss": 0.0054, "step": 11424 }, { "epoch": 10.985576923076923, "grad_norm": 3.2325778007507324, "learning_rate": 1.2431840825493753e-05, "loss": 0.0455, "step": 11425 }, { "epoch": 10.986538461538462, "grad_norm": 0.6087067723274231, "learning_rate": 1.2430632526482443e-05, "loss": 0.0032, "step": 11426 }, { "epoch": 10.9875, "grad_norm": 3.501009464263916, "learning_rate": 1.2429424189754846e-05, "loss": 0.0243, "step": 11427 }, { "epoch": 10.98846153846154, "grad_norm": 1.0227218866348267, "learning_rate": 1.2428215815329719e-05, "loss": 0.0065, "step": 11428 }, { "epoch": 10.989423076923076, "grad_norm": 1.8448044061660767, "learning_rate": 1.2427007403225808e-05, "loss": 0.0088, "step": 11429 }, { "epoch": 10.990384615384615, "grad_norm": 2.378048896789551, "learning_rate": 1.2425798953461863e-05, "loss": 0.0227, "step": 11430 }, { "epoch": 10.991346153846154, "grad_norm": 0.7687617540359497, "learning_rate": 1.242459046605664e-05, "loss": 0.0077, "step": 11431 }, { "epoch": 10.992307692307692, "grad_norm": 2.384737014770508, "learning_rate": 1.2423381941028891e-05, "loss": 0.0206, "step": 11432 }, { "epoch": 10.993269230769231, "grad_norm": 1.6432145833969116, "learning_rate": 1.2422173378397364e-05, "loss": 0.0136, "step": 11433 }, { "epoch": 10.99423076923077, "grad_norm": 3.8703322410583496, "learning_rate": 1.2420964778180815e-05, "loss": 0.0458, "step": 11434 }, { "epoch": 10.995192307692308, "grad_norm": 2.272400140762329, "learning_rate": 1.2419756140398e-05, "loss": 0.0181, "step": 11435 }, { "epoch": 10.996153846153845, "grad_norm": 2.1145012378692627, "learning_rate": 1.2418547465067671e-05, "loss": 0.039, "step": 11436 }, { "epoch": 10.997115384615384, "grad_norm": 2.037134885787964, "learning_rate": 1.2417338752208582e-05, "loss": 0.0135, "step": 11437 }, { "epoch": 10.998076923076923, "grad_norm": 0.25322550535202026, "learning_rate": 1.2416130001839493e-05, "loss": 0.0012, "step": 11438 }, { "epoch": 10.999038461538461, "grad_norm": 2.112422466278076, "learning_rate": 1.2414921213979157e-05, "loss": 0.0213, "step": 11439 }, { "epoch": 11.0, "grad_norm": 0.9576374888420105, "learning_rate": 1.241371238864633e-05, "loss": 0.0063, "step": 11440 }, { "epoch": 11.000961538461539, "grad_norm": 0.10703200846910477, "learning_rate": 1.2412503525859772e-05, "loss": 0.0008, "step": 11441 }, { "epoch": 11.001923076923077, "grad_norm": 0.06609085202217102, "learning_rate": 1.241129462563824e-05, "loss": 0.0005, "step": 11442 }, { "epoch": 11.002884615384616, "grad_norm": 1.1411195993423462, "learning_rate": 1.2410085688000493e-05, "loss": 0.0079, "step": 11443 }, { "epoch": 11.003846153846155, "grad_norm": 0.44050654768943787, "learning_rate": 1.2408876712965289e-05, "loss": 0.0018, "step": 11444 }, { "epoch": 11.004807692307692, "grad_norm": 1.3947700262069702, "learning_rate": 1.2407667700551387e-05, "loss": 0.0029, "step": 11445 }, { "epoch": 11.00576923076923, "grad_norm": 0.9900282621383667, "learning_rate": 1.240645865077755e-05, "loss": 0.0055, "step": 11446 }, { "epoch": 11.006730769230769, "grad_norm": 0.11255120486021042, "learning_rate": 1.2405249563662539e-05, "loss": 0.0011, "step": 11447 }, { "epoch": 11.007692307692308, "grad_norm": 1.4778698682785034, "learning_rate": 1.2404040439225111e-05, "loss": 0.0135, "step": 11448 }, { "epoch": 11.008653846153846, "grad_norm": 0.1555507332086563, "learning_rate": 1.2402831277484034e-05, "loss": 0.0009, "step": 11449 }, { "epoch": 11.009615384615385, "grad_norm": 0.28753000497817993, "learning_rate": 1.2401622078458067e-05, "loss": 0.0016, "step": 11450 }, { "epoch": 11.010576923076924, "grad_norm": 0.011804654262959957, "learning_rate": 1.2400412842165972e-05, "loss": 0.0001, "step": 11451 }, { "epoch": 11.011538461538462, "grad_norm": 0.5617654323577881, "learning_rate": 1.2399203568626517e-05, "loss": 0.0034, "step": 11452 }, { "epoch": 11.0125, "grad_norm": 2.1781668663024902, "learning_rate": 1.2397994257858463e-05, "loss": 0.0307, "step": 11453 }, { "epoch": 11.013461538461538, "grad_norm": 2.695761203765869, "learning_rate": 1.2396784909880577e-05, "loss": 0.0198, "step": 11454 }, { "epoch": 11.014423076923077, "grad_norm": 0.680515706539154, "learning_rate": 1.2395575524711626e-05, "loss": 0.0049, "step": 11455 }, { "epoch": 11.015384615384615, "grad_norm": 1.8089319467544556, "learning_rate": 1.2394366102370367e-05, "loss": 0.0173, "step": 11456 }, { "epoch": 11.016346153846154, "grad_norm": 0.20794683694839478, "learning_rate": 1.2393156642875579e-05, "loss": 0.0006, "step": 11457 }, { "epoch": 11.017307692307693, "grad_norm": 0.9603610038757324, "learning_rate": 1.2391947146246025e-05, "loss": 0.0086, "step": 11458 }, { "epoch": 11.018269230769231, "grad_norm": 0.4969803988933563, "learning_rate": 1.2390737612500465e-05, "loss": 0.0029, "step": 11459 }, { "epoch": 11.01923076923077, "grad_norm": 0.03717717155814171, "learning_rate": 1.2389528041657679e-05, "loss": 0.0003, "step": 11460 }, { "epoch": 11.020192307692307, "grad_norm": 0.025483056902885437, "learning_rate": 1.238831843373643e-05, "loss": 0.0003, "step": 11461 }, { "epoch": 11.021153846153846, "grad_norm": 0.5622068643569946, "learning_rate": 1.238710878875549e-05, "loss": 0.003, "step": 11462 }, { "epoch": 11.022115384615384, "grad_norm": 1.1198643445968628, "learning_rate": 1.2385899106733626e-05, "loss": 0.005, "step": 11463 }, { "epoch": 11.023076923076923, "grad_norm": 1.3633289337158203, "learning_rate": 1.2384689387689611e-05, "loss": 0.0677, "step": 11464 }, { "epoch": 11.024038461538462, "grad_norm": 1.9177155494689941, "learning_rate": 1.2383479631642215e-05, "loss": 0.012, "step": 11465 }, { "epoch": 11.025, "grad_norm": 2.0556681156158447, "learning_rate": 1.2382269838610211e-05, "loss": 0.0422, "step": 11466 }, { "epoch": 11.025961538461539, "grad_norm": 1.6472527980804443, "learning_rate": 1.2381060008612368e-05, "loss": 0.0097, "step": 11467 }, { "epoch": 11.026923076923078, "grad_norm": 2.5553793907165527, "learning_rate": 1.2379850141667466e-05, "loss": 0.0311, "step": 11468 }, { "epoch": 11.027884615384615, "grad_norm": 0.4776434600353241, "learning_rate": 1.2378640237794274e-05, "loss": 0.0031, "step": 11469 }, { "epoch": 11.028846153846153, "grad_norm": 0.3522699177265167, "learning_rate": 1.2377430297011566e-05, "loss": 0.0019, "step": 11470 }, { "epoch": 11.029807692307692, "grad_norm": 0.18325306475162506, "learning_rate": 1.2376220319338118e-05, "loss": 0.001, "step": 11471 }, { "epoch": 11.03076923076923, "grad_norm": 0.07303318381309509, "learning_rate": 1.2375010304792704e-05, "loss": 0.0006, "step": 11472 }, { "epoch": 11.03173076923077, "grad_norm": 0.29215481877326965, "learning_rate": 1.23738002533941e-05, "loss": 0.0016, "step": 11473 }, { "epoch": 11.032692307692308, "grad_norm": 1.557008147239685, "learning_rate": 1.2372590165161084e-05, "loss": 0.0074, "step": 11474 }, { "epoch": 11.033653846153847, "grad_norm": 1.6228936910629272, "learning_rate": 1.2371380040112433e-05, "loss": 0.0076, "step": 11475 }, { "epoch": 11.034615384615385, "grad_norm": 0.06481959670782089, "learning_rate": 1.2370169878266922e-05, "loss": 0.0006, "step": 11476 }, { "epoch": 11.035576923076922, "grad_norm": 1.655745506286621, "learning_rate": 1.236895967964333e-05, "loss": 0.0443, "step": 11477 }, { "epoch": 11.036538461538461, "grad_norm": 0.17952202260494232, "learning_rate": 1.236774944426044e-05, "loss": 0.0016, "step": 11478 }, { "epoch": 11.0375, "grad_norm": 0.06297075748443604, "learning_rate": 1.2366539172137027e-05, "loss": 0.0007, "step": 11479 }, { "epoch": 11.038461538461538, "grad_norm": 1.2707043886184692, "learning_rate": 1.2365328863291872e-05, "loss": 0.0201, "step": 11480 }, { "epoch": 11.039423076923077, "grad_norm": 0.10292903333902359, "learning_rate": 1.2364118517743753e-05, "loss": 0.0009, "step": 11481 }, { "epoch": 11.040384615384616, "grad_norm": 0.7417581081390381, "learning_rate": 1.2362908135511453e-05, "loss": 0.0011, "step": 11482 }, { "epoch": 11.041346153846154, "grad_norm": 1.4143428802490234, "learning_rate": 1.2361697716613757e-05, "loss": 0.0075, "step": 11483 }, { "epoch": 11.042307692307693, "grad_norm": 0.23846890032291412, "learning_rate": 1.2360487261069443e-05, "loss": 0.0015, "step": 11484 }, { "epoch": 11.04326923076923, "grad_norm": 0.3159378468990326, "learning_rate": 1.235927676889729e-05, "loss": 0.0019, "step": 11485 }, { "epoch": 11.044230769230769, "grad_norm": 0.06443437933921814, "learning_rate": 1.2358066240116092e-05, "loss": 0.0005, "step": 11486 }, { "epoch": 11.045192307692307, "grad_norm": 1.3784962892532349, "learning_rate": 1.2356855674744625e-05, "loss": 0.0078, "step": 11487 }, { "epoch": 11.046153846153846, "grad_norm": 0.711418092250824, "learning_rate": 1.2355645072801673e-05, "loss": 0.0023, "step": 11488 }, { "epoch": 11.047115384615385, "grad_norm": 0.462452232837677, "learning_rate": 1.2354434434306026e-05, "loss": 0.0023, "step": 11489 }, { "epoch": 11.048076923076923, "grad_norm": 0.048641808331012726, "learning_rate": 1.2353223759276465e-05, "loss": 0.0005, "step": 11490 }, { "epoch": 11.049038461538462, "grad_norm": 0.06172219663858414, "learning_rate": 1.2352013047731779e-05, "loss": 0.0005, "step": 11491 }, { "epoch": 11.05, "grad_norm": 0.5631320476531982, "learning_rate": 1.2350802299690752e-05, "loss": 0.0065, "step": 11492 }, { "epoch": 11.050961538461538, "grad_norm": 4.375358581542969, "learning_rate": 1.2349591515172174e-05, "loss": 0.0355, "step": 11493 }, { "epoch": 11.051923076923076, "grad_norm": 3.323474168777466, "learning_rate": 1.2348380694194833e-05, "loss": 0.0581, "step": 11494 }, { "epoch": 11.052884615384615, "grad_norm": 0.2742774784564972, "learning_rate": 1.2347169836777516e-05, "loss": 0.0018, "step": 11495 }, { "epoch": 11.053846153846154, "grad_norm": 0.717534601688385, "learning_rate": 1.2345958942939008e-05, "loss": 0.0033, "step": 11496 }, { "epoch": 11.054807692307692, "grad_norm": 1.276625394821167, "learning_rate": 1.2344748012698107e-05, "loss": 0.0316, "step": 11497 }, { "epoch": 11.055769230769231, "grad_norm": 0.15999138355255127, "learning_rate": 1.2343537046073597e-05, "loss": 0.0006, "step": 11498 }, { "epoch": 11.05673076923077, "grad_norm": 0.8829627633094788, "learning_rate": 1.2342326043084268e-05, "loss": 0.011, "step": 11499 }, { "epoch": 11.057692307692308, "grad_norm": 2.6831459999084473, "learning_rate": 1.2341115003748916e-05, "loss": 0.0257, "step": 11500 }, { "epoch": 11.058653846153845, "grad_norm": 2.41475248336792, "learning_rate": 1.233990392808633e-05, "loss": 0.0247, "step": 11501 }, { "epoch": 11.059615384615384, "grad_norm": 0.027470290660858154, "learning_rate": 1.2338692816115304e-05, "loss": 0.0002, "step": 11502 }, { "epoch": 11.060576923076923, "grad_norm": 0.5552831292152405, "learning_rate": 1.2337481667854625e-05, "loss": 0.0026, "step": 11503 }, { "epoch": 11.061538461538461, "grad_norm": 0.22549782693386078, "learning_rate": 1.2336270483323094e-05, "loss": 0.0008, "step": 11504 }, { "epoch": 11.0625, "grad_norm": 1.0597987174987793, "learning_rate": 1.2335059262539501e-05, "loss": 0.0079, "step": 11505 }, { "epoch": 11.063461538461539, "grad_norm": 0.2299574911594391, "learning_rate": 1.2333848005522645e-05, "loss": 0.0019, "step": 11506 }, { "epoch": 11.064423076923077, "grad_norm": 1.8426930904388428, "learning_rate": 1.2332636712291313e-05, "loss": 0.0124, "step": 11507 }, { "epoch": 11.065384615384616, "grad_norm": 0.08188196271657944, "learning_rate": 1.2331425382864307e-05, "loss": 0.0006, "step": 11508 }, { "epoch": 11.066346153846155, "grad_norm": 2.8509325981140137, "learning_rate": 1.2330214017260422e-05, "loss": 0.0755, "step": 11509 }, { "epoch": 11.067307692307692, "grad_norm": 0.2906940281391144, "learning_rate": 1.2329002615498453e-05, "loss": 0.0015, "step": 11510 }, { "epoch": 11.06826923076923, "grad_norm": 0.02386428974568844, "learning_rate": 1.2327791177597201e-05, "loss": 0.0002, "step": 11511 }, { "epoch": 11.069230769230769, "grad_norm": 0.08666502684354782, "learning_rate": 1.2326579703575464e-05, "loss": 0.0006, "step": 11512 }, { "epoch": 11.070192307692308, "grad_norm": 0.14775502681732178, "learning_rate": 1.2325368193452037e-05, "loss": 0.0006, "step": 11513 }, { "epoch": 11.071153846153846, "grad_norm": 2.4321706295013428, "learning_rate": 1.2324156647245715e-05, "loss": 0.0255, "step": 11514 }, { "epoch": 11.072115384615385, "grad_norm": 0.6409963965415955, "learning_rate": 1.232294506497531e-05, "loss": 0.0035, "step": 11515 }, { "epoch": 11.073076923076924, "grad_norm": 0.10070192068815231, "learning_rate": 1.2321733446659612e-05, "loss": 0.0007, "step": 11516 }, { "epoch": 11.074038461538462, "grad_norm": 0.2562251091003418, "learning_rate": 1.2320521792317425e-05, "loss": 0.0013, "step": 11517 }, { "epoch": 11.075, "grad_norm": 0.08182583004236221, "learning_rate": 1.2319310101967552e-05, "loss": 0.0007, "step": 11518 }, { "epoch": 11.075961538461538, "grad_norm": 4.019643783569336, "learning_rate": 1.2318098375628793e-05, "loss": 0.0651, "step": 11519 }, { "epoch": 11.076923076923077, "grad_norm": 0.38515737652778625, "learning_rate": 1.231688661331995e-05, "loss": 0.0018, "step": 11520 }, { "epoch": 11.077884615384615, "grad_norm": 0.46488726139068604, "learning_rate": 1.2315674815059829e-05, "loss": 0.002, "step": 11521 }, { "epoch": 11.078846153846154, "grad_norm": 2.997561454772949, "learning_rate": 1.2314462980867227e-05, "loss": 0.0378, "step": 11522 }, { "epoch": 11.079807692307693, "grad_norm": 0.37687113881111145, "learning_rate": 1.2313251110760958e-05, "loss": 0.0023, "step": 11523 }, { "epoch": 11.080769230769231, "grad_norm": 1.2615114450454712, "learning_rate": 1.2312039204759819e-05, "loss": 0.0041, "step": 11524 }, { "epoch": 11.08173076923077, "grad_norm": 0.16105546057224274, "learning_rate": 1.2310827262882614e-05, "loss": 0.0013, "step": 11525 }, { "epoch": 11.082692307692307, "grad_norm": 0.5228608250617981, "learning_rate": 1.2309615285148154e-05, "loss": 0.0021, "step": 11526 }, { "epoch": 11.083653846153846, "grad_norm": 1.5412863492965698, "learning_rate": 1.2308403271575245e-05, "loss": 0.0092, "step": 11527 }, { "epoch": 11.084615384615384, "grad_norm": 0.04846891760826111, "learning_rate": 1.2307191222182688e-05, "loss": 0.0004, "step": 11528 }, { "epoch": 11.085576923076923, "grad_norm": 0.09637171030044556, "learning_rate": 1.2305979136989298e-05, "loss": 0.0005, "step": 11529 }, { "epoch": 11.086538461538462, "grad_norm": 2.7320187091827393, "learning_rate": 1.230476701601388e-05, "loss": 0.0037, "step": 11530 }, { "epoch": 11.0875, "grad_norm": 0.8943040370941162, "learning_rate": 1.2303554859275244e-05, "loss": 0.0034, "step": 11531 }, { "epoch": 11.088461538461539, "grad_norm": 1.6376309394836426, "learning_rate": 1.2302342666792196e-05, "loss": 0.0238, "step": 11532 }, { "epoch": 11.089423076923078, "grad_norm": 0.3358817398548126, "learning_rate": 1.2301130438583547e-05, "loss": 0.0021, "step": 11533 }, { "epoch": 11.090384615384615, "grad_norm": 0.21514587104320526, "learning_rate": 1.2299918174668106e-05, "loss": 0.001, "step": 11534 }, { "epoch": 11.091346153846153, "grad_norm": 0.3351333439350128, "learning_rate": 1.2298705875064688e-05, "loss": 0.0038, "step": 11535 }, { "epoch": 11.092307692307692, "grad_norm": 0.08496702462434769, "learning_rate": 1.2297493539792096e-05, "loss": 0.0003, "step": 11536 }, { "epoch": 11.09326923076923, "grad_norm": 0.15228348970413208, "learning_rate": 1.2296281168869151e-05, "loss": 0.0008, "step": 11537 }, { "epoch": 11.09423076923077, "grad_norm": 1.0489157438278198, "learning_rate": 1.2295068762314661e-05, "loss": 0.0059, "step": 11538 }, { "epoch": 11.095192307692308, "grad_norm": 0.38174524903297424, "learning_rate": 1.229385632014744e-05, "loss": 0.0028, "step": 11539 }, { "epoch": 11.096153846153847, "grad_norm": 0.864358127117157, "learning_rate": 1.2292643842386303e-05, "loss": 0.0085, "step": 11540 }, { "epoch": 11.097115384615385, "grad_norm": 0.03051033616065979, "learning_rate": 1.229143132905006e-05, "loss": 0.0004, "step": 11541 }, { "epoch": 11.098076923076922, "grad_norm": 2.627902030944824, "learning_rate": 1.229021878015753e-05, "loss": 0.0097, "step": 11542 }, { "epoch": 11.099038461538461, "grad_norm": 2.276533365249634, "learning_rate": 1.2289006195727523e-05, "loss": 0.0235, "step": 11543 }, { "epoch": 11.1, "grad_norm": 2.4068820476531982, "learning_rate": 1.228779357577886e-05, "loss": 0.0245, "step": 11544 }, { "epoch": 11.100961538461538, "grad_norm": 0.48878219723701477, "learning_rate": 1.2286580920330358e-05, "loss": 0.0014, "step": 11545 }, { "epoch": 11.101923076923077, "grad_norm": 0.01481728907674551, "learning_rate": 1.2285368229400829e-05, "loss": 0.0002, "step": 11546 }, { "epoch": 11.102884615384616, "grad_norm": 0.8071326017379761, "learning_rate": 1.228415550300909e-05, "loss": 0.0033, "step": 11547 }, { "epoch": 11.103846153846154, "grad_norm": 0.37370288372039795, "learning_rate": 1.2282942741173965e-05, "loss": 0.0016, "step": 11548 }, { "epoch": 11.104807692307693, "grad_norm": 0.0750812441110611, "learning_rate": 1.2281729943914269e-05, "loss": 0.0006, "step": 11549 }, { "epoch": 11.10576923076923, "grad_norm": 0.7707893252372742, "learning_rate": 1.2280517111248818e-05, "loss": 0.0125, "step": 11550 }, { "epoch": 11.106730769230769, "grad_norm": 0.04773842915892601, "learning_rate": 1.2279304243196438e-05, "loss": 0.0003, "step": 11551 }, { "epoch": 11.107692307692307, "grad_norm": 1.5326181650161743, "learning_rate": 1.2278091339775944e-05, "loss": 0.0057, "step": 11552 }, { "epoch": 11.108653846153846, "grad_norm": 0.023807832971215248, "learning_rate": 1.2276878401006159e-05, "loss": 0.0002, "step": 11553 }, { "epoch": 11.109615384615385, "grad_norm": 0.30643409490585327, "learning_rate": 1.22756654269059e-05, "loss": 0.0009, "step": 11554 }, { "epoch": 11.110576923076923, "grad_norm": 0.3879410922527313, "learning_rate": 1.2274452417493998e-05, "loss": 0.0016, "step": 11555 }, { "epoch": 11.111538461538462, "grad_norm": 0.8099364042282104, "learning_rate": 1.227323937278927e-05, "loss": 0.0036, "step": 11556 }, { "epoch": 11.1125, "grad_norm": 0.3730303645133972, "learning_rate": 1.2272026292810536e-05, "loss": 0.002, "step": 11557 }, { "epoch": 11.113461538461538, "grad_norm": 0.8834611773490906, "learning_rate": 1.2270813177576622e-05, "loss": 0.0045, "step": 11558 }, { "epoch": 11.114423076923076, "grad_norm": 0.4602535367012024, "learning_rate": 1.2269600027106354e-05, "loss": 0.0018, "step": 11559 }, { "epoch": 11.115384615384615, "grad_norm": 0.09074404090642929, "learning_rate": 1.2268386841418553e-05, "loss": 0.0005, "step": 11560 }, { "epoch": 11.116346153846154, "grad_norm": 0.7582783102989197, "learning_rate": 1.2267173620532047e-05, "loss": 0.0022, "step": 11561 }, { "epoch": 11.117307692307692, "grad_norm": 2.143808364868164, "learning_rate": 1.2265960364465661e-05, "loss": 0.0191, "step": 11562 }, { "epoch": 11.118269230769231, "grad_norm": 1.0152169466018677, "learning_rate": 1.2264747073238221e-05, "loss": 0.0033, "step": 11563 }, { "epoch": 11.11923076923077, "grad_norm": 1.4080841541290283, "learning_rate": 1.2263533746868552e-05, "loss": 0.0061, "step": 11564 }, { "epoch": 11.120192307692308, "grad_norm": 0.07831277698278427, "learning_rate": 1.2262320385375484e-05, "loss": 0.0003, "step": 11565 }, { "epoch": 11.121153846153845, "grad_norm": 0.16029611229896545, "learning_rate": 1.2261106988777843e-05, "loss": 0.0006, "step": 11566 }, { "epoch": 11.122115384615384, "grad_norm": 3.3378469944000244, "learning_rate": 1.2259893557094457e-05, "loss": 0.0226, "step": 11567 }, { "epoch": 11.123076923076923, "grad_norm": 3.2877628803253174, "learning_rate": 1.2258680090344155e-05, "loss": 0.0375, "step": 11568 }, { "epoch": 11.124038461538461, "grad_norm": 0.11578589677810669, "learning_rate": 1.2257466588545767e-05, "loss": 0.0006, "step": 11569 }, { "epoch": 11.125, "grad_norm": 0.3374175429344177, "learning_rate": 1.2256253051718125e-05, "loss": 0.0015, "step": 11570 }, { "epoch": 11.125961538461539, "grad_norm": 0.4316827058792114, "learning_rate": 1.2255039479880057e-05, "loss": 0.0019, "step": 11571 }, { "epoch": 11.126923076923077, "grad_norm": 0.7452892065048218, "learning_rate": 1.2253825873050396e-05, "loss": 0.0033, "step": 11572 }, { "epoch": 11.127884615384616, "grad_norm": 0.9546921253204346, "learning_rate": 1.2252612231247972e-05, "loss": 0.006, "step": 11573 }, { "epoch": 11.128846153846155, "grad_norm": 4.174757957458496, "learning_rate": 1.2251398554491615e-05, "loss": 0.0537, "step": 11574 }, { "epoch": 11.129807692307692, "grad_norm": 0.19611091911792755, "learning_rate": 1.2250184842800164e-05, "loss": 0.0008, "step": 11575 }, { "epoch": 11.13076923076923, "grad_norm": 2.630589246749878, "learning_rate": 1.2248971096192444e-05, "loss": 0.0828, "step": 11576 }, { "epoch": 11.131730769230769, "grad_norm": 1.2721365690231323, "learning_rate": 1.2247757314687296e-05, "loss": 0.0069, "step": 11577 }, { "epoch": 11.132692307692308, "grad_norm": 0.08930560946464539, "learning_rate": 1.2246543498303552e-05, "loss": 0.0005, "step": 11578 }, { "epoch": 11.133653846153846, "grad_norm": 1.7523473501205444, "learning_rate": 1.2245329647060044e-05, "loss": 0.008, "step": 11579 }, { "epoch": 11.134615384615385, "grad_norm": 0.2748846709728241, "learning_rate": 1.2244115760975612e-05, "loss": 0.0012, "step": 11580 }, { "epoch": 11.135576923076924, "grad_norm": 0.12668460607528687, "learning_rate": 1.224290184006909e-05, "loss": 0.0008, "step": 11581 }, { "epoch": 11.136538461538462, "grad_norm": 0.8141096234321594, "learning_rate": 1.2241687884359315e-05, "loss": 0.0064, "step": 11582 }, { "epoch": 11.1375, "grad_norm": 1.4956315755844116, "learning_rate": 1.224047389386512e-05, "loss": 0.0198, "step": 11583 }, { "epoch": 11.138461538461538, "grad_norm": 0.013575691729784012, "learning_rate": 1.2239259868605346e-05, "loss": 0.0001, "step": 11584 }, { "epoch": 11.139423076923077, "grad_norm": 2.046290636062622, "learning_rate": 1.2238045808598833e-05, "loss": 0.0092, "step": 11585 }, { "epoch": 11.140384615384615, "grad_norm": 0.6328200101852417, "learning_rate": 1.2236831713864419e-05, "loss": 0.0031, "step": 11586 }, { "epoch": 11.141346153846154, "grad_norm": 0.0817628726363182, "learning_rate": 1.2235617584420938e-05, "loss": 0.0005, "step": 11587 }, { "epoch": 11.142307692307693, "grad_norm": 3.3801791667938232, "learning_rate": 1.2234403420287238e-05, "loss": 0.0191, "step": 11588 }, { "epoch": 11.143269230769231, "grad_norm": 1.5784646272659302, "learning_rate": 1.2233189221482151e-05, "loss": 0.0088, "step": 11589 }, { "epoch": 11.14423076923077, "grad_norm": 0.32459989190101624, "learning_rate": 1.2231974988024522e-05, "loss": 0.0011, "step": 11590 }, { "epoch": 11.145192307692307, "grad_norm": 0.9281569123268127, "learning_rate": 1.2230760719933194e-05, "loss": 0.0252, "step": 11591 }, { "epoch": 11.146153846153846, "grad_norm": 0.0129908611997962, "learning_rate": 1.2229546417227005e-05, "loss": 0.0001, "step": 11592 }, { "epoch": 11.147115384615384, "grad_norm": 1.3840172290802002, "learning_rate": 1.2228332079924803e-05, "loss": 0.005, "step": 11593 }, { "epoch": 11.148076923076923, "grad_norm": 0.17933666706085205, "learning_rate": 1.2227117708045422e-05, "loss": 0.0006, "step": 11594 }, { "epoch": 11.149038461538462, "grad_norm": 0.03216007724404335, "learning_rate": 1.2225903301607713e-05, "loss": 0.0003, "step": 11595 }, { "epoch": 11.15, "grad_norm": 1.8706952333450317, "learning_rate": 1.2224688860630518e-05, "loss": 0.0115, "step": 11596 }, { "epoch": 11.150961538461539, "grad_norm": 3.7273104190826416, "learning_rate": 1.222347438513268e-05, "loss": 0.0338, "step": 11597 }, { "epoch": 11.151923076923078, "grad_norm": 0.17445941269397736, "learning_rate": 1.2222259875133046e-05, "loss": 0.0012, "step": 11598 }, { "epoch": 11.152884615384615, "grad_norm": 0.40067169070243835, "learning_rate": 1.2221045330650463e-05, "loss": 0.0019, "step": 11599 }, { "epoch": 11.153846153846153, "grad_norm": 0.04765242710709572, "learning_rate": 1.2219830751703773e-05, "loss": 0.0003, "step": 11600 }, { "epoch": 11.154807692307692, "grad_norm": 1.880595088005066, "learning_rate": 1.2218616138311826e-05, "loss": 0.0159, "step": 11601 }, { "epoch": 11.15576923076923, "grad_norm": 0.43402841687202454, "learning_rate": 1.2217401490493466e-05, "loss": 0.0016, "step": 11602 }, { "epoch": 11.15673076923077, "grad_norm": 2.0880818367004395, "learning_rate": 1.2216186808267544e-05, "loss": 0.0087, "step": 11603 }, { "epoch": 11.157692307692308, "grad_norm": 0.22941765189170837, "learning_rate": 1.2214972091652908e-05, "loss": 0.0011, "step": 11604 }, { "epoch": 11.158653846153847, "grad_norm": 0.3327144384384155, "learning_rate": 1.2213757340668404e-05, "loss": 0.0015, "step": 11605 }, { "epoch": 11.159615384615385, "grad_norm": 1.68733811378479, "learning_rate": 1.2212542555332886e-05, "loss": 0.0132, "step": 11606 }, { "epoch": 11.160576923076922, "grad_norm": 1.1251908540725708, "learning_rate": 1.2211327735665201e-05, "loss": 0.0082, "step": 11607 }, { "epoch": 11.161538461538461, "grad_norm": 0.8751410841941833, "learning_rate": 1.2210112881684197e-05, "loss": 0.004, "step": 11608 }, { "epoch": 11.1625, "grad_norm": 0.06733271479606628, "learning_rate": 1.220889799340873e-05, "loss": 0.0002, "step": 11609 }, { "epoch": 11.163461538461538, "grad_norm": 0.17759969830513, "learning_rate": 1.220768307085765e-05, "loss": 0.0009, "step": 11610 }, { "epoch": 11.164423076923077, "grad_norm": 0.8616593480110168, "learning_rate": 1.2206468114049808e-05, "loss": 0.0022, "step": 11611 }, { "epoch": 11.165384615384616, "grad_norm": 0.38958483934402466, "learning_rate": 1.2205253123004052e-05, "loss": 0.0015, "step": 11612 }, { "epoch": 11.166346153846154, "grad_norm": 1.3505722284317017, "learning_rate": 1.2204038097739246e-05, "loss": 0.0083, "step": 11613 }, { "epoch": 11.167307692307693, "grad_norm": 1.0398216247558594, "learning_rate": 1.2202823038274235e-05, "loss": 0.0059, "step": 11614 }, { "epoch": 11.16826923076923, "grad_norm": 2.367025852203369, "learning_rate": 1.2201607944627876e-05, "loss": 0.0171, "step": 11615 }, { "epoch": 11.169230769230769, "grad_norm": 1.581423044204712, "learning_rate": 1.2200392816819022e-05, "loss": 0.0047, "step": 11616 }, { "epoch": 11.170192307692307, "grad_norm": 3.2435519695281982, "learning_rate": 1.219917765486653e-05, "loss": 0.0639, "step": 11617 }, { "epoch": 11.171153846153846, "grad_norm": 0.05218644440174103, "learning_rate": 1.2197962458789257e-05, "loss": 0.0006, "step": 11618 }, { "epoch": 11.172115384615385, "grad_norm": 0.6785010099411011, "learning_rate": 1.2196747228606056e-05, "loss": 0.0029, "step": 11619 }, { "epoch": 11.173076923076923, "grad_norm": 0.935512125492096, "learning_rate": 1.2195531964335786e-05, "loss": 0.005, "step": 11620 }, { "epoch": 11.174038461538462, "grad_norm": 1.6301039457321167, "learning_rate": 1.2194316665997303e-05, "loss": 0.0061, "step": 11621 }, { "epoch": 11.175, "grad_norm": 0.9730561971664429, "learning_rate": 1.2193101333609467e-05, "loss": 0.0062, "step": 11622 }, { "epoch": 11.175961538461538, "grad_norm": 0.07554812729358673, "learning_rate": 1.2191885967191134e-05, "loss": 0.0004, "step": 11623 }, { "epoch": 11.176923076923076, "grad_norm": 0.13869602978229523, "learning_rate": 1.2190670566761163e-05, "loss": 0.0009, "step": 11624 }, { "epoch": 11.177884615384615, "grad_norm": 0.8035235404968262, "learning_rate": 1.2189455132338414e-05, "loss": 0.0063, "step": 11625 }, { "epoch": 11.178846153846154, "grad_norm": 0.2623737156391144, "learning_rate": 1.218823966394175e-05, "loss": 0.0015, "step": 11626 }, { "epoch": 11.179807692307692, "grad_norm": 0.01473761722445488, "learning_rate": 1.2187024161590026e-05, "loss": 0.0002, "step": 11627 }, { "epoch": 11.180769230769231, "grad_norm": 0.060695208609104156, "learning_rate": 1.2185808625302107e-05, "loss": 0.0004, "step": 11628 }, { "epoch": 11.18173076923077, "grad_norm": 0.00935319159179926, "learning_rate": 1.2184593055096853e-05, "loss": 0.0001, "step": 11629 }, { "epoch": 11.182692307692308, "grad_norm": 1.9171068668365479, "learning_rate": 1.218337745099313e-05, "loss": 0.0259, "step": 11630 }, { "epoch": 11.183653846153845, "grad_norm": 1.6244791746139526, "learning_rate": 1.2182161813009792e-05, "loss": 0.0069, "step": 11631 }, { "epoch": 11.184615384615384, "grad_norm": 0.03980587422847748, "learning_rate": 1.2180946141165711e-05, "loss": 0.0002, "step": 11632 }, { "epoch": 11.185576923076923, "grad_norm": 1.371729850769043, "learning_rate": 1.2179730435479747e-05, "loss": 0.0054, "step": 11633 }, { "epoch": 11.186538461538461, "grad_norm": 0.5559417605400085, "learning_rate": 1.2178514695970762e-05, "loss": 0.003, "step": 11634 }, { "epoch": 11.1875, "grad_norm": 1.152342677116394, "learning_rate": 1.2177298922657622e-05, "loss": 0.0046, "step": 11635 }, { "epoch": 11.188461538461539, "grad_norm": 0.07633619755506516, "learning_rate": 1.2176083115559194e-05, "loss": 0.0003, "step": 11636 }, { "epoch": 11.189423076923077, "grad_norm": 1.411957025527954, "learning_rate": 1.2174867274694344e-05, "loss": 0.0253, "step": 11637 }, { "epoch": 11.190384615384616, "grad_norm": 0.5176448225975037, "learning_rate": 1.2173651400081934e-05, "loss": 0.0023, "step": 11638 }, { "epoch": 11.191346153846155, "grad_norm": 0.019655948504805565, "learning_rate": 1.2172435491740837e-05, "loss": 0.0002, "step": 11639 }, { "epoch": 11.192307692307692, "grad_norm": 0.12316640466451645, "learning_rate": 1.2171219549689917e-05, "loss": 0.0009, "step": 11640 }, { "epoch": 11.19326923076923, "grad_norm": 0.7775833606719971, "learning_rate": 1.2170003573948041e-05, "loss": 0.0032, "step": 11641 }, { "epoch": 11.194230769230769, "grad_norm": 1.6401686668395996, "learning_rate": 1.2168787564534078e-05, "loss": 0.0149, "step": 11642 }, { "epoch": 11.195192307692308, "grad_norm": 1.5051014423370361, "learning_rate": 1.2167571521466899e-05, "loss": 0.0306, "step": 11643 }, { "epoch": 11.196153846153846, "grad_norm": 3.250520706176758, "learning_rate": 1.2166355444765372e-05, "loss": 0.0258, "step": 11644 }, { "epoch": 11.197115384615385, "grad_norm": 0.01231094915419817, "learning_rate": 1.2165139334448365e-05, "loss": 0.0001, "step": 11645 }, { "epoch": 11.198076923076924, "grad_norm": 0.019772080704569817, "learning_rate": 1.216392319053475e-05, "loss": 0.0002, "step": 11646 }, { "epoch": 11.199038461538462, "grad_norm": 1.1306136846542358, "learning_rate": 1.2162707013043398e-05, "loss": 0.0036, "step": 11647 }, { "epoch": 11.2, "grad_norm": 1.244419813156128, "learning_rate": 1.2161490801993183e-05, "loss": 0.0066, "step": 11648 }, { "epoch": 11.200961538461538, "grad_norm": 2.258666515350342, "learning_rate": 1.2160274557402972e-05, "loss": 0.012, "step": 11649 }, { "epoch": 11.201923076923077, "grad_norm": 1.7417101860046387, "learning_rate": 1.215905827929164e-05, "loss": 0.0087, "step": 11650 }, { "epoch": 11.202884615384615, "grad_norm": 0.311895489692688, "learning_rate": 1.2157841967678064e-05, "loss": 0.0019, "step": 11651 }, { "epoch": 11.203846153846154, "grad_norm": 1.1840808391571045, "learning_rate": 1.215662562258111e-05, "loss": 0.0183, "step": 11652 }, { "epoch": 11.204807692307693, "grad_norm": 0.7274869084358215, "learning_rate": 1.2155409244019655e-05, "loss": 0.004, "step": 11653 }, { "epoch": 11.205769230769231, "grad_norm": 3.115049123764038, "learning_rate": 1.2154192832012579e-05, "loss": 0.0091, "step": 11654 }, { "epoch": 11.20673076923077, "grad_norm": 0.1129121258854866, "learning_rate": 1.215297638657875e-05, "loss": 0.0006, "step": 11655 }, { "epoch": 11.207692307692307, "grad_norm": 0.7284823060035706, "learning_rate": 1.2151759907737048e-05, "loss": 0.0044, "step": 11656 }, { "epoch": 11.208653846153846, "grad_norm": 0.2812550365924835, "learning_rate": 1.2150543395506345e-05, "loss": 0.0013, "step": 11657 }, { "epoch": 11.209615384615384, "grad_norm": 5.523149013519287, "learning_rate": 1.2149326849905522e-05, "loss": 0.0732, "step": 11658 }, { "epoch": 11.210576923076923, "grad_norm": 1.802573800086975, "learning_rate": 1.2148110270953455e-05, "loss": 0.0176, "step": 11659 }, { "epoch": 11.211538461538462, "grad_norm": 1.7184611558914185, "learning_rate": 1.2146893658669018e-05, "loss": 0.0154, "step": 11660 }, { "epoch": 11.2125, "grad_norm": 1.782023310661316, "learning_rate": 1.2145677013071096e-05, "loss": 0.0239, "step": 11661 }, { "epoch": 11.213461538461539, "grad_norm": 1.6025310754776, "learning_rate": 1.2144460334178565e-05, "loss": 0.0138, "step": 11662 }, { "epoch": 11.214423076923078, "grad_norm": 0.11470617353916168, "learning_rate": 1.2143243622010302e-05, "loss": 0.0007, "step": 11663 }, { "epoch": 11.215384615384615, "grad_norm": 0.22140638530254364, "learning_rate": 1.2142026876585188e-05, "loss": 0.0009, "step": 11664 }, { "epoch": 11.216346153846153, "grad_norm": 3.890732526779175, "learning_rate": 1.2140810097922102e-05, "loss": 0.0651, "step": 11665 }, { "epoch": 11.217307692307692, "grad_norm": 0.11648968607187271, "learning_rate": 1.213959328603993e-05, "loss": 0.0005, "step": 11666 }, { "epoch": 11.21826923076923, "grad_norm": 1.0379210710525513, "learning_rate": 1.2138376440957544e-05, "loss": 0.0047, "step": 11667 }, { "epoch": 11.21923076923077, "grad_norm": 3.0927023887634277, "learning_rate": 1.2137159562693839e-05, "loss": 0.0409, "step": 11668 }, { "epoch": 11.220192307692308, "grad_norm": 0.12325391173362732, "learning_rate": 1.2135942651267687e-05, "loss": 0.0008, "step": 11669 }, { "epoch": 11.221153846153847, "grad_norm": 0.3483365774154663, "learning_rate": 1.2134725706697975e-05, "loss": 0.0014, "step": 11670 }, { "epoch": 11.222115384615385, "grad_norm": 0.38530609011650085, "learning_rate": 1.213350872900358e-05, "loss": 0.0016, "step": 11671 }, { "epoch": 11.223076923076922, "grad_norm": 2.399492025375366, "learning_rate": 1.2132291718203398e-05, "loss": 0.0334, "step": 11672 }, { "epoch": 11.224038461538461, "grad_norm": 2.5398106575012207, "learning_rate": 1.2131074674316302e-05, "loss": 0.0279, "step": 11673 }, { "epoch": 11.225, "grad_norm": 1.9135522842407227, "learning_rate": 1.2129857597361188e-05, "loss": 0.0076, "step": 11674 }, { "epoch": 11.225961538461538, "grad_norm": 4.848696231842041, "learning_rate": 1.2128640487356929e-05, "loss": 0.081, "step": 11675 }, { "epoch": 11.226923076923077, "grad_norm": 0.043659187853336334, "learning_rate": 1.2127423344322418e-05, "loss": 0.0003, "step": 11676 }, { "epoch": 11.227884615384616, "grad_norm": 0.20277540385723114, "learning_rate": 1.2126206168276541e-05, "loss": 0.0013, "step": 11677 }, { "epoch": 11.228846153846154, "grad_norm": 0.37325993180274963, "learning_rate": 1.2124988959238186e-05, "loss": 0.0014, "step": 11678 }, { "epoch": 11.229807692307693, "grad_norm": 1.2202006578445435, "learning_rate": 1.2123771717226239e-05, "loss": 0.0052, "step": 11679 }, { "epoch": 11.23076923076923, "grad_norm": 0.4243139624595642, "learning_rate": 1.2122554442259589e-05, "loss": 0.0037, "step": 11680 }, { "epoch": 11.231730769230769, "grad_norm": 2.8438048362731934, "learning_rate": 1.2121337134357121e-05, "loss": 0.0274, "step": 11681 }, { "epoch": 11.232692307692307, "grad_norm": 2.267664670944214, "learning_rate": 1.2120119793537727e-05, "loss": 0.0454, "step": 11682 }, { "epoch": 11.233653846153846, "grad_norm": 0.2377847135066986, "learning_rate": 1.2118902419820297e-05, "loss": 0.0013, "step": 11683 }, { "epoch": 11.234615384615385, "grad_norm": 2.184710741043091, "learning_rate": 1.2117685013223724e-05, "loss": 0.0277, "step": 11684 }, { "epoch": 11.235576923076923, "grad_norm": 0.30086788535118103, "learning_rate": 1.2116467573766891e-05, "loss": 0.0019, "step": 11685 }, { "epoch": 11.236538461538462, "grad_norm": 0.04520484805107117, "learning_rate": 1.2115250101468691e-05, "loss": 0.0003, "step": 11686 }, { "epoch": 11.2375, "grad_norm": 0.4867030382156372, "learning_rate": 1.2114032596348023e-05, "loss": 0.0023, "step": 11687 }, { "epoch": 11.238461538461538, "grad_norm": 1.7360436916351318, "learning_rate": 1.211281505842377e-05, "loss": 0.0037, "step": 11688 }, { "epoch": 11.239423076923076, "grad_norm": 1.3895429372787476, "learning_rate": 1.2111597487714828e-05, "loss": 0.0103, "step": 11689 }, { "epoch": 11.240384615384615, "grad_norm": 0.37424224615097046, "learning_rate": 1.211037988424009e-05, "loss": 0.0016, "step": 11690 }, { "epoch": 11.241346153846154, "grad_norm": 0.25844523310661316, "learning_rate": 1.2109162248018454e-05, "loss": 0.0007, "step": 11691 }, { "epoch": 11.242307692307692, "grad_norm": 0.44970056414604187, "learning_rate": 1.210794457906881e-05, "loss": 0.0031, "step": 11692 }, { "epoch": 11.243269230769231, "grad_norm": 3.7176077365875244, "learning_rate": 1.2106726877410049e-05, "loss": 0.05, "step": 11693 }, { "epoch": 11.24423076923077, "grad_norm": 0.7719401121139526, "learning_rate": 1.2105509143061072e-05, "loss": 0.0023, "step": 11694 }, { "epoch": 11.245192307692308, "grad_norm": 0.7552746534347534, "learning_rate": 1.2104291376040774e-05, "loss": 0.0017, "step": 11695 }, { "epoch": 11.246153846153845, "grad_norm": 0.7679647207260132, "learning_rate": 1.2103073576368047e-05, "loss": 0.0046, "step": 11696 }, { "epoch": 11.247115384615384, "grad_norm": 0.6079360842704773, "learning_rate": 1.210185574406179e-05, "loss": 0.0023, "step": 11697 }, { "epoch": 11.248076923076923, "grad_norm": 2.4010443687438965, "learning_rate": 1.2100637879140904e-05, "loss": 0.0221, "step": 11698 }, { "epoch": 11.249038461538461, "grad_norm": 4.213690280914307, "learning_rate": 1.2099419981624284e-05, "loss": 0.1044, "step": 11699 }, { "epoch": 11.25, "grad_norm": 2.541041851043701, "learning_rate": 1.2098202051530822e-05, "loss": 0.0133, "step": 11700 }, { "epoch": 11.250961538461539, "grad_norm": 0.029401816427707672, "learning_rate": 1.2096984088879429e-05, "loss": 0.0003, "step": 11701 }, { "epoch": 11.251923076923077, "grad_norm": 0.28186655044555664, "learning_rate": 1.2095766093688994e-05, "loss": 0.0025, "step": 11702 }, { "epoch": 11.252884615384616, "grad_norm": 1.7906562089920044, "learning_rate": 1.2094548065978422e-05, "loss": 0.0037, "step": 11703 }, { "epoch": 11.253846153846155, "grad_norm": 0.08025895804166794, "learning_rate": 1.2093330005766607e-05, "loss": 0.0006, "step": 11704 }, { "epoch": 11.254807692307692, "grad_norm": 0.14919687807559967, "learning_rate": 1.209211191307246e-05, "loss": 0.0009, "step": 11705 }, { "epoch": 11.25576923076923, "grad_norm": 0.30298659205436707, "learning_rate": 1.2090893787914873e-05, "loss": 0.0019, "step": 11706 }, { "epoch": 11.256730769230769, "grad_norm": 0.018445681780576706, "learning_rate": 1.2089675630312755e-05, "loss": 0.0002, "step": 11707 }, { "epoch": 11.257692307692308, "grad_norm": 1.4989413022994995, "learning_rate": 1.2088457440284997e-05, "loss": 0.0119, "step": 11708 }, { "epoch": 11.258653846153846, "grad_norm": 0.06509692966938019, "learning_rate": 1.2087239217850515e-05, "loss": 0.0006, "step": 11709 }, { "epoch": 11.259615384615385, "grad_norm": 0.25950849056243896, "learning_rate": 1.2086020963028204e-05, "loss": 0.0013, "step": 11710 }, { "epoch": 11.260576923076924, "grad_norm": 1.3144633769989014, "learning_rate": 1.208480267583697e-05, "loss": 0.011, "step": 11711 }, { "epoch": 11.261538461538462, "grad_norm": 0.07333378493785858, "learning_rate": 1.2083584356295719e-05, "loss": 0.0006, "step": 11712 }, { "epoch": 11.2625, "grad_norm": 0.05182206630706787, "learning_rate": 1.2082366004423353e-05, "loss": 0.0004, "step": 11713 }, { "epoch": 11.263461538461538, "grad_norm": 4.543247222900391, "learning_rate": 1.208114762023878e-05, "loss": 0.019, "step": 11714 }, { "epoch": 11.264423076923077, "grad_norm": 1.7697619199752808, "learning_rate": 1.2079929203760901e-05, "loss": 0.0059, "step": 11715 }, { "epoch": 11.265384615384615, "grad_norm": 0.41244691610336304, "learning_rate": 1.2078710755008624e-05, "loss": 0.002, "step": 11716 }, { "epoch": 11.266346153846154, "grad_norm": 3.0482850074768066, "learning_rate": 1.2077492274000861e-05, "loss": 0.0684, "step": 11717 }, { "epoch": 11.267307692307693, "grad_norm": 0.08205536007881165, "learning_rate": 1.2076273760756514e-05, "loss": 0.0005, "step": 11718 }, { "epoch": 11.268269230769231, "grad_norm": 0.3991318345069885, "learning_rate": 1.2075055215294492e-05, "loss": 0.0012, "step": 11719 }, { "epoch": 11.26923076923077, "grad_norm": 1.2012672424316406, "learning_rate": 1.2073836637633705e-05, "loss": 0.008, "step": 11720 }, { "epoch": 11.270192307692307, "grad_norm": 8.046299934387207, "learning_rate": 1.2072618027793056e-05, "loss": 0.1938, "step": 11721 }, { "epoch": 11.271153846153846, "grad_norm": 2.1703572273254395, "learning_rate": 1.207139938579146e-05, "loss": 0.0116, "step": 11722 }, { "epoch": 11.272115384615384, "grad_norm": 0.07868735492229462, "learning_rate": 1.2070180711647828e-05, "loss": 0.0008, "step": 11723 }, { "epoch": 11.273076923076923, "grad_norm": 0.994678258895874, "learning_rate": 1.2068962005381063e-05, "loss": 0.0085, "step": 11724 }, { "epoch": 11.274038461538462, "grad_norm": 3.339146614074707, "learning_rate": 1.2067743267010083e-05, "loss": 0.0328, "step": 11725 }, { "epoch": 11.275, "grad_norm": 0.2175770252943039, "learning_rate": 1.2066524496553793e-05, "loss": 0.0006, "step": 11726 }, { "epoch": 11.275961538461539, "grad_norm": 2.953463315963745, "learning_rate": 1.206530569403111e-05, "loss": 0.0176, "step": 11727 }, { "epoch": 11.276923076923078, "grad_norm": 2.872433662414551, "learning_rate": 1.2064086859460947e-05, "loss": 0.0248, "step": 11728 }, { "epoch": 11.277884615384615, "grad_norm": 4.220402240753174, "learning_rate": 1.2062867992862208e-05, "loss": 0.0606, "step": 11729 }, { "epoch": 11.278846153846153, "grad_norm": 2.0250978469848633, "learning_rate": 1.2061649094253815e-05, "loss": 0.0143, "step": 11730 }, { "epoch": 11.279807692307692, "grad_norm": 2.696678876876831, "learning_rate": 1.2060430163654681e-05, "loss": 0.0223, "step": 11731 }, { "epoch": 11.28076923076923, "grad_norm": 2.570786476135254, "learning_rate": 1.2059211201083717e-05, "loss": 0.0522, "step": 11732 }, { "epoch": 11.28173076923077, "grad_norm": 1.5826524496078491, "learning_rate": 1.2057992206559837e-05, "loss": 0.0448, "step": 11733 }, { "epoch": 11.282692307692308, "grad_norm": 0.06548717617988586, "learning_rate": 1.2056773180101962e-05, "loss": 0.0005, "step": 11734 }, { "epoch": 11.283653846153847, "grad_norm": 0.11217549443244934, "learning_rate": 1.2055554121729e-05, "loss": 0.0006, "step": 11735 }, { "epoch": 11.284615384615385, "grad_norm": 0.6549948453903198, "learning_rate": 1.2054335031459874e-05, "loss": 0.0021, "step": 11736 }, { "epoch": 11.285576923076922, "grad_norm": 0.7398827075958252, "learning_rate": 1.2053115909313494e-05, "loss": 0.0166, "step": 11737 }, { "epoch": 11.286538461538461, "grad_norm": 1.4238693714141846, "learning_rate": 1.2051896755308782e-05, "loss": 0.0079, "step": 11738 }, { "epoch": 11.2875, "grad_norm": 0.6233024001121521, "learning_rate": 1.2050677569464657e-05, "loss": 0.002, "step": 11739 }, { "epoch": 11.288461538461538, "grad_norm": 0.04959364980459213, "learning_rate": 1.2049458351800033e-05, "loss": 0.0005, "step": 11740 }, { "epoch": 11.289423076923077, "grad_norm": 2.585202693939209, "learning_rate": 1.2048239102333829e-05, "loss": 0.0193, "step": 11741 }, { "epoch": 11.290384615384616, "grad_norm": 2.8617091178894043, "learning_rate": 1.2047019821084969e-05, "loss": 0.0687, "step": 11742 }, { "epoch": 11.291346153846154, "grad_norm": 1.4851707220077515, "learning_rate": 1.2045800508072367e-05, "loss": 0.0096, "step": 11743 }, { "epoch": 11.292307692307693, "grad_norm": 1.4326492547988892, "learning_rate": 1.2044581163314945e-05, "loss": 0.0404, "step": 11744 }, { "epoch": 11.29326923076923, "grad_norm": 1.3470441102981567, "learning_rate": 1.2043361786831626e-05, "loss": 0.0145, "step": 11745 }, { "epoch": 11.294230769230769, "grad_norm": 0.3759194016456604, "learning_rate": 1.204214237864133e-05, "loss": 0.0017, "step": 11746 }, { "epoch": 11.295192307692307, "grad_norm": 0.16257381439208984, "learning_rate": 1.2040922938762977e-05, "loss": 0.0009, "step": 11747 }, { "epoch": 11.296153846153846, "grad_norm": 0.13073104619979858, "learning_rate": 1.2039703467215489e-05, "loss": 0.0013, "step": 11748 }, { "epoch": 11.297115384615385, "grad_norm": 1.3661013841629028, "learning_rate": 1.203848396401779e-05, "loss": 0.0139, "step": 11749 }, { "epoch": 11.298076923076923, "grad_norm": 0.19894003868103027, "learning_rate": 1.2037264429188805e-05, "loss": 0.0013, "step": 11750 }, { "epoch": 11.299038461538462, "grad_norm": 1.4451990127563477, "learning_rate": 1.203604486274745e-05, "loss": 0.0088, "step": 11751 }, { "epoch": 11.3, "grad_norm": 0.7456668615341187, "learning_rate": 1.2034825264712657e-05, "loss": 0.0045, "step": 11752 }, { "epoch": 11.300961538461538, "grad_norm": 3.464395523071289, "learning_rate": 1.2033605635103349e-05, "loss": 0.0517, "step": 11753 }, { "epoch": 11.301923076923076, "grad_norm": 0.7496804594993591, "learning_rate": 1.2032385973938452e-05, "loss": 0.0034, "step": 11754 }, { "epoch": 11.302884615384615, "grad_norm": 0.31677955389022827, "learning_rate": 1.2031166281236886e-05, "loss": 0.002, "step": 11755 }, { "epoch": 11.303846153846154, "grad_norm": 0.1791658103466034, "learning_rate": 1.2029946557017583e-05, "loss": 0.0011, "step": 11756 }, { "epoch": 11.304807692307692, "grad_norm": 1.260757327079773, "learning_rate": 1.2028726801299467e-05, "loss": 0.0168, "step": 11757 }, { "epoch": 11.305769230769231, "grad_norm": 1.1530765295028687, "learning_rate": 1.2027507014101463e-05, "loss": 0.0057, "step": 11758 }, { "epoch": 11.30673076923077, "grad_norm": 0.5573974847793579, "learning_rate": 1.2026287195442503e-05, "loss": 0.0022, "step": 11759 }, { "epoch": 11.307692307692308, "grad_norm": 1.9882904291152954, "learning_rate": 1.2025067345341512e-05, "loss": 0.0239, "step": 11760 }, { "epoch": 11.308653846153845, "grad_norm": 1.0442827939987183, "learning_rate": 1.2023847463817422e-05, "loss": 0.0065, "step": 11761 }, { "epoch": 11.309615384615384, "grad_norm": 1.2539451122283936, "learning_rate": 1.2022627550889154e-05, "loss": 0.0088, "step": 11762 }, { "epoch": 11.310576923076923, "grad_norm": 0.12126243859529495, "learning_rate": 1.2021407606575646e-05, "loss": 0.0004, "step": 11763 }, { "epoch": 11.311538461538461, "grad_norm": 1.984208345413208, "learning_rate": 1.2020187630895826e-05, "loss": 0.0471, "step": 11764 }, { "epoch": 11.3125, "grad_norm": 3.076995372772217, "learning_rate": 1.2018967623868623e-05, "loss": 0.0481, "step": 11765 }, { "epoch": 11.313461538461539, "grad_norm": 0.7042245864868164, "learning_rate": 1.2017747585512965e-05, "loss": 0.0025, "step": 11766 }, { "epoch": 11.314423076923077, "grad_norm": 1.2021188735961914, "learning_rate": 1.2016527515847786e-05, "loss": 0.0041, "step": 11767 }, { "epoch": 11.315384615384616, "grad_norm": 1.707544207572937, "learning_rate": 1.201530741489202e-05, "loss": 0.0083, "step": 11768 }, { "epoch": 11.316346153846155, "grad_norm": 3.025538206100464, "learning_rate": 1.20140872826646e-05, "loss": 0.0354, "step": 11769 }, { "epoch": 11.317307692307692, "grad_norm": 0.10177332162857056, "learning_rate": 1.2012867119184455e-05, "loss": 0.0003, "step": 11770 }, { "epoch": 11.31826923076923, "grad_norm": 2.531594753265381, "learning_rate": 1.201164692447052e-05, "loss": 0.0214, "step": 11771 }, { "epoch": 11.319230769230769, "grad_norm": 1.8918863534927368, "learning_rate": 1.2010426698541728e-05, "loss": 0.0128, "step": 11772 }, { "epoch": 11.320192307692308, "grad_norm": 0.3653016984462738, "learning_rate": 1.2009206441417014e-05, "loss": 0.0029, "step": 11773 }, { "epoch": 11.321153846153846, "grad_norm": 1.715917944908142, "learning_rate": 1.2007986153115314e-05, "loss": 0.049, "step": 11774 }, { "epoch": 11.322115384615385, "grad_norm": 0.7147164344787598, "learning_rate": 1.2006765833655565e-05, "loss": 0.0049, "step": 11775 }, { "epoch": 11.323076923076924, "grad_norm": 0.2769842743873596, "learning_rate": 1.2005545483056698e-05, "loss": 0.0016, "step": 11776 }, { "epoch": 11.324038461538462, "grad_norm": 0.2520393431186676, "learning_rate": 1.2004325101337648e-05, "loss": 0.0032, "step": 11777 }, { "epoch": 11.325, "grad_norm": 1.6791013479232788, "learning_rate": 1.2003104688517358e-05, "loss": 0.0103, "step": 11778 }, { "epoch": 11.325961538461538, "grad_norm": 0.24360160529613495, "learning_rate": 1.2001884244614762e-05, "loss": 0.001, "step": 11779 }, { "epoch": 11.326923076923077, "grad_norm": 0.4223204553127289, "learning_rate": 1.2000663769648797e-05, "loss": 0.0047, "step": 11780 }, { "epoch": 11.327884615384615, "grad_norm": 0.681601881980896, "learning_rate": 1.1999443263638403e-05, "loss": 0.0039, "step": 11781 }, { "epoch": 11.328846153846154, "grad_norm": 0.018514884635806084, "learning_rate": 1.199822272660252e-05, "loss": 0.0002, "step": 11782 }, { "epoch": 11.329807692307693, "grad_norm": 1.816483736038208, "learning_rate": 1.1997002158560083e-05, "loss": 0.0131, "step": 11783 }, { "epoch": 11.330769230769231, "grad_norm": 1.1851282119750977, "learning_rate": 1.199578155953003e-05, "loss": 0.0061, "step": 11784 }, { "epoch": 11.33173076923077, "grad_norm": 2.8564817905426025, "learning_rate": 1.199456092953131e-05, "loss": 0.0167, "step": 11785 }, { "epoch": 11.332692307692307, "grad_norm": 0.09594893455505371, "learning_rate": 1.1993340268582859e-05, "loss": 0.0004, "step": 11786 }, { "epoch": 11.333653846153846, "grad_norm": 1.5611472129821777, "learning_rate": 1.1992119576703614e-05, "loss": 0.0365, "step": 11787 }, { "epoch": 11.334615384615384, "grad_norm": 2.169402837753296, "learning_rate": 1.1990898853912519e-05, "loss": 0.0084, "step": 11788 }, { "epoch": 11.335576923076923, "grad_norm": 0.3872586488723755, "learning_rate": 1.1989678100228519e-05, "loss": 0.0031, "step": 11789 }, { "epoch": 11.336538461538462, "grad_norm": 1.7641403675079346, "learning_rate": 1.1988457315670556e-05, "loss": 0.022, "step": 11790 }, { "epoch": 11.3375, "grad_norm": 1.9965004920959473, "learning_rate": 1.1987236500257568e-05, "loss": 0.0048, "step": 11791 }, { "epoch": 11.338461538461539, "grad_norm": 1.5895692110061646, "learning_rate": 1.1986015654008505e-05, "loss": 0.0183, "step": 11792 }, { "epoch": 11.339423076923078, "grad_norm": 4.284836292266846, "learning_rate": 1.1984794776942309e-05, "loss": 0.0654, "step": 11793 }, { "epoch": 11.340384615384615, "grad_norm": 1.595931053161621, "learning_rate": 1.1983573869077921e-05, "loss": 0.0075, "step": 11794 }, { "epoch": 11.341346153846153, "grad_norm": 0.3830154240131378, "learning_rate": 1.1982352930434287e-05, "loss": 0.0022, "step": 11795 }, { "epoch": 11.342307692307692, "grad_norm": 0.04521312192082405, "learning_rate": 1.1981131961030354e-05, "loss": 0.0003, "step": 11796 }, { "epoch": 11.34326923076923, "grad_norm": 2.189424514770508, "learning_rate": 1.1979910960885071e-05, "loss": 0.0169, "step": 11797 }, { "epoch": 11.34423076923077, "grad_norm": 2.17478609085083, "learning_rate": 1.197868993001738e-05, "loss": 0.0335, "step": 11798 }, { "epoch": 11.345192307692308, "grad_norm": 1.2598052024841309, "learning_rate": 1.1977468868446224e-05, "loss": 0.0033, "step": 11799 }, { "epoch": 11.346153846153847, "grad_norm": 4.887673854827881, "learning_rate": 1.197624777619056e-05, "loss": 0.0185, "step": 11800 }, { "epoch": 11.347115384615385, "grad_norm": 0.26730483770370483, "learning_rate": 1.1975026653269328e-05, "loss": 0.0014, "step": 11801 }, { "epoch": 11.348076923076922, "grad_norm": 0.6319655179977417, "learning_rate": 1.1973805499701479e-05, "loss": 0.0026, "step": 11802 }, { "epoch": 11.349038461538461, "grad_norm": 0.036939993500709534, "learning_rate": 1.1972584315505964e-05, "loss": 0.0005, "step": 11803 }, { "epoch": 11.35, "grad_norm": 1.0228710174560547, "learning_rate": 1.1971363100701727e-05, "loss": 0.0095, "step": 11804 }, { "epoch": 11.350961538461538, "grad_norm": 0.040423545986413956, "learning_rate": 1.1970141855307724e-05, "loss": 0.0002, "step": 11805 }, { "epoch": 11.351923076923077, "grad_norm": 0.4954843819141388, "learning_rate": 1.1968920579342897e-05, "loss": 0.0023, "step": 11806 }, { "epoch": 11.352884615384616, "grad_norm": 0.08139639347791672, "learning_rate": 1.1967699272826205e-05, "loss": 0.0007, "step": 11807 }, { "epoch": 11.353846153846154, "grad_norm": 0.6731680035591125, "learning_rate": 1.1966477935776598e-05, "loss": 0.0034, "step": 11808 }, { "epoch": 11.354807692307693, "grad_norm": 0.20006008446216583, "learning_rate": 1.1965256568213017e-05, "loss": 0.0015, "step": 11809 }, { "epoch": 11.35576923076923, "grad_norm": 0.07786369323730469, "learning_rate": 1.1964035170154428e-05, "loss": 0.0007, "step": 11810 }, { "epoch": 11.356730769230769, "grad_norm": 0.2709752321243286, "learning_rate": 1.1962813741619777e-05, "loss": 0.0016, "step": 11811 }, { "epoch": 11.357692307692307, "grad_norm": 0.06859589368104935, "learning_rate": 1.1961592282628017e-05, "loss": 0.0004, "step": 11812 }, { "epoch": 11.358653846153846, "grad_norm": 1.4998915195465088, "learning_rate": 1.1960370793198099e-05, "loss": 0.0258, "step": 11813 }, { "epoch": 11.359615384615385, "grad_norm": 0.009078321978449821, "learning_rate": 1.1959149273348985e-05, "loss": 0.0001, "step": 11814 }, { "epoch": 11.360576923076923, "grad_norm": 2.1091020107269287, "learning_rate": 1.195792772309962e-05, "loss": 0.0337, "step": 11815 }, { "epoch": 11.361538461538462, "grad_norm": 4.70648717880249, "learning_rate": 1.1956706142468968e-05, "loss": 0.1021, "step": 11816 }, { "epoch": 11.3625, "grad_norm": 4.773881912231445, "learning_rate": 1.1955484531475973e-05, "loss": 0.033, "step": 11817 }, { "epoch": 11.363461538461538, "grad_norm": 0.6378446817398071, "learning_rate": 1.1954262890139604e-05, "loss": 0.0052, "step": 11818 }, { "epoch": 11.364423076923076, "grad_norm": 0.17178571224212646, "learning_rate": 1.1953041218478807e-05, "loss": 0.0012, "step": 11819 }, { "epoch": 11.365384615384615, "grad_norm": 0.09940546751022339, "learning_rate": 1.1951819516512541e-05, "loss": 0.0006, "step": 11820 }, { "epoch": 11.366346153846154, "grad_norm": 0.2861263155937195, "learning_rate": 1.1950597784259768e-05, "loss": 0.0019, "step": 11821 }, { "epoch": 11.367307692307692, "grad_norm": 3.679194927215576, "learning_rate": 1.194937602173944e-05, "loss": 0.0944, "step": 11822 }, { "epoch": 11.368269230769231, "grad_norm": 0.5084322094917297, "learning_rate": 1.1948154228970518e-05, "loss": 0.0021, "step": 11823 }, { "epoch": 11.36923076923077, "grad_norm": 0.15313845872879028, "learning_rate": 1.194693240597196e-05, "loss": 0.0013, "step": 11824 }, { "epoch": 11.370192307692308, "grad_norm": 0.8509141206741333, "learning_rate": 1.1945710552762726e-05, "loss": 0.0037, "step": 11825 }, { "epoch": 11.371153846153845, "grad_norm": 1.5803561210632324, "learning_rate": 1.1944488669361775e-05, "loss": 0.0082, "step": 11826 }, { "epoch": 11.372115384615384, "grad_norm": 0.5556684136390686, "learning_rate": 1.1943266755788068e-05, "loss": 0.0038, "step": 11827 }, { "epoch": 11.373076923076923, "grad_norm": 1.2141776084899902, "learning_rate": 1.194204481206056e-05, "loss": 0.0047, "step": 11828 }, { "epoch": 11.374038461538461, "grad_norm": 1.274605393409729, "learning_rate": 1.194082283819822e-05, "loss": 0.0088, "step": 11829 }, { "epoch": 11.375, "grad_norm": 0.1704736202955246, "learning_rate": 1.1939600834220007e-05, "loss": 0.0012, "step": 11830 }, { "epoch": 11.375961538461539, "grad_norm": 1.242430329322815, "learning_rate": 1.1938378800144877e-05, "loss": 0.0219, "step": 11831 }, { "epoch": 11.376923076923077, "grad_norm": 1.6687099933624268, "learning_rate": 1.19371567359918e-05, "loss": 0.0085, "step": 11832 }, { "epoch": 11.377884615384616, "grad_norm": 0.08635354042053223, "learning_rate": 1.1935934641779735e-05, "loss": 0.0006, "step": 11833 }, { "epoch": 11.378846153846155, "grad_norm": 3.9441630840301514, "learning_rate": 1.193471251752765e-05, "loss": 0.0347, "step": 11834 }, { "epoch": 11.379807692307692, "grad_norm": 3.021218776702881, "learning_rate": 1.1933490363254499e-05, "loss": 0.0216, "step": 11835 }, { "epoch": 11.38076923076923, "grad_norm": 0.0284106507897377, "learning_rate": 1.1932268178979256e-05, "loss": 0.0002, "step": 11836 }, { "epoch": 11.381730769230769, "grad_norm": 1.4920235872268677, "learning_rate": 1.1931045964720882e-05, "loss": 0.0761, "step": 11837 }, { "epoch": 11.382692307692308, "grad_norm": 1.4807926416397095, "learning_rate": 1.1929823720498341e-05, "loss": 0.0182, "step": 11838 }, { "epoch": 11.383653846153846, "grad_norm": 4.182583808898926, "learning_rate": 1.1928601446330599e-05, "loss": 0.0394, "step": 11839 }, { "epoch": 11.384615384615385, "grad_norm": 3.1752402782440186, "learning_rate": 1.1927379142236623e-05, "loss": 0.0779, "step": 11840 }, { "epoch": 11.385576923076924, "grad_norm": 1.2727339267730713, "learning_rate": 1.1926156808235383e-05, "loss": 0.0067, "step": 11841 }, { "epoch": 11.386538461538462, "grad_norm": 0.08460153639316559, "learning_rate": 1.1924934444345838e-05, "loss": 0.0005, "step": 11842 }, { "epoch": 11.3875, "grad_norm": 3.4655683040618896, "learning_rate": 1.1923712050586964e-05, "loss": 0.0143, "step": 11843 }, { "epoch": 11.388461538461538, "grad_norm": 1.347795844078064, "learning_rate": 1.1922489626977723e-05, "loss": 0.0039, "step": 11844 }, { "epoch": 11.389423076923077, "grad_norm": 0.02953910082578659, "learning_rate": 1.1921267173537085e-05, "loss": 0.0003, "step": 11845 }, { "epoch": 11.390384615384615, "grad_norm": 1.9755146503448486, "learning_rate": 1.192004469028402e-05, "loss": 0.0202, "step": 11846 }, { "epoch": 11.391346153846154, "grad_norm": 3.351043224334717, "learning_rate": 1.1918822177237497e-05, "loss": 0.0841, "step": 11847 }, { "epoch": 11.392307692307693, "grad_norm": 1.2269706726074219, "learning_rate": 1.1917599634416486e-05, "loss": 0.0061, "step": 11848 }, { "epoch": 11.393269230769231, "grad_norm": 3.7649965286254883, "learning_rate": 1.1916377061839955e-05, "loss": 0.0439, "step": 11849 }, { "epoch": 11.39423076923077, "grad_norm": 3.015404462814331, "learning_rate": 1.1915154459526876e-05, "loss": 0.018, "step": 11850 }, { "epoch": 11.395192307692307, "grad_norm": 0.3071654438972473, "learning_rate": 1.1913931827496223e-05, "loss": 0.0021, "step": 11851 }, { "epoch": 11.396153846153846, "grad_norm": 0.3668300211429596, "learning_rate": 1.1912709165766965e-05, "loss": 0.0019, "step": 11852 }, { "epoch": 11.397115384615384, "grad_norm": 2.0513577461242676, "learning_rate": 1.1911486474358074e-05, "loss": 0.0735, "step": 11853 }, { "epoch": 11.398076923076923, "grad_norm": 0.48729225993156433, "learning_rate": 1.1910263753288522e-05, "loss": 0.0018, "step": 11854 }, { "epoch": 11.399038461538462, "grad_norm": 0.6535379886627197, "learning_rate": 1.1909041002577287e-05, "loss": 0.0222, "step": 11855 }, { "epoch": 11.4, "grad_norm": 1.4945429563522339, "learning_rate": 1.1907818222243335e-05, "loss": 0.0368, "step": 11856 }, { "epoch": 11.400961538461539, "grad_norm": 1.1635253429412842, "learning_rate": 1.1906595412305642e-05, "loss": 0.0096, "step": 11857 }, { "epoch": 11.401923076923078, "grad_norm": 0.7953274846076965, "learning_rate": 1.1905372572783187e-05, "loss": 0.0024, "step": 11858 }, { "epoch": 11.402884615384615, "grad_norm": 0.058305054903030396, "learning_rate": 1.1904149703694943e-05, "loss": 0.0003, "step": 11859 }, { "epoch": 11.403846153846153, "grad_norm": 0.1293456256389618, "learning_rate": 1.190292680505988e-05, "loss": 0.0009, "step": 11860 }, { "epoch": 11.404807692307692, "grad_norm": 0.2982935905456543, "learning_rate": 1.1901703876896979e-05, "loss": 0.0018, "step": 11861 }, { "epoch": 11.40576923076923, "grad_norm": 3.5409491062164307, "learning_rate": 1.1900480919225219e-05, "loss": 0.143, "step": 11862 }, { "epoch": 11.40673076923077, "grad_norm": 0.6013526320457458, "learning_rate": 1.189925793206357e-05, "loss": 0.0031, "step": 11863 }, { "epoch": 11.407692307692308, "grad_norm": 0.1828155219554901, "learning_rate": 1.1898034915431012e-05, "loss": 0.0012, "step": 11864 }, { "epoch": 11.408653846153847, "grad_norm": 4.012914180755615, "learning_rate": 1.1896811869346525e-05, "loss": 0.0505, "step": 11865 }, { "epoch": 11.409615384615385, "grad_norm": 0.9722942113876343, "learning_rate": 1.1895588793829082e-05, "loss": 0.0044, "step": 11866 }, { "epoch": 11.410576923076922, "grad_norm": 1.1920619010925293, "learning_rate": 1.1894365688897666e-05, "loss": 0.0081, "step": 11867 }, { "epoch": 11.411538461538461, "grad_norm": 0.5528777241706848, "learning_rate": 1.1893142554571254e-05, "loss": 0.0032, "step": 11868 }, { "epoch": 11.4125, "grad_norm": 0.48424893617630005, "learning_rate": 1.1891919390868826e-05, "loss": 0.0026, "step": 11869 }, { "epoch": 11.413461538461538, "grad_norm": 0.21882127225399017, "learning_rate": 1.1890696197809365e-05, "loss": 0.001, "step": 11870 }, { "epoch": 11.414423076923077, "grad_norm": 0.035136930644512177, "learning_rate": 1.1889472975411843e-05, "loss": 0.0004, "step": 11871 }, { "epoch": 11.415384615384616, "grad_norm": 2.854832887649536, "learning_rate": 1.1888249723695246e-05, "loss": 0.0197, "step": 11872 }, { "epoch": 11.416346153846154, "grad_norm": 1.3173940181732178, "learning_rate": 1.188702644267856e-05, "loss": 0.0047, "step": 11873 }, { "epoch": 11.417307692307693, "grad_norm": 2.901353120803833, "learning_rate": 1.188580313238076e-05, "loss": 0.0392, "step": 11874 }, { "epoch": 11.41826923076923, "grad_norm": 0.11594639718532562, "learning_rate": 1.1884579792820827e-05, "loss": 0.0008, "step": 11875 }, { "epoch": 11.419230769230769, "grad_norm": 0.056074079126119614, "learning_rate": 1.188335642401775e-05, "loss": 0.0006, "step": 11876 }, { "epoch": 11.420192307692307, "grad_norm": 1.0430837869644165, "learning_rate": 1.1882133025990508e-05, "loss": 0.0055, "step": 11877 }, { "epoch": 11.421153846153846, "grad_norm": 0.47200465202331543, "learning_rate": 1.1880909598758086e-05, "loss": 0.003, "step": 11878 }, { "epoch": 11.422115384615385, "grad_norm": 0.5260982513427734, "learning_rate": 1.1879686142339466e-05, "loss": 0.0039, "step": 11879 }, { "epoch": 11.423076923076923, "grad_norm": 0.26537513732910156, "learning_rate": 1.1878462656753636e-05, "loss": 0.0022, "step": 11880 }, { "epoch": 11.424038461538462, "grad_norm": 1.7761701345443726, "learning_rate": 1.1877239142019577e-05, "loss": 0.0167, "step": 11881 }, { "epoch": 11.425, "grad_norm": 0.32187339663505554, "learning_rate": 1.1876015598156274e-05, "loss": 0.0031, "step": 11882 }, { "epoch": 11.425961538461538, "grad_norm": 0.2679169178009033, "learning_rate": 1.1874792025182718e-05, "loss": 0.0014, "step": 11883 }, { "epoch": 11.426923076923076, "grad_norm": 0.2679169178009033, "learning_rate": 1.1873568423117894e-05, "loss": 0.0182, "step": 11884 }, { "epoch": 11.427884615384615, "grad_norm": 0.4746445417404175, "learning_rate": 1.1873568423117894e-05, "loss": 0.0026, "step": 11885 }, { "epoch": 11.428846153846154, "grad_norm": 1.2986735105514526, "learning_rate": 1.1872344791980783e-05, "loss": 0.0123, "step": 11886 }, { "epoch": 11.429807692307692, "grad_norm": 0.10161647945642471, "learning_rate": 1.1871121131790378e-05, "loss": 0.0008, "step": 11887 }, { "epoch": 11.430769230769231, "grad_norm": 3.2606325149536133, "learning_rate": 1.1869897442565665e-05, "loss": 0.0911, "step": 11888 }, { "epoch": 11.43173076923077, "grad_norm": 1.7359199523925781, "learning_rate": 1.1868673724325632e-05, "loss": 0.0541, "step": 11889 }, { "epoch": 11.432692307692308, "grad_norm": 0.050237663090229034, "learning_rate": 1.1867449977089264e-05, "loss": 0.0005, "step": 11890 }, { "epoch": 11.433653846153845, "grad_norm": 0.13498954474925995, "learning_rate": 1.1866226200875557e-05, "loss": 0.0006, "step": 11891 }, { "epoch": 11.434615384615384, "grad_norm": 1.6633409261703491, "learning_rate": 1.1865002395703496e-05, "loss": 0.0087, "step": 11892 }, { "epoch": 11.435576923076923, "grad_norm": 5.089423179626465, "learning_rate": 1.1863778561592071e-05, "loss": 0.4157, "step": 11893 }, { "epoch": 11.436538461538461, "grad_norm": 3.7460415363311768, "learning_rate": 1.1862554698560272e-05, "loss": 0.0314, "step": 11894 }, { "epoch": 11.4375, "grad_norm": 1.5392893552780151, "learning_rate": 1.1861330806627092e-05, "loss": 0.0147, "step": 11895 }, { "epoch": 11.438461538461539, "grad_norm": 0.91646808385849, "learning_rate": 1.1860106885811522e-05, "loss": 0.0052, "step": 11896 }, { "epoch": 11.439423076923077, "grad_norm": 2.0035336017608643, "learning_rate": 1.185888293613255e-05, "loss": 0.0383, "step": 11897 }, { "epoch": 11.440384615384616, "grad_norm": 1.3976845741271973, "learning_rate": 1.185765895760917e-05, "loss": 0.025, "step": 11898 }, { "epoch": 11.441346153846155, "grad_norm": 2.8232645988464355, "learning_rate": 1.1856434950260378e-05, "loss": 0.0646, "step": 11899 }, { "epoch": 11.442307692307692, "grad_norm": 3.906554937362671, "learning_rate": 1.1855210914105162e-05, "loss": 0.0183, "step": 11900 }, { "epoch": 11.44326923076923, "grad_norm": 1.3315621614456177, "learning_rate": 1.1853986849162517e-05, "loss": 0.0065, "step": 11901 }, { "epoch": 11.444230769230769, "grad_norm": 2.8142812252044678, "learning_rate": 1.185276275545144e-05, "loss": 0.0447, "step": 11902 }, { "epoch": 11.445192307692308, "grad_norm": 0.9811984896659851, "learning_rate": 1.1851538632990922e-05, "loss": 0.006, "step": 11903 }, { "epoch": 11.446153846153846, "grad_norm": 0.1141442358493805, "learning_rate": 1.1850314481799958e-05, "loss": 0.001, "step": 11904 }, { "epoch": 11.447115384615385, "grad_norm": 2.0535829067230225, "learning_rate": 1.1849090301897544e-05, "loss": 0.0367, "step": 11905 }, { "epoch": 11.448076923076924, "grad_norm": 0.23507162928581238, "learning_rate": 1.1847866093302676e-05, "loss": 0.0013, "step": 11906 }, { "epoch": 11.449038461538462, "grad_norm": 0.07966838777065277, "learning_rate": 1.184664185603435e-05, "loss": 0.0009, "step": 11907 }, { "epoch": 11.45, "grad_norm": 1.6193236112594604, "learning_rate": 1.184541759011156e-05, "loss": 0.0153, "step": 11908 }, { "epoch": 11.450961538461538, "grad_norm": 0.9183610081672668, "learning_rate": 1.1844193295553307e-05, "loss": 0.0025, "step": 11909 }, { "epoch": 11.451923076923077, "grad_norm": 3.1389224529266357, "learning_rate": 1.1842968972378586e-05, "loss": 0.0161, "step": 11910 }, { "epoch": 11.452884615384615, "grad_norm": 1.5629584789276123, "learning_rate": 1.1841744620606396e-05, "loss": 0.0155, "step": 11911 }, { "epoch": 11.453846153846154, "grad_norm": 0.1245604157447815, "learning_rate": 1.1840520240255735e-05, "loss": 0.0017, "step": 11912 }, { "epoch": 11.454807692307693, "grad_norm": 1.0009007453918457, "learning_rate": 1.1839295831345602e-05, "loss": 0.0056, "step": 11913 }, { "epoch": 11.455769230769231, "grad_norm": 1.1469624042510986, "learning_rate": 1.1838071393894996e-05, "loss": 0.0051, "step": 11914 }, { "epoch": 11.45673076923077, "grad_norm": 0.7719247341156006, "learning_rate": 1.1836846927922917e-05, "loss": 0.0047, "step": 11915 }, { "epoch": 11.457692307692307, "grad_norm": 0.7572650909423828, "learning_rate": 1.1835622433448361e-05, "loss": 0.0043, "step": 11916 }, { "epoch": 11.458653846153846, "grad_norm": 0.3599148690700531, "learning_rate": 1.1834397910490338e-05, "loss": 0.0023, "step": 11917 }, { "epoch": 11.459615384615384, "grad_norm": 2.8843002319335938, "learning_rate": 1.183317335906784e-05, "loss": 0.0341, "step": 11918 }, { "epoch": 11.460576923076923, "grad_norm": 1.2545222043991089, "learning_rate": 1.1831948779199873e-05, "loss": 0.0054, "step": 11919 }, { "epoch": 11.461538461538462, "grad_norm": 2.133204698562622, "learning_rate": 1.1830724170905437e-05, "loss": 0.0168, "step": 11920 }, { "epoch": 11.4625, "grad_norm": 1.2816728353500366, "learning_rate": 1.1829499534203534e-05, "loss": 0.0079, "step": 11921 }, { "epoch": 11.463461538461539, "grad_norm": 1.1162523031234741, "learning_rate": 1.182827486911317e-05, "loss": 0.007, "step": 11922 }, { "epoch": 11.464423076923078, "grad_norm": 0.1937912106513977, "learning_rate": 1.1827050175653344e-05, "loss": 0.0016, "step": 11923 }, { "epoch": 11.465384615384615, "grad_norm": 0.797467052936554, "learning_rate": 1.1825825453843063e-05, "loss": 0.0038, "step": 11924 }, { "epoch": 11.466346153846153, "grad_norm": 1.5524425506591797, "learning_rate": 1.182460070370133e-05, "loss": 0.0092, "step": 11925 }, { "epoch": 11.467307692307692, "grad_norm": 2.389735460281372, "learning_rate": 1.1823375925247149e-05, "loss": 0.095, "step": 11926 }, { "epoch": 11.46826923076923, "grad_norm": 1.078076958656311, "learning_rate": 1.1822151118499522e-05, "loss": 0.0052, "step": 11927 }, { "epoch": 11.46923076923077, "grad_norm": 3.3504583835601807, "learning_rate": 1.182092628347746e-05, "loss": 0.0608, "step": 11928 }, { "epoch": 11.470192307692308, "grad_norm": 1.8828083276748657, "learning_rate": 1.181970142019997e-05, "loss": 0.0115, "step": 11929 }, { "epoch": 11.471153846153847, "grad_norm": 0.8843384981155396, "learning_rate": 1.181847652868605e-05, "loss": 0.0123, "step": 11930 }, { "epoch": 11.472115384615385, "grad_norm": 0.7294244170188904, "learning_rate": 1.1817251608954712e-05, "loss": 0.0056, "step": 11931 }, { "epoch": 11.473076923076922, "grad_norm": 0.02179783023893833, "learning_rate": 1.1816026661024962e-05, "loss": 0.0002, "step": 11932 }, { "epoch": 11.474038461538461, "grad_norm": 0.08364560455083847, "learning_rate": 1.1814801684915812e-05, "loss": 0.0005, "step": 11933 }, { "epoch": 11.475, "grad_norm": 2.3975892066955566, "learning_rate": 1.1813576680646262e-05, "loss": 0.0151, "step": 11934 }, { "epoch": 11.475961538461538, "grad_norm": 1.557342290878296, "learning_rate": 1.1812351648235327e-05, "loss": 0.0379, "step": 11935 }, { "epoch": 11.476923076923077, "grad_norm": 2.789271354675293, "learning_rate": 1.1811126587702014e-05, "loss": 0.0213, "step": 11936 }, { "epoch": 11.477884615384616, "grad_norm": 1.1178404092788696, "learning_rate": 1.180990149906533e-05, "loss": 0.0055, "step": 11937 }, { "epoch": 11.478846153846154, "grad_norm": 0.04073278233408928, "learning_rate": 1.1808676382344286e-05, "loss": 0.0005, "step": 11938 }, { "epoch": 11.479807692307693, "grad_norm": 0.31613391637802124, "learning_rate": 1.1807451237557896e-05, "loss": 0.0012, "step": 11939 }, { "epoch": 11.48076923076923, "grad_norm": 2.6920464038848877, "learning_rate": 1.1806226064725165e-05, "loss": 0.0359, "step": 11940 }, { "epoch": 11.481730769230769, "grad_norm": 0.0801040306687355, "learning_rate": 1.1805000863865108e-05, "loss": 0.0005, "step": 11941 }, { "epoch": 11.482692307692307, "grad_norm": 0.34871557354927063, "learning_rate": 1.1803775634996735e-05, "loss": 0.0024, "step": 11942 }, { "epoch": 11.483653846153846, "grad_norm": 2.2008180618286133, "learning_rate": 1.180255037813906e-05, "loss": 0.0594, "step": 11943 }, { "epoch": 11.484615384615385, "grad_norm": 1.9371806383132935, "learning_rate": 1.1801325093311091e-05, "loss": 0.0625, "step": 11944 }, { "epoch": 11.485576923076923, "grad_norm": 1.9166990518569946, "learning_rate": 1.1800099780531845e-05, "loss": 0.0191, "step": 11945 }, { "epoch": 11.486538461538462, "grad_norm": 0.20744748413562775, "learning_rate": 1.1798874439820332e-05, "loss": 0.0018, "step": 11946 }, { "epoch": 11.4875, "grad_norm": 2.9006025791168213, "learning_rate": 1.179764907119557e-05, "loss": 0.0331, "step": 11947 }, { "epoch": 11.488461538461538, "grad_norm": 0.9834652543067932, "learning_rate": 1.1796423674676566e-05, "loss": 0.0054, "step": 11948 }, { "epoch": 11.489423076923076, "grad_norm": 2.196347951889038, "learning_rate": 1.1795198250282342e-05, "loss": 0.0104, "step": 11949 }, { "epoch": 11.490384615384615, "grad_norm": 0.35777151584625244, "learning_rate": 1.179397279803191e-05, "loss": 0.0024, "step": 11950 }, { "epoch": 11.491346153846154, "grad_norm": 0.08112925291061401, "learning_rate": 1.1792747317944286e-05, "loss": 0.0005, "step": 11951 }, { "epoch": 11.492307692307692, "grad_norm": 2.5544097423553467, "learning_rate": 1.1791521810038481e-05, "loss": 0.0616, "step": 11952 }, { "epoch": 11.493269230769231, "grad_norm": 1.2827987670898438, "learning_rate": 1.1790296274333521e-05, "loss": 0.0119, "step": 11953 }, { "epoch": 11.49423076923077, "grad_norm": 1.502285361289978, "learning_rate": 1.1789070710848416e-05, "loss": 0.0162, "step": 11954 }, { "epoch": 11.495192307692308, "grad_norm": 2.0238864421844482, "learning_rate": 1.1787845119602184e-05, "loss": 0.0263, "step": 11955 }, { "epoch": 11.496153846153845, "grad_norm": 0.2667442560195923, "learning_rate": 1.1786619500613841e-05, "loss": 0.0015, "step": 11956 }, { "epoch": 11.497115384615384, "grad_norm": 0.7444959282875061, "learning_rate": 1.178539385390241e-05, "loss": 0.005, "step": 11957 }, { "epoch": 11.498076923076923, "grad_norm": 0.4968129098415375, "learning_rate": 1.1784168179486906e-05, "loss": 0.0026, "step": 11958 }, { "epoch": 11.499038461538461, "grad_norm": 1.0606160163879395, "learning_rate": 1.1782942477386348e-05, "loss": 0.0186, "step": 11959 }, { "epoch": 11.5, "grad_norm": 1.0220445394515991, "learning_rate": 1.1781716747619753e-05, "loss": 0.0088, "step": 11960 }, { "epoch": 11.500961538461539, "grad_norm": 1.4771298170089722, "learning_rate": 1.1780490990206146e-05, "loss": 0.0269, "step": 11961 }, { "epoch": 11.501923076923077, "grad_norm": 0.46878132224082947, "learning_rate": 1.1779265205164545e-05, "loss": 0.0025, "step": 11962 }, { "epoch": 11.502884615384616, "grad_norm": 0.9775708913803101, "learning_rate": 1.1778039392513966e-05, "loss": 0.0049, "step": 11963 }, { "epoch": 11.503846153846155, "grad_norm": 0.5548266768455505, "learning_rate": 1.1776813552273438e-05, "loss": 0.0027, "step": 11964 }, { "epoch": 11.504807692307692, "grad_norm": 1.3633688688278198, "learning_rate": 1.1775587684461977e-05, "loss": 0.0191, "step": 11965 }, { "epoch": 11.50576923076923, "grad_norm": 1.6293998956680298, "learning_rate": 1.177436178909861e-05, "loss": 0.0101, "step": 11966 }, { "epoch": 11.506730769230769, "grad_norm": 2.3566226959228516, "learning_rate": 1.177313586620235e-05, "loss": 0.0202, "step": 11967 }, { "epoch": 11.507692307692308, "grad_norm": 1.034687876701355, "learning_rate": 1.177190991579223e-05, "loss": 0.006, "step": 11968 }, { "epoch": 11.508653846153846, "grad_norm": 2.2048990726470947, "learning_rate": 1.1770683937887266e-05, "loss": 0.0603, "step": 11969 }, { "epoch": 11.509615384615385, "grad_norm": 3.1299805641174316, "learning_rate": 1.1769457932506487e-05, "loss": 0.0227, "step": 11970 }, { "epoch": 11.510576923076924, "grad_norm": 3.980588912963867, "learning_rate": 1.176823189966891e-05, "loss": 0.0385, "step": 11971 }, { "epoch": 11.51153846153846, "grad_norm": 0.17287448048591614, "learning_rate": 1.1767005839393565e-05, "loss": 0.001, "step": 11972 }, { "epoch": 11.5125, "grad_norm": 1.1082477569580078, "learning_rate": 1.1765779751699478e-05, "loss": 0.0128, "step": 11973 }, { "epoch": 11.513461538461538, "grad_norm": 1.1854054927825928, "learning_rate": 1.1764553636605669e-05, "loss": 0.0057, "step": 11974 }, { "epoch": 11.514423076923077, "grad_norm": 0.460456520318985, "learning_rate": 1.176332749413117e-05, "loss": 0.0022, "step": 11975 }, { "epoch": 11.515384615384615, "grad_norm": 0.11380220204591751, "learning_rate": 1.1762101324294998e-05, "loss": 0.0008, "step": 11976 }, { "epoch": 11.516346153846154, "grad_norm": 0.040376439690589905, "learning_rate": 1.1760875127116192e-05, "loss": 0.0004, "step": 11977 }, { "epoch": 11.517307692307693, "grad_norm": 0.8547381162643433, "learning_rate": 1.1759648902613765e-05, "loss": 0.0035, "step": 11978 }, { "epoch": 11.518269230769231, "grad_norm": 0.8972680568695068, "learning_rate": 1.1758422650806756e-05, "loss": 0.0037, "step": 11979 }, { "epoch": 11.51923076923077, "grad_norm": 0.29397234320640564, "learning_rate": 1.1757196371714187e-05, "loss": 0.0016, "step": 11980 }, { "epoch": 11.520192307692307, "grad_norm": 1.8195462226867676, "learning_rate": 1.1755970065355087e-05, "loss": 0.0173, "step": 11981 }, { "epoch": 11.521153846153846, "grad_norm": 0.16043265163898468, "learning_rate": 1.1754743731748487e-05, "loss": 0.0009, "step": 11982 }, { "epoch": 11.522115384615384, "grad_norm": 9.689574241638184, "learning_rate": 1.1753517370913414e-05, "loss": 0.0575, "step": 11983 }, { "epoch": 11.523076923076923, "grad_norm": 1.5833946466445923, "learning_rate": 1.1752290982868899e-05, "loss": 0.0096, "step": 11984 }, { "epoch": 11.524038461538462, "grad_norm": 1.2280687093734741, "learning_rate": 1.1751064567633968e-05, "loss": 0.0066, "step": 11985 }, { "epoch": 11.525, "grad_norm": 0.1434255689382553, "learning_rate": 1.1749838125227654e-05, "loss": 0.0013, "step": 11986 }, { "epoch": 11.525961538461539, "grad_norm": 1.9505900144577026, "learning_rate": 1.1748611655668992e-05, "loss": 0.0404, "step": 11987 }, { "epoch": 11.526923076923078, "grad_norm": 0.11924667656421661, "learning_rate": 1.1747385158977008e-05, "loss": 0.001, "step": 11988 }, { "epoch": 11.527884615384615, "grad_norm": 0.8709274530410767, "learning_rate": 1.1746158635170732e-05, "loss": 0.0061, "step": 11989 }, { "epoch": 11.528846153846153, "grad_norm": 0.4350533187389374, "learning_rate": 1.17449320842692e-05, "loss": 0.0023, "step": 11990 }, { "epoch": 11.529807692307692, "grad_norm": 0.9551132917404175, "learning_rate": 1.1743705506291445e-05, "loss": 0.0049, "step": 11991 }, { "epoch": 11.53076923076923, "grad_norm": 2.5233516693115234, "learning_rate": 1.1742478901256496e-05, "loss": 0.0169, "step": 11992 }, { "epoch": 11.53173076923077, "grad_norm": 1.480802297592163, "learning_rate": 1.174125226918339e-05, "loss": 0.0144, "step": 11993 }, { "epoch": 11.532692307692308, "grad_norm": 2.303577423095703, "learning_rate": 1.174002561009116e-05, "loss": 0.0512, "step": 11994 }, { "epoch": 11.533653846153847, "grad_norm": 0.3008725941181183, "learning_rate": 1.173879892399884e-05, "loss": 0.0009, "step": 11995 }, { "epoch": 11.534615384615385, "grad_norm": 1.174712061882019, "learning_rate": 1.1737572210925459e-05, "loss": 0.0233, "step": 11996 }, { "epoch": 11.535576923076922, "grad_norm": 2.3816282749176025, "learning_rate": 1.1736345470890062e-05, "loss": 0.0218, "step": 11997 }, { "epoch": 11.536538461538461, "grad_norm": 0.3761586546897888, "learning_rate": 1.1735118703911679e-05, "loss": 0.002, "step": 11998 }, { "epoch": 11.5375, "grad_norm": 1.1303435564041138, "learning_rate": 1.1733891910009345e-05, "loss": 0.0257, "step": 11999 }, { "epoch": 11.538461538461538, "grad_norm": 0.20808836817741394, "learning_rate": 1.1732665089202095e-05, "loss": 0.0014, "step": 12000 }, { "epoch": 11.539423076923077, "grad_norm": 0.514900267124176, "learning_rate": 1.1731438241508973e-05, "loss": 0.0017, "step": 12001 }, { "epoch": 11.540384615384616, "grad_norm": 0.5857588648796082, "learning_rate": 1.173021136694901e-05, "loss": 0.0037, "step": 12002 }, { "epoch": 11.541346153846154, "grad_norm": 0.21703986823558807, "learning_rate": 1.1728984465541243e-05, "loss": 0.0012, "step": 12003 }, { "epoch": 11.542307692307693, "grad_norm": 0.22087028622627258, "learning_rate": 1.1727757537304712e-05, "loss": 0.0012, "step": 12004 }, { "epoch": 11.54326923076923, "grad_norm": 2.0240767002105713, "learning_rate": 1.1726530582258455e-05, "loss": 0.0262, "step": 12005 }, { "epoch": 11.544230769230769, "grad_norm": 2.515225648880005, "learning_rate": 1.1725303600421511e-05, "loss": 0.0242, "step": 12006 }, { "epoch": 11.545192307692307, "grad_norm": 2.275585174560547, "learning_rate": 1.1724076591812919e-05, "loss": 0.0204, "step": 12007 }, { "epoch": 11.546153846153846, "grad_norm": 0.249825119972229, "learning_rate": 1.1722849556451716e-05, "loss": 0.0017, "step": 12008 }, { "epoch": 11.547115384615385, "grad_norm": 1.3963934183120728, "learning_rate": 1.1721622494356948e-05, "loss": 0.0089, "step": 12009 }, { "epoch": 11.548076923076923, "grad_norm": 1.1726552248001099, "learning_rate": 1.172039540554765e-05, "loss": 0.015, "step": 12010 }, { "epoch": 11.549038461538462, "grad_norm": 0.06616012752056122, "learning_rate": 1.1719168290042864e-05, "loss": 0.0005, "step": 12011 }, { "epoch": 11.55, "grad_norm": 0.6528984904289246, "learning_rate": 1.1717941147861633e-05, "loss": 0.0044, "step": 12012 }, { "epoch": 11.55096153846154, "grad_norm": 0.8865340948104858, "learning_rate": 1.1716713979022998e-05, "loss": 0.0115, "step": 12013 }, { "epoch": 11.551923076923076, "grad_norm": 1.8452974557876587, "learning_rate": 1.1715486783546e-05, "loss": 0.0088, "step": 12014 }, { "epoch": 11.552884615384615, "grad_norm": 1.9206596612930298, "learning_rate": 1.171425956144968e-05, "loss": 0.0093, "step": 12015 }, { "epoch": 11.553846153846154, "grad_norm": 3.1645188331604004, "learning_rate": 1.1713032312753086e-05, "loss": 0.0588, "step": 12016 }, { "epoch": 11.554807692307692, "grad_norm": 0.042517196387052536, "learning_rate": 1.1711805037475257e-05, "loss": 0.0005, "step": 12017 }, { "epoch": 11.555769230769231, "grad_norm": 0.1377817839384079, "learning_rate": 1.1710577735635238e-05, "loss": 0.0009, "step": 12018 }, { "epoch": 11.55673076923077, "grad_norm": 0.10598216205835342, "learning_rate": 1.1709350407252069e-05, "loss": 0.0005, "step": 12019 }, { "epoch": 11.557692307692308, "grad_norm": 0.7536218166351318, "learning_rate": 1.1708123052344803e-05, "loss": 0.0038, "step": 12020 }, { "epoch": 11.558653846153845, "grad_norm": 0.43603256344795227, "learning_rate": 1.1706895670932481e-05, "loss": 0.0015, "step": 12021 }, { "epoch": 11.559615384615384, "grad_norm": 2.988676071166992, "learning_rate": 1.1705668263034147e-05, "loss": 0.0268, "step": 12022 }, { "epoch": 11.560576923076923, "grad_norm": 1.9269400835037231, "learning_rate": 1.1704440828668846e-05, "loss": 0.0125, "step": 12023 }, { "epoch": 11.561538461538461, "grad_norm": 2.95552921295166, "learning_rate": 1.1703213367855629e-05, "loss": 0.0564, "step": 12024 }, { "epoch": 11.5625, "grad_norm": 0.8929288983345032, "learning_rate": 1.1701985880613538e-05, "loss": 0.002, "step": 12025 }, { "epoch": 11.563461538461539, "grad_norm": 1.1472337245941162, "learning_rate": 1.170075836696162e-05, "loss": 0.0027, "step": 12026 }, { "epoch": 11.564423076923077, "grad_norm": 0.17202971875667572, "learning_rate": 1.1699530826918924e-05, "loss": 0.0012, "step": 12027 }, { "epoch": 11.565384615384616, "grad_norm": 2.1027793884277344, "learning_rate": 1.16983032605045e-05, "loss": 0.0091, "step": 12028 }, { "epoch": 11.566346153846155, "grad_norm": 0.1928352564573288, "learning_rate": 1.1697075667737392e-05, "loss": 0.0009, "step": 12029 }, { "epoch": 11.567307692307692, "grad_norm": 2.5261778831481934, "learning_rate": 1.1695848048636649e-05, "loss": 0.0472, "step": 12030 }, { "epoch": 11.56826923076923, "grad_norm": 0.05162942782044411, "learning_rate": 1.1694620403221323e-05, "loss": 0.0004, "step": 12031 }, { "epoch": 11.569230769230769, "grad_norm": 2.561924457550049, "learning_rate": 1.1693392731510464e-05, "loss": 0.0305, "step": 12032 }, { "epoch": 11.570192307692308, "grad_norm": 0.16730858385562897, "learning_rate": 1.1692165033523117e-05, "loss": 0.0012, "step": 12033 }, { "epoch": 11.571153846153846, "grad_norm": 0.25544050335884094, "learning_rate": 1.1690937309278336e-05, "loss": 0.0015, "step": 12034 }, { "epoch": 11.572115384615385, "grad_norm": 0.8376131653785706, "learning_rate": 1.1689709558795173e-05, "loss": 0.0054, "step": 12035 }, { "epoch": 11.573076923076924, "grad_norm": 0.03697555884718895, "learning_rate": 1.1688481782092676e-05, "loss": 0.0004, "step": 12036 }, { "epoch": 11.57403846153846, "grad_norm": 0.33084070682525635, "learning_rate": 1.1687253979189895e-05, "loss": 0.003, "step": 12037 }, { "epoch": 11.575, "grad_norm": 3.5473885536193848, "learning_rate": 1.1686026150105884e-05, "loss": 0.0332, "step": 12038 }, { "epoch": 11.575961538461538, "grad_norm": 0.0645594373345375, "learning_rate": 1.1684798294859699e-05, "loss": 0.0006, "step": 12039 }, { "epoch": 11.576923076923077, "grad_norm": 0.4936906099319458, "learning_rate": 1.1683570413470384e-05, "loss": 0.0021, "step": 12040 }, { "epoch": 11.577884615384615, "grad_norm": 1.179428219795227, "learning_rate": 1.1682342505957003e-05, "loss": 0.0433, "step": 12041 }, { "epoch": 11.578846153846154, "grad_norm": 0.5631898045539856, "learning_rate": 1.1681114572338603e-05, "loss": 0.003, "step": 12042 }, { "epoch": 11.579807692307693, "grad_norm": 3.4153013229370117, "learning_rate": 1.1679886612634236e-05, "loss": 0.1692, "step": 12043 }, { "epoch": 11.580769230769231, "grad_norm": 0.8118518590927124, "learning_rate": 1.1678658626862959e-05, "loss": 0.0051, "step": 12044 }, { "epoch": 11.58173076923077, "grad_norm": 2.6670782566070557, "learning_rate": 1.167743061504383e-05, "loss": 0.0184, "step": 12045 }, { "epoch": 11.582692307692307, "grad_norm": 0.3968663513660431, "learning_rate": 1.1676202577195901e-05, "loss": 0.0018, "step": 12046 }, { "epoch": 11.583653846153846, "grad_norm": 0.7957424521446228, "learning_rate": 1.1674974513338226e-05, "loss": 0.0036, "step": 12047 }, { "epoch": 11.584615384615384, "grad_norm": 0.728478193283081, "learning_rate": 1.1673746423489862e-05, "loss": 0.0057, "step": 12048 }, { "epoch": 11.585576923076923, "grad_norm": 0.6707988977432251, "learning_rate": 1.167251830766987e-05, "loss": 0.004, "step": 12049 }, { "epoch": 11.586538461538462, "grad_norm": 0.8309881687164307, "learning_rate": 1.1671290165897297e-05, "loss": 0.0029, "step": 12050 }, { "epoch": 11.5875, "grad_norm": 0.9704452753067017, "learning_rate": 1.1670061998191207e-05, "loss": 0.0033, "step": 12051 }, { "epoch": 11.588461538461539, "grad_norm": 0.7050037980079651, "learning_rate": 1.1668833804570658e-05, "loss": 0.0051, "step": 12052 }, { "epoch": 11.589423076923078, "grad_norm": 1.5128834247589111, "learning_rate": 1.1667605585054708e-05, "loss": 0.0135, "step": 12053 }, { "epoch": 11.590384615384615, "grad_norm": 2.0233964920043945, "learning_rate": 1.1666377339662412e-05, "loss": 0.0131, "step": 12054 }, { "epoch": 11.591346153846153, "grad_norm": 0.047007735818624496, "learning_rate": 1.1665149068412829e-05, "loss": 0.0007, "step": 12055 }, { "epoch": 11.592307692307692, "grad_norm": 2.232757329940796, "learning_rate": 1.1663920771325022e-05, "loss": 0.0482, "step": 12056 }, { "epoch": 11.59326923076923, "grad_norm": 0.2287406623363495, "learning_rate": 1.1662692448418047e-05, "loss": 0.0009, "step": 12057 }, { "epoch": 11.59423076923077, "grad_norm": 1.617445945739746, "learning_rate": 1.1661464099710963e-05, "loss": 0.0135, "step": 12058 }, { "epoch": 11.595192307692308, "grad_norm": 0.7961991429328918, "learning_rate": 1.1660235725222835e-05, "loss": 0.0049, "step": 12059 }, { "epoch": 11.596153846153847, "grad_norm": 2.541308879852295, "learning_rate": 1.1659007324972722e-05, "loss": 0.0281, "step": 12060 }, { "epoch": 11.597115384615385, "grad_norm": 2.9515154361724854, "learning_rate": 1.1657778898979684e-05, "loss": 0.0707, "step": 12061 }, { "epoch": 11.598076923076922, "grad_norm": 0.5971046686172485, "learning_rate": 1.165655044726278e-05, "loss": 0.0027, "step": 12062 }, { "epoch": 11.599038461538461, "grad_norm": 6.32280969619751, "learning_rate": 1.1655321969841079e-05, "loss": 0.0995, "step": 12063 }, { "epoch": 11.6, "grad_norm": 1.851729393005371, "learning_rate": 1.1654093466733636e-05, "loss": 0.0128, "step": 12064 }, { "epoch": 11.600961538461538, "grad_norm": 0.3643939197063446, "learning_rate": 1.165286493795952e-05, "loss": 0.0024, "step": 12065 }, { "epoch": 11.601923076923077, "grad_norm": 1.6941550970077515, "learning_rate": 1.1651636383537788e-05, "loss": 0.0193, "step": 12066 }, { "epoch": 11.602884615384616, "grad_norm": 0.12830506265163422, "learning_rate": 1.1650407803487509e-05, "loss": 0.0015, "step": 12067 }, { "epoch": 11.603846153846154, "grad_norm": 0.7312121391296387, "learning_rate": 1.1649179197827744e-05, "loss": 0.0038, "step": 12068 }, { "epoch": 11.604807692307693, "grad_norm": 1.0625624656677246, "learning_rate": 1.1647950566577561e-05, "loss": 0.0053, "step": 12069 }, { "epoch": 11.60576923076923, "grad_norm": 0.9283000230789185, "learning_rate": 1.1646721909756019e-05, "loss": 0.0106, "step": 12070 }, { "epoch": 11.606730769230769, "grad_norm": 6.566211223602295, "learning_rate": 1.1645493227382184e-05, "loss": 0.0416, "step": 12071 }, { "epoch": 11.607692307692307, "grad_norm": 1.9449883699417114, "learning_rate": 1.164426451947513e-05, "loss": 0.0093, "step": 12072 }, { "epoch": 11.608653846153846, "grad_norm": 0.8683651089668274, "learning_rate": 1.1643035786053911e-05, "loss": 0.0098, "step": 12073 }, { "epoch": 11.609615384615385, "grad_norm": 0.1719404011964798, "learning_rate": 1.1641807027137599e-05, "loss": 0.0013, "step": 12074 }, { "epoch": 11.610576923076923, "grad_norm": 0.3999759554862976, "learning_rate": 1.1640578242745263e-05, "loss": 0.0024, "step": 12075 }, { "epoch": 11.611538461538462, "grad_norm": 1.683087706565857, "learning_rate": 1.1639349432895967e-05, "loss": 0.0458, "step": 12076 }, { "epoch": 11.6125, "grad_norm": 1.662100076675415, "learning_rate": 1.163812059760878e-05, "loss": 0.0079, "step": 12077 }, { "epoch": 11.61346153846154, "grad_norm": 2.3124935626983643, "learning_rate": 1.1636891736902766e-05, "loss": 0.0161, "step": 12078 }, { "epoch": 11.614423076923076, "grad_norm": 2.021066665649414, "learning_rate": 1.1635662850797001e-05, "loss": 0.0334, "step": 12079 }, { "epoch": 11.615384615384615, "grad_norm": 3.432248830795288, "learning_rate": 1.1634433939310548e-05, "loss": 0.0401, "step": 12080 }, { "epoch": 11.616346153846154, "grad_norm": 4.24747371673584, "learning_rate": 1.1633205002462473e-05, "loss": 0.1215, "step": 12081 }, { "epoch": 11.617307692307692, "grad_norm": 1.638861894607544, "learning_rate": 1.1631976040271855e-05, "loss": 0.0636, "step": 12082 }, { "epoch": 11.618269230769231, "grad_norm": 0.12812912464141846, "learning_rate": 1.1630747052757756e-05, "loss": 0.0007, "step": 12083 }, { "epoch": 11.61923076923077, "grad_norm": 1.0779263973236084, "learning_rate": 1.1629518039939249e-05, "loss": 0.0041, "step": 12084 }, { "epoch": 11.620192307692308, "grad_norm": 2.1317861080169678, "learning_rate": 1.1628289001835405e-05, "loss": 0.0451, "step": 12085 }, { "epoch": 11.621153846153845, "grad_norm": 0.10518977046012878, "learning_rate": 1.1627059938465296e-05, "loss": 0.0006, "step": 12086 }, { "epoch": 11.622115384615384, "grad_norm": 2.769683361053467, "learning_rate": 1.162583084984799e-05, "loss": 0.0202, "step": 12087 }, { "epoch": 11.623076923076923, "grad_norm": 0.49134042859077454, "learning_rate": 1.162460173600256e-05, "loss": 0.0026, "step": 12088 }, { "epoch": 11.624038461538461, "grad_norm": 1.6182091236114502, "learning_rate": 1.1623372596948084e-05, "loss": 0.0439, "step": 12089 }, { "epoch": 11.625, "grad_norm": 2.6123127937316895, "learning_rate": 1.1622143432703626e-05, "loss": 0.0334, "step": 12090 }, { "epoch": 11.625961538461539, "grad_norm": 0.14911460876464844, "learning_rate": 1.1620914243288261e-05, "loss": 0.0011, "step": 12091 }, { "epoch": 11.626923076923077, "grad_norm": 1.8569309711456299, "learning_rate": 1.1619685028721068e-05, "loss": 0.0108, "step": 12092 }, { "epoch": 11.627884615384616, "grad_norm": 0.08450841158628464, "learning_rate": 1.1618455789021117e-05, "loss": 0.0006, "step": 12093 }, { "epoch": 11.628846153846155, "grad_norm": 1.235069751739502, "learning_rate": 1.1617226524207484e-05, "loss": 0.0107, "step": 12094 }, { "epoch": 11.629807692307692, "grad_norm": 1.196071743965149, "learning_rate": 1.1615997234299237e-05, "loss": 0.0071, "step": 12095 }, { "epoch": 11.63076923076923, "grad_norm": 1.1177600622177124, "learning_rate": 1.1614767919315457e-05, "loss": 0.0078, "step": 12096 }, { "epoch": 11.631730769230769, "grad_norm": 0.0588502511382103, "learning_rate": 1.1613538579275221e-05, "loss": 0.0005, "step": 12097 }, { "epoch": 11.632692307692308, "grad_norm": 1.9124013185501099, "learning_rate": 1.1612309214197599e-05, "loss": 0.0717, "step": 12098 }, { "epoch": 11.633653846153846, "grad_norm": 0.05669961869716644, "learning_rate": 1.161107982410167e-05, "loss": 0.0007, "step": 12099 }, { "epoch": 11.634615384615385, "grad_norm": 0.6350682377815247, "learning_rate": 1.1609850409006513e-05, "loss": 0.0029, "step": 12100 }, { "epoch": 11.635576923076924, "grad_norm": 1.9158209562301636, "learning_rate": 1.1608620968931201e-05, "loss": 0.0478, "step": 12101 }, { "epoch": 11.63653846153846, "grad_norm": 0.3122759759426117, "learning_rate": 1.1607391503894812e-05, "loss": 0.0024, "step": 12102 }, { "epoch": 11.6375, "grad_norm": 1.3180357217788696, "learning_rate": 1.1606162013916426e-05, "loss": 0.01, "step": 12103 }, { "epoch": 11.638461538461538, "grad_norm": 0.29579445719718933, "learning_rate": 1.160493249901512e-05, "loss": 0.0021, "step": 12104 }, { "epoch": 11.639423076923077, "grad_norm": 1.5771082639694214, "learning_rate": 1.1603702959209973e-05, "loss": 0.007, "step": 12105 }, { "epoch": 11.640384615384615, "grad_norm": 1.6106817722320557, "learning_rate": 1.1602473394520058e-05, "loss": 0.0067, "step": 12106 }, { "epoch": 11.641346153846154, "grad_norm": 4.715485095977783, "learning_rate": 1.1601243804964464e-05, "loss": 0.0482, "step": 12107 }, { "epoch": 11.642307692307693, "grad_norm": 0.052562057971954346, "learning_rate": 1.1600014190562263e-05, "loss": 0.0004, "step": 12108 }, { "epoch": 11.643269230769231, "grad_norm": 1.6504769325256348, "learning_rate": 1.159878455133254e-05, "loss": 0.0095, "step": 12109 }, { "epoch": 11.64423076923077, "grad_norm": 0.13765369355678558, "learning_rate": 1.1597554887294371e-05, "loss": 0.001, "step": 12110 }, { "epoch": 11.645192307692307, "grad_norm": 2.684690475463867, "learning_rate": 1.1596325198466841e-05, "loss": 0.0175, "step": 12111 }, { "epoch": 11.646153846153846, "grad_norm": 0.27035269141197205, "learning_rate": 1.1595095484869028e-05, "loss": 0.0025, "step": 12112 }, { "epoch": 11.647115384615384, "grad_norm": 0.20539449155330658, "learning_rate": 1.1593865746520011e-05, "loss": 0.0015, "step": 12113 }, { "epoch": 11.648076923076923, "grad_norm": 4.868832588195801, "learning_rate": 1.159263598343888e-05, "loss": 0.0467, "step": 12114 }, { "epoch": 11.649038461538462, "grad_norm": 3.7488186359405518, "learning_rate": 1.1591406195644713e-05, "loss": 0.0485, "step": 12115 }, { "epoch": 11.65, "grad_norm": 0.6325000524520874, "learning_rate": 1.1590176383156593e-05, "loss": 0.0032, "step": 12116 }, { "epoch": 11.650961538461539, "grad_norm": 0.6304261684417725, "learning_rate": 1.1588946545993598e-05, "loss": 0.0058, "step": 12117 }, { "epoch": 11.651923076923078, "grad_norm": 1.667549729347229, "learning_rate": 1.1587716684174822e-05, "loss": 0.0091, "step": 12118 }, { "epoch": 11.652884615384615, "grad_norm": 1.2395447492599487, "learning_rate": 1.1586486797719338e-05, "loss": 0.0163, "step": 12119 }, { "epoch": 11.653846153846153, "grad_norm": 2.056043863296509, "learning_rate": 1.1585256886646239e-05, "loss": 0.0192, "step": 12120 }, { "epoch": 11.654807692307692, "grad_norm": 0.12178602814674377, "learning_rate": 1.1584026950974602e-05, "loss": 0.001, "step": 12121 }, { "epoch": 11.65576923076923, "grad_norm": 0.06923345476388931, "learning_rate": 1.158279699072352e-05, "loss": 0.0009, "step": 12122 }, { "epoch": 11.65673076923077, "grad_norm": 0.933881938457489, "learning_rate": 1.158156700591207e-05, "loss": 0.0075, "step": 12123 }, { "epoch": 11.657692307692308, "grad_norm": 0.2783129811286926, "learning_rate": 1.1580336996559343e-05, "loss": 0.002, "step": 12124 }, { "epoch": 11.658653846153847, "grad_norm": 0.15655753016471863, "learning_rate": 1.1579106962684426e-05, "loss": 0.0016, "step": 12125 }, { "epoch": 11.659615384615385, "grad_norm": 1.6351885795593262, "learning_rate": 1.1577876904306404e-05, "loss": 0.0235, "step": 12126 }, { "epoch": 11.660576923076922, "grad_norm": 1.3603417873382568, "learning_rate": 1.1576646821444361e-05, "loss": 0.0055, "step": 12127 }, { "epoch": 11.661538461538461, "grad_norm": 0.04973830282688141, "learning_rate": 1.1575416714117387e-05, "loss": 0.0004, "step": 12128 }, { "epoch": 11.6625, "grad_norm": 2.191542863845825, "learning_rate": 1.157418658234457e-05, "loss": 0.0211, "step": 12129 }, { "epoch": 11.663461538461538, "grad_norm": 4.1865620613098145, "learning_rate": 1.1572956426144999e-05, "loss": 0.0397, "step": 12130 }, { "epoch": 11.664423076923077, "grad_norm": 0.5171579122543335, "learning_rate": 1.1571726245537757e-05, "loss": 0.0027, "step": 12131 }, { "epoch": 11.665384615384616, "grad_norm": 0.0997408926486969, "learning_rate": 1.157049604054194e-05, "loss": 0.0006, "step": 12132 }, { "epoch": 11.666346153846154, "grad_norm": 2.3357913494110107, "learning_rate": 1.1569265811176634e-05, "loss": 0.0444, "step": 12133 }, { "epoch": 11.667307692307693, "grad_norm": 4.021185398101807, "learning_rate": 1.1568035557460928e-05, "loss": 0.0353, "step": 12134 }, { "epoch": 11.66826923076923, "grad_norm": 1.6206153631210327, "learning_rate": 1.1566805279413909e-05, "loss": 0.0251, "step": 12135 }, { "epoch": 11.669230769230769, "grad_norm": 0.5296703577041626, "learning_rate": 1.1565574977054675e-05, "loss": 0.0021, "step": 12136 }, { "epoch": 11.670192307692307, "grad_norm": 0.04334020987153053, "learning_rate": 1.156434465040231e-05, "loss": 0.0003, "step": 12137 }, { "epoch": 11.671153846153846, "grad_norm": 2.7645130157470703, "learning_rate": 1.1563114299475908e-05, "loss": 0.0266, "step": 12138 }, { "epoch": 11.672115384615385, "grad_norm": 1.0968601703643799, "learning_rate": 1.156188392429456e-05, "loss": 0.0079, "step": 12139 }, { "epoch": 11.673076923076923, "grad_norm": 0.25073477625846863, "learning_rate": 1.1560653524877356e-05, "loss": 0.0024, "step": 12140 }, { "epoch": 11.674038461538462, "grad_norm": 1.6729847192764282, "learning_rate": 1.1559423101243393e-05, "loss": 0.0163, "step": 12141 }, { "epoch": 11.675, "grad_norm": 0.22301480174064636, "learning_rate": 1.1558192653411755e-05, "loss": 0.0019, "step": 12142 }, { "epoch": 11.67596153846154, "grad_norm": 1.5234382152557373, "learning_rate": 1.1556962181401542e-05, "loss": 0.0282, "step": 12143 }, { "epoch": 11.676923076923076, "grad_norm": 2.1938862800598145, "learning_rate": 1.1555731685231848e-05, "loss": 0.0141, "step": 12144 }, { "epoch": 11.677884615384615, "grad_norm": 2.2735722064971924, "learning_rate": 1.1554501164921765e-05, "loss": 0.0481, "step": 12145 }, { "epoch": 11.678846153846154, "grad_norm": 0.6240689158439636, "learning_rate": 1.1553270620490382e-05, "loss": 0.0026, "step": 12146 }, { "epoch": 11.679807692307692, "grad_norm": 3.348621368408203, "learning_rate": 1.1552040051956801e-05, "loss": 0.0224, "step": 12147 }, { "epoch": 11.680769230769231, "grad_norm": 3.4970178604125977, "learning_rate": 1.1550809459340114e-05, "loss": 0.1085, "step": 12148 }, { "epoch": 11.68173076923077, "grad_norm": 1.1463433504104614, "learning_rate": 1.1549578842659415e-05, "loss": 0.0067, "step": 12149 }, { "epoch": 11.682692307692308, "grad_norm": 2.682800531387329, "learning_rate": 1.1548348201933799e-05, "loss": 0.025, "step": 12150 }, { "epoch": 11.683653846153845, "grad_norm": 0.08808385580778122, "learning_rate": 1.1547117537182364e-05, "loss": 0.0008, "step": 12151 }, { "epoch": 11.684615384615384, "grad_norm": 2.1917831897735596, "learning_rate": 1.1545886848424206e-05, "loss": 0.0277, "step": 12152 }, { "epoch": 11.685576923076923, "grad_norm": 2.1772730350494385, "learning_rate": 1.154465613567842e-05, "loss": 0.0283, "step": 12153 }, { "epoch": 11.686538461538461, "grad_norm": 1.5476444959640503, "learning_rate": 1.1543425398964106e-05, "loss": 0.0245, "step": 12154 }, { "epoch": 11.6875, "grad_norm": 1.7354029417037964, "learning_rate": 1.154219463830036e-05, "loss": 0.009, "step": 12155 }, { "epoch": 11.688461538461539, "grad_norm": 0.6820545792579651, "learning_rate": 1.1540963853706277e-05, "loss": 0.0012, "step": 12156 }, { "epoch": 11.689423076923077, "grad_norm": 0.5317140221595764, "learning_rate": 1.1539733045200959e-05, "loss": 0.0047, "step": 12157 }, { "epoch": 11.690384615384616, "grad_norm": 0.9291486740112305, "learning_rate": 1.1538502212803504e-05, "loss": 0.0042, "step": 12158 }, { "epoch": 11.691346153846155, "grad_norm": 0.12528640031814575, "learning_rate": 1.153727135653301e-05, "loss": 0.0012, "step": 12159 }, { "epoch": 11.692307692307692, "grad_norm": 0.7526221871376038, "learning_rate": 1.1536040476408576e-05, "loss": 0.0037, "step": 12160 }, { "epoch": 11.69326923076923, "grad_norm": 0.6652765274047852, "learning_rate": 1.1534809572449299e-05, "loss": 0.0032, "step": 12161 }, { "epoch": 11.694230769230769, "grad_norm": 0.5851998329162598, "learning_rate": 1.1533578644674287e-05, "loss": 0.0039, "step": 12162 }, { "epoch": 11.695192307692308, "grad_norm": 0.9917886257171631, "learning_rate": 1.1532347693102632e-05, "loss": 0.0043, "step": 12163 }, { "epoch": 11.696153846153846, "grad_norm": 1.2763359546661377, "learning_rate": 1.1531116717753438e-05, "loss": 0.0051, "step": 12164 }, { "epoch": 11.697115384615385, "grad_norm": 0.442485511302948, "learning_rate": 1.152988571864581e-05, "loss": 0.002, "step": 12165 }, { "epoch": 11.698076923076924, "grad_norm": 0.9502205848693848, "learning_rate": 1.1528654695798844e-05, "loss": 0.003, "step": 12166 }, { "epoch": 11.69903846153846, "grad_norm": 0.18962141871452332, "learning_rate": 1.1527423649231644e-05, "loss": 0.0018, "step": 12167 }, { "epoch": 11.7, "grad_norm": 0.24350173771381378, "learning_rate": 1.152619257896331e-05, "loss": 0.001, "step": 12168 }, { "epoch": 11.700961538461538, "grad_norm": 1.2907668352127075, "learning_rate": 1.1524961485012947e-05, "loss": 0.0079, "step": 12169 }, { "epoch": 11.701923076923077, "grad_norm": 2.1311140060424805, "learning_rate": 1.152373036739966e-05, "loss": 0.0508, "step": 12170 }, { "epoch": 11.702884615384615, "grad_norm": 1.3961619138717651, "learning_rate": 1.1522499226142548e-05, "loss": 0.0097, "step": 12171 }, { "epoch": 11.703846153846154, "grad_norm": 1.857836127281189, "learning_rate": 1.1521268061260716e-05, "loss": 0.0409, "step": 12172 }, { "epoch": 11.704807692307693, "grad_norm": 1.6801443099975586, "learning_rate": 1.1520036872773269e-05, "loss": 0.0164, "step": 12173 }, { "epoch": 11.705769230769231, "grad_norm": 0.18751521408557892, "learning_rate": 1.151880566069931e-05, "loss": 0.0017, "step": 12174 }, { "epoch": 11.70673076923077, "grad_norm": 3.2902255058288574, "learning_rate": 1.1517574425057945e-05, "loss": 0.0256, "step": 12175 }, { "epoch": 11.707692307692307, "grad_norm": 2.256413459777832, "learning_rate": 1.151634316586828e-05, "loss": 0.1049, "step": 12176 }, { "epoch": 11.708653846153846, "grad_norm": 1.376017451286316, "learning_rate": 1.1515111883149418e-05, "loss": 0.048, "step": 12177 }, { "epoch": 11.709615384615384, "grad_norm": 0.11459296941757202, "learning_rate": 1.1513880576920471e-05, "loss": 0.0008, "step": 12178 }, { "epoch": 11.710576923076923, "grad_norm": 0.11921392381191254, "learning_rate": 1.1512649247200534e-05, "loss": 0.0007, "step": 12179 }, { "epoch": 11.711538461538462, "grad_norm": 4.190530776977539, "learning_rate": 1.1511417894008725e-05, "loss": 0.0704, "step": 12180 }, { "epoch": 11.7125, "grad_norm": 1.8237138986587524, "learning_rate": 1.1510186517364143e-05, "loss": 0.0204, "step": 12181 }, { "epoch": 11.713461538461539, "grad_norm": 1.1720458269119263, "learning_rate": 1.15089551172859e-05, "loss": 0.0049, "step": 12182 }, { "epoch": 11.714423076923078, "grad_norm": 0.3472869396209717, "learning_rate": 1.1507723693793103e-05, "loss": 0.002, "step": 12183 }, { "epoch": 11.715384615384615, "grad_norm": 0.37669384479522705, "learning_rate": 1.150649224690486e-05, "loss": 0.0028, "step": 12184 }, { "epoch": 11.716346153846153, "grad_norm": 3.417907476425171, "learning_rate": 1.150526077664028e-05, "loss": 0.0281, "step": 12185 }, { "epoch": 11.717307692307692, "grad_norm": 0.979127049446106, "learning_rate": 1.1504029283018464e-05, "loss": 0.0077, "step": 12186 }, { "epoch": 11.71826923076923, "grad_norm": 0.029350565746426582, "learning_rate": 1.1502797766058533e-05, "loss": 0.0004, "step": 12187 }, { "epoch": 11.71923076923077, "grad_norm": 2.705709218978882, "learning_rate": 1.1501566225779592e-05, "loss": 0.0133, "step": 12188 }, { "epoch": 11.720192307692308, "grad_norm": 0.0792480856180191, "learning_rate": 1.150033466220075e-05, "loss": 0.0005, "step": 12189 }, { "epoch": 11.721153846153847, "grad_norm": 1.0781172513961792, "learning_rate": 1.1499103075341114e-05, "loss": 0.007, "step": 12190 }, { "epoch": 11.722115384615385, "grad_norm": 5.481060981750488, "learning_rate": 1.1497871465219801e-05, "loss": 0.1307, "step": 12191 }, { "epoch": 11.723076923076922, "grad_norm": 1.5397759675979614, "learning_rate": 1.149663983185592e-05, "loss": 0.0084, "step": 12192 }, { "epoch": 11.724038461538461, "grad_norm": 0.5033973455429077, "learning_rate": 1.1495408175268578e-05, "loss": 0.0021, "step": 12193 }, { "epoch": 11.725, "grad_norm": 0.8370183706283569, "learning_rate": 1.1494176495476891e-05, "loss": 0.0044, "step": 12194 }, { "epoch": 11.725961538461538, "grad_norm": 0.21324566006660461, "learning_rate": 1.1492944792499973e-05, "loss": 0.002, "step": 12195 }, { "epoch": 11.726923076923077, "grad_norm": 0.7904151082038879, "learning_rate": 1.1491713066356933e-05, "loss": 0.0046, "step": 12196 }, { "epoch": 11.727884615384616, "grad_norm": 0.0503234826028347, "learning_rate": 1.149048131706688e-05, "loss": 0.0004, "step": 12197 }, { "epoch": 11.728846153846154, "grad_norm": 3.116096258163452, "learning_rate": 1.1489249544648935e-05, "loss": 0.0267, "step": 12198 }, { "epoch": 11.729807692307693, "grad_norm": 1.8063478469848633, "learning_rate": 1.1488017749122209e-05, "loss": 0.0054, "step": 12199 }, { "epoch": 11.73076923076923, "grad_norm": 0.23573459684848785, "learning_rate": 1.1486785930505812e-05, "loss": 0.0015, "step": 12200 }, { "epoch": 11.731730769230769, "grad_norm": 0.11000070720911026, "learning_rate": 1.1485554088818862e-05, "loss": 0.001, "step": 12201 }, { "epoch": 11.732692307692307, "grad_norm": 2.0493886470794678, "learning_rate": 1.1484322224080474e-05, "loss": 0.0139, "step": 12202 }, { "epoch": 11.733653846153846, "grad_norm": 3.7824394702911377, "learning_rate": 1.148309033630976e-05, "loss": 0.1009, "step": 12203 }, { "epoch": 11.734615384615385, "grad_norm": 0.7016332745552063, "learning_rate": 1.1481858425525835e-05, "loss": 0.0046, "step": 12204 }, { "epoch": 11.735576923076923, "grad_norm": 2.5312042236328125, "learning_rate": 1.148062649174782e-05, "loss": 0.0251, "step": 12205 }, { "epoch": 11.736538461538462, "grad_norm": 5.017472267150879, "learning_rate": 1.1479394534994826e-05, "loss": 0.1371, "step": 12206 }, { "epoch": 11.7375, "grad_norm": 0.4697524905204773, "learning_rate": 1.1478162555285971e-05, "loss": 0.0042, "step": 12207 }, { "epoch": 11.73846153846154, "grad_norm": 2.343048572540283, "learning_rate": 1.147693055264037e-05, "loss": 0.0218, "step": 12208 }, { "epoch": 11.739423076923076, "grad_norm": 1.4881470203399658, "learning_rate": 1.1475698527077143e-05, "loss": 0.0197, "step": 12209 }, { "epoch": 11.740384615384615, "grad_norm": 0.028748750686645508, "learning_rate": 1.1474466478615406e-05, "loss": 0.0003, "step": 12210 }, { "epoch": 11.741346153846154, "grad_norm": 8.165172576904297, "learning_rate": 1.1473234407274278e-05, "loss": 0.0305, "step": 12211 }, { "epoch": 11.742307692307692, "grad_norm": 1.0017790794372559, "learning_rate": 1.1472002313072874e-05, "loss": 0.0053, "step": 12212 }, { "epoch": 11.743269230769231, "grad_norm": 0.3865683972835541, "learning_rate": 1.1470770196030314e-05, "loss": 0.002, "step": 12213 }, { "epoch": 11.74423076923077, "grad_norm": 0.11042308807373047, "learning_rate": 1.146953805616572e-05, "loss": 0.0009, "step": 12214 }, { "epoch": 11.745192307692308, "grad_norm": 0.476330429315567, "learning_rate": 1.1468305893498204e-05, "loss": 0.0021, "step": 12215 }, { "epoch": 11.746153846153845, "grad_norm": 0.4239274263381958, "learning_rate": 1.1467073708046894e-05, "loss": 0.0013, "step": 12216 }, { "epoch": 11.747115384615384, "grad_norm": 0.8877801895141602, "learning_rate": 1.1465841499830903e-05, "loss": 0.0039, "step": 12217 }, { "epoch": 11.748076923076923, "grad_norm": 1.397276520729065, "learning_rate": 1.1464609268869357e-05, "loss": 0.0094, "step": 12218 }, { "epoch": 11.749038461538461, "grad_norm": 4.7319464683532715, "learning_rate": 1.146337701518137e-05, "loss": 0.0673, "step": 12219 }, { "epoch": 11.75, "grad_norm": 1.4072916507720947, "learning_rate": 1.1462144738786069e-05, "loss": 0.0085, "step": 12220 }, { "epoch": 11.750961538461539, "grad_norm": 3.5181267261505127, "learning_rate": 1.1460912439702574e-05, "loss": 0.0256, "step": 12221 }, { "epoch": 11.751923076923077, "grad_norm": 3.7153680324554443, "learning_rate": 1.1459680117950002e-05, "loss": 0.0226, "step": 12222 }, { "epoch": 11.752884615384616, "grad_norm": 1.3794282674789429, "learning_rate": 1.1458447773547483e-05, "loss": 0.0119, "step": 12223 }, { "epoch": 11.753846153846155, "grad_norm": 0.18992702662944794, "learning_rate": 1.1457215406514132e-05, "loss": 0.0014, "step": 12224 }, { "epoch": 11.754807692307692, "grad_norm": 0.4423820972442627, "learning_rate": 1.1455983016869078e-05, "loss": 0.0033, "step": 12225 }, { "epoch": 11.75576923076923, "grad_norm": 1.1284990310668945, "learning_rate": 1.1454750604631438e-05, "loss": 0.0114, "step": 12226 }, { "epoch": 11.756730769230769, "grad_norm": 1.7468615770339966, "learning_rate": 1.1453518169820339e-05, "loss": 0.0643, "step": 12227 }, { "epoch": 11.757692307692308, "grad_norm": 1.094515085220337, "learning_rate": 1.1452285712454905e-05, "loss": 0.0049, "step": 12228 }, { "epoch": 11.758653846153846, "grad_norm": 2.04939341545105, "learning_rate": 1.1451053232554259e-05, "loss": 0.0422, "step": 12229 }, { "epoch": 11.759615384615385, "grad_norm": 3.8723161220550537, "learning_rate": 1.1449820730137521e-05, "loss": 0.0429, "step": 12230 }, { "epoch": 11.760576923076924, "grad_norm": 0.09187125414609909, "learning_rate": 1.1448588205223828e-05, "loss": 0.0006, "step": 12231 }, { "epoch": 11.76153846153846, "grad_norm": 1.5860154628753662, "learning_rate": 1.1447355657832292e-05, "loss": 0.0391, "step": 12232 }, { "epoch": 11.7625, "grad_norm": 2.099275827407837, "learning_rate": 1.1446123087982046e-05, "loss": 0.0262, "step": 12233 }, { "epoch": 11.763461538461538, "grad_norm": 2.7989978790283203, "learning_rate": 1.1444890495692214e-05, "loss": 0.0142, "step": 12234 }, { "epoch": 11.764423076923077, "grad_norm": 1.1307899951934814, "learning_rate": 1.1443657880981922e-05, "loss": 0.0038, "step": 12235 }, { "epoch": 11.765384615384615, "grad_norm": 0.12502071261405945, "learning_rate": 1.1442425243870299e-05, "loss": 0.0006, "step": 12236 }, { "epoch": 11.766346153846154, "grad_norm": 1.4366528987884521, "learning_rate": 1.1441192584376466e-05, "loss": 0.0077, "step": 12237 }, { "epoch": 11.767307692307693, "grad_norm": 1.9903403520584106, "learning_rate": 1.1439959902519555e-05, "loss": 0.0379, "step": 12238 }, { "epoch": 11.768269230769231, "grad_norm": 1.2802834510803223, "learning_rate": 1.1438727198318695e-05, "loss": 0.0074, "step": 12239 }, { "epoch": 11.76923076923077, "grad_norm": 2.4020755290985107, "learning_rate": 1.1437494471793008e-05, "loss": 0.0487, "step": 12240 }, { "epoch": 11.770192307692307, "grad_norm": 0.6828036308288574, "learning_rate": 1.1436261722961627e-05, "loss": 0.0033, "step": 12241 }, { "epoch": 11.771153846153846, "grad_norm": 0.5173073410987854, "learning_rate": 1.143502895184368e-05, "loss": 0.0027, "step": 12242 }, { "epoch": 11.772115384615384, "grad_norm": 0.3919115960597992, "learning_rate": 1.1433796158458297e-05, "loss": 0.0022, "step": 12243 }, { "epoch": 11.773076923076923, "grad_norm": 4.42440938949585, "learning_rate": 1.1432563342824603e-05, "loss": 0.1047, "step": 12244 }, { "epoch": 11.774038461538462, "grad_norm": 3.102856397628784, "learning_rate": 1.1431330504961733e-05, "loss": 0.0737, "step": 12245 }, { "epoch": 11.775, "grad_norm": 1.8181830644607544, "learning_rate": 1.1430097644888813e-05, "loss": 0.0139, "step": 12246 }, { "epoch": 11.775961538461539, "grad_norm": 2.4725501537323, "learning_rate": 1.1428864762624975e-05, "loss": 0.0317, "step": 12247 }, { "epoch": 11.776923076923078, "grad_norm": 0.10683537274599075, "learning_rate": 1.1427631858189347e-05, "loss": 0.0012, "step": 12248 }, { "epoch": 11.777884615384615, "grad_norm": 0.3064311444759369, "learning_rate": 1.1426398931601066e-05, "loss": 0.0019, "step": 12249 }, { "epoch": 11.778846153846153, "grad_norm": 0.4924425780773163, "learning_rate": 1.142516598287926e-05, "loss": 0.0032, "step": 12250 }, { "epoch": 11.779807692307692, "grad_norm": 1.1254957914352417, "learning_rate": 1.142393301204306e-05, "loss": 0.0069, "step": 12251 }, { "epoch": 11.78076923076923, "grad_norm": 1.194256067276001, "learning_rate": 1.1422700019111596e-05, "loss": 0.0063, "step": 12252 }, { "epoch": 11.78173076923077, "grad_norm": 2.7343578338623047, "learning_rate": 1.1421467004104007e-05, "loss": 0.0132, "step": 12253 }, { "epoch": 11.782692307692308, "grad_norm": 1.702536702156067, "learning_rate": 1.1420233967039423e-05, "loss": 0.0264, "step": 12254 }, { "epoch": 11.783653846153847, "grad_norm": 2.072559118270874, "learning_rate": 1.1419000907936973e-05, "loss": 0.0156, "step": 12255 }, { "epoch": 11.784615384615385, "grad_norm": 2.2939391136169434, "learning_rate": 1.1417767826815793e-05, "loss": 0.0209, "step": 12256 }, { "epoch": 11.785576923076922, "grad_norm": 0.28812137246131897, "learning_rate": 1.141653472369502e-05, "loss": 0.0011, "step": 12257 }, { "epoch": 11.786538461538461, "grad_norm": 2.4590249061584473, "learning_rate": 1.1415301598593786e-05, "loss": 0.0169, "step": 12258 }, { "epoch": 11.7875, "grad_norm": 4.002727031707764, "learning_rate": 1.1414068451531222e-05, "loss": 0.0632, "step": 12259 }, { "epoch": 11.788461538461538, "grad_norm": 1.062517762184143, "learning_rate": 1.1412835282526468e-05, "loss": 0.0081, "step": 12260 }, { "epoch": 11.789423076923077, "grad_norm": 0.6337510347366333, "learning_rate": 1.141160209159866e-05, "loss": 0.0026, "step": 12261 }, { "epoch": 11.790384615384616, "grad_norm": 0.5105716586112976, "learning_rate": 1.1410368878766924e-05, "loss": 0.0023, "step": 12262 }, { "epoch": 11.791346153846154, "grad_norm": 0.11755590885877609, "learning_rate": 1.1409135644050407e-05, "loss": 0.0011, "step": 12263 }, { "epoch": 11.792307692307693, "grad_norm": 0.1882469803094864, "learning_rate": 1.1407902387468239e-05, "loss": 0.0017, "step": 12264 }, { "epoch": 11.79326923076923, "grad_norm": 0.7893863320350647, "learning_rate": 1.1406669109039558e-05, "loss": 0.0025, "step": 12265 }, { "epoch": 11.794230769230769, "grad_norm": 1.5201739072799683, "learning_rate": 1.14054358087835e-05, "loss": 0.0226, "step": 12266 }, { "epoch": 11.795192307692307, "grad_norm": 0.49211832880973816, "learning_rate": 1.1404202486719205e-05, "loss": 0.0019, "step": 12267 }, { "epoch": 11.796153846153846, "grad_norm": 1.9685453176498413, "learning_rate": 1.1402969142865809e-05, "loss": 0.008, "step": 12268 }, { "epoch": 11.797115384615385, "grad_norm": 3.0633363723754883, "learning_rate": 1.1401735777242449e-05, "loss": 0.0231, "step": 12269 }, { "epoch": 11.798076923076923, "grad_norm": 1.6719027757644653, "learning_rate": 1.1400502389868262e-05, "loss": 0.0166, "step": 12270 }, { "epoch": 11.799038461538462, "grad_norm": 0.6324142217636108, "learning_rate": 1.139926898076239e-05, "loss": 0.0018, "step": 12271 }, { "epoch": 11.8, "grad_norm": 1.148796558380127, "learning_rate": 1.1398035549943972e-05, "loss": 0.0081, "step": 12272 }, { "epoch": 11.80096153846154, "grad_norm": 0.07566878944635391, "learning_rate": 1.139680209743214e-05, "loss": 0.0006, "step": 12273 }, { "epoch": 11.801923076923076, "grad_norm": 2.2306249141693115, "learning_rate": 1.1395568623246045e-05, "loss": 0.026, "step": 12274 }, { "epoch": 11.802884615384615, "grad_norm": 3.4527342319488525, "learning_rate": 1.1394335127404819e-05, "loss": 0.1161, "step": 12275 }, { "epoch": 11.803846153846154, "grad_norm": 2.5276448726654053, "learning_rate": 1.1393101609927604e-05, "loss": 0.011, "step": 12276 }, { "epoch": 11.804807692307692, "grad_norm": 0.03817298263311386, "learning_rate": 1.1391868070833538e-05, "loss": 0.0004, "step": 12277 }, { "epoch": 11.805769230769231, "grad_norm": 2.3265042304992676, "learning_rate": 1.1390634510141768e-05, "loss": 0.0073, "step": 12278 }, { "epoch": 11.80673076923077, "grad_norm": 0.8130820989608765, "learning_rate": 1.138940092787143e-05, "loss": 0.004, "step": 12279 }, { "epoch": 11.807692307692308, "grad_norm": 0.568030834197998, "learning_rate": 1.138816732404167e-05, "loss": 0.0057, "step": 12280 }, { "epoch": 11.808653846153845, "grad_norm": 0.5382899641990662, "learning_rate": 1.1386933698671623e-05, "loss": 0.0033, "step": 12281 }, { "epoch": 11.809615384615384, "grad_norm": 0.3751159906387329, "learning_rate": 1.1385700051780439e-05, "loss": 0.0024, "step": 12282 }, { "epoch": 11.810576923076923, "grad_norm": 2.9086852073669434, "learning_rate": 1.1384466383387256e-05, "loss": 0.0361, "step": 12283 }, { "epoch": 11.811538461538461, "grad_norm": 1.150599718093872, "learning_rate": 1.1383232693511216e-05, "loss": 0.0071, "step": 12284 }, { "epoch": 11.8125, "grad_norm": 2.9714198112487793, "learning_rate": 1.1381998982171469e-05, "loss": 0.0336, "step": 12285 }, { "epoch": 11.813461538461539, "grad_norm": 1.2588340044021606, "learning_rate": 1.138076524938715e-05, "loss": 0.0055, "step": 12286 }, { "epoch": 11.814423076923077, "grad_norm": 0.09230682253837585, "learning_rate": 1.1379531495177412e-05, "loss": 0.0007, "step": 12287 }, { "epoch": 11.815384615384616, "grad_norm": 2.4294137954711914, "learning_rate": 1.1378297719561388e-05, "loss": 0.0386, "step": 12288 }, { "epoch": 11.816346153846155, "grad_norm": 2.666524887084961, "learning_rate": 1.1377063922558232e-05, "loss": 0.0494, "step": 12289 }, { "epoch": 11.817307692307692, "grad_norm": 3.162680149078369, "learning_rate": 1.1375830104187085e-05, "loss": 0.1073, "step": 12290 }, { "epoch": 11.81826923076923, "grad_norm": 1.3670485019683838, "learning_rate": 1.1374596264467093e-05, "loss": 0.0636, "step": 12291 }, { "epoch": 11.819230769230769, "grad_norm": 3.187577724456787, "learning_rate": 1.1373362403417398e-05, "loss": 0.028, "step": 12292 }, { "epoch": 11.820192307692308, "grad_norm": 1.768544316291809, "learning_rate": 1.1372128521057155e-05, "loss": 0.0377, "step": 12293 }, { "epoch": 11.821153846153846, "grad_norm": 1.559454321861267, "learning_rate": 1.1370894617405502e-05, "loss": 0.0067, "step": 12294 }, { "epoch": 11.822115384615385, "grad_norm": 1.2620995044708252, "learning_rate": 1.1369660692481586e-05, "loss": 0.0081, "step": 12295 }, { "epoch": 11.823076923076924, "grad_norm": 1.7739944458007812, "learning_rate": 1.1368426746304557e-05, "loss": 0.0116, "step": 12296 }, { "epoch": 11.82403846153846, "grad_norm": 3.2845356464385986, "learning_rate": 1.1367192778893563e-05, "loss": 0.1095, "step": 12297 }, { "epoch": 11.825, "grad_norm": 2.154172897338867, "learning_rate": 1.136595879026775e-05, "loss": 0.005, "step": 12298 }, { "epoch": 11.825961538461538, "grad_norm": 0.07540469616651535, "learning_rate": 1.1364724780446261e-05, "loss": 0.0006, "step": 12299 }, { "epoch": 11.826923076923077, "grad_norm": 1.6734927892684937, "learning_rate": 1.1363490749448253e-05, "loss": 0.0062, "step": 12300 }, { "epoch": 11.827884615384615, "grad_norm": 1.542966604232788, "learning_rate": 1.136225669729287e-05, "loss": 0.0111, "step": 12301 }, { "epoch": 11.828846153846154, "grad_norm": 0.2546592056751251, "learning_rate": 1.1361022623999262e-05, "loss": 0.0017, "step": 12302 }, { "epoch": 11.829807692307693, "grad_norm": 0.21342921257019043, "learning_rate": 1.1359788529586576e-05, "loss": 0.0008, "step": 12303 }, { "epoch": 11.830769230769231, "grad_norm": 1.042030692100525, "learning_rate": 1.1358554414073963e-05, "loss": 0.0103, "step": 12304 }, { "epoch": 11.83173076923077, "grad_norm": 0.939439594745636, "learning_rate": 1.1357320277480572e-05, "loss": 0.0043, "step": 12305 }, { "epoch": 11.832692307692307, "grad_norm": 1.6378742456436157, "learning_rate": 1.1356086119825553e-05, "loss": 0.0105, "step": 12306 }, { "epoch": 11.833653846153846, "grad_norm": 3.3597323894500732, "learning_rate": 1.135485194112806e-05, "loss": 0.0316, "step": 12307 }, { "epoch": 11.834615384615384, "grad_norm": 1.9194461107254028, "learning_rate": 1.1353617741407241e-05, "loss": 0.0628, "step": 12308 }, { "epoch": 11.835576923076923, "grad_norm": 1.3941161632537842, "learning_rate": 1.1352383520682249e-05, "loss": 0.0112, "step": 12309 }, { "epoch": 11.836538461538462, "grad_norm": 0.34549620747566223, "learning_rate": 1.1351149278972232e-05, "loss": 0.0019, "step": 12310 }, { "epoch": 11.8375, "grad_norm": 0.1684727817773819, "learning_rate": 1.1349915016296342e-05, "loss": 0.0017, "step": 12311 }, { "epoch": 11.838461538461539, "grad_norm": 0.1271795779466629, "learning_rate": 1.1348680732673735e-05, "loss": 0.0011, "step": 12312 }, { "epoch": 11.839423076923078, "grad_norm": 0.19417867064476013, "learning_rate": 1.134744642812356e-05, "loss": 0.0015, "step": 12313 }, { "epoch": 11.840384615384615, "grad_norm": 1.6258282661437988, "learning_rate": 1.1346212102664973e-05, "loss": 0.0106, "step": 12314 }, { "epoch": 11.841346153846153, "grad_norm": 3.520535945892334, "learning_rate": 1.1344977756317126e-05, "loss": 0.0492, "step": 12315 }, { "epoch": 11.842307692307692, "grad_norm": 1.4926408529281616, "learning_rate": 1.1343743389099169e-05, "loss": 0.0312, "step": 12316 }, { "epoch": 11.84326923076923, "grad_norm": 1.7521892786026, "learning_rate": 1.134250900103026e-05, "loss": 0.0074, "step": 12317 }, { "epoch": 11.84423076923077, "grad_norm": 1.4329084157943726, "learning_rate": 1.1341274592129552e-05, "loss": 0.0068, "step": 12318 }, { "epoch": 11.845192307692308, "grad_norm": 1.109743356704712, "learning_rate": 1.1340040162416197e-05, "loss": 0.0043, "step": 12319 }, { "epoch": 11.846153846153847, "grad_norm": 0.24201853573322296, "learning_rate": 1.1338805711909353e-05, "loss": 0.0018, "step": 12320 }, { "epoch": 11.847115384615385, "grad_norm": 0.09951010346412659, "learning_rate": 1.1337571240628171e-05, "loss": 0.0008, "step": 12321 }, { "epoch": 11.848076923076922, "grad_norm": 0.9270000457763672, "learning_rate": 1.1336336748591814e-05, "loss": 0.005, "step": 12322 }, { "epoch": 11.849038461538461, "grad_norm": 0.1629483848810196, "learning_rate": 1.133510223581943e-05, "loss": 0.001, "step": 12323 }, { "epoch": 11.85, "grad_norm": 1.066652774810791, "learning_rate": 1.1333867702330177e-05, "loss": 0.0061, "step": 12324 }, { "epoch": 11.850961538461538, "grad_norm": 0.9895073771476746, "learning_rate": 1.1332633148143212e-05, "loss": 0.0041, "step": 12325 }, { "epoch": 11.851923076923077, "grad_norm": 2.5944020748138428, "learning_rate": 1.1331398573277695e-05, "loss": 0.0221, "step": 12326 }, { "epoch": 11.852884615384616, "grad_norm": 1.5319708585739136, "learning_rate": 1.1330163977752776e-05, "loss": 0.034, "step": 12327 }, { "epoch": 11.853846153846154, "grad_norm": 2.222386121749878, "learning_rate": 1.1328929361587617e-05, "loss": 0.0386, "step": 12328 }, { "epoch": 11.854807692307693, "grad_norm": 0.23904536664485931, "learning_rate": 1.1327694724801377e-05, "loss": 0.0014, "step": 12329 }, { "epoch": 11.85576923076923, "grad_norm": 5.728724002838135, "learning_rate": 1.1326460067413212e-05, "loss": 0.0915, "step": 12330 }, { "epoch": 11.856730769230769, "grad_norm": 3.3465189933776855, "learning_rate": 1.1325225389442278e-05, "loss": 0.0126, "step": 12331 }, { "epoch": 11.857692307692307, "grad_norm": 3.5468530654907227, "learning_rate": 1.1323990690907734e-05, "loss": 0.0192, "step": 12332 }, { "epoch": 11.858653846153846, "grad_norm": 2.4452600479125977, "learning_rate": 1.132275597182874e-05, "loss": 0.0089, "step": 12333 }, { "epoch": 11.859615384615385, "grad_norm": 3.4015936851501465, "learning_rate": 1.132152123222446e-05, "loss": 0.0994, "step": 12334 }, { "epoch": 11.860576923076923, "grad_norm": 2.841076612472534, "learning_rate": 1.1320286472114044e-05, "loss": 0.0777, "step": 12335 }, { "epoch": 11.861538461538462, "grad_norm": 1.5032296180725098, "learning_rate": 1.1319051691516659e-05, "loss": 0.0124, "step": 12336 }, { "epoch": 11.8625, "grad_norm": 1.8018440008163452, "learning_rate": 1.1317816890451466e-05, "loss": 0.0131, "step": 12337 }, { "epoch": 11.86346153846154, "grad_norm": 1.7636399269104004, "learning_rate": 1.1316582068937618e-05, "loss": 0.0177, "step": 12338 }, { "epoch": 11.864423076923076, "grad_norm": 1.1757844686508179, "learning_rate": 1.1315347226994283e-05, "loss": 0.0132, "step": 12339 }, { "epoch": 11.865384615384615, "grad_norm": 2.7755331993103027, "learning_rate": 1.1314112364640616e-05, "loss": 0.0233, "step": 12340 }, { "epoch": 11.866346153846154, "grad_norm": 0.39013299345970154, "learning_rate": 1.1312877481895787e-05, "loss": 0.0033, "step": 12341 }, { "epoch": 11.867307692307692, "grad_norm": 0.42428621649742126, "learning_rate": 1.1311642578778951e-05, "loss": 0.0021, "step": 12342 }, { "epoch": 11.868269230769231, "grad_norm": 1.5776269435882568, "learning_rate": 1.1310407655309271e-05, "loss": 0.0119, "step": 12343 }, { "epoch": 11.86923076923077, "grad_norm": 2.508105754852295, "learning_rate": 1.130917271150591e-05, "loss": 0.017, "step": 12344 }, { "epoch": 11.870192307692308, "grad_norm": 0.18520107865333557, "learning_rate": 1.1307937747388034e-05, "loss": 0.0013, "step": 12345 }, { "epoch": 11.871153846153845, "grad_norm": 0.1112503781914711, "learning_rate": 1.1306702762974797e-05, "loss": 0.0009, "step": 12346 }, { "epoch": 11.872115384615384, "grad_norm": 0.2986941933631897, "learning_rate": 1.1305467758285374e-05, "loss": 0.0017, "step": 12347 }, { "epoch": 11.873076923076923, "grad_norm": 0.2194576859474182, "learning_rate": 1.130423273333892e-05, "loss": 0.001, "step": 12348 }, { "epoch": 11.874038461538461, "grad_norm": 0.7656297087669373, "learning_rate": 1.1302997688154601e-05, "loss": 0.0034, "step": 12349 }, { "epoch": 11.875, "grad_norm": 0.09090650081634521, "learning_rate": 1.1301762622751584e-05, "loss": 0.001, "step": 12350 }, { "epoch": 11.875961538461539, "grad_norm": 1.6824387311935425, "learning_rate": 1.130052753714903e-05, "loss": 0.0106, "step": 12351 }, { "epoch": 11.876923076923077, "grad_norm": 1.2176772356033325, "learning_rate": 1.129929243136611e-05, "loss": 0.0156, "step": 12352 }, { "epoch": 11.877884615384616, "grad_norm": 0.06507167965173721, "learning_rate": 1.1298057305421978e-05, "loss": 0.0006, "step": 12353 }, { "epoch": 11.878846153846155, "grad_norm": 0.30855876207351685, "learning_rate": 1.1296822159335812e-05, "loss": 0.0016, "step": 12354 }, { "epoch": 11.879807692307692, "grad_norm": 2.212456464767456, "learning_rate": 1.129558699312677e-05, "loss": 0.015, "step": 12355 }, { "epoch": 11.88076923076923, "grad_norm": 0.05358124151825905, "learning_rate": 1.1294351806814021e-05, "loss": 0.0003, "step": 12356 }, { "epoch": 11.881730769230769, "grad_norm": 0.260581374168396, "learning_rate": 1.1293116600416729e-05, "loss": 0.0016, "step": 12357 }, { "epoch": 11.882692307692308, "grad_norm": 0.1974126100540161, "learning_rate": 1.1291881373954066e-05, "loss": 0.0018, "step": 12358 }, { "epoch": 11.883653846153846, "grad_norm": 0.5369440317153931, "learning_rate": 1.1290646127445193e-05, "loss": 0.0031, "step": 12359 }, { "epoch": 11.884615384615385, "grad_norm": 3.2062554359436035, "learning_rate": 1.1289410860909283e-05, "loss": 0.0358, "step": 12360 }, { "epoch": 11.885576923076924, "grad_norm": 2.197775363922119, "learning_rate": 1.1288175574365496e-05, "loss": 0.0439, "step": 12361 }, { "epoch": 11.88653846153846, "grad_norm": 2.360482692718506, "learning_rate": 1.1286940267833009e-05, "loss": 0.0645, "step": 12362 }, { "epoch": 11.8875, "grad_norm": 2.445638418197632, "learning_rate": 1.1285704941330986e-05, "loss": 0.0354, "step": 12363 }, { "epoch": 11.888461538461538, "grad_norm": 2.404012441635132, "learning_rate": 1.1284469594878593e-05, "loss": 0.0423, "step": 12364 }, { "epoch": 11.889423076923077, "grad_norm": 0.3104546070098877, "learning_rate": 1.1283234228495003e-05, "loss": 0.0014, "step": 12365 }, { "epoch": 11.890384615384615, "grad_norm": 1.691554307937622, "learning_rate": 1.1281998842199383e-05, "loss": 0.0079, "step": 12366 }, { "epoch": 11.891346153846154, "grad_norm": 1.7080278396606445, "learning_rate": 1.1280763436010908e-05, "loss": 0.0034, "step": 12367 }, { "epoch": 11.892307692307693, "grad_norm": 1.6855778694152832, "learning_rate": 1.127952800994874e-05, "loss": 0.0153, "step": 12368 }, { "epoch": 11.893269230769231, "grad_norm": 1.5673024654388428, "learning_rate": 1.1278292564032052e-05, "loss": 0.009, "step": 12369 }, { "epoch": 11.89423076923077, "grad_norm": 0.8229565024375916, "learning_rate": 1.1277057098280018e-05, "loss": 0.0055, "step": 12370 }, { "epoch": 11.895192307692307, "grad_norm": 0.10219477862119675, "learning_rate": 1.1275821612711803e-05, "loss": 0.0009, "step": 12371 }, { "epoch": 11.896153846153846, "grad_norm": 0.68161541223526, "learning_rate": 1.1274586107346581e-05, "loss": 0.0037, "step": 12372 }, { "epoch": 11.897115384615384, "grad_norm": 0.03744477033615112, "learning_rate": 1.1273350582203527e-05, "loss": 0.0003, "step": 12373 }, { "epoch": 11.898076923076923, "grad_norm": 0.4080539345741272, "learning_rate": 1.1272115037301806e-05, "loss": 0.0017, "step": 12374 }, { "epoch": 11.899038461538462, "grad_norm": 6.138733386993408, "learning_rate": 1.1270879472660592e-05, "loss": 0.0739, "step": 12375 }, { "epoch": 11.9, "grad_norm": 1.9050878286361694, "learning_rate": 1.1269643888299062e-05, "loss": 0.0445, "step": 12376 }, { "epoch": 11.900961538461539, "grad_norm": 0.8570379614830017, "learning_rate": 1.1268408284236384e-05, "loss": 0.0026, "step": 12377 }, { "epoch": 11.901923076923078, "grad_norm": 2.5019354820251465, "learning_rate": 1.1267172660491731e-05, "loss": 0.0138, "step": 12378 }, { "epoch": 11.902884615384615, "grad_norm": 0.11505740880966187, "learning_rate": 1.1265937017084278e-05, "loss": 0.0011, "step": 12379 }, { "epoch": 11.903846153846153, "grad_norm": 3.5419199466705322, "learning_rate": 1.12647013540332e-05, "loss": 0.0203, "step": 12380 }, { "epoch": 11.904807692307692, "grad_norm": 3.501039505004883, "learning_rate": 1.1263465671357668e-05, "loss": 0.0201, "step": 12381 }, { "epoch": 11.90576923076923, "grad_norm": 1.7197072505950928, "learning_rate": 1.1262229969076857e-05, "loss": 0.0177, "step": 12382 }, { "epoch": 11.90673076923077, "grad_norm": 0.25640422105789185, "learning_rate": 1.126099424720994e-05, "loss": 0.0012, "step": 12383 }, { "epoch": 11.907692307692308, "grad_norm": 2.305593490600586, "learning_rate": 1.1259758505776092e-05, "loss": 0.0114, "step": 12384 }, { "epoch": 11.908653846153847, "grad_norm": 1.4593197107315063, "learning_rate": 1.1258522744794493e-05, "loss": 0.0294, "step": 12385 }, { "epoch": 11.909615384615385, "grad_norm": 1.4353916645050049, "learning_rate": 1.1257286964284311e-05, "loss": 0.0118, "step": 12386 }, { "epoch": 11.910576923076922, "grad_norm": 0.9957876205444336, "learning_rate": 1.125605116426473e-05, "loss": 0.0044, "step": 12387 }, { "epoch": 11.911538461538461, "grad_norm": 0.8338533043861389, "learning_rate": 1.1254815344754919e-05, "loss": 0.0048, "step": 12388 }, { "epoch": 11.9125, "grad_norm": 1.5933635234832764, "learning_rate": 1.1253579505774057e-05, "loss": 0.0085, "step": 12389 }, { "epoch": 11.913461538461538, "grad_norm": 1.6012821197509766, "learning_rate": 1.1252343647341316e-05, "loss": 0.0251, "step": 12390 }, { "epoch": 11.914423076923077, "grad_norm": 1.1338142156600952, "learning_rate": 1.1251107769475884e-05, "loss": 0.0099, "step": 12391 }, { "epoch": 11.915384615384616, "grad_norm": 0.19948960840702057, "learning_rate": 1.1249871872196928e-05, "loss": 0.0012, "step": 12392 }, { "epoch": 11.916346153846154, "grad_norm": 3.387626886367798, "learning_rate": 1.1248635955523631e-05, "loss": 0.0201, "step": 12393 }, { "epoch": 11.917307692307693, "grad_norm": 0.07241646200418472, "learning_rate": 1.1247400019475165e-05, "loss": 0.0005, "step": 12394 }, { "epoch": 11.91826923076923, "grad_norm": 3.409682273864746, "learning_rate": 1.1246164064070715e-05, "loss": 0.0245, "step": 12395 }, { "epoch": 11.919230769230769, "grad_norm": 0.09613452851772308, "learning_rate": 1.1244928089329455e-05, "loss": 0.0006, "step": 12396 }, { "epoch": 11.920192307692307, "grad_norm": 0.51278156042099, "learning_rate": 1.1243692095270565e-05, "loss": 0.003, "step": 12397 }, { "epoch": 11.921153846153846, "grad_norm": 1.086679220199585, "learning_rate": 1.1242456081913222e-05, "loss": 0.0073, "step": 12398 }, { "epoch": 11.922115384615385, "grad_norm": 1.6595158576965332, "learning_rate": 1.1241220049276609e-05, "loss": 0.0134, "step": 12399 }, { "epoch": 11.923076923076923, "grad_norm": 1.272936224937439, "learning_rate": 1.1239983997379904e-05, "loss": 0.0143, "step": 12400 }, { "epoch": 11.924038461538462, "grad_norm": 2.646167039871216, "learning_rate": 1.1238747926242286e-05, "loss": 0.0183, "step": 12401 }, { "epoch": 11.925, "grad_norm": 1.468679428100586, "learning_rate": 1.1237511835882936e-05, "loss": 0.035, "step": 12402 }, { "epoch": 11.92596153846154, "grad_norm": 3.767516851425171, "learning_rate": 1.1236275726321035e-05, "loss": 0.0538, "step": 12403 }, { "epoch": 11.926923076923076, "grad_norm": 0.2806527614593506, "learning_rate": 1.1235039597575761e-05, "loss": 0.0011, "step": 12404 }, { "epoch": 11.927884615384615, "grad_norm": 2.419640064239502, "learning_rate": 1.12338034496663e-05, "loss": 0.0089, "step": 12405 }, { "epoch": 11.928846153846154, "grad_norm": 1.7253220081329346, "learning_rate": 1.1232567282611831e-05, "loss": 0.0285, "step": 12406 }, { "epoch": 11.929807692307692, "grad_norm": 3.184619188308716, "learning_rate": 1.1231331096431535e-05, "loss": 0.0353, "step": 12407 }, { "epoch": 11.930769230769231, "grad_norm": 0.2671585977077484, "learning_rate": 1.123009489114459e-05, "loss": 0.002, "step": 12408 }, { "epoch": 11.93173076923077, "grad_norm": 0.30926528573036194, "learning_rate": 1.1228858666770186e-05, "loss": 0.0012, "step": 12409 }, { "epoch": 11.932692307692308, "grad_norm": 0.4514785408973694, "learning_rate": 1.1227622423327501e-05, "loss": 0.0026, "step": 12410 }, { "epoch": 11.933653846153845, "grad_norm": 2.033203125, "learning_rate": 1.1226386160835722e-05, "loss": 0.0062, "step": 12411 }, { "epoch": 11.934615384615384, "grad_norm": 2.9867143630981445, "learning_rate": 1.1225149879314026e-05, "loss": 0.0242, "step": 12412 }, { "epoch": 11.935576923076923, "grad_norm": 0.3085574209690094, "learning_rate": 1.12239135787816e-05, "loss": 0.0012, "step": 12413 }, { "epoch": 11.936538461538461, "grad_norm": 4.340213298797607, "learning_rate": 1.1222677259257628e-05, "loss": 0.0498, "step": 12414 }, { "epoch": 11.9375, "grad_norm": 1.991869568824768, "learning_rate": 1.1221440920761295e-05, "loss": 0.0496, "step": 12415 }, { "epoch": 11.938461538461539, "grad_norm": 0.47262144088745117, "learning_rate": 1.1220204563311781e-05, "loss": 0.0024, "step": 12416 }, { "epoch": 11.939423076923077, "grad_norm": 3.763995885848999, "learning_rate": 1.1218968186928274e-05, "loss": 0.0167, "step": 12417 }, { "epoch": 11.940384615384616, "grad_norm": 0.2522519826889038, "learning_rate": 1.121773179162996e-05, "loss": 0.0019, "step": 12418 }, { "epoch": 11.941346153846155, "grad_norm": 2.2478649616241455, "learning_rate": 1.1216495377436023e-05, "loss": 0.0228, "step": 12419 }, { "epoch": 11.942307692307692, "grad_norm": 0.7312126755714417, "learning_rate": 1.1215258944365646e-05, "loss": 0.0032, "step": 12420 }, { "epoch": 11.94326923076923, "grad_norm": 1.203500509262085, "learning_rate": 1.1214022492438016e-05, "loss": 0.0042, "step": 12421 }, { "epoch": 11.944230769230769, "grad_norm": 1.8490865230560303, "learning_rate": 1.1212786021672324e-05, "loss": 0.0053, "step": 12422 }, { "epoch": 11.945192307692308, "grad_norm": 1.247776746749878, "learning_rate": 1.1211549532087749e-05, "loss": 0.0062, "step": 12423 }, { "epoch": 11.946153846153846, "grad_norm": 2.90933895111084, "learning_rate": 1.1210313023703482e-05, "loss": 0.0509, "step": 12424 }, { "epoch": 11.947115384615385, "grad_norm": 0.2451854646205902, "learning_rate": 1.1209076496538712e-05, "loss": 0.0016, "step": 12425 }, { "epoch": 11.948076923076924, "grad_norm": 4.143494606018066, "learning_rate": 1.120783995061262e-05, "loss": 0.0478, "step": 12426 }, { "epoch": 11.94903846153846, "grad_norm": 1.8996978998184204, "learning_rate": 1.1206603385944396e-05, "loss": 0.018, "step": 12427 }, { "epoch": 11.95, "grad_norm": 2.1995980739593506, "learning_rate": 1.1205366802553231e-05, "loss": 0.035, "step": 12428 }, { "epoch": 11.950961538461538, "grad_norm": 1.0366666316986084, "learning_rate": 1.1204130200458311e-05, "loss": 0.0052, "step": 12429 }, { "epoch": 11.951923076923077, "grad_norm": 0.38953882455825806, "learning_rate": 1.1202893579678824e-05, "loss": 0.0018, "step": 12430 }, { "epoch": 11.952884615384615, "grad_norm": 0.4562341272830963, "learning_rate": 1.1201656940233959e-05, "loss": 0.003, "step": 12431 }, { "epoch": 11.953846153846154, "grad_norm": 1.2523715496063232, "learning_rate": 1.1200420282142906e-05, "loss": 0.0066, "step": 12432 }, { "epoch": 11.954807692307693, "grad_norm": 2.129584789276123, "learning_rate": 1.1199183605424852e-05, "loss": 0.0188, "step": 12433 }, { "epoch": 11.955769230769231, "grad_norm": 3.245764970779419, "learning_rate": 1.1197946910098986e-05, "loss": 0.0186, "step": 12434 }, { "epoch": 11.95673076923077, "grad_norm": 0.4311573803424835, "learning_rate": 1.1196710196184504e-05, "loss": 0.0025, "step": 12435 }, { "epoch": 11.957692307692307, "grad_norm": 2.264404296875, "learning_rate": 1.119547346370059e-05, "loss": 0.0146, "step": 12436 }, { "epoch": 11.958653846153846, "grad_norm": 0.36719411611557007, "learning_rate": 1.1194236712666438e-05, "loss": 0.0017, "step": 12437 }, { "epoch": 11.959615384615384, "grad_norm": 0.7090450525283813, "learning_rate": 1.1192999943101235e-05, "loss": 0.0032, "step": 12438 }, { "epoch": 11.960576923076923, "grad_norm": 1.0175081491470337, "learning_rate": 1.1191763155024176e-05, "loss": 0.0058, "step": 12439 }, { "epoch": 11.961538461538462, "grad_norm": 1.4494209289550781, "learning_rate": 1.1190526348454452e-05, "loss": 0.0176, "step": 12440 }, { "epoch": 11.9625, "grad_norm": 3.0087876319885254, "learning_rate": 1.1189289523411251e-05, "loss": 0.0321, "step": 12441 }, { "epoch": 11.963461538461539, "grad_norm": 0.18269303441047668, "learning_rate": 1.1188052679913766e-05, "loss": 0.0014, "step": 12442 }, { "epoch": 11.964423076923078, "grad_norm": 1.58562171459198, "learning_rate": 1.1186815817981191e-05, "loss": 0.0211, "step": 12443 }, { "epoch": 11.965384615384615, "grad_norm": 0.9711982607841492, "learning_rate": 1.118557893763272e-05, "loss": 0.0029, "step": 12444 }, { "epoch": 11.966346153846153, "grad_norm": 3.2881271839141846, "learning_rate": 1.118434203888754e-05, "loss": 0.1117, "step": 12445 }, { "epoch": 11.967307692307692, "grad_norm": 1.5971283912658691, "learning_rate": 1.118310512176485e-05, "loss": 0.0166, "step": 12446 }, { "epoch": 11.96826923076923, "grad_norm": 1.7393728494644165, "learning_rate": 1.1181868186283842e-05, "loss": 0.0104, "step": 12447 }, { "epoch": 11.96923076923077, "grad_norm": 0.22905482351779938, "learning_rate": 1.1180631232463707e-05, "loss": 0.0013, "step": 12448 }, { "epoch": 11.970192307692308, "grad_norm": 0.2413584589958191, "learning_rate": 1.1179394260323639e-05, "loss": 0.0013, "step": 12449 }, { "epoch": 11.971153846153847, "grad_norm": 1.703189730644226, "learning_rate": 1.1178157269882837e-05, "loss": 0.0266, "step": 12450 }, { "epoch": 11.972115384615385, "grad_norm": 0.20155096054077148, "learning_rate": 1.117692026116049e-05, "loss": 0.0011, "step": 12451 }, { "epoch": 11.973076923076922, "grad_norm": 2.492672920227051, "learning_rate": 1.1175683234175794e-05, "loss": 0.0241, "step": 12452 }, { "epoch": 11.974038461538461, "grad_norm": 1.292592167854309, "learning_rate": 1.1174446188947945e-05, "loss": 0.0051, "step": 12453 }, { "epoch": 11.975, "grad_norm": 3.1982288360595703, "learning_rate": 1.1173209125496139e-05, "loss": 0.0387, "step": 12454 }, { "epoch": 11.975961538461538, "grad_norm": 1.6986082792282104, "learning_rate": 1.117197204383957e-05, "loss": 0.0254, "step": 12455 }, { "epoch": 11.976923076923077, "grad_norm": 0.04546324536204338, "learning_rate": 1.1170734943997435e-05, "loss": 0.0003, "step": 12456 }, { "epoch": 11.977884615384616, "grad_norm": 2.6535451412200928, "learning_rate": 1.116949782598893e-05, "loss": 0.0252, "step": 12457 }, { "epoch": 11.978846153846154, "grad_norm": 0.18476459383964539, "learning_rate": 1.116826068983325e-05, "loss": 0.0013, "step": 12458 }, { "epoch": 11.979807692307693, "grad_norm": 0.8532966375350952, "learning_rate": 1.1167023535549594e-05, "loss": 0.0039, "step": 12459 }, { "epoch": 11.98076923076923, "grad_norm": 2.970383644104004, "learning_rate": 1.1165786363157156e-05, "loss": 0.0816, "step": 12460 }, { "epoch": 11.981730769230769, "grad_norm": 0.13315892219543457, "learning_rate": 1.1164549172675139e-05, "loss": 0.0008, "step": 12461 }, { "epoch": 11.982692307692307, "grad_norm": 4.374106407165527, "learning_rate": 1.1163311964122733e-05, "loss": 0.0459, "step": 12462 }, { "epoch": 11.983653846153846, "grad_norm": 1.3996440172195435, "learning_rate": 1.1162074737519143e-05, "loss": 0.0058, "step": 12463 }, { "epoch": 11.984615384615385, "grad_norm": 0.949974536895752, "learning_rate": 1.116083749288356e-05, "loss": 0.024, "step": 12464 }, { "epoch": 11.985576923076923, "grad_norm": 0.4286958873271942, "learning_rate": 1.115960023023519e-05, "loss": 0.0024, "step": 12465 }, { "epoch": 11.986538461538462, "grad_norm": 4.786700248718262, "learning_rate": 1.1158362949593227e-05, "loss": 0.1679, "step": 12466 }, { "epoch": 11.9875, "grad_norm": 0.09373269975185394, "learning_rate": 1.1157125650976867e-05, "loss": 0.0008, "step": 12467 }, { "epoch": 11.98846153846154, "grad_norm": 6.18698787689209, "learning_rate": 1.1155888334405316e-05, "loss": 0.1481, "step": 12468 }, { "epoch": 11.989423076923076, "grad_norm": 0.3145369291305542, "learning_rate": 1.1154650999897771e-05, "loss": 0.0016, "step": 12469 }, { "epoch": 11.990384615384615, "grad_norm": 3.078092575073242, "learning_rate": 1.115341364747343e-05, "loss": 0.0227, "step": 12470 }, { "epoch": 11.991346153846154, "grad_norm": 0.1850864738225937, "learning_rate": 1.1152176277151493e-05, "loss": 0.0017, "step": 12471 }, { "epoch": 11.992307692307692, "grad_norm": 1.399560570716858, "learning_rate": 1.1150938888951165e-05, "loss": 0.0163, "step": 12472 }, { "epoch": 11.993269230769231, "grad_norm": 2.1531410217285156, "learning_rate": 1.1149701482891644e-05, "loss": 0.0115, "step": 12473 }, { "epoch": 11.99423076923077, "grad_norm": 0.09137868136167526, "learning_rate": 1.1148464058992126e-05, "loss": 0.0007, "step": 12474 }, { "epoch": 11.995192307692308, "grad_norm": 0.3343891501426697, "learning_rate": 1.114722661727182e-05, "loss": 0.0018, "step": 12475 }, { "epoch": 11.996153846153845, "grad_norm": 8.151801109313965, "learning_rate": 1.1145989157749923e-05, "loss": 0.1223, "step": 12476 }, { "epoch": 11.997115384615384, "grad_norm": 2.8546805381774902, "learning_rate": 1.1144751680445637e-05, "loss": 0.0342, "step": 12477 }, { "epoch": 11.998076923076923, "grad_norm": 0.25443777441978455, "learning_rate": 1.1143514185378164e-05, "loss": 0.0017, "step": 12478 }, { "epoch": 11.999038461538461, "grad_norm": 0.12162187695503235, "learning_rate": 1.114227667256671e-05, "loss": 0.0013, "step": 12479 }, { "epoch": 12.0, "grad_norm": 0.14082197844982147, "learning_rate": 1.1141039142030472e-05, "loss": 0.0015, "step": 12480 }, { "epoch": 12.000961538461539, "grad_norm": 0.12106452882289886, "learning_rate": 1.1139801593788657e-05, "loss": 0.0016, "step": 12481 }, { "epoch": 12.001923076923077, "grad_norm": 3.761728286743164, "learning_rate": 1.1138564027860466e-05, "loss": 0.0296, "step": 12482 }, { "epoch": 12.002884615384616, "grad_norm": 2.7856061458587646, "learning_rate": 1.1137326444265102e-05, "loss": 0.0568, "step": 12483 }, { "epoch": 12.003846153846155, "grad_norm": 0.27630671858787537, "learning_rate": 1.113608884302177e-05, "loss": 0.0015, "step": 12484 }, { "epoch": 12.004807692307692, "grad_norm": 2.048569679260254, "learning_rate": 1.1134851224149675e-05, "loss": 0.0291, "step": 12485 }, { "epoch": 12.00576923076923, "grad_norm": 0.3682458698749542, "learning_rate": 1.113361358766802e-05, "loss": 0.0017, "step": 12486 }, { "epoch": 12.006730769230769, "grad_norm": 1.7677489519119263, "learning_rate": 1.1132375933596006e-05, "loss": 0.0123, "step": 12487 }, { "epoch": 12.007692307692308, "grad_norm": 2.9058868885040283, "learning_rate": 1.1131138261952845e-05, "loss": 0.0175, "step": 12488 }, { "epoch": 12.008653846153846, "grad_norm": 1.3415812253952026, "learning_rate": 1.1129900572757734e-05, "loss": 0.0074, "step": 12489 }, { "epoch": 12.009615384615385, "grad_norm": 0.580966591835022, "learning_rate": 1.1128662866029884e-05, "loss": 0.0022, "step": 12490 }, { "epoch": 12.010576923076924, "grad_norm": 1.2469745874404907, "learning_rate": 1.1127425141788502e-05, "loss": 0.0098, "step": 12491 }, { "epoch": 12.011538461538462, "grad_norm": 0.9989943504333496, "learning_rate": 1.1126187400052788e-05, "loss": 0.006, "step": 12492 }, { "epoch": 12.0125, "grad_norm": 0.17153407633304596, "learning_rate": 1.112494964084195e-05, "loss": 0.0024, "step": 12493 }, { "epoch": 12.013461538461538, "grad_norm": 0.5034084916114807, "learning_rate": 1.1123711864175195e-05, "loss": 0.0032, "step": 12494 }, { "epoch": 12.014423076923077, "grad_norm": 2.653383255004883, "learning_rate": 1.1122474070071732e-05, "loss": 0.0558, "step": 12495 }, { "epoch": 12.015384615384615, "grad_norm": 0.9995128512382507, "learning_rate": 1.1121236258550762e-05, "loss": 0.0066, "step": 12496 }, { "epoch": 12.016346153846154, "grad_norm": 0.4274672269821167, "learning_rate": 1.11199984296315e-05, "loss": 0.0028, "step": 12497 }, { "epoch": 12.017307692307693, "grad_norm": 0.2511261999607086, "learning_rate": 1.111876058333315e-05, "loss": 0.0023, "step": 12498 }, { "epoch": 12.018269230769231, "grad_norm": 0.10348544269800186, "learning_rate": 1.1117522719674918e-05, "loss": 0.0015, "step": 12499 }, { "epoch": 12.01923076923077, "grad_norm": 1.338636040687561, "learning_rate": 1.111628483867601e-05, "loss": 0.0046, "step": 12500 }, { "epoch": 12.020192307692307, "grad_norm": 0.5519689917564392, "learning_rate": 1.1115046940355643e-05, "loss": 0.0041, "step": 12501 }, { "epoch": 12.021153846153846, "grad_norm": 1.072367548942566, "learning_rate": 1.1113809024733018e-05, "loss": 0.0047, "step": 12502 }, { "epoch": 12.022115384615384, "grad_norm": 0.7807968258857727, "learning_rate": 1.1112571091827345e-05, "loss": 0.0085, "step": 12503 }, { "epoch": 12.023076923076923, "grad_norm": 0.056336965411901474, "learning_rate": 1.1111333141657834e-05, "loss": 0.0006, "step": 12504 }, { "epoch": 12.024038461538462, "grad_norm": 0.7315201163291931, "learning_rate": 1.1110095174243695e-05, "loss": 0.0037, "step": 12505 }, { "epoch": 12.025, "grad_norm": 0.08506700396537781, "learning_rate": 1.1108857189604138e-05, "loss": 0.0011, "step": 12506 }, { "epoch": 12.025961538461539, "grad_norm": 1.0005784034729004, "learning_rate": 1.110761918775837e-05, "loss": 0.0171, "step": 12507 }, { "epoch": 12.026923076923078, "grad_norm": 0.043448302894830704, "learning_rate": 1.1106381168725604e-05, "loss": 0.0003, "step": 12508 }, { "epoch": 12.027884615384615, "grad_norm": 0.06293129920959473, "learning_rate": 1.110514313252505e-05, "loss": 0.0008, "step": 12509 }, { "epoch": 12.028846153846153, "grad_norm": 3.576561689376831, "learning_rate": 1.1103905079175918e-05, "loss": 0.0465, "step": 12510 }, { "epoch": 12.029807692307692, "grad_norm": 2.0959455966949463, "learning_rate": 1.1102667008697417e-05, "loss": 0.0157, "step": 12511 }, { "epoch": 12.03076923076923, "grad_norm": 0.23014777898788452, "learning_rate": 1.1101428921108762e-05, "loss": 0.0016, "step": 12512 }, { "epoch": 12.03173076923077, "grad_norm": 0.15777955949306488, "learning_rate": 1.1100190816429164e-05, "loss": 0.0011, "step": 12513 }, { "epoch": 12.032692307692308, "grad_norm": 0.0906348004937172, "learning_rate": 1.109895269467783e-05, "loss": 0.0009, "step": 12514 }, { "epoch": 12.033653846153847, "grad_norm": 1.272945523262024, "learning_rate": 1.1097714555873978e-05, "loss": 0.0058, "step": 12515 }, { "epoch": 12.034615384615385, "grad_norm": 3.7688305377960205, "learning_rate": 1.1096476400036817e-05, "loss": 0.042, "step": 12516 }, { "epoch": 12.035576923076922, "grad_norm": 0.06940958648920059, "learning_rate": 1.1095238227185564e-05, "loss": 0.0008, "step": 12517 }, { "epoch": 12.036538461538461, "grad_norm": 0.055749230086803436, "learning_rate": 1.1094000037339422e-05, "loss": 0.0006, "step": 12518 }, { "epoch": 12.0375, "grad_norm": 1.701501488685608, "learning_rate": 1.1092761830517616e-05, "loss": 0.0106, "step": 12519 }, { "epoch": 12.038461538461538, "grad_norm": 2.1865382194519043, "learning_rate": 1.109152360673935e-05, "loss": 0.0255, "step": 12520 }, { "epoch": 12.039423076923077, "grad_norm": 0.10723701864480972, "learning_rate": 1.1090285366023843e-05, "loss": 0.0013, "step": 12521 }, { "epoch": 12.040384615384616, "grad_norm": 0.15722671151161194, "learning_rate": 1.1089047108390305e-05, "loss": 0.0011, "step": 12522 }, { "epoch": 12.041346153846154, "grad_norm": 2.597104787826538, "learning_rate": 1.1087808833857954e-05, "loss": 0.0278, "step": 12523 }, { "epoch": 12.042307692307693, "grad_norm": 0.25047606229782104, "learning_rate": 1.1086570542446003e-05, "loss": 0.0014, "step": 12524 }, { "epoch": 12.04326923076923, "grad_norm": 1.9918506145477295, "learning_rate": 1.1085332234173664e-05, "loss": 0.0188, "step": 12525 }, { "epoch": 12.044230769230769, "grad_norm": 0.10912220180034637, "learning_rate": 1.1084093909060156e-05, "loss": 0.0007, "step": 12526 }, { "epoch": 12.045192307692307, "grad_norm": 1.314948320388794, "learning_rate": 1.1082855567124693e-05, "loss": 0.008, "step": 12527 }, { "epoch": 12.046153846153846, "grad_norm": 0.7907436490058899, "learning_rate": 1.1081617208386489e-05, "loss": 0.0038, "step": 12528 }, { "epoch": 12.047115384615385, "grad_norm": 0.1326225847005844, "learning_rate": 1.1080378832864757e-05, "loss": 0.0011, "step": 12529 }, { "epoch": 12.048076923076923, "grad_norm": 0.13562464714050293, "learning_rate": 1.1079140440578722e-05, "loss": 0.0012, "step": 12530 }, { "epoch": 12.049038461538462, "grad_norm": 0.04478078708052635, "learning_rate": 1.1077902031547591e-05, "loss": 0.0004, "step": 12531 }, { "epoch": 12.05, "grad_norm": 0.28908881545066833, "learning_rate": 1.1076663605790585e-05, "loss": 0.0013, "step": 12532 }, { "epoch": 12.050961538461538, "grad_norm": 0.2872157394886017, "learning_rate": 1.1075425163326918e-05, "loss": 0.0022, "step": 12533 }, { "epoch": 12.051923076923076, "grad_norm": 0.3559732437133789, "learning_rate": 1.1074186704175813e-05, "loss": 0.0029, "step": 12534 }, { "epoch": 12.052884615384615, "grad_norm": 1.9689579010009766, "learning_rate": 1.1072948228356481e-05, "loss": 0.0148, "step": 12535 }, { "epoch": 12.053846153846154, "grad_norm": 0.05605272203683853, "learning_rate": 1.1071709735888139e-05, "loss": 0.0006, "step": 12536 }, { "epoch": 12.054807692307692, "grad_norm": 0.10808445513248444, "learning_rate": 1.1070471226790011e-05, "loss": 0.0009, "step": 12537 }, { "epoch": 12.055769230769231, "grad_norm": 0.5667967200279236, "learning_rate": 1.106923270108131e-05, "loss": 0.0014, "step": 12538 }, { "epoch": 12.05673076923077, "grad_norm": 0.13909997045993805, "learning_rate": 1.1067994158781257e-05, "loss": 0.0009, "step": 12539 }, { "epoch": 12.057692307692308, "grad_norm": 1.8844835758209229, "learning_rate": 1.1066755599909065e-05, "loss": 0.026, "step": 12540 }, { "epoch": 12.058653846153845, "grad_norm": 0.7298424243927002, "learning_rate": 1.1065517024483961e-05, "loss": 0.002, "step": 12541 }, { "epoch": 12.059615384615384, "grad_norm": 0.7581204175949097, "learning_rate": 1.106427843252516e-05, "loss": 0.0057, "step": 12542 }, { "epoch": 12.060576923076923, "grad_norm": 1.6956069469451904, "learning_rate": 1.106303982405188e-05, "loss": 0.0081, "step": 12543 }, { "epoch": 12.061538461538461, "grad_norm": 0.2190195918083191, "learning_rate": 1.106180119908334e-05, "loss": 0.0007, "step": 12544 }, { "epoch": 12.0625, "grad_norm": 0.49436694383621216, "learning_rate": 1.1060562557638765e-05, "loss": 0.0038, "step": 12545 }, { "epoch": 12.063461538461539, "grad_norm": 0.09532908350229263, "learning_rate": 1.105932389973737e-05, "loss": 0.0009, "step": 12546 }, { "epoch": 12.064423076923077, "grad_norm": 2.3527493476867676, "learning_rate": 1.1058085225398378e-05, "loss": 0.0189, "step": 12547 }, { "epoch": 12.065384615384616, "grad_norm": 0.8930640816688538, "learning_rate": 1.1056846534641008e-05, "loss": 0.004, "step": 12548 }, { "epoch": 12.066346153846155, "grad_norm": 0.22181196510791779, "learning_rate": 1.1055607827484483e-05, "loss": 0.0018, "step": 12549 }, { "epoch": 12.067307692307692, "grad_norm": 0.7085650563240051, "learning_rate": 1.1054369103948021e-05, "loss": 0.0046, "step": 12550 }, { "epoch": 12.06826923076923, "grad_norm": 0.20633023977279663, "learning_rate": 1.1053130364050845e-05, "loss": 0.0014, "step": 12551 }, { "epoch": 12.069230769230769, "grad_norm": 1.5830061435699463, "learning_rate": 1.105189160781218e-05, "loss": 0.0188, "step": 12552 }, { "epoch": 12.070192307692308, "grad_norm": 2.6992361545562744, "learning_rate": 1.105065283525124e-05, "loss": 0.1, "step": 12553 }, { "epoch": 12.071153846153846, "grad_norm": 0.16213281452655792, "learning_rate": 1.1049414046387254e-05, "loss": 0.0011, "step": 12554 }, { "epoch": 12.072115384615385, "grad_norm": 0.7462780475616455, "learning_rate": 1.1048175241239442e-05, "loss": 0.01, "step": 12555 }, { "epoch": 12.073076923076924, "grad_norm": 0.07652000337839127, "learning_rate": 1.1046936419827026e-05, "loss": 0.0008, "step": 12556 }, { "epoch": 12.074038461538462, "grad_norm": 0.2506360113620758, "learning_rate": 1.104569758216923e-05, "loss": 0.0018, "step": 12557 }, { "epoch": 12.075, "grad_norm": 0.29567447304725647, "learning_rate": 1.1044458728285275e-05, "loss": 0.002, "step": 12558 }, { "epoch": 12.075961538461538, "grad_norm": 1.5480581521987915, "learning_rate": 1.104321985819439e-05, "loss": 0.0058, "step": 12559 }, { "epoch": 12.076923076923077, "grad_norm": 1.7885425090789795, "learning_rate": 1.1041980971915791e-05, "loss": 0.0194, "step": 12560 }, { "epoch": 12.077884615384615, "grad_norm": 0.20471031963825226, "learning_rate": 1.1040742069468707e-05, "loss": 0.0017, "step": 12561 }, { "epoch": 12.078846153846154, "grad_norm": 2.27433443069458, "learning_rate": 1.103950315087236e-05, "loss": 0.0683, "step": 12562 }, { "epoch": 12.079807692307693, "grad_norm": 0.0431198924779892, "learning_rate": 1.1038264216145973e-05, "loss": 0.0002, "step": 12563 }, { "epoch": 12.080769230769231, "grad_norm": 0.6440499424934387, "learning_rate": 1.1037025265308776e-05, "loss": 0.0032, "step": 12564 }, { "epoch": 12.08173076923077, "grad_norm": 0.1999792605638504, "learning_rate": 1.1035786298379989e-05, "loss": 0.0019, "step": 12565 }, { "epoch": 12.082692307692307, "grad_norm": 0.6697495579719543, "learning_rate": 1.1034547315378838e-05, "loss": 0.0032, "step": 12566 }, { "epoch": 12.083653846153846, "grad_norm": 0.24703949689865112, "learning_rate": 1.103330831632455e-05, "loss": 0.0015, "step": 12567 }, { "epoch": 12.084615384615384, "grad_norm": 1.1980174779891968, "learning_rate": 1.103206930123635e-05, "loss": 0.0117, "step": 12568 }, { "epoch": 12.085576923076923, "grad_norm": 2.5248324871063232, "learning_rate": 1.1030830270133462e-05, "loss": 0.0142, "step": 12569 }, { "epoch": 12.086538461538462, "grad_norm": 0.14072087407112122, "learning_rate": 1.1029591223035116e-05, "loss": 0.001, "step": 12570 }, { "epoch": 12.0875, "grad_norm": 0.5654207468032837, "learning_rate": 1.1028352159960535e-05, "loss": 0.0029, "step": 12571 }, { "epoch": 12.088461538461539, "grad_norm": 0.17257708311080933, "learning_rate": 1.1027113080928947e-05, "loss": 0.0007, "step": 12572 }, { "epoch": 12.089423076923078, "grad_norm": 0.05893371254205704, "learning_rate": 1.1025873985959576e-05, "loss": 0.0003, "step": 12573 }, { "epoch": 12.090384615384615, "grad_norm": 0.28812482953071594, "learning_rate": 1.1024634875071653e-05, "loss": 0.0011, "step": 12574 }, { "epoch": 12.091346153846153, "grad_norm": 0.07174195349216461, "learning_rate": 1.1023395748284406e-05, "loss": 0.0004, "step": 12575 }, { "epoch": 12.092307692307692, "grad_norm": 0.1671285182237625, "learning_rate": 1.1022156605617059e-05, "loss": 0.0013, "step": 12576 }, { "epoch": 12.09326923076923, "grad_norm": 0.9830246567726135, "learning_rate": 1.1020917447088841e-05, "loss": 0.0028, "step": 12577 }, { "epoch": 12.09423076923077, "grad_norm": 1.262211799621582, "learning_rate": 1.1019678272718983e-05, "loss": 0.0061, "step": 12578 }, { "epoch": 12.095192307692308, "grad_norm": 0.33916714787483215, "learning_rate": 1.101843908252671e-05, "loss": 0.002, "step": 12579 }, { "epoch": 12.096153846153847, "grad_norm": 0.07403421401977539, "learning_rate": 1.1017199876531248e-05, "loss": 0.0006, "step": 12580 }, { "epoch": 12.097115384615385, "grad_norm": 1.2160145044326782, "learning_rate": 1.1015960654751833e-05, "loss": 0.0137, "step": 12581 }, { "epoch": 12.098076923076922, "grad_norm": 3.4756529331207275, "learning_rate": 1.101472141720769e-05, "loss": 0.0086, "step": 12582 }, { "epoch": 12.099038461538461, "grad_norm": 0.024302098900079727, "learning_rate": 1.1013482163918048e-05, "loss": 0.0003, "step": 12583 }, { "epoch": 12.1, "grad_norm": 0.05177683010697365, "learning_rate": 1.1012242894902136e-05, "loss": 0.0005, "step": 12584 }, { "epoch": 12.100961538461538, "grad_norm": 0.436638742685318, "learning_rate": 1.101100361017919e-05, "loss": 0.0013, "step": 12585 }, { "epoch": 12.101923076923077, "grad_norm": 0.03997527062892914, "learning_rate": 1.1009764309768432e-05, "loss": 0.0003, "step": 12586 }, { "epoch": 12.102884615384616, "grad_norm": 1.661329746246338, "learning_rate": 1.1008524993689094e-05, "loss": 0.0084, "step": 12587 }, { "epoch": 12.103846153846154, "grad_norm": 1.7204997539520264, "learning_rate": 1.100728566196041e-05, "loss": 0.0201, "step": 12588 }, { "epoch": 12.104807692307693, "grad_norm": 0.8784460425376892, "learning_rate": 1.1006046314601608e-05, "loss": 0.0105, "step": 12589 }, { "epoch": 12.10576923076923, "grad_norm": 0.12538325786590576, "learning_rate": 1.1004806951631921e-05, "loss": 0.0006, "step": 12590 }, { "epoch": 12.106730769230769, "grad_norm": 3.488985776901245, "learning_rate": 1.1003567573070576e-05, "loss": 0.0348, "step": 12591 }, { "epoch": 12.107692307692307, "grad_norm": 2.3506033420562744, "learning_rate": 1.1002328178936813e-05, "loss": 0.0354, "step": 12592 }, { "epoch": 12.108653846153846, "grad_norm": 1.3485808372497559, "learning_rate": 1.1001088769249855e-05, "loss": 0.0075, "step": 12593 }, { "epoch": 12.109615384615385, "grad_norm": 1.8876333236694336, "learning_rate": 1.0999849344028937e-05, "loss": 0.0142, "step": 12594 }, { "epoch": 12.110576923076923, "grad_norm": 0.14640820026397705, "learning_rate": 1.0998609903293294e-05, "loss": 0.0005, "step": 12595 }, { "epoch": 12.111538461538462, "grad_norm": 0.08946304023265839, "learning_rate": 1.0997370447062155e-05, "loss": 0.0005, "step": 12596 }, { "epoch": 12.1125, "grad_norm": 0.15014943480491638, "learning_rate": 1.0996130975354756e-05, "loss": 0.0008, "step": 12597 }, { "epoch": 12.113461538461538, "grad_norm": 1.6471959352493286, "learning_rate": 1.0994891488190325e-05, "loss": 0.0137, "step": 12598 }, { "epoch": 12.114423076923076, "grad_norm": 1.1152607202529907, "learning_rate": 1.0993651985588098e-05, "loss": 0.0039, "step": 12599 }, { "epoch": 12.115384615384615, "grad_norm": 0.686077356338501, "learning_rate": 1.0992412467567311e-05, "loss": 0.0024, "step": 12600 }, { "epoch": 12.116346153846154, "grad_norm": 0.08704381436109543, "learning_rate": 1.0991172934147193e-05, "loss": 0.0006, "step": 12601 }, { "epoch": 12.117307692307692, "grad_norm": 1.8517982959747314, "learning_rate": 1.098993338534698e-05, "loss": 0.0091, "step": 12602 }, { "epoch": 12.118269230769231, "grad_norm": 0.4719809591770172, "learning_rate": 1.0988693821185907e-05, "loss": 0.0013, "step": 12603 }, { "epoch": 12.11923076923077, "grad_norm": 3.7152750492095947, "learning_rate": 1.0987454241683208e-05, "loss": 0.1556, "step": 12604 }, { "epoch": 12.120192307692308, "grad_norm": 0.01153324544429779, "learning_rate": 1.0986214646858115e-05, "loss": 0.0001, "step": 12605 }, { "epoch": 12.121153846153845, "grad_norm": 0.14183717966079712, "learning_rate": 1.098497503672987e-05, "loss": 0.0009, "step": 12606 }, { "epoch": 12.122115384615384, "grad_norm": 1.7084009647369385, "learning_rate": 1.09837354113177e-05, "loss": 0.0047, "step": 12607 }, { "epoch": 12.123076923076923, "grad_norm": 1.4064853191375732, "learning_rate": 1.0982495770640845e-05, "loss": 0.0052, "step": 12608 }, { "epoch": 12.124038461538461, "grad_norm": 3.414609909057617, "learning_rate": 1.0981256114718536e-05, "loss": 0.0839, "step": 12609 }, { "epoch": 12.125, "grad_norm": 0.03836580738425255, "learning_rate": 1.0980016443570016e-05, "loss": 0.0004, "step": 12610 }, { "epoch": 12.125961538461539, "grad_norm": 0.7082359790802002, "learning_rate": 1.0978776757214515e-05, "loss": 0.0039, "step": 12611 }, { "epoch": 12.126923076923077, "grad_norm": 2.200817584991455, "learning_rate": 1.0977537055671275e-05, "loss": 0.0609, "step": 12612 }, { "epoch": 12.127884615384616, "grad_norm": 1.4805808067321777, "learning_rate": 1.0976297338959523e-05, "loss": 0.007, "step": 12613 }, { "epoch": 12.128846153846155, "grad_norm": 0.11481287330389023, "learning_rate": 1.0975057607098505e-05, "loss": 0.0007, "step": 12614 }, { "epoch": 12.129807692307692, "grad_norm": 0.024276409298181534, "learning_rate": 1.0973817860107454e-05, "loss": 0.0002, "step": 12615 }, { "epoch": 12.13076923076923, "grad_norm": 0.08306270092725754, "learning_rate": 1.0972578098005609e-05, "loss": 0.0006, "step": 12616 }, { "epoch": 12.131730769230769, "grad_norm": 1.676113247871399, "learning_rate": 1.0971338320812206e-05, "loss": 0.0298, "step": 12617 }, { "epoch": 12.132692307692308, "grad_norm": 0.06764877587556839, "learning_rate": 1.0970098528546482e-05, "loss": 0.0006, "step": 12618 }, { "epoch": 12.133653846153846, "grad_norm": 0.9523903131484985, "learning_rate": 1.0968858721227678e-05, "loss": 0.0072, "step": 12619 }, { "epoch": 12.134615384615385, "grad_norm": 0.2782733142375946, "learning_rate": 1.0967618898875027e-05, "loss": 0.0023, "step": 12620 }, { "epoch": 12.135576923076924, "grad_norm": 0.15790513157844543, "learning_rate": 1.0966379061507775e-05, "loss": 0.0011, "step": 12621 }, { "epoch": 12.136538461538462, "grad_norm": 0.745266854763031, "learning_rate": 1.0965139209145153e-05, "loss": 0.0019, "step": 12622 }, { "epoch": 12.1375, "grad_norm": 0.9065245985984802, "learning_rate": 1.0963899341806406e-05, "loss": 0.0033, "step": 12623 }, { "epoch": 12.138461538461538, "grad_norm": 0.0761108547449112, "learning_rate": 1.0962659459510765e-05, "loss": 0.0007, "step": 12624 }, { "epoch": 12.139423076923077, "grad_norm": 0.23085658252239227, "learning_rate": 1.096141956227748e-05, "loss": 0.0026, "step": 12625 }, { "epoch": 12.140384615384615, "grad_norm": 0.17977061867713928, "learning_rate": 1.0960179650125781e-05, "loss": 0.0016, "step": 12626 }, { "epoch": 12.141346153846154, "grad_norm": 0.869281530380249, "learning_rate": 1.0958939723074913e-05, "loss": 0.0069, "step": 12627 }, { "epoch": 12.142307692307693, "grad_norm": 1.2588739395141602, "learning_rate": 1.0957699781144119e-05, "loss": 0.0487, "step": 12628 }, { "epoch": 12.143269230769231, "grad_norm": 0.13760414719581604, "learning_rate": 1.0956459824352629e-05, "loss": 0.0009, "step": 12629 }, { "epoch": 12.14423076923077, "grad_norm": 1.3473690748214722, "learning_rate": 1.0955219852719694e-05, "loss": 0.0045, "step": 12630 }, { "epoch": 12.145192307692307, "grad_norm": 0.3104250431060791, "learning_rate": 1.0953979866264549e-05, "loss": 0.0016, "step": 12631 }, { "epoch": 12.146153846153846, "grad_norm": 0.04270775988698006, "learning_rate": 1.0952739865006437e-05, "loss": 0.0004, "step": 12632 }, { "epoch": 12.147115384615384, "grad_norm": 1.5598726272583008, "learning_rate": 1.0951499848964598e-05, "loss": 0.0044, "step": 12633 }, { "epoch": 12.148076923076923, "grad_norm": 0.026590734720230103, "learning_rate": 1.0950259818158274e-05, "loss": 0.0001, "step": 12634 }, { "epoch": 12.149038461538462, "grad_norm": 0.008748073130846024, "learning_rate": 1.0949019772606707e-05, "loss": 0.0001, "step": 12635 }, { "epoch": 12.15, "grad_norm": 0.31863483786582947, "learning_rate": 1.0947779712329138e-05, "loss": 0.0018, "step": 12636 }, { "epoch": 12.150961538461539, "grad_norm": 0.0675705075263977, "learning_rate": 1.094653963734481e-05, "loss": 0.0006, "step": 12637 }, { "epoch": 12.151923076923078, "grad_norm": 0.11103840917348862, "learning_rate": 1.0945299547672963e-05, "loss": 0.0006, "step": 12638 }, { "epoch": 12.152884615384615, "grad_norm": 0.3537456691265106, "learning_rate": 1.0944059443332844e-05, "loss": 0.003, "step": 12639 }, { "epoch": 12.153846153846153, "grad_norm": 1.5972988605499268, "learning_rate": 1.0942819324343693e-05, "loss": 0.0089, "step": 12640 }, { "epoch": 12.154807692307692, "grad_norm": 0.14768250286579132, "learning_rate": 1.0941579190724752e-05, "loss": 0.0011, "step": 12641 }, { "epoch": 12.15576923076923, "grad_norm": 0.9831545352935791, "learning_rate": 1.0940339042495264e-05, "loss": 0.0041, "step": 12642 }, { "epoch": 12.15673076923077, "grad_norm": 0.0961238443851471, "learning_rate": 1.0939098879674476e-05, "loss": 0.0003, "step": 12643 }, { "epoch": 12.157692307692308, "grad_norm": 4.055691242218018, "learning_rate": 1.0937858702281631e-05, "loss": 0.0274, "step": 12644 }, { "epoch": 12.158653846153847, "grad_norm": 1.4214649200439453, "learning_rate": 1.0936618510335967e-05, "loss": 0.0072, "step": 12645 }, { "epoch": 12.159615384615385, "grad_norm": 0.6551876068115234, "learning_rate": 1.093537830385674e-05, "loss": 0.0029, "step": 12646 }, { "epoch": 12.160576923076922, "grad_norm": 0.15267343819141388, "learning_rate": 1.0934138082863182e-05, "loss": 0.0014, "step": 12647 }, { "epoch": 12.161538461538461, "grad_norm": 0.14777466654777527, "learning_rate": 1.0932897847374542e-05, "loss": 0.0008, "step": 12648 }, { "epoch": 12.1625, "grad_norm": 0.18459279835224152, "learning_rate": 1.0931657597410066e-05, "loss": 0.0012, "step": 12649 }, { "epoch": 12.163461538461538, "grad_norm": 0.07076963037252426, "learning_rate": 1.0930417332989e-05, "loss": 0.0004, "step": 12650 }, { "epoch": 12.164423076923077, "grad_norm": 0.1545545756816864, "learning_rate": 1.0929177054130587e-05, "loss": 0.0007, "step": 12651 }, { "epoch": 12.165384615384616, "grad_norm": 0.06445667892694473, "learning_rate": 1.0927936760854075e-05, "loss": 0.0005, "step": 12652 }, { "epoch": 12.166346153846154, "grad_norm": 1.5351146459579468, "learning_rate": 1.0926696453178702e-05, "loss": 0.0309, "step": 12653 }, { "epoch": 12.167307692307693, "grad_norm": 3.135935068130493, "learning_rate": 1.0925456131123726e-05, "loss": 0.0133, "step": 12654 }, { "epoch": 12.16826923076923, "grad_norm": 1.062523603439331, "learning_rate": 1.0924215794708385e-05, "loss": 0.0059, "step": 12655 }, { "epoch": 12.169230769230769, "grad_norm": 1.9185422658920288, "learning_rate": 1.0922975443951926e-05, "loss": 0.0438, "step": 12656 }, { "epoch": 12.170192307692307, "grad_norm": 0.16750061511993408, "learning_rate": 1.0921735078873599e-05, "loss": 0.001, "step": 12657 }, { "epoch": 12.171153846153846, "grad_norm": 0.0715881958603859, "learning_rate": 1.0920494699492649e-05, "loss": 0.0004, "step": 12658 }, { "epoch": 12.172115384615385, "grad_norm": 0.15572960674762726, "learning_rate": 1.0919254305828322e-05, "loss": 0.0009, "step": 12659 }, { "epoch": 12.173076923076923, "grad_norm": 0.24385346472263336, "learning_rate": 1.0918013897899866e-05, "loss": 0.0018, "step": 12660 }, { "epoch": 12.174038461538462, "grad_norm": 0.0456964336335659, "learning_rate": 1.091677347572653e-05, "loss": 0.0003, "step": 12661 }, { "epoch": 12.175, "grad_norm": 4.113883972167969, "learning_rate": 1.091553303932756e-05, "loss": 0.0572, "step": 12662 }, { "epoch": 12.175961538461538, "grad_norm": 0.03710641339421272, "learning_rate": 1.0914292588722201e-05, "loss": 0.0004, "step": 12663 }, { "epoch": 12.176923076923076, "grad_norm": 0.9435673952102661, "learning_rate": 1.0913052123929707e-05, "loss": 0.0059, "step": 12664 }, { "epoch": 12.177884615384615, "grad_norm": 1.555614948272705, "learning_rate": 1.0911811644969324e-05, "loss": 0.0065, "step": 12665 }, { "epoch": 12.178846153846154, "grad_norm": 0.055378932505846024, "learning_rate": 1.0910571151860301e-05, "loss": 0.0005, "step": 12666 }, { "epoch": 12.179807692307692, "grad_norm": 2.0380349159240723, "learning_rate": 1.0909330644621884e-05, "loss": 0.0249, "step": 12667 }, { "epoch": 12.180769230769231, "grad_norm": 0.5792089700698853, "learning_rate": 1.0908090123273327e-05, "loss": 0.002, "step": 12668 }, { "epoch": 12.18173076923077, "grad_norm": 0.22394324839115143, "learning_rate": 1.0906849587833875e-05, "loss": 0.0014, "step": 12669 }, { "epoch": 12.182692307692308, "grad_norm": 2.534531831741333, "learning_rate": 1.090560903832278e-05, "loss": 0.0338, "step": 12670 }, { "epoch": 12.183653846153845, "grad_norm": 0.548193633556366, "learning_rate": 1.0904368474759288e-05, "loss": 0.0019, "step": 12671 }, { "epoch": 12.184615384615384, "grad_norm": 0.28046998381614685, "learning_rate": 1.0903127897162654e-05, "loss": 0.0017, "step": 12672 }, { "epoch": 12.185576923076923, "grad_norm": 0.3359149098396301, "learning_rate": 1.0901887305552125e-05, "loss": 0.0012, "step": 12673 }, { "epoch": 12.186538461538461, "grad_norm": 1.308074712753296, "learning_rate": 1.0900646699946953e-05, "loss": 0.0124, "step": 12674 }, { "epoch": 12.1875, "grad_norm": 0.058399129658937454, "learning_rate": 1.0899406080366385e-05, "loss": 0.0006, "step": 12675 }, { "epoch": 12.188461538461539, "grad_norm": 0.908534049987793, "learning_rate": 1.0898165446829676e-05, "loss": 0.0052, "step": 12676 }, { "epoch": 12.189423076923077, "grad_norm": 0.39925193786621094, "learning_rate": 1.0896924799356076e-05, "loss": 0.0018, "step": 12677 }, { "epoch": 12.190384615384616, "grad_norm": 0.24545976519584656, "learning_rate": 1.0895684137964834e-05, "loss": 0.0011, "step": 12678 }, { "epoch": 12.191346153846155, "grad_norm": 0.8439859747886658, "learning_rate": 1.0894443462675204e-05, "loss": 0.0049, "step": 12679 }, { "epoch": 12.192307692307692, "grad_norm": 0.24299240112304688, "learning_rate": 1.0893202773506437e-05, "loss": 0.0012, "step": 12680 }, { "epoch": 12.19326923076923, "grad_norm": 1.7235771417617798, "learning_rate": 1.0891962070477786e-05, "loss": 0.0519, "step": 12681 }, { "epoch": 12.194230769230769, "grad_norm": 1.6127924919128418, "learning_rate": 1.0890721353608498e-05, "loss": 0.0049, "step": 12682 }, { "epoch": 12.195192307692308, "grad_norm": 1.700370192527771, "learning_rate": 1.088948062291783e-05, "loss": 0.0258, "step": 12683 }, { "epoch": 12.196153846153846, "grad_norm": 1.186755895614624, "learning_rate": 1.0888239878425035e-05, "loss": 0.0064, "step": 12684 }, { "epoch": 12.197115384615385, "grad_norm": 0.4977875351905823, "learning_rate": 1.088699912014936e-05, "loss": 0.0021, "step": 12685 }, { "epoch": 12.198076923076924, "grad_norm": 1.5281614065170288, "learning_rate": 1.0885758348110065e-05, "loss": 0.0471, "step": 12686 }, { "epoch": 12.199038461538462, "grad_norm": 2.3017547130584717, "learning_rate": 1.0884517562326402e-05, "loss": 0.0252, "step": 12687 }, { "epoch": 12.2, "grad_norm": 0.0736604779958725, "learning_rate": 1.088327676281762e-05, "loss": 0.0007, "step": 12688 }, { "epoch": 12.200961538461538, "grad_norm": 5.043656349182129, "learning_rate": 1.0882035949602973e-05, "loss": 0.0198, "step": 12689 }, { "epoch": 12.201923076923077, "grad_norm": 0.03613566607236862, "learning_rate": 1.088079512270172e-05, "loss": 0.0003, "step": 12690 }, { "epoch": 12.202884615384615, "grad_norm": 0.07449167966842651, "learning_rate": 1.0879554282133111e-05, "loss": 0.0006, "step": 12691 }, { "epoch": 12.203846153846154, "grad_norm": 0.016246922314167023, "learning_rate": 1.0878313427916403e-05, "loss": 0.0001, "step": 12692 }, { "epoch": 12.204807692307693, "grad_norm": 3.5430688858032227, "learning_rate": 1.0877072560070844e-05, "loss": 0.0863, "step": 12693 }, { "epoch": 12.205769230769231, "grad_norm": 0.06223323941230774, "learning_rate": 1.0875831678615696e-05, "loss": 0.0004, "step": 12694 }, { "epoch": 12.20673076923077, "grad_norm": 0.2355339378118515, "learning_rate": 1.0874590783570212e-05, "loss": 0.001, "step": 12695 }, { "epoch": 12.207692307692307, "grad_norm": 0.03366750106215477, "learning_rate": 1.087334987495364e-05, "loss": 0.0003, "step": 12696 }, { "epoch": 12.208653846153846, "grad_norm": 0.26705360412597656, "learning_rate": 1.0872108952785247e-05, "loss": 0.0013, "step": 12697 }, { "epoch": 12.209615384615384, "grad_norm": 1.5470869541168213, "learning_rate": 1.0870868017084282e-05, "loss": 0.0862, "step": 12698 }, { "epoch": 12.210576923076923, "grad_norm": 0.09777224063873291, "learning_rate": 1.0869627067870001e-05, "loss": 0.0008, "step": 12699 }, { "epoch": 12.211538461538462, "grad_norm": 2.439574956893921, "learning_rate": 1.0868386105161657e-05, "loss": 0.0171, "step": 12700 }, { "epoch": 12.2125, "grad_norm": 0.09803532809019089, "learning_rate": 1.0867145128978513e-05, "loss": 0.0007, "step": 12701 }, { "epoch": 12.213461538461539, "grad_norm": 0.04978317767381668, "learning_rate": 1.086590413933982e-05, "loss": 0.0005, "step": 12702 }, { "epoch": 12.214423076923078, "grad_norm": 1.772131085395813, "learning_rate": 1.086466313626484e-05, "loss": 0.031, "step": 12703 }, { "epoch": 12.215384615384615, "grad_norm": 0.04167703539133072, "learning_rate": 1.0863422119772819e-05, "loss": 0.0003, "step": 12704 }, { "epoch": 12.216346153846153, "grad_norm": 0.1202940046787262, "learning_rate": 1.0862181089883025e-05, "loss": 0.0008, "step": 12705 }, { "epoch": 12.217307692307692, "grad_norm": 0.17492163181304932, "learning_rate": 1.0860940046614712e-05, "loss": 0.0015, "step": 12706 }, { "epoch": 12.21826923076923, "grad_norm": 2.7537074089050293, "learning_rate": 1.0859698989987133e-05, "loss": 0.0074, "step": 12707 }, { "epoch": 12.21923076923077, "grad_norm": 0.0849200189113617, "learning_rate": 1.0858457920019552e-05, "loss": 0.0005, "step": 12708 }, { "epoch": 12.220192307692308, "grad_norm": 0.329420804977417, "learning_rate": 1.0857216836731221e-05, "loss": 0.0018, "step": 12709 }, { "epoch": 12.221153846153847, "grad_norm": 0.07972877472639084, "learning_rate": 1.0855975740141402e-05, "loss": 0.0005, "step": 12710 }, { "epoch": 12.222115384615385, "grad_norm": 0.2369213104248047, "learning_rate": 1.085473463026935e-05, "loss": 0.0013, "step": 12711 }, { "epoch": 12.223076923076922, "grad_norm": 0.2577395439147949, "learning_rate": 1.0853493507134327e-05, "loss": 0.0018, "step": 12712 }, { "epoch": 12.224038461538461, "grad_norm": 0.09464507550001144, "learning_rate": 1.085225237075559e-05, "loss": 0.0008, "step": 12713 }, { "epoch": 12.225, "grad_norm": 1.4190661907196045, "learning_rate": 1.0851011221152398e-05, "loss": 0.0069, "step": 12714 }, { "epoch": 12.225961538461538, "grad_norm": 0.045774564146995544, "learning_rate": 1.0849770058344007e-05, "loss": 0.0003, "step": 12715 }, { "epoch": 12.226923076923077, "grad_norm": 0.32302984595298767, "learning_rate": 1.084852888234968e-05, "loss": 0.001, "step": 12716 }, { "epoch": 12.227884615384616, "grad_norm": 0.245289146900177, "learning_rate": 1.0847287693188677e-05, "loss": 0.0016, "step": 12717 }, { "epoch": 12.228846153846154, "grad_norm": 0.9121506810188293, "learning_rate": 1.0846046490880253e-05, "loss": 0.0257, "step": 12718 }, { "epoch": 12.229807692307693, "grad_norm": 0.12511612474918365, "learning_rate": 1.0844805275443673e-05, "loss": 0.0009, "step": 12719 }, { "epoch": 12.23076923076923, "grad_norm": 0.018711160868406296, "learning_rate": 1.0843564046898192e-05, "loss": 0.0002, "step": 12720 }, { "epoch": 12.231730769230769, "grad_norm": 0.05261053517460823, "learning_rate": 1.0842322805263074e-05, "loss": 0.0003, "step": 12721 }, { "epoch": 12.232692307692307, "grad_norm": 1.3645799160003662, "learning_rate": 1.0841081550557577e-05, "loss": 0.004, "step": 12722 }, { "epoch": 12.233653846153846, "grad_norm": 1.720461130142212, "learning_rate": 1.0839840282800966e-05, "loss": 0.0092, "step": 12723 }, { "epoch": 12.234615384615385, "grad_norm": 0.23214611411094666, "learning_rate": 1.0838599002012498e-05, "loss": 0.001, "step": 12724 }, { "epoch": 12.235576923076923, "grad_norm": 6.48012638092041, "learning_rate": 1.0837357708211431e-05, "loss": 0.0443, "step": 12725 }, { "epoch": 12.236538461538462, "grad_norm": 0.07444849610328674, "learning_rate": 1.0836116401417033e-05, "loss": 0.0004, "step": 12726 }, { "epoch": 12.2375, "grad_norm": 0.40313801169395447, "learning_rate": 1.0834875081648564e-05, "loss": 0.0015, "step": 12727 }, { "epoch": 12.238461538461538, "grad_norm": 0.845645010471344, "learning_rate": 1.0833633748925283e-05, "loss": 0.0035, "step": 12728 }, { "epoch": 12.239423076923076, "grad_norm": 0.2158411294221878, "learning_rate": 1.083239240326645e-05, "loss": 0.002, "step": 12729 }, { "epoch": 12.240384615384615, "grad_norm": 0.8460617065429688, "learning_rate": 1.0831151044691332e-05, "loss": 0.0031, "step": 12730 }, { "epoch": 12.241346153846154, "grad_norm": 0.5309967398643494, "learning_rate": 1.082990967321919e-05, "loss": 0.0029, "step": 12731 }, { "epoch": 12.242307692307692, "grad_norm": 1.7077538967132568, "learning_rate": 1.0828668288869287e-05, "loss": 0.0134, "step": 12732 }, { "epoch": 12.243269230769231, "grad_norm": 0.0749744325876236, "learning_rate": 1.0827426891660879e-05, "loss": 0.0004, "step": 12733 }, { "epoch": 12.24423076923077, "grad_norm": 2.647963762283325, "learning_rate": 1.0826185481613239e-05, "loss": 0.0411, "step": 12734 }, { "epoch": 12.245192307692308, "grad_norm": 0.20625609159469604, "learning_rate": 1.0824944058745623e-05, "loss": 0.001, "step": 12735 }, { "epoch": 12.246153846153845, "grad_norm": 3.955803155899048, "learning_rate": 1.0823702623077295e-05, "loss": 0.0315, "step": 12736 }, { "epoch": 12.247115384615384, "grad_norm": 0.06250067055225372, "learning_rate": 1.0822461174627523e-05, "loss": 0.0005, "step": 12737 }, { "epoch": 12.248076923076923, "grad_norm": 0.9073411822319031, "learning_rate": 1.0821219713415568e-05, "loss": 0.0037, "step": 12738 }, { "epoch": 12.249038461538461, "grad_norm": 0.13010317087173462, "learning_rate": 1.0819978239460691e-05, "loss": 0.0006, "step": 12739 }, { "epoch": 12.25, "grad_norm": 0.2699049413204193, "learning_rate": 1.0818736752782157e-05, "loss": 0.0007, "step": 12740 }, { "epoch": 12.250961538461539, "grad_norm": 1.6142336130142212, "learning_rate": 1.0817495253399233e-05, "loss": 0.0364, "step": 12741 }, { "epoch": 12.251923076923077, "grad_norm": 0.9510759711265564, "learning_rate": 1.0816253741331184e-05, "loss": 0.0034, "step": 12742 }, { "epoch": 12.252884615384616, "grad_norm": 0.03277614712715149, "learning_rate": 1.0815012216597272e-05, "loss": 0.0003, "step": 12743 }, { "epoch": 12.253846153846155, "grad_norm": 0.3105195462703705, "learning_rate": 1.0813770679216758e-05, "loss": 0.0016, "step": 12744 }, { "epoch": 12.254807692307692, "grad_norm": 0.7513300180435181, "learning_rate": 1.0812529129208916e-05, "loss": 0.0027, "step": 12745 }, { "epoch": 12.25576923076923, "grad_norm": 3.010287046432495, "learning_rate": 1.0811287566593005e-05, "loss": 0.0244, "step": 12746 }, { "epoch": 12.256730769230769, "grad_norm": 0.045294709503650665, "learning_rate": 1.0810045991388291e-05, "loss": 0.0003, "step": 12747 }, { "epoch": 12.257692307692308, "grad_norm": 2.1923727989196777, "learning_rate": 1.0808804403614044e-05, "loss": 0.0258, "step": 12748 }, { "epoch": 12.258653846153846, "grad_norm": 0.054981496185064316, "learning_rate": 1.0807562803289524e-05, "loss": 0.0003, "step": 12749 }, { "epoch": 12.259615384615385, "grad_norm": 0.3439871668815613, "learning_rate": 1.0806321190434002e-05, "loss": 0.0029, "step": 12750 }, { "epoch": 12.260576923076924, "grad_norm": 0.13910529017448425, "learning_rate": 1.0805079565066736e-05, "loss": 0.0012, "step": 12751 }, { "epoch": 12.261538461538462, "grad_norm": 0.140613853931427, "learning_rate": 1.0803837927207004e-05, "loss": 0.001, "step": 12752 }, { "epoch": 12.2625, "grad_norm": 0.6235657930374146, "learning_rate": 1.0802596276874064e-05, "loss": 0.0025, "step": 12753 }, { "epoch": 12.263461538461538, "grad_norm": 0.3225812613964081, "learning_rate": 1.0801354614087188e-05, "loss": 0.0008, "step": 12754 }, { "epoch": 12.264423076923077, "grad_norm": 0.684496283531189, "learning_rate": 1.0800112938865635e-05, "loss": 0.0024, "step": 12755 }, { "epoch": 12.265384615384615, "grad_norm": 3.495835781097412, "learning_rate": 1.0798871251228681e-05, "loss": 0.0122, "step": 12756 }, { "epoch": 12.266346153846154, "grad_norm": 3.350217580795288, "learning_rate": 1.079762955119559e-05, "loss": 0.0216, "step": 12757 }, { "epoch": 12.267307692307693, "grad_norm": 0.07222815603017807, "learning_rate": 1.0796387838785626e-05, "loss": 0.0006, "step": 12758 }, { "epoch": 12.268269230769231, "grad_norm": 0.6584571599960327, "learning_rate": 1.0795146114018065e-05, "loss": 0.005, "step": 12759 }, { "epoch": 12.26923076923077, "grad_norm": 0.12975329160690308, "learning_rate": 1.0793904376912168e-05, "loss": 0.0011, "step": 12760 }, { "epoch": 12.270192307692307, "grad_norm": 0.05544493347406387, "learning_rate": 1.0792662627487207e-05, "loss": 0.0004, "step": 12761 }, { "epoch": 12.271153846153846, "grad_norm": 0.435680091381073, "learning_rate": 1.0791420865762443e-05, "loss": 0.0022, "step": 12762 }, { "epoch": 12.272115384615384, "grad_norm": 0.07209448516368866, "learning_rate": 1.0790179091757156e-05, "loss": 0.0005, "step": 12763 }, { "epoch": 12.273076923076923, "grad_norm": 0.012118201702833176, "learning_rate": 1.0788937305490605e-05, "loss": 0.0001, "step": 12764 }, { "epoch": 12.274038461538462, "grad_norm": 0.11820241808891296, "learning_rate": 1.0787695506982065e-05, "loss": 0.0012, "step": 12765 }, { "epoch": 12.275, "grad_norm": 0.20377185940742493, "learning_rate": 1.07864536962508e-05, "loss": 0.0013, "step": 12766 }, { "epoch": 12.275961538461539, "grad_norm": 1.0526254177093506, "learning_rate": 1.0785211873316085e-05, "loss": 0.0085, "step": 12767 }, { "epoch": 12.276923076923078, "grad_norm": 2.8634748458862305, "learning_rate": 1.0783970038197183e-05, "loss": 0.0415, "step": 12768 }, { "epoch": 12.277884615384615, "grad_norm": 0.348502516746521, "learning_rate": 1.0782728190913367e-05, "loss": 0.001, "step": 12769 }, { "epoch": 12.278846153846153, "grad_norm": 2.44376802444458, "learning_rate": 1.0781486331483908e-05, "loss": 0.0149, "step": 12770 }, { "epoch": 12.279807692307692, "grad_norm": 0.11362778395414352, "learning_rate": 1.0780244459928075e-05, "loss": 0.001, "step": 12771 }, { "epoch": 12.28076923076923, "grad_norm": 0.8746069073677063, "learning_rate": 1.077900257626514e-05, "loss": 0.0035, "step": 12772 }, { "epoch": 12.28173076923077, "grad_norm": 2.514371871948242, "learning_rate": 1.0777760680514367e-05, "loss": 0.034, "step": 12773 }, { "epoch": 12.282692307692308, "grad_norm": 0.02334620989859104, "learning_rate": 1.0776518772695035e-05, "loss": 0.0003, "step": 12774 }, { "epoch": 12.283653846153847, "grad_norm": 0.023551812395453453, "learning_rate": 1.0775276852826408e-05, "loss": 0.0003, "step": 12775 }, { "epoch": 12.284615384615385, "grad_norm": 0.47882863879203796, "learning_rate": 1.0774034920927758e-05, "loss": 0.0028, "step": 12776 }, { "epoch": 12.285576923076922, "grad_norm": 1.393618106842041, "learning_rate": 1.0772792977018362e-05, "loss": 0.0066, "step": 12777 }, { "epoch": 12.286538461538461, "grad_norm": 0.09410542249679565, "learning_rate": 1.0771551021117486e-05, "loss": 0.0004, "step": 12778 }, { "epoch": 12.2875, "grad_norm": 2.2241334915161133, "learning_rate": 1.0770309053244403e-05, "loss": 0.0375, "step": 12779 }, { "epoch": 12.288461538461538, "grad_norm": 0.20775532722473145, "learning_rate": 1.0769067073418383e-05, "loss": 0.0012, "step": 12780 }, { "epoch": 12.289423076923077, "grad_norm": 0.4474319815635681, "learning_rate": 1.0767825081658698e-05, "loss": 0.0024, "step": 12781 }, { "epoch": 12.290384615384616, "grad_norm": 0.7538203597068787, "learning_rate": 1.0766583077984624e-05, "loss": 0.007, "step": 12782 }, { "epoch": 12.291346153846154, "grad_norm": 0.08084800839424133, "learning_rate": 1.076534106241543e-05, "loss": 0.0006, "step": 12783 }, { "epoch": 12.292307692307693, "grad_norm": 0.580491840839386, "learning_rate": 1.0764099034970385e-05, "loss": 0.0027, "step": 12784 }, { "epoch": 12.29326923076923, "grad_norm": 1.056301474571228, "learning_rate": 1.076285699566877e-05, "loss": 0.0168, "step": 12785 }, { "epoch": 12.294230769230769, "grad_norm": 3.354421854019165, "learning_rate": 1.0761614944529855e-05, "loss": 0.0108, "step": 12786 }, { "epoch": 12.295192307692307, "grad_norm": 1.2533679008483887, "learning_rate": 1.0760372881572904e-05, "loss": 0.0053, "step": 12787 }, { "epoch": 12.296153846153846, "grad_norm": 0.3553096652030945, "learning_rate": 1.0759130806817204e-05, "loss": 0.0017, "step": 12788 }, { "epoch": 12.297115384615385, "grad_norm": 0.03969399258494377, "learning_rate": 1.0757888720282022e-05, "loss": 0.0005, "step": 12789 }, { "epoch": 12.298076923076923, "grad_norm": 1.8121720552444458, "learning_rate": 1.0756646621986629e-05, "loss": 0.0029, "step": 12790 }, { "epoch": 12.299038461538462, "grad_norm": 1.0147910118103027, "learning_rate": 1.0755404511950301e-05, "loss": 0.0041, "step": 12791 }, { "epoch": 12.3, "grad_norm": 2.659701347351074, "learning_rate": 1.0754162390192313e-05, "loss": 0.0099, "step": 12792 }, { "epoch": 12.300961538461538, "grad_norm": 0.7722313404083252, "learning_rate": 1.0752920256731939e-05, "loss": 0.0018, "step": 12793 }, { "epoch": 12.301923076923076, "grad_norm": 0.2625766098499298, "learning_rate": 1.0751678111588451e-05, "loss": 0.0018, "step": 12794 }, { "epoch": 12.302884615384615, "grad_norm": 0.23870311677455902, "learning_rate": 1.0750435954781124e-05, "loss": 0.0013, "step": 12795 }, { "epoch": 12.303846153846154, "grad_norm": 0.2866942286491394, "learning_rate": 1.0749193786329235e-05, "loss": 0.0014, "step": 12796 }, { "epoch": 12.304807692307692, "grad_norm": 0.054373908787965775, "learning_rate": 1.0747951606252059e-05, "loss": 0.0004, "step": 12797 }, { "epoch": 12.305769230769231, "grad_norm": 0.6754065155982971, "learning_rate": 1.0746709414568863e-05, "loss": 0.0032, "step": 12798 }, { "epoch": 12.30673076923077, "grad_norm": 1.5993849039077759, "learning_rate": 1.0745467211298934e-05, "loss": 0.01, "step": 12799 }, { "epoch": 12.307692307692308, "grad_norm": 0.08874324709177017, "learning_rate": 1.0744224996461541e-05, "loss": 0.0007, "step": 12800 }, { "epoch": 12.308653846153845, "grad_norm": 0.027811894193291664, "learning_rate": 1.074298277007596e-05, "loss": 0.0003, "step": 12801 }, { "epoch": 12.309615384615384, "grad_norm": 2.5920443534851074, "learning_rate": 1.0741740532161467e-05, "loss": 0.0249, "step": 12802 }, { "epoch": 12.310576923076923, "grad_norm": 0.29936549067497253, "learning_rate": 1.0740498282737337e-05, "loss": 0.0016, "step": 12803 }, { "epoch": 12.311538461538461, "grad_norm": 3.6229283809661865, "learning_rate": 1.0739256021822849e-05, "loss": 0.0341, "step": 12804 }, { "epoch": 12.3125, "grad_norm": 0.05809810012578964, "learning_rate": 1.0738013749437278e-05, "loss": 0.0005, "step": 12805 }, { "epoch": 12.313461538461539, "grad_norm": 0.032954297959804535, "learning_rate": 1.0736771465599894e-05, "loss": 0.0003, "step": 12806 }, { "epoch": 12.314423076923077, "grad_norm": 6.043939113616943, "learning_rate": 1.0735529170329981e-05, "loss": 0.0256, "step": 12807 }, { "epoch": 12.315384615384616, "grad_norm": 1.3817254304885864, "learning_rate": 1.0734286863646817e-05, "loss": 0.0037, "step": 12808 }, { "epoch": 12.316346153846155, "grad_norm": 0.07212735712528229, "learning_rate": 1.0733044545569675e-05, "loss": 0.0003, "step": 12809 }, { "epoch": 12.317307692307692, "grad_norm": 0.03895391896367073, "learning_rate": 1.0731802216117832e-05, "loss": 0.0003, "step": 12810 }, { "epoch": 12.31826923076923, "grad_norm": 0.019389593973755836, "learning_rate": 1.0730559875310566e-05, "loss": 0.0001, "step": 12811 }, { "epoch": 12.319230769230769, "grad_norm": 0.20439735054969788, "learning_rate": 1.0729317523167158e-05, "loss": 0.0013, "step": 12812 }, { "epoch": 12.320192307692308, "grad_norm": 0.013729101978242397, "learning_rate": 1.0728075159706881e-05, "loss": 0.0001, "step": 12813 }, { "epoch": 12.321153846153846, "grad_norm": 0.13729827105998993, "learning_rate": 1.0726832784949013e-05, "loss": 0.0008, "step": 12814 }, { "epoch": 12.322115384615385, "grad_norm": 1.304315447807312, "learning_rate": 1.0725590398912835e-05, "loss": 0.0051, "step": 12815 }, { "epoch": 12.323076923076924, "grad_norm": 1.610916256904602, "learning_rate": 1.0724348001617626e-05, "loss": 0.0069, "step": 12816 }, { "epoch": 12.324038461538462, "grad_norm": 1.5617018938064575, "learning_rate": 1.0723105593082657e-05, "loss": 0.0054, "step": 12817 }, { "epoch": 12.325, "grad_norm": 0.16750425100326538, "learning_rate": 1.0721863173327214e-05, "loss": 0.0006, "step": 12818 }, { "epoch": 12.325961538461538, "grad_norm": 0.01885414868593216, "learning_rate": 1.0720620742370573e-05, "loss": 0.0001, "step": 12819 }, { "epoch": 12.326923076923077, "grad_norm": 0.38921037316322327, "learning_rate": 1.0719378300232013e-05, "loss": 0.0023, "step": 12820 }, { "epoch": 12.327884615384615, "grad_norm": 0.5147849917411804, "learning_rate": 1.0718135846930813e-05, "loss": 0.002, "step": 12821 }, { "epoch": 12.328846153846154, "grad_norm": 1.6135640144348145, "learning_rate": 1.0716893382486253e-05, "loss": 0.013, "step": 12822 }, { "epoch": 12.329807692307693, "grad_norm": 2.3395156860351562, "learning_rate": 1.0715650906917612e-05, "loss": 0.0064, "step": 12823 }, { "epoch": 12.330769230769231, "grad_norm": 0.25073930621147156, "learning_rate": 1.0714408420244169e-05, "loss": 0.0012, "step": 12824 }, { "epoch": 12.33173076923077, "grad_norm": 1.773189902305603, "learning_rate": 1.0713165922485203e-05, "loss": 0.0106, "step": 12825 }, { "epoch": 12.332692307692307, "grad_norm": 1.8992704153060913, "learning_rate": 1.0711923413659995e-05, "loss": 0.0026, "step": 12826 }, { "epoch": 12.333653846153846, "grad_norm": 0.25244519114494324, "learning_rate": 1.0710680893787825e-05, "loss": 0.0012, "step": 12827 }, { "epoch": 12.334615384615384, "grad_norm": 0.48392772674560547, "learning_rate": 1.0709438362887975e-05, "loss": 0.005, "step": 12828 }, { "epoch": 12.335576923076923, "grad_norm": 0.05806070566177368, "learning_rate": 1.0708195820979721e-05, "loss": 0.0004, "step": 12829 }, { "epoch": 12.336538461538462, "grad_norm": 5.3341779708862305, "learning_rate": 1.0706953268082349e-05, "loss": 0.0037, "step": 12830 }, { "epoch": 12.3375, "grad_norm": 0.03522014617919922, "learning_rate": 1.0705710704215138e-05, "loss": 0.0003, "step": 12831 }, { "epoch": 12.338461538461539, "grad_norm": 0.6240330338478088, "learning_rate": 1.0704468129397366e-05, "loss": 0.0022, "step": 12832 }, { "epoch": 12.339423076923078, "grad_norm": 0.6913948059082031, "learning_rate": 1.0703225543648317e-05, "loss": 0.0053, "step": 12833 }, { "epoch": 12.340384615384615, "grad_norm": 1.8083540201187134, "learning_rate": 1.0701982946987271e-05, "loss": 0.0098, "step": 12834 }, { "epoch": 12.341346153846153, "grad_norm": 1.6725202798843384, "learning_rate": 1.0700740339433509e-05, "loss": 0.0051, "step": 12835 }, { "epoch": 12.342307692307692, "grad_norm": 0.047108668833971024, "learning_rate": 1.0699497721006315e-05, "loss": 0.0001, "step": 12836 }, { "epoch": 12.34326923076923, "grad_norm": 0.5918806195259094, "learning_rate": 1.069825509172497e-05, "loss": 0.0024, "step": 12837 }, { "epoch": 12.34423076923077, "grad_norm": 0.04511353746056557, "learning_rate": 1.0697012451608756e-05, "loss": 0.0002, "step": 12838 }, { "epoch": 12.345192307692308, "grad_norm": 0.15584205090999603, "learning_rate": 1.069576980067695e-05, "loss": 0.0008, "step": 12839 }, { "epoch": 12.346153846153847, "grad_norm": 0.8780108094215393, "learning_rate": 1.0694527138948843e-05, "loss": 0.0039, "step": 12840 }, { "epoch": 12.347115384615385, "grad_norm": 0.1454925686120987, "learning_rate": 1.0693284466443711e-05, "loss": 0.0006, "step": 12841 }, { "epoch": 12.348076923076922, "grad_norm": 4.2751054763793945, "learning_rate": 1.0692041783180841e-05, "loss": 0.0718, "step": 12842 }, { "epoch": 12.349038461538461, "grad_norm": 2.0837888717651367, "learning_rate": 1.0690799089179511e-05, "loss": 0.0413, "step": 12843 }, { "epoch": 12.35, "grad_norm": 2.632570505142212, "learning_rate": 1.0689556384459008e-05, "loss": 0.0094, "step": 12844 }, { "epoch": 12.350961538461538, "grad_norm": 1.6082042455673218, "learning_rate": 1.0688313669038614e-05, "loss": 0.0035, "step": 12845 }, { "epoch": 12.351923076923077, "grad_norm": 0.5492042899131775, "learning_rate": 1.0687070942937612e-05, "loss": 0.0026, "step": 12846 }, { "epoch": 12.352884615384616, "grad_norm": 1.2764114141464233, "learning_rate": 1.0685828206175285e-05, "loss": 0.0073, "step": 12847 }, { "epoch": 12.353846153846154, "grad_norm": 0.2046603411436081, "learning_rate": 1.0684585458770918e-05, "loss": 0.0005, "step": 12848 }, { "epoch": 12.354807692307693, "grad_norm": 0.09084887057542801, "learning_rate": 1.0683342700743796e-05, "loss": 0.0004, "step": 12849 }, { "epoch": 12.35576923076923, "grad_norm": 1.197593331336975, "learning_rate": 1.0682099932113195e-05, "loss": 0.0052, "step": 12850 }, { "epoch": 12.356730769230769, "grad_norm": 0.027694206684827805, "learning_rate": 1.0680857152898407e-05, "loss": 0.0002, "step": 12851 }, { "epoch": 12.357692307692307, "grad_norm": 1.2943370342254639, "learning_rate": 1.0679614363118718e-05, "loss": 0.0044, "step": 12852 }, { "epoch": 12.358653846153846, "grad_norm": 3.731065034866333, "learning_rate": 1.0678371562793407e-05, "loss": 0.0214, "step": 12853 }, { "epoch": 12.359615384615385, "grad_norm": 0.07883709669113159, "learning_rate": 1.0677128751941757e-05, "loss": 0.0007, "step": 12854 }, { "epoch": 12.360576923076923, "grad_norm": 2.4859936237335205, "learning_rate": 1.0675885930583059e-05, "loss": 0.0254, "step": 12855 }, { "epoch": 12.361538461538462, "grad_norm": 1.8115739822387695, "learning_rate": 1.0674643098736596e-05, "loss": 0.0151, "step": 12856 }, { "epoch": 12.3625, "grad_norm": 1.2630912065505981, "learning_rate": 1.0673400256421649e-05, "loss": 0.0128, "step": 12857 }, { "epoch": 12.363461538461538, "grad_norm": 0.1518162190914154, "learning_rate": 1.0672157403657508e-05, "loss": 0.0007, "step": 12858 }, { "epoch": 12.364423076923076, "grad_norm": 0.780174970626831, "learning_rate": 1.067091454046346e-05, "loss": 0.0029, "step": 12859 }, { "epoch": 12.365384615384615, "grad_norm": 0.25113674998283386, "learning_rate": 1.0669671666858783e-05, "loss": 0.0016, "step": 12860 }, { "epoch": 12.366346153846154, "grad_norm": 1.3399908542633057, "learning_rate": 1.0668428782862767e-05, "loss": 0.0039, "step": 12861 }, { "epoch": 12.367307692307692, "grad_norm": 0.13219086825847626, "learning_rate": 1.0667185888494703e-05, "loss": 0.0007, "step": 12862 }, { "epoch": 12.368269230769231, "grad_norm": 0.40104618668556213, "learning_rate": 1.0665942983773867e-05, "loss": 0.0015, "step": 12863 }, { "epoch": 12.36923076923077, "grad_norm": 0.836529552936554, "learning_rate": 1.0664700068719554e-05, "loss": 0.0052, "step": 12864 }, { "epoch": 12.370192307692308, "grad_norm": 0.07361453026533127, "learning_rate": 1.0663457143351044e-05, "loss": 0.0003, "step": 12865 }, { "epoch": 12.371153846153845, "grad_norm": 0.0655575543642044, "learning_rate": 1.066221420768763e-05, "loss": 0.0003, "step": 12866 }, { "epoch": 12.372115384615384, "grad_norm": 0.6574098467826843, "learning_rate": 1.0660971261748592e-05, "loss": 0.0029, "step": 12867 }, { "epoch": 12.373076923076923, "grad_norm": 0.518675684928894, "learning_rate": 1.0659728305553218e-05, "loss": 0.0018, "step": 12868 }, { "epoch": 12.374038461538461, "grad_norm": 0.005541603546589613, "learning_rate": 1.0658485339120801e-05, "loss": 0.0, "step": 12869 }, { "epoch": 12.375, "grad_norm": 3.0120160579681396, "learning_rate": 1.0657242362470623e-05, "loss": 0.0155, "step": 12870 }, { "epoch": 12.375961538461539, "grad_norm": 1.4248875379562378, "learning_rate": 1.0655999375621973e-05, "loss": 0.009, "step": 12871 }, { "epoch": 12.376923076923077, "grad_norm": 2.723252534866333, "learning_rate": 1.0654756378594135e-05, "loss": 0.0309, "step": 12872 }, { "epoch": 12.377884615384616, "grad_norm": 0.18890836834907532, "learning_rate": 1.0653513371406403e-05, "loss": 0.0007, "step": 12873 }, { "epoch": 12.378846153846155, "grad_norm": 1.3459570407867432, "learning_rate": 1.0652270354078062e-05, "loss": 0.0172, "step": 12874 }, { "epoch": 12.379807692307692, "grad_norm": 1.7470600605010986, "learning_rate": 1.0651027326628396e-05, "loss": 0.0079, "step": 12875 }, { "epoch": 12.38076923076923, "grad_norm": 1.1231439113616943, "learning_rate": 1.0649784289076699e-05, "loss": 0.0038, "step": 12876 }, { "epoch": 12.381730769230769, "grad_norm": 1.779711127281189, "learning_rate": 1.0648541241442258e-05, "loss": 0.0166, "step": 12877 }, { "epoch": 12.382692307692308, "grad_norm": 0.0340263806283474, "learning_rate": 1.0647298183744359e-05, "loss": 0.0002, "step": 12878 }, { "epoch": 12.383653846153846, "grad_norm": 4.1869893074035645, "learning_rate": 1.064605511600229e-05, "loss": 0.021, "step": 12879 }, { "epoch": 12.384615384615385, "grad_norm": 0.02667084150016308, "learning_rate": 1.0644812038235343e-05, "loss": 0.0003, "step": 12880 }, { "epoch": 12.385576923076924, "grad_norm": 0.01040783803910017, "learning_rate": 1.0643568950462809e-05, "loss": 0.0001, "step": 12881 }, { "epoch": 12.386538461538462, "grad_norm": 6.970524787902832, "learning_rate": 1.0642325852703972e-05, "loss": 0.0356, "step": 12882 }, { "epoch": 12.3875, "grad_norm": 0.018726233392953873, "learning_rate": 1.064108274497812e-05, "loss": 0.0002, "step": 12883 }, { "epoch": 12.388461538461538, "grad_norm": 3.1878769397735596, "learning_rate": 1.0639839627304548e-05, "loss": 0.0703, "step": 12884 }, { "epoch": 12.389423076923077, "grad_norm": 2.411550998687744, "learning_rate": 1.0638596499702543e-05, "loss": 0.0232, "step": 12885 }, { "epoch": 12.390384615384615, "grad_norm": 0.03173087537288666, "learning_rate": 1.0637353362191392e-05, "loss": 0.0002, "step": 12886 }, { "epoch": 12.391346153846154, "grad_norm": 0.3077256381511688, "learning_rate": 1.063611021479039e-05, "loss": 0.0007, "step": 12887 }, { "epoch": 12.392307692307693, "grad_norm": 0.22052599489688873, "learning_rate": 1.0634867057518824e-05, "loss": 0.0016, "step": 12888 }, { "epoch": 12.393269230769231, "grad_norm": 1.5156445503234863, "learning_rate": 1.0633623890395985e-05, "loss": 0.0148, "step": 12889 }, { "epoch": 12.39423076923077, "grad_norm": 0.017875762656331062, "learning_rate": 1.0632380713441161e-05, "loss": 0.0001, "step": 12890 }, { "epoch": 12.395192307692307, "grad_norm": 1.9765260219573975, "learning_rate": 1.0631137526673647e-05, "loss": 0.004, "step": 12891 }, { "epoch": 12.396153846153846, "grad_norm": 0.23891369998455048, "learning_rate": 1.0629894330112729e-05, "loss": 0.0009, "step": 12892 }, { "epoch": 12.397115384615384, "grad_norm": 0.04781315103173256, "learning_rate": 1.0628651123777702e-05, "loss": 0.0003, "step": 12893 }, { "epoch": 12.398076923076923, "grad_norm": 0.7571129202842712, "learning_rate": 1.062740790768785e-05, "loss": 0.0031, "step": 12894 }, { "epoch": 12.399038461538462, "grad_norm": 0.06421919167041779, "learning_rate": 1.0626164681862473e-05, "loss": 0.0005, "step": 12895 }, { "epoch": 12.4, "grad_norm": 3.878005266189575, "learning_rate": 1.0624921446320857e-05, "loss": 0.0289, "step": 12896 }, { "epoch": 12.400961538461539, "grad_norm": 0.5189083814620972, "learning_rate": 1.0623678201082292e-05, "loss": 0.0017, "step": 12897 }, { "epoch": 12.401923076923078, "grad_norm": 0.023825006559491158, "learning_rate": 1.0622434946166073e-05, "loss": 0.0002, "step": 12898 }, { "epoch": 12.402884615384615, "grad_norm": 0.18121862411499023, "learning_rate": 1.0621191681591493e-05, "loss": 0.0006, "step": 12899 }, { "epoch": 12.403846153846153, "grad_norm": 3.262754201889038, "learning_rate": 1.061994840737784e-05, "loss": 0.0284, "step": 12900 }, { "epoch": 12.404807692307692, "grad_norm": 0.01758374273777008, "learning_rate": 1.0618705123544403e-05, "loss": 0.0002, "step": 12901 }, { "epoch": 12.40576923076923, "grad_norm": 4.050249099731445, "learning_rate": 1.0617461830110486e-05, "loss": 0.0819, "step": 12902 }, { "epoch": 12.40673076923077, "grad_norm": 1.4686908721923828, "learning_rate": 1.0616218527095368e-05, "loss": 0.0107, "step": 12903 }, { "epoch": 12.407692307692308, "grad_norm": 1.0079340934753418, "learning_rate": 1.061497521451835e-05, "loss": 0.0059, "step": 12904 }, { "epoch": 12.408653846153847, "grad_norm": 0.08593595772981644, "learning_rate": 1.0613731892398717e-05, "loss": 0.0004, "step": 12905 }, { "epoch": 12.409615384615385, "grad_norm": 1.1780136823654175, "learning_rate": 1.0612488560755772e-05, "loss": 0.0088, "step": 12906 }, { "epoch": 12.410576923076922, "grad_norm": 0.745063841342926, "learning_rate": 1.0611245219608798e-05, "loss": 0.003, "step": 12907 }, { "epoch": 12.411538461538461, "grad_norm": 0.7167472839355469, "learning_rate": 1.0610001868977094e-05, "loss": 0.0044, "step": 12908 }, { "epoch": 12.4125, "grad_norm": 0.1147441565990448, "learning_rate": 1.060875850887995e-05, "loss": 0.0007, "step": 12909 }, { "epoch": 12.413461538461538, "grad_norm": 0.4491330683231354, "learning_rate": 1.0607515139336663e-05, "loss": 0.0022, "step": 12910 }, { "epoch": 12.414423076923077, "grad_norm": 0.3067284822463989, "learning_rate": 1.0606271760366524e-05, "loss": 0.0008, "step": 12911 }, { "epoch": 12.415384615384616, "grad_norm": 2.755213737487793, "learning_rate": 1.0605028371988824e-05, "loss": 0.0445, "step": 12912 }, { "epoch": 12.416346153846154, "grad_norm": 0.014123410917818546, "learning_rate": 1.0603784974222862e-05, "loss": 0.0001, "step": 12913 }, { "epoch": 12.417307692307693, "grad_norm": 1.252386212348938, "learning_rate": 1.0602541567087927e-05, "loss": 0.0032, "step": 12914 }, { "epoch": 12.41826923076923, "grad_norm": 0.04413997009396553, "learning_rate": 1.0601298150603316e-05, "loss": 0.0003, "step": 12915 }, { "epoch": 12.419230769230769, "grad_norm": 0.054510537534952164, "learning_rate": 1.0600054724788325e-05, "loss": 0.0005, "step": 12916 }, { "epoch": 12.420192307692307, "grad_norm": 0.3372943103313446, "learning_rate": 1.0598811289662243e-05, "loss": 0.0023, "step": 12917 }, { "epoch": 12.421153846153846, "grad_norm": 1.8576765060424805, "learning_rate": 1.0597567845244369e-05, "loss": 0.0584, "step": 12918 }, { "epoch": 12.422115384615385, "grad_norm": 2.3520727157592773, "learning_rate": 1.0596324391553993e-05, "loss": 0.0099, "step": 12919 }, { "epoch": 12.423076923076923, "grad_norm": 0.5331741571426392, "learning_rate": 1.0595080928610416e-05, "loss": 0.004, "step": 12920 }, { "epoch": 12.424038461538462, "grad_norm": 0.1755894273519516, "learning_rate": 1.059383745643293e-05, "loss": 0.0006, "step": 12921 }, { "epoch": 12.425, "grad_norm": 11.890891075134277, "learning_rate": 1.0592593975040825e-05, "loss": 0.0463, "step": 12922 }, { "epoch": 12.425961538461538, "grad_norm": 0.2928774356842041, "learning_rate": 1.0591350484453404e-05, "loss": 0.0012, "step": 12923 }, { "epoch": 12.426923076923076, "grad_norm": 0.6348920464515686, "learning_rate": 1.0590106984689959e-05, "loss": 0.0022, "step": 12924 }, { "epoch": 12.427884615384615, "grad_norm": 1.2374638319015503, "learning_rate": 1.0588863475769785e-05, "loss": 0.0051, "step": 12925 }, { "epoch": 12.428846153846154, "grad_norm": 0.2948172986507416, "learning_rate": 1.0587619957712176e-05, "loss": 0.0018, "step": 12926 }, { "epoch": 12.429807692307692, "grad_norm": 0.6722233891487122, "learning_rate": 1.0586376430536434e-05, "loss": 0.0023, "step": 12927 }, { "epoch": 12.430769230769231, "grad_norm": 0.321811705827713, "learning_rate": 1.058513289426185e-05, "loss": 0.0013, "step": 12928 }, { "epoch": 12.43173076923077, "grad_norm": 0.6362225413322449, "learning_rate": 1.0583889348907718e-05, "loss": 0.0013, "step": 12929 }, { "epoch": 12.432692307692308, "grad_norm": 0.8152300119400024, "learning_rate": 1.0582645794493337e-05, "loss": 0.0046, "step": 12930 }, { "epoch": 12.433653846153845, "grad_norm": 0.24810177087783813, "learning_rate": 1.0581402231038007e-05, "loss": 0.0009, "step": 12931 }, { "epoch": 12.434615384615384, "grad_norm": 2.8752493858337402, "learning_rate": 1.058015865856102e-05, "loss": 0.0131, "step": 12932 }, { "epoch": 12.435576923076923, "grad_norm": 3.2277281284332275, "learning_rate": 1.0578915077081673e-05, "loss": 0.0369, "step": 12933 }, { "epoch": 12.436538461538461, "grad_norm": 0.16608433425426483, "learning_rate": 1.057767148661926e-05, "loss": 0.0007, "step": 12934 }, { "epoch": 12.4375, "grad_norm": 1.5514500141143799, "learning_rate": 1.0576427887193083e-05, "loss": 0.0073, "step": 12935 }, { "epoch": 12.438461538461539, "grad_norm": 0.25253239274024963, "learning_rate": 1.057518427882244e-05, "loss": 0.0014, "step": 12936 }, { "epoch": 12.439423076923077, "grad_norm": 1.2818557024002075, "learning_rate": 1.0573940661526621e-05, "loss": 0.033, "step": 12937 }, { "epoch": 12.440384615384616, "grad_norm": 0.2293855994939804, "learning_rate": 1.0572697035324928e-05, "loss": 0.001, "step": 12938 }, { "epoch": 12.441346153846155, "grad_norm": 0.9707685708999634, "learning_rate": 1.057145340023666e-05, "loss": 0.0101, "step": 12939 }, { "epoch": 12.442307692307692, "grad_norm": 0.8353589177131653, "learning_rate": 1.0570209756281111e-05, "loss": 0.0048, "step": 12940 }, { "epoch": 12.44326923076923, "grad_norm": 1.3781774044036865, "learning_rate": 1.0568966103477579e-05, "loss": 0.0081, "step": 12941 }, { "epoch": 12.444230769230769, "grad_norm": 3.2007453441619873, "learning_rate": 1.0567722441845364e-05, "loss": 0.0429, "step": 12942 }, { "epoch": 12.445192307692308, "grad_norm": 1.9243446588516235, "learning_rate": 1.0566478771403763e-05, "loss": 0.0118, "step": 12943 }, { "epoch": 12.446153846153846, "grad_norm": 0.0800950899720192, "learning_rate": 1.0565235092172075e-05, "loss": 0.0005, "step": 12944 }, { "epoch": 12.447115384615385, "grad_norm": 3.948979377746582, "learning_rate": 1.0563991404169595e-05, "loss": 0.0186, "step": 12945 }, { "epoch": 12.448076923076924, "grad_norm": 0.5059509873390198, "learning_rate": 1.0562747707415626e-05, "loss": 0.0073, "step": 12946 }, { "epoch": 12.449038461538462, "grad_norm": 1.108991026878357, "learning_rate": 1.0561504001929466e-05, "loss": 0.003, "step": 12947 }, { "epoch": 12.45, "grad_norm": 3.366262435913086, "learning_rate": 1.0560260287730406e-05, "loss": 0.0353, "step": 12948 }, { "epoch": 12.450961538461538, "grad_norm": 0.6539415717124939, "learning_rate": 1.0559016564837753e-05, "loss": 0.0039, "step": 12949 }, { "epoch": 12.451923076923077, "grad_norm": 0.19487035274505615, "learning_rate": 1.0557772833270807e-05, "loss": 0.0008, "step": 12950 }, { "epoch": 12.452884615384615, "grad_norm": 1.7657582759857178, "learning_rate": 1.0556529093048862e-05, "loss": 0.0049, "step": 12951 }, { "epoch": 12.453846153846154, "grad_norm": 0.8857653141021729, "learning_rate": 1.0555285344191217e-05, "loss": 0.0033, "step": 12952 }, { "epoch": 12.454807692307693, "grad_norm": 0.8771275877952576, "learning_rate": 1.0554041586717174e-05, "loss": 0.0026, "step": 12953 }, { "epoch": 12.455769230769231, "grad_norm": 0.9680629372596741, "learning_rate": 1.0552797820646033e-05, "loss": 0.0061, "step": 12954 }, { "epoch": 12.45673076923077, "grad_norm": 0.03628453612327576, "learning_rate": 1.0551554045997092e-05, "loss": 0.0003, "step": 12955 }, { "epoch": 12.457692307692307, "grad_norm": 0.031593482941389084, "learning_rate": 1.055031026278965e-05, "loss": 0.0002, "step": 12956 }, { "epoch": 12.458653846153846, "grad_norm": 0.01789233088493347, "learning_rate": 1.0549066471043008e-05, "loss": 0.0001, "step": 12957 }, { "epoch": 12.459615384615384, "grad_norm": 0.6747738718986511, "learning_rate": 1.0547822670776466e-05, "loss": 0.0025, "step": 12958 }, { "epoch": 12.460576923076923, "grad_norm": 0.025344964116811752, "learning_rate": 1.0546578862009323e-05, "loss": 0.0003, "step": 12959 }, { "epoch": 12.461538461538462, "grad_norm": 0.3214167058467865, "learning_rate": 1.054533504476088e-05, "loss": 0.0013, "step": 12960 }, { "epoch": 12.4625, "grad_norm": 0.25936299562454224, "learning_rate": 1.054409121905044e-05, "loss": 0.0013, "step": 12961 }, { "epoch": 12.463461538461539, "grad_norm": 1.1198982000350952, "learning_rate": 1.0542847384897301e-05, "loss": 0.0032, "step": 12962 }, { "epoch": 12.464423076923078, "grad_norm": 0.04585712030529976, "learning_rate": 1.0541603542320762e-05, "loss": 0.0003, "step": 12963 }, { "epoch": 12.465384615384615, "grad_norm": 1.3969789743423462, "learning_rate": 1.0540359691340127e-05, "loss": 0.0064, "step": 12964 }, { "epoch": 12.466346153846153, "grad_norm": 0.06250379979610443, "learning_rate": 1.0539115831974695e-05, "loss": 0.0004, "step": 12965 }, { "epoch": 12.467307692307692, "grad_norm": 1.4007281064987183, "learning_rate": 1.0537871964243766e-05, "loss": 0.0085, "step": 12966 }, { "epoch": 12.46826923076923, "grad_norm": 0.31679028272628784, "learning_rate": 1.0536628088166645e-05, "loss": 0.0022, "step": 12967 }, { "epoch": 12.46923076923077, "grad_norm": 1.3376857042312622, "learning_rate": 1.053538420376263e-05, "loss": 0.0093, "step": 12968 }, { "epoch": 12.470192307692308, "grad_norm": 0.5849937796592712, "learning_rate": 1.0534140311051026e-05, "loss": 0.0032, "step": 12969 }, { "epoch": 12.471153846153847, "grad_norm": 0.268352210521698, "learning_rate": 1.0532896410051128e-05, "loss": 0.001, "step": 12970 }, { "epoch": 12.472115384615385, "grad_norm": 2.5980000495910645, "learning_rate": 1.0531652500782246e-05, "loss": 0.0078, "step": 12971 }, { "epoch": 12.473076923076922, "grad_norm": 4.163290500640869, "learning_rate": 1.0530408583263674e-05, "loss": 0.2063, "step": 12972 }, { "epoch": 12.474038461538461, "grad_norm": 2.1663331985473633, "learning_rate": 1.0529164657514722e-05, "loss": 0.0409, "step": 12973 }, { "epoch": 12.475, "grad_norm": 0.008271679282188416, "learning_rate": 1.052792072355468e-05, "loss": 0.0001, "step": 12974 }, { "epoch": 12.475961538461538, "grad_norm": 0.02633022703230381, "learning_rate": 1.0526676781402862e-05, "loss": 0.0003, "step": 12975 }, { "epoch": 12.476923076923077, "grad_norm": 0.052973672747612, "learning_rate": 1.0525432831078568e-05, "loss": 0.0003, "step": 12976 }, { "epoch": 12.477884615384616, "grad_norm": 1.2087470293045044, "learning_rate": 1.0524188872601095e-05, "loss": 0.0084, "step": 12977 }, { "epoch": 12.478846153846154, "grad_norm": 0.04555433243513107, "learning_rate": 1.0522944905989751e-05, "loss": 0.0004, "step": 12978 }, { "epoch": 12.479807692307693, "grad_norm": 0.35728880763053894, "learning_rate": 1.0521700931263838e-05, "loss": 0.0016, "step": 12979 }, { "epoch": 12.48076923076923, "grad_norm": 0.20160740613937378, "learning_rate": 1.0520456948442659e-05, "loss": 0.0016, "step": 12980 }, { "epoch": 12.481730769230769, "grad_norm": 1.5992234945297241, "learning_rate": 1.051921295754551e-05, "loss": 0.0075, "step": 12981 }, { "epoch": 12.482692307692307, "grad_norm": 0.11144516617059708, "learning_rate": 1.0517968958591705e-05, "loss": 0.0006, "step": 12982 }, { "epoch": 12.483653846153846, "grad_norm": 0.24836036562919617, "learning_rate": 1.0516724951600541e-05, "loss": 0.0017, "step": 12983 }, { "epoch": 12.484615384615385, "grad_norm": 2.3929476737976074, "learning_rate": 1.0515480936591323e-05, "loss": 0.0091, "step": 12984 }, { "epoch": 12.485576923076923, "grad_norm": 0.1479024887084961, "learning_rate": 1.0514236913583349e-05, "loss": 0.0006, "step": 12985 }, { "epoch": 12.486538461538462, "grad_norm": 0.05701201409101486, "learning_rate": 1.0512992882595931e-05, "loss": 0.0006, "step": 12986 }, { "epoch": 12.4875, "grad_norm": 1.1958463191986084, "learning_rate": 1.051174884364837e-05, "loss": 0.0037, "step": 12987 }, { "epoch": 12.488461538461538, "grad_norm": 0.5944148302078247, "learning_rate": 1.0510504796759966e-05, "loss": 0.0019, "step": 12988 }, { "epoch": 12.489423076923076, "grad_norm": 3.5516295433044434, "learning_rate": 1.0509260741950026e-05, "loss": 0.0269, "step": 12989 }, { "epoch": 12.490384615384615, "grad_norm": 0.043633706867694855, "learning_rate": 1.0508016679237859e-05, "loss": 0.0005, "step": 12990 }, { "epoch": 12.491346153846154, "grad_norm": 2.8535735607147217, "learning_rate": 1.0506772608642759e-05, "loss": 0.0154, "step": 12991 }, { "epoch": 12.492307692307692, "grad_norm": 4.405535697937012, "learning_rate": 1.0505528530184037e-05, "loss": 0.0316, "step": 12992 }, { "epoch": 12.493269230769231, "grad_norm": 0.19347435235977173, "learning_rate": 1.0504284443880997e-05, "loss": 0.0012, "step": 12993 }, { "epoch": 12.49423076923077, "grad_norm": 2.2149617671966553, "learning_rate": 1.050304034975294e-05, "loss": 0.0546, "step": 12994 }, { "epoch": 12.495192307692308, "grad_norm": 0.042185381054878235, "learning_rate": 1.0501796247819176e-05, "loss": 0.0004, "step": 12995 }, { "epoch": 12.496153846153845, "grad_norm": 1.1340030431747437, "learning_rate": 1.0500552138099004e-05, "loss": 0.0243, "step": 12996 }, { "epoch": 12.497115384615384, "grad_norm": 2.1321239471435547, "learning_rate": 1.0499308020611733e-05, "loss": 0.0368, "step": 12997 }, { "epoch": 12.498076923076923, "grad_norm": 0.28462961316108704, "learning_rate": 1.0498063895376669e-05, "loss": 0.0011, "step": 12998 }, { "epoch": 12.499038461538461, "grad_norm": 3.7389442920684814, "learning_rate": 1.0496819762413114e-05, "loss": 0.0469, "step": 12999 }, { "epoch": 12.5, "grad_norm": 0.24181044101715088, "learning_rate": 1.0495575621740372e-05, "loss": 0.0012, "step": 13000 }, { "epoch": 12.500961538461539, "grad_norm": 3.658289909362793, "learning_rate": 1.0494331473377754e-05, "loss": 0.0208, "step": 13001 }, { "epoch": 12.501923076923077, "grad_norm": 0.044955380260944366, "learning_rate": 1.0493087317344562e-05, "loss": 0.0005, "step": 13002 }, { "epoch": 12.502884615384616, "grad_norm": 3.5015487670898438, "learning_rate": 1.0491843153660099e-05, "loss": 0.0508, "step": 13003 }, { "epoch": 12.503846153846155, "grad_norm": 0.04912986978888512, "learning_rate": 1.0490598982343678e-05, "loss": 0.0005, "step": 13004 }, { "epoch": 12.504807692307692, "grad_norm": 1.5155342817306519, "learning_rate": 1.04893548034146e-05, "loss": 0.0082, "step": 13005 }, { "epoch": 12.50576923076923, "grad_norm": 0.10590625554323196, "learning_rate": 1.0488110616892169e-05, "loss": 0.0005, "step": 13006 }, { "epoch": 12.506730769230769, "grad_norm": 0.19465182721614838, "learning_rate": 1.0486866422795694e-05, "loss": 0.001, "step": 13007 }, { "epoch": 12.507692307692308, "grad_norm": 0.18840277194976807, "learning_rate": 1.0485622221144485e-05, "loss": 0.001, "step": 13008 }, { "epoch": 12.508653846153846, "grad_norm": 1.6630247831344604, "learning_rate": 1.0484378011957841e-05, "loss": 0.0096, "step": 13009 }, { "epoch": 12.509615384615385, "grad_norm": 1.9665940999984741, "learning_rate": 1.0483133795255072e-05, "loss": 0.0106, "step": 13010 }, { "epoch": 12.510576923076924, "grad_norm": 0.1051097884774208, "learning_rate": 1.0481889571055487e-05, "loss": 0.0008, "step": 13011 }, { "epoch": 12.51153846153846, "grad_norm": 2.274458646774292, "learning_rate": 1.0480645339378386e-05, "loss": 0.0234, "step": 13012 }, { "epoch": 12.5125, "grad_norm": 1.9745206832885742, "learning_rate": 1.0479401100243083e-05, "loss": 0.0124, "step": 13013 }, { "epoch": 12.513461538461538, "grad_norm": 0.10695755481719971, "learning_rate": 1.0478156853668882e-05, "loss": 0.0008, "step": 13014 }, { "epoch": 12.514423076923077, "grad_norm": 0.13234592974185944, "learning_rate": 1.0476912599675089e-05, "loss": 0.0011, "step": 13015 }, { "epoch": 12.515384615384615, "grad_norm": 0.05419882759451866, "learning_rate": 1.0475668338281013e-05, "loss": 0.0003, "step": 13016 }, { "epoch": 12.516346153846154, "grad_norm": 0.008113151416182518, "learning_rate": 1.0474424069505958e-05, "loss": 0.0001, "step": 13017 }, { "epoch": 12.517307692307693, "grad_norm": 2.8704042434692383, "learning_rate": 1.0473179793369237e-05, "loss": 0.0117, "step": 13018 }, { "epoch": 12.518269230769231, "grad_norm": 0.07705240696668625, "learning_rate": 1.0471935509890152e-05, "loss": 0.0006, "step": 13019 }, { "epoch": 12.51923076923077, "grad_norm": 0.059182703495025635, "learning_rate": 1.0470691219088016e-05, "loss": 0.0005, "step": 13020 }, { "epoch": 12.520192307692307, "grad_norm": 0.20132391154766083, "learning_rate": 1.046944692098213e-05, "loss": 0.0006, "step": 13021 }, { "epoch": 12.521153846153846, "grad_norm": 2.1562037467956543, "learning_rate": 1.0468202615591807e-05, "loss": 0.0094, "step": 13022 }, { "epoch": 12.522115384615384, "grad_norm": 1.117196798324585, "learning_rate": 1.0466958302936356e-05, "loss": 0.0426, "step": 13023 }, { "epoch": 12.523076923076923, "grad_norm": 1.4403139352798462, "learning_rate": 1.0465713983035082e-05, "loss": 0.0208, "step": 13024 }, { "epoch": 12.524038461538462, "grad_norm": 0.8783336877822876, "learning_rate": 1.046446965590729e-05, "loss": 0.0053, "step": 13025 }, { "epoch": 12.525, "grad_norm": 0.2622036635875702, "learning_rate": 1.0463225321572294e-05, "loss": 0.0021, "step": 13026 }, { "epoch": 12.525961538461539, "grad_norm": 2.9394357204437256, "learning_rate": 1.0461980980049401e-05, "loss": 0.0336, "step": 13027 }, { "epoch": 12.526923076923078, "grad_norm": 0.26923656463623047, "learning_rate": 1.046073663135792e-05, "loss": 0.0013, "step": 13028 }, { "epoch": 12.527884615384615, "grad_norm": 1.835957407951355, "learning_rate": 1.0459492275517158e-05, "loss": 0.011, "step": 13029 }, { "epoch": 12.528846153846153, "grad_norm": 0.2271452248096466, "learning_rate": 1.0458247912546427e-05, "loss": 0.0008, "step": 13030 }, { "epoch": 12.529807692307692, "grad_norm": 0.22667674720287323, "learning_rate": 1.0457003542465029e-05, "loss": 0.0018, "step": 13031 }, { "epoch": 12.53076923076923, "grad_norm": 1.0188361406326294, "learning_rate": 1.0455759165292278e-05, "loss": 0.0054, "step": 13032 }, { "epoch": 12.53173076923077, "grad_norm": 0.5189208388328552, "learning_rate": 1.0454514781047484e-05, "loss": 0.0018, "step": 13033 }, { "epoch": 12.532692307692308, "grad_norm": 2.048388957977295, "learning_rate": 1.0453270389749956e-05, "loss": 0.0171, "step": 13034 }, { "epoch": 12.533653846153847, "grad_norm": 2.7769272327423096, "learning_rate": 1.0452025991419e-05, "loss": 0.0605, "step": 13035 }, { "epoch": 12.534615384615385, "grad_norm": 2.9137630462646484, "learning_rate": 1.0450781586073926e-05, "loss": 0.054, "step": 13036 }, { "epoch": 12.535576923076922, "grad_norm": 0.6355502605438232, "learning_rate": 1.0449537173734045e-05, "loss": 0.0019, "step": 13037 }, { "epoch": 12.536538461538461, "grad_norm": 0.4164009392261505, "learning_rate": 1.0448292754418669e-05, "loss": 0.0018, "step": 13038 }, { "epoch": 12.5375, "grad_norm": 0.04128037765622139, "learning_rate": 1.0447048328147103e-05, "loss": 0.0004, "step": 13039 }, { "epoch": 12.538461538461538, "grad_norm": 4.039883613586426, "learning_rate": 1.044580389493866e-05, "loss": 0.0488, "step": 13040 }, { "epoch": 12.539423076923077, "grad_norm": 0.04632886126637459, "learning_rate": 1.044455945481265e-05, "loss": 0.0004, "step": 13041 }, { "epoch": 12.540384615384616, "grad_norm": 0.5954341888427734, "learning_rate": 1.0443315007788381e-05, "loss": 0.0035, "step": 13042 }, { "epoch": 12.541346153846154, "grad_norm": 0.5300947427749634, "learning_rate": 1.0442070553885162e-05, "loss": 0.0046, "step": 13043 }, { "epoch": 12.542307692307693, "grad_norm": 0.04000864923000336, "learning_rate": 1.0440826093122309e-05, "loss": 0.0002, "step": 13044 }, { "epoch": 12.54326923076923, "grad_norm": 1.4774646759033203, "learning_rate": 1.043958162551913e-05, "loss": 0.0038, "step": 13045 }, { "epoch": 12.544230769230769, "grad_norm": 0.8323326706886292, "learning_rate": 1.043833715109493e-05, "loss": 0.0071, "step": 13046 }, { "epoch": 12.545192307692307, "grad_norm": 1.1549139022827148, "learning_rate": 1.0437092669869025e-05, "loss": 0.0062, "step": 13047 }, { "epoch": 12.546153846153846, "grad_norm": 1.8280996084213257, "learning_rate": 1.0435848181860724e-05, "loss": 0.018, "step": 13048 }, { "epoch": 12.547115384615385, "grad_norm": 1.6253085136413574, "learning_rate": 1.043460368708934e-05, "loss": 0.0086, "step": 13049 }, { "epoch": 12.548076923076923, "grad_norm": 5.367619514465332, "learning_rate": 1.043335918557418e-05, "loss": 0.1269, "step": 13050 }, { "epoch": 12.549038461538462, "grad_norm": 1.5968382358551025, "learning_rate": 1.043211467733456e-05, "loss": 0.0055, "step": 13051 }, { "epoch": 12.55, "grad_norm": 1.057041049003601, "learning_rate": 1.0430870162389787e-05, "loss": 0.0035, "step": 13052 }, { "epoch": 12.55096153846154, "grad_norm": 1.6837108135223389, "learning_rate": 1.0429625640759175e-05, "loss": 0.009, "step": 13053 }, { "epoch": 12.551923076923076, "grad_norm": 0.04519711062312126, "learning_rate": 1.0428381112462035e-05, "loss": 0.0005, "step": 13054 }, { "epoch": 12.552884615384615, "grad_norm": 0.9189910292625427, "learning_rate": 1.0427136577517674e-05, "loss": 0.004, "step": 13055 }, { "epoch": 12.553846153846154, "grad_norm": 0.12138361483812332, "learning_rate": 1.0425892035945411e-05, "loss": 0.001, "step": 13056 }, { "epoch": 12.554807692307692, "grad_norm": 0.13487447798252106, "learning_rate": 1.042464748776455e-05, "loss": 0.0013, "step": 13057 }, { "epoch": 12.555769230769231, "grad_norm": 0.057577334344387054, "learning_rate": 1.0423402932994406e-05, "loss": 0.0006, "step": 13058 }, { "epoch": 12.55673076923077, "grad_norm": 2.0741443634033203, "learning_rate": 1.0422158371654295e-05, "loss": 0.0231, "step": 13059 }, { "epoch": 12.557692307692308, "grad_norm": 0.11513052880764008, "learning_rate": 1.0420913803763522e-05, "loss": 0.0009, "step": 13060 }, { "epoch": 12.558653846153845, "grad_norm": 1.667931318283081, "learning_rate": 1.0419669229341402e-05, "loss": 0.0176, "step": 13061 }, { "epoch": 12.559615384615384, "grad_norm": 3.7646634578704834, "learning_rate": 1.0418424648407249e-05, "loss": 0.0325, "step": 13062 }, { "epoch": 12.560576923076923, "grad_norm": 0.42679908871650696, "learning_rate": 1.0417180060980375e-05, "loss": 0.0017, "step": 13063 }, { "epoch": 12.561538461538461, "grad_norm": 0.3037842810153961, "learning_rate": 1.0415935467080087e-05, "loss": 0.0014, "step": 13064 }, { "epoch": 12.5625, "grad_norm": 0.17589236795902252, "learning_rate": 1.04146908667257e-05, "loss": 0.0012, "step": 13065 }, { "epoch": 12.563461538461539, "grad_norm": 0.24927708506584167, "learning_rate": 1.0413446259936532e-05, "loss": 0.0014, "step": 13066 }, { "epoch": 12.564423076923077, "grad_norm": 2.164278984069824, "learning_rate": 1.0412201646731893e-05, "loss": 0.0107, "step": 13067 }, { "epoch": 12.565384615384616, "grad_norm": 1.7535004615783691, "learning_rate": 1.0410957027131089e-05, "loss": 0.013, "step": 13068 }, { "epoch": 12.566346153846155, "grad_norm": 0.018743300810456276, "learning_rate": 1.0409712401153441e-05, "loss": 0.0002, "step": 13069 }, { "epoch": 12.567307692307692, "grad_norm": 0.28333014249801636, "learning_rate": 1.040846776881826e-05, "loss": 0.0015, "step": 13070 }, { "epoch": 12.56826923076923, "grad_norm": 2.808687210083008, "learning_rate": 1.0407223130144856e-05, "loss": 0.0227, "step": 13071 }, { "epoch": 12.569230769230769, "grad_norm": 1.015148401260376, "learning_rate": 1.0405978485152544e-05, "loss": 0.0043, "step": 13072 }, { "epoch": 12.570192307692308, "grad_norm": 0.8553012609481812, "learning_rate": 1.0404733833860639e-05, "loss": 0.0107, "step": 13073 }, { "epoch": 12.571153846153846, "grad_norm": 1.4858802556991577, "learning_rate": 1.0403489176288452e-05, "loss": 0.0082, "step": 13074 }, { "epoch": 12.572115384615385, "grad_norm": 1.025696039199829, "learning_rate": 1.0402244512455298e-05, "loss": 0.0028, "step": 13075 }, { "epoch": 12.573076923076924, "grad_norm": 1.025507926940918, "learning_rate": 1.0400999842380488e-05, "loss": 0.0062, "step": 13076 }, { "epoch": 12.57403846153846, "grad_norm": 2.903841495513916, "learning_rate": 1.0399755166083339e-05, "loss": 0.0336, "step": 13077 }, { "epoch": 12.575, "grad_norm": 5.54362154006958, "learning_rate": 1.0398510483583164e-05, "loss": 0.0078, "step": 13078 }, { "epoch": 12.575961538461538, "grad_norm": 0.19152046740055084, "learning_rate": 1.0397265794899271e-05, "loss": 0.0016, "step": 13079 }, { "epoch": 12.576923076923077, "grad_norm": 0.8413310647010803, "learning_rate": 1.0396021100050984e-05, "loss": 0.0045, "step": 13080 }, { "epoch": 12.577884615384615, "grad_norm": 2.518596649169922, "learning_rate": 1.0394776399057611e-05, "loss": 0.0527, "step": 13081 }, { "epoch": 12.578846153846154, "grad_norm": 1.0154248476028442, "learning_rate": 1.0393531691938465e-05, "loss": 0.0073, "step": 13082 }, { "epoch": 12.579807692307693, "grad_norm": 2.541156530380249, "learning_rate": 1.0392286978712862e-05, "loss": 0.031, "step": 13083 }, { "epoch": 12.580769230769231, "grad_norm": 0.12102396041154861, "learning_rate": 1.0391042259400119e-05, "loss": 0.0008, "step": 13084 }, { "epoch": 12.58173076923077, "grad_norm": 1.6276075839996338, "learning_rate": 1.0389797534019548e-05, "loss": 0.0059, "step": 13085 }, { "epoch": 12.582692307692307, "grad_norm": 1.5903984308242798, "learning_rate": 1.0388552802590461e-05, "loss": 0.0072, "step": 13086 }, { "epoch": 12.583653846153846, "grad_norm": 2.597376823425293, "learning_rate": 1.0387308065132173e-05, "loss": 0.0211, "step": 13087 }, { "epoch": 12.584615384615384, "grad_norm": 2.366765260696411, "learning_rate": 1.0386063321664007e-05, "loss": 0.0162, "step": 13088 }, { "epoch": 12.585576923076923, "grad_norm": 0.21967312693595886, "learning_rate": 1.0384818572205266e-05, "loss": 0.0009, "step": 13089 }, { "epoch": 12.586538461538462, "grad_norm": 1.063942551612854, "learning_rate": 1.038357381677527e-05, "loss": 0.0042, "step": 13090 }, { "epoch": 12.5875, "grad_norm": 1.1130024194717407, "learning_rate": 1.0382329055393337e-05, "loss": 0.0132, "step": 13091 }, { "epoch": 12.588461538461539, "grad_norm": 0.21439988911151886, "learning_rate": 1.0381084288078778e-05, "loss": 0.0008, "step": 13092 }, { "epoch": 12.589423076923078, "grad_norm": 0.4493701457977295, "learning_rate": 1.0379839514850912e-05, "loss": 0.0034, "step": 13093 }, { "epoch": 12.590384615384615, "grad_norm": 0.8124949932098389, "learning_rate": 1.0378594735729046e-05, "loss": 0.0036, "step": 13094 }, { "epoch": 12.591346153846153, "grad_norm": 0.16714511811733246, "learning_rate": 1.0377349950732505e-05, "loss": 0.0008, "step": 13095 }, { "epoch": 12.592307692307692, "grad_norm": 0.32720234990119934, "learning_rate": 1.0376105159880599e-05, "loss": 0.0011, "step": 13096 }, { "epoch": 12.59326923076923, "grad_norm": 0.018943261355161667, "learning_rate": 1.0374860363192642e-05, "loss": 0.0002, "step": 13097 }, { "epoch": 12.59423076923077, "grad_norm": 2.1579811573028564, "learning_rate": 1.0373615560687957e-05, "loss": 0.0199, "step": 13098 }, { "epoch": 12.595192307692308, "grad_norm": 0.39956390857696533, "learning_rate": 1.0372370752385854e-05, "loss": 0.0028, "step": 13099 }, { "epoch": 12.596153846153847, "grad_norm": 0.6971926689147949, "learning_rate": 1.0371125938305648e-05, "loss": 0.0043, "step": 13100 }, { "epoch": 12.597115384615385, "grad_norm": 0.2298157811164856, "learning_rate": 1.0369881118466658e-05, "loss": 0.0015, "step": 13101 }, { "epoch": 12.598076923076922, "grad_norm": 0.3029814064502716, "learning_rate": 1.0368636292888199e-05, "loss": 0.0014, "step": 13102 }, { "epoch": 12.599038461538461, "grad_norm": 1.127758264541626, "learning_rate": 1.0367391461589587e-05, "loss": 0.0044, "step": 13103 }, { "epoch": 12.6, "grad_norm": 0.41802793741226196, "learning_rate": 1.0366146624590137e-05, "loss": 0.002, "step": 13104 }, { "epoch": 12.600961538461538, "grad_norm": 0.01956181973218918, "learning_rate": 1.0364901781909165e-05, "loss": 0.0002, "step": 13105 }, { "epoch": 12.601923076923077, "grad_norm": 1.7490787506103516, "learning_rate": 1.0363656933565989e-05, "loss": 0.0034, "step": 13106 }, { "epoch": 12.602884615384616, "grad_norm": 2.984802722930908, "learning_rate": 1.0362412079579925e-05, "loss": 0.0078, "step": 13107 }, { "epoch": 12.603846153846154, "grad_norm": 0.9601401090621948, "learning_rate": 1.0361167219970287e-05, "loss": 0.0041, "step": 13108 }, { "epoch": 12.604807692307693, "grad_norm": 0.47813910245895386, "learning_rate": 1.0359922354756398e-05, "loss": 0.0012, "step": 13109 }, { "epoch": 12.60576923076923, "grad_norm": 0.13444975018501282, "learning_rate": 1.0358677483957567e-05, "loss": 0.0009, "step": 13110 }, { "epoch": 12.606730769230769, "grad_norm": 2.1158745288848877, "learning_rate": 1.0357432607593116e-05, "loss": 0.0146, "step": 13111 }, { "epoch": 12.607692307692307, "grad_norm": 0.21265414357185364, "learning_rate": 1.0356187725682359e-05, "loss": 0.0013, "step": 13112 }, { "epoch": 12.608653846153846, "grad_norm": 1.8977785110473633, "learning_rate": 1.0354942838244614e-05, "loss": 0.0138, "step": 13113 }, { "epoch": 12.609615384615385, "grad_norm": 0.017503982409834862, "learning_rate": 1.0353697945299198e-05, "loss": 0.0002, "step": 13114 }, { "epoch": 12.610576923076923, "grad_norm": 3.4364469051361084, "learning_rate": 1.035245304686543e-05, "loss": 0.0151, "step": 13115 }, { "epoch": 12.611538461538462, "grad_norm": 1.7513517141342163, "learning_rate": 1.0351208142962621e-05, "loss": 0.0068, "step": 13116 }, { "epoch": 12.6125, "grad_norm": 0.2557159960269928, "learning_rate": 1.0349963233610096e-05, "loss": 0.001, "step": 13117 }, { "epoch": 12.61346153846154, "grad_norm": 2.1641366481781006, "learning_rate": 1.034871831882717e-05, "loss": 0.0578, "step": 13118 }, { "epoch": 12.614423076923076, "grad_norm": 0.12644915282726288, "learning_rate": 1.0347473398633153e-05, "loss": 0.0007, "step": 13119 }, { "epoch": 12.615384615384615, "grad_norm": 0.4361416697502136, "learning_rate": 1.0346228473047373e-05, "loss": 0.0012, "step": 13120 }, { "epoch": 12.616346153846154, "grad_norm": 2.7799510955810547, "learning_rate": 1.0344983542089142e-05, "loss": 0.0284, "step": 13121 }, { "epoch": 12.617307692307692, "grad_norm": 0.8642948865890503, "learning_rate": 1.034373860577778e-05, "loss": 0.0026, "step": 13122 }, { "epoch": 12.618269230769231, "grad_norm": 0.17859198153018951, "learning_rate": 1.0342493664132602e-05, "loss": 0.0008, "step": 13123 }, { "epoch": 12.61923076923077, "grad_norm": 0.6901010274887085, "learning_rate": 1.034124871717293e-05, "loss": 0.0023, "step": 13124 }, { "epoch": 12.620192307692308, "grad_norm": 0.4454607367515564, "learning_rate": 1.0340003764918078e-05, "loss": 0.0017, "step": 13125 }, { "epoch": 12.621153846153845, "grad_norm": 1.5328130722045898, "learning_rate": 1.0338758807387369e-05, "loss": 0.0031, "step": 13126 }, { "epoch": 12.622115384615384, "grad_norm": 0.04443776607513428, "learning_rate": 1.033751384460011e-05, "loss": 0.0003, "step": 13127 }, { "epoch": 12.623076923076923, "grad_norm": 1.1904683113098145, "learning_rate": 1.0336268876575632e-05, "loss": 0.0038, "step": 13128 }, { "epoch": 12.624038461538461, "grad_norm": 3.3061180114746094, "learning_rate": 1.0335023903333248e-05, "loss": 0.0285, "step": 13129 }, { "epoch": 12.625, "grad_norm": 0.5620142817497253, "learning_rate": 1.0333778924892272e-05, "loss": 0.0178, "step": 13130 }, { "epoch": 12.625961538461539, "grad_norm": 0.05839763954281807, "learning_rate": 1.0332533941272032e-05, "loss": 0.0004, "step": 13131 }, { "epoch": 12.626923076923077, "grad_norm": 0.14016805589199066, "learning_rate": 1.033128895249184e-05, "loss": 0.0008, "step": 13132 }, { "epoch": 12.627884615384616, "grad_norm": 0.11092953383922577, "learning_rate": 1.0330043958571015e-05, "loss": 0.0006, "step": 13133 }, { "epoch": 12.628846153846155, "grad_norm": 3.849493980407715, "learning_rate": 1.0328798959528873e-05, "loss": 0.0801, "step": 13134 }, { "epoch": 12.629807692307692, "grad_norm": 0.1163124367594719, "learning_rate": 1.0327553955384742e-05, "loss": 0.0006, "step": 13135 }, { "epoch": 12.63076923076923, "grad_norm": 1.3215917348861694, "learning_rate": 1.0326308946157933e-05, "loss": 0.0048, "step": 13136 }, { "epoch": 12.631730769230769, "grad_norm": 0.010965057648718357, "learning_rate": 1.0325063931867767e-05, "loss": 0.0001, "step": 13137 }, { "epoch": 12.632692307692308, "grad_norm": 3.168210744857788, "learning_rate": 1.0323818912533561e-05, "loss": 0.0114, "step": 13138 }, { "epoch": 12.633653846153846, "grad_norm": 1.7324881553649902, "learning_rate": 1.0322573888174636e-05, "loss": 0.0646, "step": 13139 }, { "epoch": 12.634615384615385, "grad_norm": 0.0302962064743042, "learning_rate": 1.0321328858810314e-05, "loss": 0.0002, "step": 13140 }, { "epoch": 12.635576923076924, "grad_norm": 0.32502058148384094, "learning_rate": 1.0320083824459907e-05, "loss": 0.0016, "step": 13141 }, { "epoch": 12.63653846153846, "grad_norm": 0.3855636715888977, "learning_rate": 1.0318838785142743e-05, "loss": 0.0012, "step": 13142 }, { "epoch": 12.6375, "grad_norm": 2.6573896408081055, "learning_rate": 1.0317593740878133e-05, "loss": 0.1176, "step": 13143 }, { "epoch": 12.638461538461538, "grad_norm": 1.0369220972061157, "learning_rate": 1.0316348691685403e-05, "loss": 0.0479, "step": 13144 }, { "epoch": 12.639423076923077, "grad_norm": 2.9585654735565186, "learning_rate": 1.0315103637583868e-05, "loss": 0.0643, "step": 13145 }, { "epoch": 12.640384615384615, "grad_norm": 0.3815719485282898, "learning_rate": 1.0313858578592851e-05, "loss": 0.0021, "step": 13146 }, { "epoch": 12.641346153846154, "grad_norm": 0.027627091854810715, "learning_rate": 1.0312613514731669e-05, "loss": 0.0002, "step": 13147 }, { "epoch": 12.642307692307693, "grad_norm": 0.04094978794455528, "learning_rate": 1.031136844601964e-05, "loss": 0.0002, "step": 13148 }, { "epoch": 12.643269230769231, "grad_norm": 1.8871792554855347, "learning_rate": 1.031012337247609e-05, "loss": 0.0197, "step": 13149 }, { "epoch": 12.64423076923077, "grad_norm": 5.194286823272705, "learning_rate": 1.0308878294120336e-05, "loss": 0.1044, "step": 13150 }, { "epoch": 12.645192307692307, "grad_norm": 1.2725976705551147, "learning_rate": 1.0307633210971697e-05, "loss": 0.0254, "step": 13151 }, { "epoch": 12.646153846153846, "grad_norm": 0.06743686646223068, "learning_rate": 1.0306388123049492e-05, "loss": 0.0007, "step": 13152 }, { "epoch": 12.647115384615384, "grad_norm": 0.3535303473472595, "learning_rate": 1.0305143030373045e-05, "loss": 0.0016, "step": 13153 }, { "epoch": 12.648076923076923, "grad_norm": 0.06745323538780212, "learning_rate": 1.030389793296167e-05, "loss": 0.0002, "step": 13154 }, { "epoch": 12.649038461538462, "grad_norm": 1.1014615297317505, "learning_rate": 1.0302652830834693e-05, "loss": 0.0052, "step": 13155 }, { "epoch": 12.65, "grad_norm": 0.5235792994499207, "learning_rate": 1.0301407724011432e-05, "loss": 0.0043, "step": 13156 }, { "epoch": 12.650961538461539, "grad_norm": 0.06852295249700546, "learning_rate": 1.030016261251121e-05, "loss": 0.0005, "step": 13157 }, { "epoch": 12.651923076923078, "grad_norm": 0.9891589879989624, "learning_rate": 1.0298917496353342e-05, "loss": 0.0059, "step": 13158 }, { "epoch": 12.652884615384615, "grad_norm": 2.7095389366149902, "learning_rate": 1.0297672375557153e-05, "loss": 0.0266, "step": 13159 }, { "epoch": 12.653846153846153, "grad_norm": 0.11936715990304947, "learning_rate": 1.0296427250141962e-05, "loss": 0.0009, "step": 13160 }, { "epoch": 12.654807692307692, "grad_norm": 0.28154197335243225, "learning_rate": 1.0295182120127092e-05, "loss": 0.0018, "step": 13161 }, { "epoch": 12.65576923076923, "grad_norm": 0.6880713701248169, "learning_rate": 1.029393698553186e-05, "loss": 0.0018, "step": 13162 }, { "epoch": 12.65673076923077, "grad_norm": 1.2670947313308716, "learning_rate": 1.0292691846375586e-05, "loss": 0.0125, "step": 13163 }, { "epoch": 12.657692307692308, "grad_norm": 0.018114451318979263, "learning_rate": 1.0291446702677598e-05, "loss": 0.0001, "step": 13164 }, { "epoch": 12.658653846153847, "grad_norm": 3.8374619483947754, "learning_rate": 1.0290201554457213e-05, "loss": 0.0619, "step": 13165 }, { "epoch": 12.659615384615385, "grad_norm": 0.07213067263364792, "learning_rate": 1.028895640173375e-05, "loss": 0.0006, "step": 13166 }, { "epoch": 12.660576923076922, "grad_norm": 1.7331042289733887, "learning_rate": 1.028771124452653e-05, "loss": 0.0169, "step": 13167 }, { "epoch": 12.661538461538461, "grad_norm": 3.7520081996917725, "learning_rate": 1.0286466082854878e-05, "loss": 0.06, "step": 13168 }, { "epoch": 12.6625, "grad_norm": 2.3066534996032715, "learning_rate": 1.0285220916738112e-05, "loss": 0.0081, "step": 13169 }, { "epoch": 12.663461538461538, "grad_norm": 0.22013598680496216, "learning_rate": 1.0283975746195554e-05, "loss": 0.0007, "step": 13170 }, { "epoch": 12.664423076923077, "grad_norm": 0.19643838703632355, "learning_rate": 1.0282730571246529e-05, "loss": 0.0012, "step": 13171 }, { "epoch": 12.665384615384616, "grad_norm": 0.4352911710739136, "learning_rate": 1.0281485391910353e-05, "loss": 0.0016, "step": 13172 }, { "epoch": 12.666346153846154, "grad_norm": 1.067794680595398, "learning_rate": 1.0280240208206351e-05, "loss": 0.0058, "step": 13173 }, { "epoch": 12.667307692307693, "grad_norm": 1.712936520576477, "learning_rate": 1.0278995020153842e-05, "loss": 0.0426, "step": 13174 }, { "epoch": 12.66826923076923, "grad_norm": 2.19675612449646, "learning_rate": 1.027774982777215e-05, "loss": 0.0082, "step": 13175 }, { "epoch": 12.669230769230769, "grad_norm": 0.3550432026386261, "learning_rate": 1.0276504631080595e-05, "loss": 0.0023, "step": 13176 }, { "epoch": 12.670192307692307, "grad_norm": 0.16880202293395996, "learning_rate": 1.0275259430098502e-05, "loss": 0.0008, "step": 13177 }, { "epoch": 12.671153846153846, "grad_norm": 0.1874985247850418, "learning_rate": 1.0274014224845186e-05, "loss": 0.0013, "step": 13178 }, { "epoch": 12.672115384615385, "grad_norm": 0.8068612217903137, "learning_rate": 1.0272769015339978e-05, "loss": 0.0576, "step": 13179 }, { "epoch": 12.673076923076923, "grad_norm": 2.521536111831665, "learning_rate": 1.0271523801602194e-05, "loss": 0.0302, "step": 13180 }, { "epoch": 12.674038461538462, "grad_norm": 0.0699869766831398, "learning_rate": 1.0270278583651156e-05, "loss": 0.0004, "step": 13181 }, { "epoch": 12.675, "grad_norm": 0.03778696432709694, "learning_rate": 1.0269033361506189e-05, "loss": 0.0003, "step": 13182 }, { "epoch": 12.67596153846154, "grad_norm": 2.379920482635498, "learning_rate": 1.0267788135186613e-05, "loss": 0.0201, "step": 13183 }, { "epoch": 12.676923076923076, "grad_norm": 0.022361960262060165, "learning_rate": 1.0266542904711753e-05, "loss": 0.0002, "step": 13184 }, { "epoch": 12.677884615384615, "grad_norm": 2.6250503063201904, "learning_rate": 1.0265297670100927e-05, "loss": 0.0156, "step": 13185 }, { "epoch": 12.678846153846154, "grad_norm": 0.146754652261734, "learning_rate": 1.026405243137346e-05, "loss": 0.0008, "step": 13186 }, { "epoch": 12.679807692307692, "grad_norm": 0.23049287497997284, "learning_rate": 1.0262807188548679e-05, "loss": 0.0006, "step": 13187 }, { "epoch": 12.680769230769231, "grad_norm": 2.5638294219970703, "learning_rate": 1.0261561941645894e-05, "loss": 0.0087, "step": 13188 }, { "epoch": 12.68173076923077, "grad_norm": 0.21387585997581482, "learning_rate": 1.0260316690684442e-05, "loss": 0.0011, "step": 13189 }, { "epoch": 12.682692307692308, "grad_norm": 0.1608017086982727, "learning_rate": 1.0259071435683636e-05, "loss": 0.0007, "step": 13190 }, { "epoch": 12.683653846153845, "grad_norm": 1.3211764097213745, "learning_rate": 1.0257826176662802e-05, "loss": 0.0095, "step": 13191 }, { "epoch": 12.684615384615384, "grad_norm": 3.2302088737487793, "learning_rate": 1.0256580913641261e-05, "loss": 0.0121, "step": 13192 }, { "epoch": 12.685576923076923, "grad_norm": 3.243105173110962, "learning_rate": 1.025533564663834e-05, "loss": 0.019, "step": 13193 }, { "epoch": 12.686538461538461, "grad_norm": 1.7876758575439453, "learning_rate": 1.0254090375673357e-05, "loss": 0.0107, "step": 13194 }, { "epoch": 12.6875, "grad_norm": 2.6519951820373535, "learning_rate": 1.025284510076564e-05, "loss": 0.0972, "step": 13195 }, { "epoch": 12.688461538461539, "grad_norm": 2.2265193462371826, "learning_rate": 1.0251599821934506e-05, "loss": 0.0129, "step": 13196 }, { "epoch": 12.689423076923077, "grad_norm": 1.3948290348052979, "learning_rate": 1.0250354539199284e-05, "loss": 0.0047, "step": 13197 }, { "epoch": 12.690384615384616, "grad_norm": 0.03300514817237854, "learning_rate": 1.0249109252579292e-05, "loss": 0.0005, "step": 13198 }, { "epoch": 12.691346153846155, "grad_norm": 0.20557966828346252, "learning_rate": 1.0247863962093853e-05, "loss": 0.0018, "step": 13199 }, { "epoch": 12.692307692307692, "grad_norm": 0.413767009973526, "learning_rate": 1.0246618667762299e-05, "loss": 0.0013, "step": 13200 }, { "epoch": 12.69326923076923, "grad_norm": 0.0461464561522007, "learning_rate": 1.0245373369603941e-05, "loss": 0.0005, "step": 13201 }, { "epoch": 12.694230769230769, "grad_norm": 0.031725041568279266, "learning_rate": 1.0244128067638112e-05, "loss": 0.0003, "step": 13202 }, { "epoch": 12.695192307692308, "grad_norm": 1.1519205570220947, "learning_rate": 1.0242882761884132e-05, "loss": 0.0066, "step": 13203 }, { "epoch": 12.696153846153846, "grad_norm": 6.267227649688721, "learning_rate": 1.0241637452361323e-05, "loss": 0.0488, "step": 13204 }, { "epoch": 12.697115384615385, "grad_norm": 0.06438738107681274, "learning_rate": 1.0240392139089011e-05, "loss": 0.0005, "step": 13205 }, { "epoch": 12.698076923076924, "grad_norm": 0.1937682181596756, "learning_rate": 1.0239146822086518e-05, "loss": 0.0008, "step": 13206 }, { "epoch": 12.69903846153846, "grad_norm": 1.3856083154678345, "learning_rate": 1.0237901501373169e-05, "loss": 0.0054, "step": 13207 }, { "epoch": 12.7, "grad_norm": 1.7884215116500854, "learning_rate": 1.0236656176968287e-05, "loss": 0.0347, "step": 13208 }, { "epoch": 12.700961538461538, "grad_norm": 0.6306294798851013, "learning_rate": 1.0235410848891195e-05, "loss": 0.0016, "step": 13209 }, { "epoch": 12.701923076923077, "grad_norm": 0.718529224395752, "learning_rate": 1.0234165517161218e-05, "loss": 0.0024, "step": 13210 }, { "epoch": 12.702884615384615, "grad_norm": 2.932239532470703, "learning_rate": 1.0232920181797678e-05, "loss": 0.0142, "step": 13211 }, { "epoch": 12.703846153846154, "grad_norm": 1.4756594896316528, "learning_rate": 1.0231674842819901e-05, "loss": 0.0195, "step": 13212 }, { "epoch": 12.704807692307693, "grad_norm": 0.31583213806152344, "learning_rate": 1.0230429500247212e-05, "loss": 0.0015, "step": 13213 }, { "epoch": 12.705769230769231, "grad_norm": 5.074815273284912, "learning_rate": 1.0229184154098935e-05, "loss": 0.0407, "step": 13214 }, { "epoch": 12.70673076923077, "grad_norm": 1.450802206993103, "learning_rate": 1.0227938804394386e-05, "loss": 0.0281, "step": 13215 }, { "epoch": 12.707692307692307, "grad_norm": 0.10679943859577179, "learning_rate": 1.02266934511529e-05, "loss": 0.0006, "step": 13216 }, { "epoch": 12.708653846153846, "grad_norm": 0.62040776014328, "learning_rate": 1.0225448094393799e-05, "loss": 0.0028, "step": 13217 }, { "epoch": 12.709615384615384, "grad_norm": 2.0459585189819336, "learning_rate": 1.0224202734136401e-05, "loss": 0.0187, "step": 13218 }, { "epoch": 12.710576923076923, "grad_norm": 0.012942005880177021, "learning_rate": 1.0222957370400038e-05, "loss": 0.0001, "step": 13219 }, { "epoch": 12.711538461538462, "grad_norm": 0.07308763265609741, "learning_rate": 1.022171200320403e-05, "loss": 0.0005, "step": 13220 }, { "epoch": 12.7125, "grad_norm": 1.2069823741912842, "learning_rate": 1.0220466632567703e-05, "loss": 0.0061, "step": 13221 }, { "epoch": 12.713461538461539, "grad_norm": 0.9428746700286865, "learning_rate": 1.0219221258510377e-05, "loss": 0.0102, "step": 13222 }, { "epoch": 12.714423076923078, "grad_norm": 0.16316598653793335, "learning_rate": 1.0217975881051382e-05, "loss": 0.0006, "step": 13223 }, { "epoch": 12.715384615384615, "grad_norm": 0.1403864175081253, "learning_rate": 1.0216730500210046e-05, "loss": 0.0014, "step": 13224 }, { "epoch": 12.716346153846153, "grad_norm": 0.040529441088438034, "learning_rate": 1.0215485116005685e-05, "loss": 0.0003, "step": 13225 }, { "epoch": 12.717307692307692, "grad_norm": 0.11508054286241531, "learning_rate": 1.0214239728457626e-05, "loss": 0.0008, "step": 13226 }, { "epoch": 12.71826923076923, "grad_norm": 1.4444704055786133, "learning_rate": 1.0212994337585199e-05, "loss": 0.0152, "step": 13227 }, { "epoch": 12.71923076923077, "grad_norm": 0.3304170072078705, "learning_rate": 1.0211748943407723e-05, "loss": 0.0022, "step": 13228 }, { "epoch": 12.720192307692308, "grad_norm": 0.044845081865787506, "learning_rate": 1.0210503545944522e-05, "loss": 0.0004, "step": 13229 }, { "epoch": 12.721153846153847, "grad_norm": 3.4257466793060303, "learning_rate": 1.0209258145214926e-05, "loss": 0.0718, "step": 13230 }, { "epoch": 12.722115384615385, "grad_norm": 1.8746284246444702, "learning_rate": 1.020801274123826e-05, "loss": 0.0218, "step": 13231 }, { "epoch": 12.723076923076922, "grad_norm": 0.0737040638923645, "learning_rate": 1.0206767334033845e-05, "loss": 0.0004, "step": 13232 }, { "epoch": 12.724038461538461, "grad_norm": 0.3864276707172394, "learning_rate": 1.0205521923621007e-05, "loss": 0.001, "step": 13233 }, { "epoch": 12.725, "grad_norm": 2.271214723587036, "learning_rate": 1.0204276510019074e-05, "loss": 0.0068, "step": 13234 }, { "epoch": 12.725961538461538, "grad_norm": 0.05471230670809746, "learning_rate": 1.0203031093247367e-05, "loss": 0.0004, "step": 13235 }, { "epoch": 12.726923076923077, "grad_norm": 2.512439727783203, "learning_rate": 1.0201785673325214e-05, "loss": 0.0153, "step": 13236 }, { "epoch": 12.727884615384616, "grad_norm": 1.7344738245010376, "learning_rate": 1.020054025027194e-05, "loss": 0.0038, "step": 13237 }, { "epoch": 12.728846153846154, "grad_norm": 0.6288772225379944, "learning_rate": 1.0199294824106868e-05, "loss": 0.0038, "step": 13238 }, { "epoch": 12.729807692307693, "grad_norm": 0.03280697017908096, "learning_rate": 1.019804939484933e-05, "loss": 0.0002, "step": 13239 }, { "epoch": 12.73076923076923, "grad_norm": 0.504184365272522, "learning_rate": 1.0196803962518643e-05, "loss": 0.0021, "step": 13240 }, { "epoch": 12.731730769230769, "grad_norm": 0.25841468572616577, "learning_rate": 1.0195558527134136e-05, "loss": 0.0012, "step": 13241 }, { "epoch": 12.732692307692307, "grad_norm": 0.1866898536682129, "learning_rate": 1.0194313088715135e-05, "loss": 0.0012, "step": 13242 }, { "epoch": 12.733653846153846, "grad_norm": 0.057821959257125854, "learning_rate": 1.0193067647280968e-05, "loss": 0.0004, "step": 13243 }, { "epoch": 12.734615384615385, "grad_norm": 0.17323371767997742, "learning_rate": 1.0191822202850952e-05, "loss": 0.0008, "step": 13244 }, { "epoch": 12.735576923076923, "grad_norm": 4.9241557121276855, "learning_rate": 1.0190576755444423e-05, "loss": 0.0937, "step": 13245 }, { "epoch": 12.736538461538462, "grad_norm": 2.2673285007476807, "learning_rate": 1.01893313050807e-05, "loss": 0.0534, "step": 13246 }, { "epoch": 12.7375, "grad_norm": 1.348069429397583, "learning_rate": 1.018808585177911e-05, "loss": 0.0076, "step": 13247 }, { "epoch": 12.73846153846154, "grad_norm": 4.24481201171875, "learning_rate": 1.0186840395558984e-05, "loss": 0.0254, "step": 13248 }, { "epoch": 12.739423076923076, "grad_norm": 1.7930784225463867, "learning_rate": 1.018559493643964e-05, "loss": 0.0113, "step": 13249 }, { "epoch": 12.740384615384615, "grad_norm": 4.2725749015808105, "learning_rate": 1.0184349474440409e-05, "loss": 0.0346, "step": 13250 }, { "epoch": 12.741346153846154, "grad_norm": 1.9459789991378784, "learning_rate": 1.0183104009580612e-05, "loss": 0.0108, "step": 13251 }, { "epoch": 12.742307692307692, "grad_norm": 0.09060880541801453, "learning_rate": 1.0181858541879582e-05, "loss": 0.0006, "step": 13252 }, { "epoch": 12.743269230769231, "grad_norm": 0.2528371214866638, "learning_rate": 1.018061307135664e-05, "loss": 0.0012, "step": 13253 }, { "epoch": 12.74423076923077, "grad_norm": 1.0206702947616577, "learning_rate": 1.0179367598031111e-05, "loss": 0.0045, "step": 13254 }, { "epoch": 12.745192307692308, "grad_norm": 0.03803299367427826, "learning_rate": 1.0178122121922324e-05, "loss": 0.0005, "step": 13255 }, { "epoch": 12.746153846153845, "grad_norm": 3.6406846046447754, "learning_rate": 1.0176876643049607e-05, "loss": 0.0447, "step": 13256 }, { "epoch": 12.747115384615384, "grad_norm": 1.2862765789031982, "learning_rate": 1.0175631161432282e-05, "loss": 0.0058, "step": 13257 }, { "epoch": 12.748076923076923, "grad_norm": 2.7283549308776855, "learning_rate": 1.0174385677089675e-05, "loss": 0.0335, "step": 13258 }, { "epoch": 12.749038461538461, "grad_norm": 1.3387610912322998, "learning_rate": 1.0173140190041117e-05, "loss": 0.0093, "step": 13259 }, { "epoch": 12.75, "grad_norm": 0.19651533663272858, "learning_rate": 1.0171894700305931e-05, "loss": 0.0006, "step": 13260 }, { "epoch": 12.750961538461539, "grad_norm": 0.14124217629432678, "learning_rate": 1.0170649207903445e-05, "loss": 0.0006, "step": 13261 }, { "epoch": 12.751923076923077, "grad_norm": 0.4438075125217438, "learning_rate": 1.016940371285298e-05, "loss": 0.0014, "step": 13262 }, { "epoch": 12.752884615384616, "grad_norm": 0.37132352590560913, "learning_rate": 1.0168158215173869e-05, "loss": 0.0012, "step": 13263 }, { "epoch": 12.753846153846155, "grad_norm": 1.6248950958251953, "learning_rate": 1.0166912714885437e-05, "loss": 0.0051, "step": 13264 }, { "epoch": 12.754807692307692, "grad_norm": 0.12563833594322205, "learning_rate": 1.0165667212007008e-05, "loss": 0.0009, "step": 13265 }, { "epoch": 12.75576923076923, "grad_norm": 1.1617714166641235, "learning_rate": 1.016442170655791e-05, "loss": 0.0033, "step": 13266 }, { "epoch": 12.756730769230769, "grad_norm": 2.6330833435058594, "learning_rate": 1.0163176198557471e-05, "loss": 0.0663, "step": 13267 }, { "epoch": 12.757692307692308, "grad_norm": 0.9080213308334351, "learning_rate": 1.0161930688025018e-05, "loss": 0.0226, "step": 13268 }, { "epoch": 12.758653846153846, "grad_norm": 2.643683910369873, "learning_rate": 1.016068517497987e-05, "loss": 0.0075, "step": 13269 }, { "epoch": 12.759615384615385, "grad_norm": 1.6733415126800537, "learning_rate": 1.0159439659441363e-05, "loss": 0.0201, "step": 13270 }, { "epoch": 12.760576923076924, "grad_norm": 2.217071056365967, "learning_rate": 1.0158194141428823e-05, "loss": 0.0966, "step": 13271 }, { "epoch": 12.76153846153846, "grad_norm": 0.2256079763174057, "learning_rate": 1.0156948620961574e-05, "loss": 0.0013, "step": 13272 }, { "epoch": 12.7625, "grad_norm": 2.373594284057617, "learning_rate": 1.0155703098058937e-05, "loss": 0.0195, "step": 13273 }, { "epoch": 12.763461538461538, "grad_norm": 0.29644832015037537, "learning_rate": 1.015445757274025e-05, "loss": 0.002, "step": 13274 }, { "epoch": 12.764423076923077, "grad_norm": 1.5055890083312988, "learning_rate": 1.0153212045024833e-05, "loss": 0.0137, "step": 13275 }, { "epoch": 12.765384615384615, "grad_norm": 0.9358724355697632, "learning_rate": 1.0151966514932017e-05, "loss": 0.0066, "step": 13276 }, { "epoch": 12.766346153846154, "grad_norm": 0.052529603242874146, "learning_rate": 1.0150720982481124e-05, "loss": 0.0004, "step": 13277 }, { "epoch": 12.767307692307693, "grad_norm": 3.095048427581787, "learning_rate": 1.0149475447691485e-05, "loss": 0.0538, "step": 13278 }, { "epoch": 12.768269230769231, "grad_norm": 0.6534105539321899, "learning_rate": 1.0148229910582427e-05, "loss": 0.0041, "step": 13279 }, { "epoch": 12.76923076923077, "grad_norm": 0.14967824518680573, "learning_rate": 1.0146984371173272e-05, "loss": 0.0016, "step": 13280 }, { "epoch": 12.770192307692307, "grad_norm": 0.03796662017703056, "learning_rate": 1.0145738829483354e-05, "loss": 0.0004, "step": 13281 }, { "epoch": 12.771153846153846, "grad_norm": 1.2614591121673584, "learning_rate": 1.0144493285531998e-05, "loss": 0.0115, "step": 13282 }, { "epoch": 12.772115384615384, "grad_norm": 2.1656532287597656, "learning_rate": 1.0143247739338529e-05, "loss": 0.011, "step": 13283 }, { "epoch": 12.773076923076923, "grad_norm": 2.4230287075042725, "learning_rate": 1.0142002190922273e-05, "loss": 0.0155, "step": 13284 }, { "epoch": 12.774038461538462, "grad_norm": 0.0802575871348381, "learning_rate": 1.0140756640302563e-05, "loss": 0.0007, "step": 13285 }, { "epoch": 12.775, "grad_norm": 0.5440566539764404, "learning_rate": 1.0139511087498724e-05, "loss": 0.002, "step": 13286 }, { "epoch": 12.775961538461539, "grad_norm": 0.2844947278499603, "learning_rate": 1.0138265532530078e-05, "loss": 0.002, "step": 13287 }, { "epoch": 12.776923076923078, "grad_norm": 3.0848684310913086, "learning_rate": 1.013701997541596e-05, "loss": 0.0503, "step": 13288 }, { "epoch": 12.777884615384615, "grad_norm": 0.5209133625030518, "learning_rate": 1.0135774416175694e-05, "loss": 0.0016, "step": 13289 }, { "epoch": 12.778846153846153, "grad_norm": 3.022753953933716, "learning_rate": 1.0134528854828607e-05, "loss": 0.0208, "step": 13290 }, { "epoch": 12.779807692307692, "grad_norm": 1.196162223815918, "learning_rate": 1.0133283291394025e-05, "loss": 0.0081, "step": 13291 }, { "epoch": 12.78076923076923, "grad_norm": 0.3934309184551239, "learning_rate": 1.0132037725891281e-05, "loss": 0.0025, "step": 13292 }, { "epoch": 12.78173076923077, "grad_norm": 1.4523062705993652, "learning_rate": 1.0130792158339697e-05, "loss": 0.011, "step": 13293 }, { "epoch": 12.782692307692308, "grad_norm": 1.5391908884048462, "learning_rate": 1.0129546588758605e-05, "loss": 0.0859, "step": 13294 }, { "epoch": 12.783653846153847, "grad_norm": 1.9499242305755615, "learning_rate": 1.0128301017167325e-05, "loss": 0.0145, "step": 13295 }, { "epoch": 12.784615384615385, "grad_norm": 0.8234105706214905, "learning_rate": 1.0127055443585195e-05, "loss": 0.0057, "step": 13296 }, { "epoch": 12.785576923076922, "grad_norm": 0.9985339641571045, "learning_rate": 1.0125809868031535e-05, "loss": 0.0047, "step": 13297 }, { "epoch": 12.786538461538461, "grad_norm": 0.16270940005779266, "learning_rate": 1.0124564290525673e-05, "loss": 0.0011, "step": 13298 }, { "epoch": 12.7875, "grad_norm": 1.2385352849960327, "learning_rate": 1.0123318711086942e-05, "loss": 0.0132, "step": 13299 }, { "epoch": 12.788461538461538, "grad_norm": 0.730448842048645, "learning_rate": 1.0122073129734668e-05, "loss": 0.0037, "step": 13300 }, { "epoch": 12.789423076923077, "grad_norm": 3.2623541355133057, "learning_rate": 1.0120827546488175e-05, "loss": 0.0296, "step": 13301 }, { "epoch": 12.790384615384616, "grad_norm": 0.792998194694519, "learning_rate": 1.0119581961366791e-05, "loss": 0.0025, "step": 13302 }, { "epoch": 12.791346153846154, "grad_norm": 0.05796067789196968, "learning_rate": 1.011833637438985e-05, "loss": 0.0006, "step": 13303 }, { "epoch": 12.792307692307693, "grad_norm": 1.001593828201294, "learning_rate": 1.0117090785576676e-05, "loss": 0.0104, "step": 13304 }, { "epoch": 12.79326923076923, "grad_norm": 0.46507611870765686, "learning_rate": 1.0115845194946595e-05, "loss": 0.0034, "step": 13305 }, { "epoch": 12.794230769230769, "grad_norm": 1.5888400077819824, "learning_rate": 1.0114599602518934e-05, "loss": 0.0057, "step": 13306 }, { "epoch": 12.795192307692307, "grad_norm": 0.5725600719451904, "learning_rate": 1.0113354008313025e-05, "loss": 0.002, "step": 13307 }, { "epoch": 12.796153846153846, "grad_norm": 0.5806070566177368, "learning_rate": 1.0112108412348196e-05, "loss": 0.0029, "step": 13308 }, { "epoch": 12.797115384615385, "grad_norm": 0.38167011737823486, "learning_rate": 1.0110862814643772e-05, "loss": 0.0024, "step": 13309 }, { "epoch": 12.798076923076923, "grad_norm": 1.2568376064300537, "learning_rate": 1.0109617215219083e-05, "loss": 0.0047, "step": 13310 }, { "epoch": 12.799038461538462, "grad_norm": 0.1056249663233757, "learning_rate": 1.0108371614093458e-05, "loss": 0.0008, "step": 13311 }, { "epoch": 12.8, "grad_norm": 0.048983681946992874, "learning_rate": 1.010712601128622e-05, "loss": 0.0005, "step": 13312 }, { "epoch": 12.80096153846154, "grad_norm": 0.1418263018131256, "learning_rate": 1.0105880406816704e-05, "loss": 0.001, "step": 13313 }, { "epoch": 12.801923076923076, "grad_norm": 0.7137091755867004, "learning_rate": 1.0104634800704234e-05, "loss": 0.006, "step": 13314 }, { "epoch": 12.802884615384615, "grad_norm": 1.7156678438186646, "learning_rate": 1.0103389192968141e-05, "loss": 0.0057, "step": 13315 }, { "epoch": 12.803846153846154, "grad_norm": 0.148686945438385, "learning_rate": 1.0102143583627748e-05, "loss": 0.0009, "step": 13316 }, { "epoch": 12.804807692307692, "grad_norm": 2.199920654296875, "learning_rate": 1.0100897972702386e-05, "loss": 0.0123, "step": 13317 }, { "epoch": 12.805769230769231, "grad_norm": 0.3905460238456726, "learning_rate": 1.0099652360211386e-05, "loss": 0.0025, "step": 13318 }, { "epoch": 12.80673076923077, "grad_norm": 0.13940338790416718, "learning_rate": 1.0098406746174074e-05, "loss": 0.001, "step": 13319 }, { "epoch": 12.807692307692308, "grad_norm": 0.4619697630405426, "learning_rate": 1.0097161130609774e-05, "loss": 0.0025, "step": 13320 }, { "epoch": 12.808653846153845, "grad_norm": 0.06279021501541138, "learning_rate": 1.0095915513537821e-05, "loss": 0.0003, "step": 13321 }, { "epoch": 12.809615384615384, "grad_norm": 2.1272029876708984, "learning_rate": 1.0094669894977542e-05, "loss": 0.0147, "step": 13322 }, { "epoch": 12.810576923076923, "grad_norm": 0.14286324381828308, "learning_rate": 1.0093424274948265e-05, "loss": 0.0009, "step": 13323 }, { "epoch": 12.811538461538461, "grad_norm": 0.0377381294965744, "learning_rate": 1.0092178653469313e-05, "loss": 0.0003, "step": 13324 }, { "epoch": 12.8125, "grad_norm": 5.004672527313232, "learning_rate": 1.0090933030560024e-05, "loss": 0.0179, "step": 13325 }, { "epoch": 12.813461538461539, "grad_norm": 2.325791835784912, "learning_rate": 1.008968740623972e-05, "loss": 0.0105, "step": 13326 }, { "epoch": 12.814423076923077, "grad_norm": 0.2983807325363159, "learning_rate": 1.0088441780527729e-05, "loss": 0.0008, "step": 13327 }, { "epoch": 12.815384615384616, "grad_norm": 0.1999426931142807, "learning_rate": 1.0087196153443379e-05, "loss": 0.001, "step": 13328 }, { "epoch": 12.816346153846155, "grad_norm": 2.845676898956299, "learning_rate": 1.0085950525006006e-05, "loss": 0.0157, "step": 13329 }, { "epoch": 12.817307692307692, "grad_norm": 0.5513539910316467, "learning_rate": 1.0084704895234929e-05, "loss": 0.002, "step": 13330 }, { "epoch": 12.81826923076923, "grad_norm": 1.9669028520584106, "learning_rate": 1.008345926414948e-05, "loss": 0.0245, "step": 13331 }, { "epoch": 12.819230769230769, "grad_norm": 2.221156120300293, "learning_rate": 1.008221363176899e-05, "loss": 0.0383, "step": 13332 }, { "epoch": 12.820192307692308, "grad_norm": 0.2589831054210663, "learning_rate": 1.0080967998112787e-05, "loss": 0.0016, "step": 13333 }, { "epoch": 12.821153846153846, "grad_norm": 1.3294631242752075, "learning_rate": 1.0079722363200197e-05, "loss": 0.0063, "step": 13334 }, { "epoch": 12.822115384615385, "grad_norm": 0.8918498754501343, "learning_rate": 1.007847672705055e-05, "loss": 0.0048, "step": 13335 }, { "epoch": 12.823076923076924, "grad_norm": 0.34584489464759827, "learning_rate": 1.0077231089683175e-05, "loss": 0.0014, "step": 13336 }, { "epoch": 12.82403846153846, "grad_norm": 0.4529060423374176, "learning_rate": 1.00759854511174e-05, "loss": 0.0018, "step": 13337 }, { "epoch": 12.825, "grad_norm": 0.7828044891357422, "learning_rate": 1.0074739811372552e-05, "loss": 0.0034, "step": 13338 }, { "epoch": 12.825961538461538, "grad_norm": 0.4041091501712799, "learning_rate": 1.0073494170467964e-05, "loss": 0.001, "step": 13339 }, { "epoch": 12.826923076923077, "grad_norm": 1.9953519105911255, "learning_rate": 1.007224852842296e-05, "loss": 0.0141, "step": 13340 }, { "epoch": 12.827884615384615, "grad_norm": 3.616929769515991, "learning_rate": 1.0071002885256875e-05, "loss": 0.0613, "step": 13341 }, { "epoch": 12.828846153846154, "grad_norm": 3.1856627464294434, "learning_rate": 1.006975724098903e-05, "loss": 0.0567, "step": 13342 }, { "epoch": 12.829807692307693, "grad_norm": 0.36723336577415466, "learning_rate": 1.0068511595638756e-05, "loss": 0.0014, "step": 13343 }, { "epoch": 12.830769230769231, "grad_norm": 0.06649099290370941, "learning_rate": 1.0067265949225387e-05, "loss": 0.0004, "step": 13344 }, { "epoch": 12.83173076923077, "grad_norm": 0.03921882063150406, "learning_rate": 1.0066020301768247e-05, "loss": 0.0003, "step": 13345 }, { "epoch": 12.832692307692307, "grad_norm": 3.2308874130249023, "learning_rate": 1.0064774653286662e-05, "loss": 0.0077, "step": 13346 }, { "epoch": 12.833653846153846, "grad_norm": 0.15657436847686768, "learning_rate": 1.0063529003799968e-05, "loss": 0.0023, "step": 13347 }, { "epoch": 12.834615384615384, "grad_norm": 3.136119842529297, "learning_rate": 1.006228335332749e-05, "loss": 0.0502, "step": 13348 }, { "epoch": 12.835576923076923, "grad_norm": 0.41293027997016907, "learning_rate": 1.0061037701888556e-05, "loss": 0.0012, "step": 13349 }, { "epoch": 12.836538461538462, "grad_norm": 0.08480944484472275, "learning_rate": 1.0059792049502497e-05, "loss": 0.0008, "step": 13350 }, { "epoch": 12.8375, "grad_norm": 0.7628644108772278, "learning_rate": 1.0058546396188642e-05, "loss": 0.0032, "step": 13351 }, { "epoch": 12.838461538461539, "grad_norm": 3.7671682834625244, "learning_rate": 1.0057300741966316e-05, "loss": 0.0468, "step": 13352 }, { "epoch": 12.839423076923078, "grad_norm": 0.19945526123046875, "learning_rate": 1.0056055086854852e-05, "loss": 0.0006, "step": 13353 }, { "epoch": 12.840384615384615, "grad_norm": 2.6401243209838867, "learning_rate": 1.0054809430873579e-05, "loss": 0.0097, "step": 13354 }, { "epoch": 12.841346153846153, "grad_norm": 4.163458824157715, "learning_rate": 1.0053563774041821e-05, "loss": 0.0306, "step": 13355 }, { "epoch": 12.842307692307692, "grad_norm": 0.07611644268035889, "learning_rate": 1.0052318116378915e-05, "loss": 0.0005, "step": 13356 }, { "epoch": 12.84326923076923, "grad_norm": 2.974032163619995, "learning_rate": 1.0051072457904181e-05, "loss": 0.065, "step": 13357 }, { "epoch": 12.84423076923077, "grad_norm": 1.7717682123184204, "learning_rate": 1.0049826798636955e-05, "loss": 0.0083, "step": 13358 }, { "epoch": 12.845192307692308, "grad_norm": 0.287387877702713, "learning_rate": 1.0048581138596563e-05, "loss": 0.0023, "step": 13359 }, { "epoch": 12.846153846153847, "grad_norm": 1.1617439985275269, "learning_rate": 1.0047335477802331e-05, "loss": 0.0159, "step": 13360 }, { "epoch": 12.847115384615385, "grad_norm": 3.181128978729248, "learning_rate": 1.0046089816273596e-05, "loss": 0.0816, "step": 13361 }, { "epoch": 12.848076923076922, "grad_norm": 0.5064250230789185, "learning_rate": 1.004484415402968e-05, "loss": 0.0032, "step": 13362 }, { "epoch": 12.849038461538461, "grad_norm": 2.372894287109375, "learning_rate": 1.0043598491089917e-05, "loss": 0.0223, "step": 13363 }, { "epoch": 12.85, "grad_norm": 0.7628025412559509, "learning_rate": 1.004235282747363e-05, "loss": 0.0041, "step": 13364 }, { "epoch": 12.850961538461538, "grad_norm": 0.5008853673934937, "learning_rate": 1.0041107163200152e-05, "loss": 0.0016, "step": 13365 }, { "epoch": 12.851923076923077, "grad_norm": 0.6600611209869385, "learning_rate": 1.0039861498288812e-05, "loss": 0.0032, "step": 13366 }, { "epoch": 12.852884615384616, "grad_norm": 0.10012739151716232, "learning_rate": 1.003861583275894e-05, "loss": 0.0007, "step": 13367 }, { "epoch": 12.853846153846154, "grad_norm": 1.5526361465454102, "learning_rate": 1.003737016662986e-05, "loss": 0.0078, "step": 13368 }, { "epoch": 12.854807692307693, "grad_norm": 0.18874625861644745, "learning_rate": 1.003612449992091e-05, "loss": 0.0011, "step": 13369 }, { "epoch": 12.85576923076923, "grad_norm": 3.0991427898406982, "learning_rate": 1.003487883265141e-05, "loss": 0.0673, "step": 13370 }, { "epoch": 12.856730769230769, "grad_norm": 0.08831234276294708, "learning_rate": 1.0033633164840692e-05, "loss": 0.0005, "step": 13371 }, { "epoch": 12.857692307692307, "grad_norm": 1.9267239570617676, "learning_rate": 1.003238749650809e-05, "loss": 0.0414, "step": 13372 }, { "epoch": 12.858653846153846, "grad_norm": 0.4188450276851654, "learning_rate": 1.0031141827672925e-05, "loss": 0.0018, "step": 13373 }, { "epoch": 12.859615384615385, "grad_norm": 2.0799529552459717, "learning_rate": 1.0029896158354533e-05, "loss": 0.0173, "step": 13374 }, { "epoch": 12.860576923076923, "grad_norm": 2.9634346961975098, "learning_rate": 1.0028650488572239e-05, "loss": 0.0122, "step": 13375 }, { "epoch": 12.861538461538462, "grad_norm": 3.1779141426086426, "learning_rate": 1.0027404818345374e-05, "loss": 0.025, "step": 13376 }, { "epoch": 12.8625, "grad_norm": 0.4492286145687103, "learning_rate": 1.0026159147693266e-05, "loss": 0.0023, "step": 13377 }, { "epoch": 12.86346153846154, "grad_norm": 0.024789854884147644, "learning_rate": 1.0024913476635245e-05, "loss": 0.0002, "step": 13378 }, { "epoch": 12.864423076923076, "grad_norm": 0.3926750123500824, "learning_rate": 1.002366780519064e-05, "loss": 0.0031, "step": 13379 }, { "epoch": 12.865384615384615, "grad_norm": 2.116440534591675, "learning_rate": 1.0022422133378783e-05, "loss": 0.0194, "step": 13380 }, { "epoch": 12.866346153846154, "grad_norm": 0.6863877177238464, "learning_rate": 1.0021176461218997e-05, "loss": 0.0035, "step": 13381 }, { "epoch": 12.867307692307692, "grad_norm": 1.4693022966384888, "learning_rate": 1.0019930788730615e-05, "loss": 0.0083, "step": 13382 }, { "epoch": 12.868269230769231, "grad_norm": 0.26212066411972046, "learning_rate": 1.0018685115932964e-05, "loss": 0.0018, "step": 13383 }, { "epoch": 12.86923076923077, "grad_norm": 1.1972308158874512, "learning_rate": 1.0017439442845378e-05, "loss": 0.0046, "step": 13384 }, { "epoch": 12.870192307692308, "grad_norm": 1.5674736499786377, "learning_rate": 1.001619376948718e-05, "loss": 0.0078, "step": 13385 }, { "epoch": 12.871153846153845, "grad_norm": 0.12611348927021027, "learning_rate": 1.0014948095877705e-05, "loss": 0.0008, "step": 13386 }, { "epoch": 12.872115384615384, "grad_norm": 4.152618408203125, "learning_rate": 1.0013702422036278e-05, "loss": 0.0375, "step": 13387 }, { "epoch": 12.873076923076923, "grad_norm": 0.2689970135688782, "learning_rate": 1.0012456747982231e-05, "loss": 0.0021, "step": 13388 }, { "epoch": 12.874038461538461, "grad_norm": 0.2977825999259949, "learning_rate": 1.001121107373489e-05, "loss": 0.002, "step": 13389 }, { "epoch": 12.875, "grad_norm": 1.818510890007019, "learning_rate": 1.0009965399313586e-05, "loss": 0.0381, "step": 13390 }, { "epoch": 12.875961538461539, "grad_norm": 0.6561331152915955, "learning_rate": 1.000871972473765e-05, "loss": 0.0033, "step": 13391 }, { "epoch": 12.876923076923077, "grad_norm": 2.515085458755493, "learning_rate": 1.0007474050026409e-05, "loss": 0.0342, "step": 13392 }, { "epoch": 12.877884615384616, "grad_norm": 1.2852903604507446, "learning_rate": 1.0006228375199191e-05, "loss": 0.0254, "step": 13393 }, { "epoch": 12.878846153846155, "grad_norm": 0.17167401313781738, "learning_rate": 1.000498270027533e-05, "loss": 0.0015, "step": 13394 }, { "epoch": 12.879807692307692, "grad_norm": 0.25043851137161255, "learning_rate": 1.000373702527415e-05, "loss": 0.0017, "step": 13395 }, { "epoch": 12.88076923076923, "grad_norm": 2.231560230255127, "learning_rate": 1.0002491350214983e-05, "loss": 0.0072, "step": 13396 }, { "epoch": 12.881730769230769, "grad_norm": 2.8279645442962646, "learning_rate": 1.0001245675117155e-05, "loss": 0.071, "step": 13397 }, { "epoch": 12.882692307692308, "grad_norm": 0.8497869372367859, "learning_rate": 1e-05, "loss": 0.0052, "step": 13398 }, { "epoch": 12.883653846153846, "grad_norm": 0.10288689285516739, "learning_rate": 9.998754324882847e-06, "loss": 0.0011, "step": 13399 }, { "epoch": 12.884615384615385, "grad_norm": 1.6888618469238281, "learning_rate": 9.997508649785019e-06, "loss": 0.0269, "step": 13400 }, { "epoch": 12.885576923076924, "grad_norm": 2.077746868133545, "learning_rate": 9.996262974725853e-06, "loss": 0.0142, "step": 13401 }, { "epoch": 12.88653846153846, "grad_norm": 0.059532083570957184, "learning_rate": 9.995017299724675e-06, "loss": 0.0006, "step": 13402 }, { "epoch": 12.8875, "grad_norm": 0.37477827072143555, "learning_rate": 9.99377162480081e-06, "loss": 0.002, "step": 13403 }, { "epoch": 12.888461538461538, "grad_norm": 0.29245176911354065, "learning_rate": 9.992525949973593e-06, "loss": 0.0024, "step": 13404 }, { "epoch": 12.889423076923077, "grad_norm": 0.11133789271116257, "learning_rate": 9.991280275262352e-06, "loss": 0.0011, "step": 13405 }, { "epoch": 12.890384615384615, "grad_norm": 1.7573609352111816, "learning_rate": 9.990034600686415e-06, "loss": 0.0167, "step": 13406 }, { "epoch": 12.891346153846154, "grad_norm": 0.9081305861473083, "learning_rate": 9.988788926265114e-06, "loss": 0.0067, "step": 13407 }, { "epoch": 12.892307692307693, "grad_norm": 0.47726988792419434, "learning_rate": 9.987543252017772e-06, "loss": 0.0023, "step": 13408 }, { "epoch": 12.893269230769231, "grad_norm": 0.40645721554756165, "learning_rate": 9.986297577963724e-06, "loss": 0.0016, "step": 13409 }, { "epoch": 12.89423076923077, "grad_norm": 1.0666166543960571, "learning_rate": 9.985051904122298e-06, "loss": 0.0053, "step": 13410 }, { "epoch": 12.895192307692307, "grad_norm": 1.1568280458450317, "learning_rate": 9.98380623051282e-06, "loss": 0.0037, "step": 13411 }, { "epoch": 12.896153846153846, "grad_norm": 0.05856168642640114, "learning_rate": 9.982560557154624e-06, "loss": 0.0006, "step": 13412 }, { "epoch": 12.897115384615384, "grad_norm": 0.5542242527008057, "learning_rate": 9.981314884067037e-06, "loss": 0.0041, "step": 13413 }, { "epoch": 12.898076923076923, "grad_norm": 1.284308671951294, "learning_rate": 9.98006921126939e-06, "loss": 0.0187, "step": 13414 }, { "epoch": 12.899038461538462, "grad_norm": 0.017018195241689682, "learning_rate": 9.978823538781006e-06, "loss": 0.0002, "step": 13415 }, { "epoch": 12.9, "grad_norm": 0.4985523521900177, "learning_rate": 9.977577866621222e-06, "loss": 0.0021, "step": 13416 }, { "epoch": 12.900961538461539, "grad_norm": 0.09068042784929276, "learning_rate": 9.976332194809362e-06, "loss": 0.0007, "step": 13417 }, { "epoch": 12.901923076923078, "grad_norm": 2.59795880317688, "learning_rate": 9.975086523364759e-06, "loss": 0.0204, "step": 13418 }, { "epoch": 12.902884615384615, "grad_norm": 2.219938039779663, "learning_rate": 9.973840852306736e-06, "loss": 0.0613, "step": 13419 }, { "epoch": 12.903846153846153, "grad_norm": 2.1042871475219727, "learning_rate": 9.97259518165463e-06, "loss": 0.0188, "step": 13420 }, { "epoch": 12.904807692307692, "grad_norm": 0.32991132140159607, "learning_rate": 9.971349511427765e-06, "loss": 0.0023, "step": 13421 }, { "epoch": 12.90576923076923, "grad_norm": 1.9290004968643188, "learning_rate": 9.97010384164547e-06, "loss": 0.045, "step": 13422 }, { "epoch": 12.90673076923077, "grad_norm": 0.8642967343330383, "learning_rate": 9.968858172327077e-06, "loss": 0.0039, "step": 13423 }, { "epoch": 12.907692307692308, "grad_norm": 0.37711918354034424, "learning_rate": 9.967612503491915e-06, "loss": 0.0012, "step": 13424 }, { "epoch": 12.908653846153847, "grad_norm": 0.9735574126243591, "learning_rate": 9.966366835159311e-06, "loss": 0.0034, "step": 13425 }, { "epoch": 12.909615384615385, "grad_norm": 1.6461377143859863, "learning_rate": 9.965121167348592e-06, "loss": 0.0061, "step": 13426 }, { "epoch": 12.910576923076922, "grad_norm": 0.11862151324748993, "learning_rate": 9.963875500079095e-06, "loss": 0.0011, "step": 13427 }, { "epoch": 12.911538461538461, "grad_norm": 0.24334990978240967, "learning_rate": 9.962629833370143e-06, "loss": 0.0012, "step": 13428 }, { "epoch": 12.9125, "grad_norm": 1.2080763578414917, "learning_rate": 9.961384167241062e-06, "loss": 0.0157, "step": 13429 }, { "epoch": 12.913461538461538, "grad_norm": 1.7277041673660278, "learning_rate": 9.96013850171119e-06, "loss": 0.009, "step": 13430 }, { "epoch": 12.914423076923077, "grad_norm": 0.25425246357917786, "learning_rate": 9.958892836799851e-06, "loss": 0.0015, "step": 13431 }, { "epoch": 12.915384615384616, "grad_norm": 1.2683820724487305, "learning_rate": 9.957647172526374e-06, "loss": 0.0061, "step": 13432 }, { "epoch": 12.916346153846154, "grad_norm": 1.235440969467163, "learning_rate": 9.956401508910086e-06, "loss": 0.0061, "step": 13433 }, { "epoch": 12.917307692307693, "grad_norm": 0.9320327639579773, "learning_rate": 9.955155845970322e-06, "loss": 0.0145, "step": 13434 }, { "epoch": 12.91826923076923, "grad_norm": 0.03040430136024952, "learning_rate": 9.953910183726407e-06, "loss": 0.0004, "step": 13435 }, { "epoch": 12.919230769230769, "grad_norm": 2.033578634262085, "learning_rate": 9.95266452219767e-06, "loss": 0.0119, "step": 13436 }, { "epoch": 12.920192307692307, "grad_norm": 0.3318825662136078, "learning_rate": 9.95141886140344e-06, "loss": 0.0023, "step": 13437 }, { "epoch": 12.921153846153846, "grad_norm": 1.1243817806243896, "learning_rate": 9.950173201363048e-06, "loss": 0.0234, "step": 13438 }, { "epoch": 12.922115384615385, "grad_norm": 0.17003387212753296, "learning_rate": 9.948927542095822e-06, "loss": 0.0008, "step": 13439 }, { "epoch": 12.923076923076923, "grad_norm": 2.2567636966705322, "learning_rate": 9.947681883621089e-06, "loss": 0.0375, "step": 13440 }, { "epoch": 12.924038461538462, "grad_norm": 2.452033042907715, "learning_rate": 9.94643622595818e-06, "loss": 0.0517, "step": 13441 }, { "epoch": 12.925, "grad_norm": 2.465052366256714, "learning_rate": 9.945190569126425e-06, "loss": 0.0558, "step": 13442 }, { "epoch": 12.92596153846154, "grad_norm": 0.029065493494272232, "learning_rate": 9.943944913145151e-06, "loss": 0.0004, "step": 13443 }, { "epoch": 12.926923076923076, "grad_norm": 0.0782037228345871, "learning_rate": 9.942699258033686e-06, "loss": 0.0005, "step": 13444 }, { "epoch": 12.927884615384615, "grad_norm": 0.05407828837633133, "learning_rate": 9.941453603811363e-06, "loss": 0.0004, "step": 13445 }, { "epoch": 12.928846153846154, "grad_norm": 1.0310319662094116, "learning_rate": 9.940207950497506e-06, "loss": 0.0055, "step": 13446 }, { "epoch": 12.929807692307692, "grad_norm": 0.13818477094173431, "learning_rate": 9.938962298111447e-06, "loss": 0.0007, "step": 13447 }, { "epoch": 12.930769230769231, "grad_norm": 0.04590281471610069, "learning_rate": 9.937716646672513e-06, "loss": 0.0003, "step": 13448 }, { "epoch": 12.93173076923077, "grad_norm": 2.6275346279144287, "learning_rate": 9.936470996200035e-06, "loss": 0.0134, "step": 13449 }, { "epoch": 12.932692307692308, "grad_norm": 0.5663359761238098, "learning_rate": 9.935225346713341e-06, "loss": 0.004, "step": 13450 }, { "epoch": 12.933653846153845, "grad_norm": 0.14680446684360504, "learning_rate": 9.933979698231757e-06, "loss": 0.0005, "step": 13451 }, { "epoch": 12.934615384615384, "grad_norm": 1.8111881017684937, "learning_rate": 9.932734050774616e-06, "loss": 0.062, "step": 13452 }, { "epoch": 12.935576923076923, "grad_norm": 0.983606219291687, "learning_rate": 9.931488404361246e-06, "loss": 0.0047, "step": 13453 }, { "epoch": 12.936538461538461, "grad_norm": 1.6829874515533447, "learning_rate": 9.930242759010974e-06, "loss": 0.0058, "step": 13454 }, { "epoch": 12.9375, "grad_norm": 0.42663291096687317, "learning_rate": 9.928997114743128e-06, "loss": 0.002, "step": 13455 }, { "epoch": 12.938461538461539, "grad_norm": 0.693517804145813, "learning_rate": 9.927751471577041e-06, "loss": 0.0043, "step": 13456 }, { "epoch": 12.939423076923077, "grad_norm": 0.5637263059616089, "learning_rate": 9.92650582953204e-06, "loss": 0.0032, "step": 13457 }, { "epoch": 12.940384615384616, "grad_norm": 0.15945865213871002, "learning_rate": 9.92526018862745e-06, "loss": 0.001, "step": 13458 }, { "epoch": 12.941346153846155, "grad_norm": 0.09036140143871307, "learning_rate": 9.924014548882602e-06, "loss": 0.0006, "step": 13459 }, { "epoch": 12.942307692307692, "grad_norm": 0.7415173053741455, "learning_rate": 9.922768910316829e-06, "loss": 0.0052, "step": 13460 }, { "epoch": 12.94326923076923, "grad_norm": 0.33809173107147217, "learning_rate": 9.921523272949453e-06, "loss": 0.0017, "step": 13461 }, { "epoch": 12.944230769230769, "grad_norm": 2.7474145889282227, "learning_rate": 9.920277636799804e-06, "loss": 0.0303, "step": 13462 }, { "epoch": 12.945192307692308, "grad_norm": 3.781632661819458, "learning_rate": 9.919032001887215e-06, "loss": 0.1082, "step": 13463 }, { "epoch": 12.946153846153846, "grad_norm": 0.636538565158844, "learning_rate": 9.917786368231011e-06, "loss": 0.0037, "step": 13464 }, { "epoch": 12.947115384615385, "grad_norm": 0.08988644927740097, "learning_rate": 9.916540735850523e-06, "loss": 0.0007, "step": 13465 }, { "epoch": 12.948076923076924, "grad_norm": 4.39815616607666, "learning_rate": 9.915295104765073e-06, "loss": 0.0221, "step": 13466 }, { "epoch": 12.94903846153846, "grad_norm": 1.106715202331543, "learning_rate": 9.914049474993999e-06, "loss": 0.0089, "step": 13467 }, { "epoch": 12.95, "grad_norm": 1.7038311958312988, "learning_rate": 9.912803846556623e-06, "loss": 0.0079, "step": 13468 }, { "epoch": 12.950961538461538, "grad_norm": 1.7061470746994019, "learning_rate": 9.911558219472275e-06, "loss": 0.02, "step": 13469 }, { "epoch": 12.951923076923077, "grad_norm": 0.8557735085487366, "learning_rate": 9.910312593760284e-06, "loss": 0.0102, "step": 13470 }, { "epoch": 12.952884615384615, "grad_norm": 4.2993364334106445, "learning_rate": 9.90906696943998e-06, "loss": 0.0213, "step": 13471 }, { "epoch": 12.953846153846154, "grad_norm": 3.5413522720336914, "learning_rate": 9.907821346530689e-06, "loss": 0.0142, "step": 13472 }, { "epoch": 12.954807692307693, "grad_norm": 1.1904760599136353, "learning_rate": 9.906575725051739e-06, "loss": 0.0047, "step": 13473 }, { "epoch": 12.955769230769231, "grad_norm": 1.3083077669143677, "learning_rate": 9.90533010502246e-06, "loss": 0.0072, "step": 13474 }, { "epoch": 12.95673076923077, "grad_norm": 0.4145882725715637, "learning_rate": 9.90408448646218e-06, "loss": 0.0023, "step": 13475 }, { "epoch": 12.957692307692307, "grad_norm": 0.6487550735473633, "learning_rate": 9.90283886939023e-06, "loss": 0.006, "step": 13476 }, { "epoch": 12.958653846153846, "grad_norm": 2.970641613006592, "learning_rate": 9.90159325382593e-06, "loss": 0.0317, "step": 13477 }, { "epoch": 12.959615384615384, "grad_norm": 0.5372416377067566, "learning_rate": 9.900347639788616e-06, "loss": 0.0021, "step": 13478 }, { "epoch": 12.960576923076923, "grad_norm": 3.999927520751953, "learning_rate": 9.899102027297617e-06, "loss": 0.0113, "step": 13479 }, { "epoch": 12.961538461538462, "grad_norm": 0.9591711163520813, "learning_rate": 9.897856416372254e-06, "loss": 0.0061, "step": 13480 }, { "epoch": 12.9625, "grad_norm": 3.0970592498779297, "learning_rate": 9.896610807031864e-06, "loss": 0.0148, "step": 13481 }, { "epoch": 12.963461538461539, "grad_norm": 0.3346675932407379, "learning_rate": 9.89536519929577e-06, "loss": 0.0025, "step": 13482 }, { "epoch": 12.964423076923078, "grad_norm": 2.71413254737854, "learning_rate": 9.8941195931833e-06, "loss": 0.0305, "step": 13483 }, { "epoch": 12.965384615384615, "grad_norm": 0.3038986921310425, "learning_rate": 9.892873988713781e-06, "loss": 0.0019, "step": 13484 }, { "epoch": 12.966346153846153, "grad_norm": 0.11490613222122192, "learning_rate": 9.891628385906546e-06, "loss": 0.0006, "step": 13485 }, { "epoch": 12.967307692307692, "grad_norm": 0.20461517572402954, "learning_rate": 9.89038278478092e-06, "loss": 0.0017, "step": 13486 }, { "epoch": 12.96826923076923, "grad_norm": 0.6607310175895691, "learning_rate": 9.889137185356231e-06, "loss": 0.0018, "step": 13487 }, { "epoch": 12.96923076923077, "grad_norm": 1.1781394481658936, "learning_rate": 9.887891587651807e-06, "loss": 0.0087, "step": 13488 }, { "epoch": 12.970192307692308, "grad_norm": 1.6963191032409668, "learning_rate": 9.886645991686977e-06, "loss": 0.0477, "step": 13489 }, { "epoch": 12.971153846153847, "grad_norm": 0.10408511757850647, "learning_rate": 9.88540039748107e-06, "loss": 0.0007, "step": 13490 }, { "epoch": 12.972115384615385, "grad_norm": 0.7693787813186646, "learning_rate": 9.88415480505341e-06, "loss": 0.0033, "step": 13491 }, { "epoch": 12.973076923076922, "grad_norm": 0.021833989769220352, "learning_rate": 9.882909214423329e-06, "loss": 0.0002, "step": 13492 }, { "epoch": 12.974038461538461, "grad_norm": 0.07001233100891113, "learning_rate": 9.881663625610152e-06, "loss": 0.0005, "step": 13493 }, { "epoch": 12.975, "grad_norm": 0.1707511693239212, "learning_rate": 9.88041803863321e-06, "loss": 0.0007, "step": 13494 }, { "epoch": 12.975961538461538, "grad_norm": 0.5752386450767517, "learning_rate": 9.879172453511827e-06, "loss": 0.0022, "step": 13495 }, { "epoch": 12.976923076923077, "grad_norm": 0.9026447534561157, "learning_rate": 9.877926870265335e-06, "loss": 0.0033, "step": 13496 }, { "epoch": 12.977884615384616, "grad_norm": 3.219813346862793, "learning_rate": 9.87668128891306e-06, "loss": 0.0421, "step": 13497 }, { "epoch": 12.978846153846154, "grad_norm": 0.5021775364875793, "learning_rate": 9.875435709474328e-06, "loss": 0.0032, "step": 13498 }, { "epoch": 12.979807692307693, "grad_norm": 0.03877215459942818, "learning_rate": 9.874190131968467e-06, "loss": 0.0004, "step": 13499 }, { "epoch": 12.98076923076923, "grad_norm": 2.381362199783325, "learning_rate": 9.872944556414808e-06, "loss": 0.026, "step": 13500 }, { "epoch": 12.981730769230769, "grad_norm": 0.032973356544971466, "learning_rate": 9.871698982832676e-06, "loss": 0.0003, "step": 13501 }, { "epoch": 12.982692307692307, "grad_norm": 1.2372018098831177, "learning_rate": 9.870453411241399e-06, "loss": 0.0056, "step": 13502 }, { "epoch": 12.983653846153846, "grad_norm": 0.31677189469337463, "learning_rate": 9.869207841660305e-06, "loss": 0.0011, "step": 13503 }, { "epoch": 12.984615384615385, "grad_norm": 0.18745455145835876, "learning_rate": 9.867962274108722e-06, "loss": 0.0011, "step": 13504 }, { "epoch": 12.985576923076923, "grad_norm": 0.5187129974365234, "learning_rate": 9.866716708605977e-06, "loss": 0.0029, "step": 13505 }, { "epoch": 12.986538461538462, "grad_norm": 2.166341781616211, "learning_rate": 9.865471145171396e-06, "loss": 0.0252, "step": 13506 }, { "epoch": 12.9875, "grad_norm": 3.631258964538574, "learning_rate": 9.86422558382431e-06, "loss": 0.077, "step": 13507 }, { "epoch": 12.98846153846154, "grad_norm": 2.7997963428497314, "learning_rate": 9.862980024584044e-06, "loss": 0.0591, "step": 13508 }, { "epoch": 12.989423076923076, "grad_norm": 0.5453376770019531, "learning_rate": 9.861734467469925e-06, "loss": 0.0025, "step": 13509 }, { "epoch": 12.990384615384615, "grad_norm": 0.14993475377559662, "learning_rate": 9.86048891250128e-06, "loss": 0.0009, "step": 13510 }, { "epoch": 12.991346153846154, "grad_norm": 0.03246854990720749, "learning_rate": 9.859243359697438e-06, "loss": 0.0002, "step": 13511 }, { "epoch": 12.992307692307692, "grad_norm": 0.40959247946739197, "learning_rate": 9.857997809077729e-06, "loss": 0.0017, "step": 13512 }, { "epoch": 12.993269230769231, "grad_norm": 2.8990752696990967, "learning_rate": 9.856752260661475e-06, "loss": 0.015, "step": 13513 }, { "epoch": 12.99423076923077, "grad_norm": 0.4474875032901764, "learning_rate": 9.855506714468006e-06, "loss": 0.0019, "step": 13514 }, { "epoch": 12.995192307692308, "grad_norm": 2.8433997631073, "learning_rate": 9.854261170516648e-06, "loss": 0.0802, "step": 13515 }, { "epoch": 12.996153846153845, "grad_norm": 1.6809024810791016, "learning_rate": 9.85301562882673e-06, "loss": 0.0085, "step": 13516 }, { "epoch": 12.997115384615384, "grad_norm": 0.021648988127708435, "learning_rate": 9.851770089417576e-06, "loss": 0.0002, "step": 13517 }, { "epoch": 12.998076923076923, "grad_norm": 1.3578468561172485, "learning_rate": 9.850524552308517e-06, "loss": 0.0048, "step": 13518 }, { "epoch": 12.999038461538461, "grad_norm": 4.601034164428711, "learning_rate": 9.849279017518879e-06, "loss": 0.0418, "step": 13519 }, { "epoch": 13.0, "grad_norm": 0.6584930419921875, "learning_rate": 9.848033485067984e-06, "loss": 0.0027, "step": 13520 }, { "epoch": 13.000961538461539, "grad_norm": 2.112273693084717, "learning_rate": 9.846787954975168e-06, "loss": 0.0477, "step": 13521 }, { "epoch": 13.001923076923077, "grad_norm": 1.3831193447113037, "learning_rate": 9.845542427259752e-06, "loss": 0.008, "step": 13522 }, { "epoch": 13.002884615384616, "grad_norm": 0.051789913326501846, "learning_rate": 9.844296901941064e-06, "loss": 0.0007, "step": 13523 }, { "epoch": 13.003846153846155, "grad_norm": 3.2437000274658203, "learning_rate": 9.843051379038431e-06, "loss": 0.0403, "step": 13524 }, { "epoch": 13.004807692307692, "grad_norm": 4.128025054931641, "learning_rate": 9.84180585857118e-06, "loss": 0.062, "step": 13525 }, { "epoch": 13.00576923076923, "grad_norm": 0.16811946034431458, "learning_rate": 9.840560340558639e-06, "loss": 0.0007, "step": 13526 }, { "epoch": 13.006730769230769, "grad_norm": 0.46903905272483826, "learning_rate": 9.839314825020132e-06, "loss": 0.0028, "step": 13527 }, { "epoch": 13.007692307692308, "grad_norm": 0.08220943808555603, "learning_rate": 9.838069311974986e-06, "loss": 0.0009, "step": 13528 }, { "epoch": 13.008653846153846, "grad_norm": 0.08894357085227966, "learning_rate": 9.836823801442532e-06, "loss": 0.0005, "step": 13529 }, { "epoch": 13.009615384615385, "grad_norm": 2.4003305435180664, "learning_rate": 9.835578293442092e-06, "loss": 0.0172, "step": 13530 }, { "epoch": 13.010576923076924, "grad_norm": 3.249966859817505, "learning_rate": 9.834332787992993e-06, "loss": 0.035, "step": 13531 }, { "epoch": 13.011538461538462, "grad_norm": 0.9106889367103577, "learning_rate": 9.833087285114565e-06, "loss": 0.0032, "step": 13532 }, { "epoch": 13.0125, "grad_norm": 1.2614637613296509, "learning_rate": 9.831841784826133e-06, "loss": 0.0201, "step": 13533 }, { "epoch": 13.013461538461538, "grad_norm": 1.2614637613296509, "learning_rate": 9.830596287147023e-06, "loss": 0.0349, "step": 13534 }, { "epoch": 13.014423076923077, "grad_norm": 0.12203714996576309, "learning_rate": 9.830596287147023e-06, "loss": 0.001, "step": 13535 }, { "epoch": 13.015384615384615, "grad_norm": 0.8940363526344299, "learning_rate": 9.829350792096558e-06, "loss": 0.0055, "step": 13536 }, { "epoch": 13.016346153846154, "grad_norm": 0.24844004213809967, "learning_rate": 9.828105299694072e-06, "loss": 0.001, "step": 13537 }, { "epoch": 13.017307692307693, "grad_norm": 0.16228842735290527, "learning_rate": 9.826859809958885e-06, "loss": 0.0009, "step": 13538 }, { "epoch": 13.018269230769231, "grad_norm": 0.24660997092723846, "learning_rate": 9.825614322910328e-06, "loss": 0.0014, "step": 13539 }, { "epoch": 13.01923076923077, "grad_norm": 0.05308062583208084, "learning_rate": 9.82436883856772e-06, "loss": 0.0004, "step": 13540 }, { "epoch": 13.020192307692307, "grad_norm": 0.03066113591194153, "learning_rate": 9.823123356950396e-06, "loss": 0.0003, "step": 13541 }, { "epoch": 13.021153846153846, "grad_norm": 0.4851778447628021, "learning_rate": 9.821877878077678e-06, "loss": 0.002, "step": 13542 }, { "epoch": 13.022115384615384, "grad_norm": 0.6992058753967285, "learning_rate": 9.82063240196889e-06, "loss": 0.002, "step": 13543 }, { "epoch": 13.023076923076923, "grad_norm": 1.601256012916565, "learning_rate": 9.819386928643363e-06, "loss": 0.0077, "step": 13544 }, { "epoch": 13.024038461538462, "grad_norm": 0.5095118880271912, "learning_rate": 9.818141458120423e-06, "loss": 0.0027, "step": 13545 }, { "epoch": 13.025, "grad_norm": 5.262826919555664, "learning_rate": 9.81689599041939e-06, "loss": 0.0484, "step": 13546 }, { "epoch": 13.025961538461539, "grad_norm": 3.890787124633789, "learning_rate": 9.815650525559595e-06, "loss": 0.0567, "step": 13547 }, { "epoch": 13.026923076923078, "grad_norm": 0.05049990117549896, "learning_rate": 9.814405063560362e-06, "loss": 0.0007, "step": 13548 }, { "epoch": 13.027884615384615, "grad_norm": 3.218751907348633, "learning_rate": 9.813159604441021e-06, "loss": 0.0115, "step": 13549 }, { "epoch": 13.028846153846153, "grad_norm": 2.7297635078430176, "learning_rate": 9.811914148220893e-06, "loss": 0.0407, "step": 13550 }, { "epoch": 13.029807692307692, "grad_norm": 0.24287927150726318, "learning_rate": 9.810668694919303e-06, "loss": 0.0007, "step": 13551 }, { "epoch": 13.03076923076923, "grad_norm": 0.1641256958246231, "learning_rate": 9.80942324455558e-06, "loss": 0.0007, "step": 13552 }, { "epoch": 13.03173076923077, "grad_norm": 0.11267459392547607, "learning_rate": 9.808177797149052e-06, "loss": 0.0007, "step": 13553 }, { "epoch": 13.032692307692308, "grad_norm": 1.9329100847244263, "learning_rate": 9.806932352719036e-06, "loss": 0.0187, "step": 13554 }, { "epoch": 13.033653846153847, "grad_norm": 3.5835189819335938, "learning_rate": 9.805686911284867e-06, "loss": 0.0455, "step": 13555 }, { "epoch": 13.034615384615385, "grad_norm": 0.986600935459137, "learning_rate": 9.804441472865868e-06, "loss": 0.0052, "step": 13556 }, { "epoch": 13.035576923076922, "grad_norm": 0.7722305059432983, "learning_rate": 9.803196037481362e-06, "loss": 0.0038, "step": 13557 }, { "epoch": 13.036538461538461, "grad_norm": 0.032191015779972076, "learning_rate": 9.801950605150673e-06, "loss": 0.0004, "step": 13558 }, { "epoch": 13.0375, "grad_norm": 0.032996248453855515, "learning_rate": 9.800705175893133e-06, "loss": 0.0003, "step": 13559 }, { "epoch": 13.038461538461538, "grad_norm": 0.14831939339637756, "learning_rate": 9.799459749728063e-06, "loss": 0.0007, "step": 13560 }, { "epoch": 13.039423076923077, "grad_norm": 0.21633850038051605, "learning_rate": 9.798214326674787e-06, "loss": 0.0018, "step": 13561 }, { "epoch": 13.040384615384616, "grad_norm": 1.1893010139465332, "learning_rate": 9.796968906752635e-06, "loss": 0.0088, "step": 13562 }, { "epoch": 13.041346153846154, "grad_norm": 1.0061439275741577, "learning_rate": 9.795723489980931e-06, "loss": 0.0794, "step": 13563 }, { "epoch": 13.042307692307693, "grad_norm": 0.044550392776727676, "learning_rate": 9.794478076378996e-06, "loss": 0.0005, "step": 13564 }, { "epoch": 13.04326923076923, "grad_norm": 0.4627264142036438, "learning_rate": 9.793232665966159e-06, "loss": 0.0021, "step": 13565 }, { "epoch": 13.044230769230769, "grad_norm": 0.5507422089576721, "learning_rate": 9.791987258761743e-06, "loss": 0.0017, "step": 13566 }, { "epoch": 13.045192307692307, "grad_norm": 0.043376658111810684, "learning_rate": 9.790741854785076e-06, "loss": 0.0004, "step": 13567 }, { "epoch": 13.046153846153846, "grad_norm": 0.05226188898086548, "learning_rate": 9.789496454055482e-06, "loss": 0.0004, "step": 13568 }, { "epoch": 13.047115384615385, "grad_norm": 1.2610148191452026, "learning_rate": 9.788251056592282e-06, "loss": 0.0075, "step": 13569 }, { "epoch": 13.048076923076923, "grad_norm": 0.04998696967959404, "learning_rate": 9.787005662414806e-06, "loss": 0.0006, "step": 13570 }, { "epoch": 13.049038461538462, "grad_norm": 2.630359172821045, "learning_rate": 9.785760271542377e-06, "loss": 0.0256, "step": 13571 }, { "epoch": 13.05, "grad_norm": 0.11947593837976456, "learning_rate": 9.784514883994318e-06, "loss": 0.001, "step": 13572 }, { "epoch": 13.050961538461538, "grad_norm": 0.1267455667257309, "learning_rate": 9.783269499789958e-06, "loss": 0.0011, "step": 13573 }, { "epoch": 13.051923076923076, "grad_norm": 1.9618395566940308, "learning_rate": 9.78202411894862e-06, "loss": 0.0194, "step": 13574 }, { "epoch": 13.052884615384615, "grad_norm": 0.03028164617717266, "learning_rate": 9.780778741489626e-06, "loss": 0.0003, "step": 13575 }, { "epoch": 13.053846153846154, "grad_norm": 0.07325103878974915, "learning_rate": 9.779533367432302e-06, "loss": 0.0006, "step": 13576 }, { "epoch": 13.054807692307692, "grad_norm": 0.07920703291893005, "learning_rate": 9.778287996795974e-06, "loss": 0.0006, "step": 13577 }, { "epoch": 13.055769230769231, "grad_norm": 2.1916160583496094, "learning_rate": 9.777042629599965e-06, "loss": 0.0216, "step": 13578 }, { "epoch": 13.05673076923077, "grad_norm": 1.5433073043823242, "learning_rate": 9.775797265863602e-06, "loss": 0.0233, "step": 13579 }, { "epoch": 13.057692307692308, "grad_norm": 0.046972524374723434, "learning_rate": 9.774551905606204e-06, "loss": 0.0006, "step": 13580 }, { "epoch": 13.058653846153845, "grad_norm": 0.5979985594749451, "learning_rate": 9.773306548847102e-06, "loss": 0.0023, "step": 13581 }, { "epoch": 13.059615384615384, "grad_norm": 2.4143118858337402, "learning_rate": 9.772061195605616e-06, "loss": 0.0117, "step": 13582 }, { "epoch": 13.060576923076923, "grad_norm": 0.05029718950390816, "learning_rate": 9.77081584590107e-06, "loss": 0.0006, "step": 13583 }, { "epoch": 13.061538461538461, "grad_norm": 0.9940075278282166, "learning_rate": 9.769570499752791e-06, "loss": 0.0084, "step": 13584 }, { "epoch": 13.0625, "grad_norm": 0.05551696568727493, "learning_rate": 9.7683251571801e-06, "loss": 0.0006, "step": 13585 }, { "epoch": 13.063461538461539, "grad_norm": 3.955387592315674, "learning_rate": 9.767079818202325e-06, "loss": 0.0926, "step": 13586 }, { "epoch": 13.064423076923077, "grad_norm": 0.10887948423624039, "learning_rate": 9.765834482838785e-06, "loss": 0.0005, "step": 13587 }, { "epoch": 13.065384615384616, "grad_norm": 0.05989442393183708, "learning_rate": 9.764589151108807e-06, "loss": 0.0004, "step": 13588 }, { "epoch": 13.066346153846155, "grad_norm": 0.12228859215974808, "learning_rate": 9.763343823031718e-06, "loss": 0.001, "step": 13589 }, { "epoch": 13.067307692307692, "grad_norm": 0.04869630187749863, "learning_rate": 9.762098498626834e-06, "loss": 0.0005, "step": 13590 }, { "epoch": 13.06826923076923, "grad_norm": 3.1066701412200928, "learning_rate": 9.760853177913483e-06, "loss": 0.0171, "step": 13591 }, { "epoch": 13.069230769230769, "grad_norm": 0.0304148867726326, "learning_rate": 9.75960786091099e-06, "loss": 0.0002, "step": 13592 }, { "epoch": 13.070192307692308, "grad_norm": 0.03075549565255642, "learning_rate": 9.75836254763868e-06, "loss": 0.0002, "step": 13593 }, { "epoch": 13.071153846153846, "grad_norm": 0.12831801176071167, "learning_rate": 9.757117238115871e-06, "loss": 0.0012, "step": 13594 }, { "epoch": 13.072115384615385, "grad_norm": 1.3912204504013062, "learning_rate": 9.75587193236189e-06, "loss": 0.0206, "step": 13595 }, { "epoch": 13.073076923076924, "grad_norm": 0.13893480598926544, "learning_rate": 9.75462663039606e-06, "loss": 0.0008, "step": 13596 }, { "epoch": 13.074038461538462, "grad_norm": 0.20539036393165588, "learning_rate": 9.753381332237706e-06, "loss": 0.0018, "step": 13597 }, { "epoch": 13.075, "grad_norm": 0.12119510769844055, "learning_rate": 9.752136037906149e-06, "loss": 0.0006, "step": 13598 }, { "epoch": 13.075961538461538, "grad_norm": 0.14174985885620117, "learning_rate": 9.750890747420713e-06, "loss": 0.0009, "step": 13599 }, { "epoch": 13.076923076923077, "grad_norm": 0.03315449133515358, "learning_rate": 9.749645460800723e-06, "loss": 0.0002, "step": 13600 }, { "epoch": 13.077884615384615, "grad_norm": 0.17912641167640686, "learning_rate": 9.748400178065499e-06, "loss": 0.0013, "step": 13601 }, { "epoch": 13.078846153846154, "grad_norm": 2.264065980911255, "learning_rate": 9.747154899234366e-06, "loss": 0.0221, "step": 13602 }, { "epoch": 13.079807692307693, "grad_norm": 1.244747519493103, "learning_rate": 9.745909624326648e-06, "loss": 0.0154, "step": 13603 }, { "epoch": 13.080769230769231, "grad_norm": 0.5433885455131531, "learning_rate": 9.744664353361666e-06, "loss": 0.0028, "step": 13604 }, { "epoch": 13.08173076923077, "grad_norm": 1.2027407884597778, "learning_rate": 9.743419086358742e-06, "loss": 0.0037, "step": 13605 }, { "epoch": 13.082692307692307, "grad_norm": 0.02374919131398201, "learning_rate": 9.742173823337201e-06, "loss": 0.0002, "step": 13606 }, { "epoch": 13.083653846153846, "grad_norm": 0.08481986820697784, "learning_rate": 9.740928564316369e-06, "loss": 0.0005, "step": 13607 }, { "epoch": 13.084615384615384, "grad_norm": 0.22709335386753082, "learning_rate": 9.739683309315564e-06, "loss": 0.0013, "step": 13608 }, { "epoch": 13.085576923076923, "grad_norm": 0.6839786171913147, "learning_rate": 9.738438058354108e-06, "loss": 0.0026, "step": 13609 }, { "epoch": 13.086538461538462, "grad_norm": 0.31788453459739685, "learning_rate": 9.737192811451326e-06, "loss": 0.002, "step": 13610 }, { "epoch": 13.0875, "grad_norm": 0.03167009353637695, "learning_rate": 9.735947568626544e-06, "loss": 0.0002, "step": 13611 }, { "epoch": 13.088461538461539, "grad_norm": 1.5328431129455566, "learning_rate": 9.734702329899076e-06, "loss": 0.0083, "step": 13612 }, { "epoch": 13.089423076923078, "grad_norm": 0.05998031795024872, "learning_rate": 9.733457095288252e-06, "loss": 0.0004, "step": 13613 }, { "epoch": 13.090384615384615, "grad_norm": 0.06743212044239044, "learning_rate": 9.732211864813392e-06, "loss": 0.0005, "step": 13614 }, { "epoch": 13.091346153846153, "grad_norm": 0.17920249700546265, "learning_rate": 9.730966638493816e-06, "loss": 0.001, "step": 13615 }, { "epoch": 13.092307692307692, "grad_norm": 2.291456699371338, "learning_rate": 9.729721416348847e-06, "loss": 0.0362, "step": 13616 }, { "epoch": 13.09326923076923, "grad_norm": 0.37689340114593506, "learning_rate": 9.728476198397811e-06, "loss": 0.002, "step": 13617 }, { "epoch": 13.09423076923077, "grad_norm": 0.8828418850898743, "learning_rate": 9.727230984660027e-06, "loss": 0.0035, "step": 13618 }, { "epoch": 13.095192307692308, "grad_norm": 0.08423537760972977, "learning_rate": 9.725985775154818e-06, "loss": 0.0006, "step": 13619 }, { "epoch": 13.096153846153847, "grad_norm": 0.21670101583003998, "learning_rate": 9.724740569901503e-06, "loss": 0.0016, "step": 13620 }, { "epoch": 13.097115384615385, "grad_norm": 0.1811690777540207, "learning_rate": 9.72349536891941e-06, "loss": 0.0005, "step": 13621 }, { "epoch": 13.098076923076922, "grad_norm": 0.21420025825500488, "learning_rate": 9.722250172227856e-06, "loss": 0.0018, "step": 13622 }, { "epoch": 13.099038461538461, "grad_norm": 1.4468045234680176, "learning_rate": 9.721004979846161e-06, "loss": 0.0045, "step": 13623 }, { "epoch": 13.1, "grad_norm": 0.03543826565146446, "learning_rate": 9.719759791793654e-06, "loss": 0.0004, "step": 13624 }, { "epoch": 13.100961538461538, "grad_norm": 0.07287126779556274, "learning_rate": 9.718514608089652e-06, "loss": 0.0004, "step": 13625 }, { "epoch": 13.101923076923077, "grad_norm": 1.7271552085876465, "learning_rate": 9.717269428753478e-06, "loss": 0.0129, "step": 13626 }, { "epoch": 13.102884615384616, "grad_norm": 1.5498286485671997, "learning_rate": 9.716024253804448e-06, "loss": 0.0245, "step": 13627 }, { "epoch": 13.103846153846154, "grad_norm": 0.041461486369371414, "learning_rate": 9.714779083261892e-06, "loss": 0.0004, "step": 13628 }, { "epoch": 13.104807692307693, "grad_norm": 1.2912927865982056, "learning_rate": 9.713533917145127e-06, "loss": 0.0193, "step": 13629 }, { "epoch": 13.10576923076923, "grad_norm": 0.13081970810890198, "learning_rate": 9.712288755473475e-06, "loss": 0.0004, "step": 13630 }, { "epoch": 13.106730769230769, "grad_norm": 1.8322136402130127, "learning_rate": 9.711043598266255e-06, "loss": 0.0166, "step": 13631 }, { "epoch": 13.107692307692307, "grad_norm": 0.2014252096414566, "learning_rate": 9.709798445542792e-06, "loss": 0.0009, "step": 13632 }, { "epoch": 13.108653846153846, "grad_norm": 2.675631523132324, "learning_rate": 9.708553297322407e-06, "loss": 0.0255, "step": 13633 }, { "epoch": 13.109615384615385, "grad_norm": 0.10532531887292862, "learning_rate": 9.707308153624415e-06, "loss": 0.0004, "step": 13634 }, { "epoch": 13.110576923076923, "grad_norm": 0.041528165340423584, "learning_rate": 9.706063014468145e-06, "loss": 0.0004, "step": 13635 }, { "epoch": 13.111538461538462, "grad_norm": 1.699718713760376, "learning_rate": 9.704817879872915e-06, "loss": 0.006, "step": 13636 }, { "epoch": 13.1125, "grad_norm": 0.20978593826293945, "learning_rate": 9.703572749858044e-06, "loss": 0.001, "step": 13637 }, { "epoch": 13.113461538461538, "grad_norm": 0.060758545994758606, "learning_rate": 9.70232762444285e-06, "loss": 0.0003, "step": 13638 }, { "epoch": 13.114423076923076, "grad_norm": 3.102832794189453, "learning_rate": 9.701082503646663e-06, "loss": 0.0256, "step": 13639 }, { "epoch": 13.115384615384615, "grad_norm": 0.44499725103378296, "learning_rate": 9.699837387488796e-06, "loss": 0.0016, "step": 13640 }, { "epoch": 13.116346153846154, "grad_norm": 1.4931565523147583, "learning_rate": 9.698592275988573e-06, "loss": 0.008, "step": 13641 }, { "epoch": 13.117307692307692, "grad_norm": 1.7161014080047607, "learning_rate": 9.69734716916531e-06, "loss": 0.0114, "step": 13642 }, { "epoch": 13.118269230769231, "grad_norm": 0.8873035907745361, "learning_rate": 9.696102067038334e-06, "loss": 0.0039, "step": 13643 }, { "epoch": 13.11923076923077, "grad_norm": 3.2051453590393066, "learning_rate": 9.694856969626962e-06, "loss": 0.0164, "step": 13644 }, { "epoch": 13.120192307692308, "grad_norm": 0.14518755674362183, "learning_rate": 9.693611876950512e-06, "loss": 0.0013, "step": 13645 }, { "epoch": 13.121153846153845, "grad_norm": 4.622775554656982, "learning_rate": 9.692366789028308e-06, "loss": 0.0674, "step": 13646 }, { "epoch": 13.122115384615384, "grad_norm": 1.3496681451797485, "learning_rate": 9.691121705879669e-06, "loss": 0.0059, "step": 13647 }, { "epoch": 13.123076923076923, "grad_norm": 0.13013310730457306, "learning_rate": 9.689876627523915e-06, "loss": 0.0005, "step": 13648 }, { "epoch": 13.124038461538461, "grad_norm": 0.11263865977525711, "learning_rate": 9.688631553980361e-06, "loss": 0.0009, "step": 13649 }, { "epoch": 13.125, "grad_norm": 0.46225932240486145, "learning_rate": 9.687386485268336e-06, "loss": 0.0019, "step": 13650 }, { "epoch": 13.125961538461539, "grad_norm": 2.131791114807129, "learning_rate": 9.686141421407154e-06, "loss": 0.0099, "step": 13651 }, { "epoch": 13.126923076923077, "grad_norm": 0.050600841641426086, "learning_rate": 9.684896362416135e-06, "loss": 0.0004, "step": 13652 }, { "epoch": 13.127884615384616, "grad_norm": 0.2376263439655304, "learning_rate": 9.683651308314602e-06, "loss": 0.0019, "step": 13653 }, { "epoch": 13.128846153846155, "grad_norm": 1.8572252988815308, "learning_rate": 9.682406259121872e-06, "loss": 0.017, "step": 13654 }, { "epoch": 13.129807692307692, "grad_norm": 0.5457060933113098, "learning_rate": 9.681161214857262e-06, "loss": 0.0023, "step": 13655 }, { "epoch": 13.13076923076923, "grad_norm": 0.4246559143066406, "learning_rate": 9.679916175540095e-06, "loss": 0.0016, "step": 13656 }, { "epoch": 13.131730769230769, "grad_norm": 0.07240910083055496, "learning_rate": 9.678671141189691e-06, "loss": 0.0005, "step": 13657 }, { "epoch": 13.132692307692308, "grad_norm": 0.11261851340532303, "learning_rate": 9.677426111825367e-06, "loss": 0.0007, "step": 13658 }, { "epoch": 13.133653846153846, "grad_norm": 2.7040152549743652, "learning_rate": 9.676181087466444e-06, "loss": 0.0136, "step": 13659 }, { "epoch": 13.134615384615385, "grad_norm": 1.2362922430038452, "learning_rate": 9.674936068132238e-06, "loss": 0.0046, "step": 13660 }, { "epoch": 13.135576923076924, "grad_norm": 2.562627077102661, "learning_rate": 9.673691053842072e-06, "loss": 0.0119, "step": 13661 }, { "epoch": 13.136538461538462, "grad_norm": 0.03599165752530098, "learning_rate": 9.672446044615263e-06, "loss": 0.0002, "step": 13662 }, { "epoch": 13.1375, "grad_norm": 0.1690894216299057, "learning_rate": 9.671201040471128e-06, "loss": 0.0009, "step": 13663 }, { "epoch": 13.138461538461538, "grad_norm": 0.145746648311615, "learning_rate": 9.66995604142899e-06, "loss": 0.0011, "step": 13664 }, { "epoch": 13.139423076923077, "grad_norm": 0.08640655875205994, "learning_rate": 9.668711047508167e-06, "loss": 0.0007, "step": 13665 }, { "epoch": 13.140384615384615, "grad_norm": 0.6706119775772095, "learning_rate": 9.667466058727973e-06, "loss": 0.0035, "step": 13666 }, { "epoch": 13.141346153846154, "grad_norm": 1.553555965423584, "learning_rate": 9.66622107510773e-06, "loss": 0.0035, "step": 13667 }, { "epoch": 13.142307692307693, "grad_norm": 4.127216815948486, "learning_rate": 9.664976096666757e-06, "loss": 0.0438, "step": 13668 }, { "epoch": 13.143269230769231, "grad_norm": 0.11326560378074646, "learning_rate": 9.663731123424373e-06, "loss": 0.0006, "step": 13669 }, { "epoch": 13.14423076923077, "grad_norm": 2.663102865219116, "learning_rate": 9.662486155399895e-06, "loss": 0.0122, "step": 13670 }, { "epoch": 13.145192307692307, "grad_norm": 2.087629556655884, "learning_rate": 9.661241192612638e-06, "loss": 0.0034, "step": 13671 }, { "epoch": 13.146153846153846, "grad_norm": 0.07876189798116684, "learning_rate": 9.659996235081926e-06, "loss": 0.0006, "step": 13672 }, { "epoch": 13.147115384615384, "grad_norm": 1.5466134548187256, "learning_rate": 9.658751282827075e-06, "loss": 0.0129, "step": 13673 }, { "epoch": 13.148076923076923, "grad_norm": 0.2638617157936096, "learning_rate": 9.6575063358674e-06, "loss": 0.0011, "step": 13674 }, { "epoch": 13.149038461538462, "grad_norm": 3.851874589920044, "learning_rate": 9.656261394222223e-06, "loss": 0.0713, "step": 13675 }, { "epoch": 13.15, "grad_norm": 0.8234162926673889, "learning_rate": 9.655016457910862e-06, "loss": 0.0057, "step": 13676 }, { "epoch": 13.150961538461539, "grad_norm": 2.470482349395752, "learning_rate": 9.653771526952632e-06, "loss": 0.0766, "step": 13677 }, { "epoch": 13.151923076923078, "grad_norm": 0.022732023149728775, "learning_rate": 9.652526601366849e-06, "loss": 0.0003, "step": 13678 }, { "epoch": 13.152884615384615, "grad_norm": 1.7406632900238037, "learning_rate": 9.651281681172837e-06, "loss": 0.0065, "step": 13679 }, { "epoch": 13.153846153846153, "grad_norm": 1.4770573377609253, "learning_rate": 9.65003676638991e-06, "loss": 0.0058, "step": 13680 }, { "epoch": 13.154807692307692, "grad_norm": 0.1297728270292282, "learning_rate": 9.648791857037384e-06, "loss": 0.0006, "step": 13681 }, { "epoch": 13.15576923076923, "grad_norm": 5.324253559112549, "learning_rate": 9.647546953134576e-06, "loss": 0.0924, "step": 13682 }, { "epoch": 13.15673076923077, "grad_norm": 0.562057375907898, "learning_rate": 9.646302054700805e-06, "loss": 0.0024, "step": 13683 }, { "epoch": 13.157692307692308, "grad_norm": 0.1768413931131363, "learning_rate": 9.645057161755391e-06, "loss": 0.001, "step": 13684 }, { "epoch": 13.158653846153847, "grad_norm": 0.7886571288108826, "learning_rate": 9.643812274317644e-06, "loss": 0.0033, "step": 13685 }, { "epoch": 13.159615384615385, "grad_norm": 0.08067379891872406, "learning_rate": 9.64256739240689e-06, "loss": 0.0005, "step": 13686 }, { "epoch": 13.160576923076922, "grad_norm": 0.2115776687860489, "learning_rate": 9.641322516042437e-06, "loss": 0.0017, "step": 13687 }, { "epoch": 13.161538461538461, "grad_norm": 2.1250715255737305, "learning_rate": 9.640077645243609e-06, "loss": 0.0075, "step": 13688 }, { "epoch": 13.1625, "grad_norm": 0.8205819725990295, "learning_rate": 9.638832780029716e-06, "loss": 0.0023, "step": 13689 }, { "epoch": 13.163461538461538, "grad_norm": 0.01911177858710289, "learning_rate": 9.63758792042008e-06, "loss": 0.0002, "step": 13690 }, { "epoch": 13.164423076923077, "grad_norm": 0.04281498119235039, "learning_rate": 9.636343066434016e-06, "loss": 0.0003, "step": 13691 }, { "epoch": 13.165384615384616, "grad_norm": 0.3879423439502716, "learning_rate": 9.63509821809084e-06, "loss": 0.0017, "step": 13692 }, { "epoch": 13.166346153846154, "grad_norm": 0.3948606252670288, "learning_rate": 9.633853375409868e-06, "loss": 0.0017, "step": 13693 }, { "epoch": 13.167307692307693, "grad_norm": 1.073081374168396, "learning_rate": 9.63260853841042e-06, "loss": 0.0057, "step": 13694 }, { "epoch": 13.16826923076923, "grad_norm": 0.863258421421051, "learning_rate": 9.631363707111808e-06, "loss": 0.0032, "step": 13695 }, { "epoch": 13.169230769230769, "grad_norm": 3.0502753257751465, "learning_rate": 9.630118881533345e-06, "loss": 0.0675, "step": 13696 }, { "epoch": 13.170192307692307, "grad_norm": 0.3216947913169861, "learning_rate": 9.628874061694355e-06, "loss": 0.0014, "step": 13697 }, { "epoch": 13.171153846153846, "grad_norm": 1.09727144241333, "learning_rate": 9.627629247614151e-06, "loss": 0.0024, "step": 13698 }, { "epoch": 13.172115384615385, "grad_norm": 2.3243520259857178, "learning_rate": 9.626384439312048e-06, "loss": 0.0093, "step": 13699 }, { "epoch": 13.173076923076923, "grad_norm": 1.7699909210205078, "learning_rate": 9.62513963680736e-06, "loss": 0.0076, "step": 13700 }, { "epoch": 13.174038461538462, "grad_norm": 0.28116974234580994, "learning_rate": 9.623894840119406e-06, "loss": 0.0017, "step": 13701 }, { "epoch": 13.175, "grad_norm": 2.670640230178833, "learning_rate": 9.622650049267502e-06, "loss": 0.1224, "step": 13702 }, { "epoch": 13.175961538461538, "grad_norm": 0.026260212063789368, "learning_rate": 9.621405264270957e-06, "loss": 0.0002, "step": 13703 }, { "epoch": 13.176923076923076, "grad_norm": 0.709369421005249, "learning_rate": 9.620160485149095e-06, "loss": 0.0029, "step": 13704 }, { "epoch": 13.177884615384615, "grad_norm": 0.10552637279033661, "learning_rate": 9.618915711921226e-06, "loss": 0.0008, "step": 13705 }, { "epoch": 13.178846153846154, "grad_norm": 0.05572531744837761, "learning_rate": 9.617670944606668e-06, "loss": 0.0005, "step": 13706 }, { "epoch": 13.179807692307692, "grad_norm": 0.05328722670674324, "learning_rate": 9.616426183224734e-06, "loss": 0.0006, "step": 13707 }, { "epoch": 13.180769230769231, "grad_norm": 0.9786396026611328, "learning_rate": 9.61518142779474e-06, "loss": 0.0028, "step": 13708 }, { "epoch": 13.18173076923077, "grad_norm": 0.2245115488767624, "learning_rate": 9.613936678336e-06, "loss": 0.0012, "step": 13709 }, { "epoch": 13.182692307692308, "grad_norm": 0.14926272630691528, "learning_rate": 9.61269193486783e-06, "loss": 0.0011, "step": 13710 }, { "epoch": 13.183653846153845, "grad_norm": 0.03238913044333458, "learning_rate": 9.611447197409544e-06, "loss": 0.0002, "step": 13711 }, { "epoch": 13.184615384615384, "grad_norm": 0.02542509324848652, "learning_rate": 9.610202465980458e-06, "loss": 0.0003, "step": 13712 }, { "epoch": 13.185576923076923, "grad_norm": 0.1936386525630951, "learning_rate": 9.608957740599888e-06, "loss": 0.0013, "step": 13713 }, { "epoch": 13.186538461538461, "grad_norm": 0.0456409677863121, "learning_rate": 9.60771302128714e-06, "loss": 0.0005, "step": 13714 }, { "epoch": 13.1875, "grad_norm": 0.0657615065574646, "learning_rate": 9.606468308061539e-06, "loss": 0.0004, "step": 13715 }, { "epoch": 13.188461538461539, "grad_norm": 0.16034799814224243, "learning_rate": 9.605223600942395e-06, "loss": 0.0012, "step": 13716 }, { "epoch": 13.189423076923077, "grad_norm": 1.0915733575820923, "learning_rate": 9.603978899949021e-06, "loss": 0.0264, "step": 13717 }, { "epoch": 13.190384615384616, "grad_norm": 0.24340413510799408, "learning_rate": 9.60273420510073e-06, "loss": 0.0019, "step": 13718 }, { "epoch": 13.191346153846155, "grad_norm": 0.052495431154966354, "learning_rate": 9.601489516416843e-06, "loss": 0.0005, "step": 13719 }, { "epoch": 13.192307692307692, "grad_norm": 1.9112037420272827, "learning_rate": 9.600244833916666e-06, "loss": 0.0242, "step": 13720 }, { "epoch": 13.19326923076923, "grad_norm": 0.6459853053092957, "learning_rate": 9.599000157619517e-06, "loss": 0.002, "step": 13721 }, { "epoch": 13.194230769230769, "grad_norm": 0.10826186090707779, "learning_rate": 9.597755487544707e-06, "loss": 0.0008, "step": 13722 }, { "epoch": 13.195192307692308, "grad_norm": 3.0701940059661865, "learning_rate": 9.596510823711553e-06, "loss": 0.0589, "step": 13723 }, { "epoch": 13.196153846153846, "grad_norm": 0.18251940608024597, "learning_rate": 9.595266166139366e-06, "loss": 0.001, "step": 13724 }, { "epoch": 13.197115384615385, "grad_norm": 0.16411972045898438, "learning_rate": 9.59402151484746e-06, "loss": 0.0006, "step": 13725 }, { "epoch": 13.198076923076924, "grad_norm": 0.3095862567424774, "learning_rate": 9.592776869855149e-06, "loss": 0.0024, "step": 13726 }, { "epoch": 13.199038461538462, "grad_norm": 0.04626540467143059, "learning_rate": 9.591532231181746e-06, "loss": 0.0005, "step": 13727 }, { "epoch": 13.2, "grad_norm": 0.03441021218895912, "learning_rate": 9.590287598846564e-06, "loss": 0.0003, "step": 13728 }, { "epoch": 13.200961538461538, "grad_norm": 0.8365755081176758, "learning_rate": 9.589042972868913e-06, "loss": 0.0044, "step": 13729 }, { "epoch": 13.201923076923077, "grad_norm": 0.3907444477081299, "learning_rate": 9.587798353268112e-06, "loss": 0.002, "step": 13730 }, { "epoch": 13.202884615384615, "grad_norm": 0.05054188519716263, "learning_rate": 9.586553740063473e-06, "loss": 0.0003, "step": 13731 }, { "epoch": 13.203846153846154, "grad_norm": 2.0927014350891113, "learning_rate": 9.585309133274303e-06, "loss": 0.0174, "step": 13732 }, { "epoch": 13.204807692307693, "grad_norm": 0.042433660477399826, "learning_rate": 9.584064532919918e-06, "loss": 0.0004, "step": 13733 }, { "epoch": 13.205769230769231, "grad_norm": 1.2629203796386719, "learning_rate": 9.582819939019632e-06, "loss": 0.0054, "step": 13734 }, { "epoch": 13.20673076923077, "grad_norm": 0.06158905103802681, "learning_rate": 9.581575351592756e-06, "loss": 0.0005, "step": 13735 }, { "epoch": 13.207692307692307, "grad_norm": 2.1063592433929443, "learning_rate": 9.580330770658601e-06, "loss": 0.0352, "step": 13736 }, { "epoch": 13.208653846153846, "grad_norm": 1.0271803140640259, "learning_rate": 9.579086196236483e-06, "loss": 0.0029, "step": 13737 }, { "epoch": 13.209615384615384, "grad_norm": 0.9126257300376892, "learning_rate": 9.57784162834571e-06, "loss": 0.005, "step": 13738 }, { "epoch": 13.210576923076923, "grad_norm": 0.04155062139034271, "learning_rate": 9.5765970670056e-06, "loss": 0.0003, "step": 13739 }, { "epoch": 13.211538461538462, "grad_norm": 0.822744607925415, "learning_rate": 9.575352512235454e-06, "loss": 0.0047, "step": 13740 }, { "epoch": 13.2125, "grad_norm": 0.7906516790390015, "learning_rate": 9.574107964054596e-06, "loss": 0.0032, "step": 13741 }, { "epoch": 13.213461538461539, "grad_norm": 0.017278239130973816, "learning_rate": 9.572863422482331e-06, "loss": 0.0001, "step": 13742 }, { "epoch": 13.214423076923078, "grad_norm": 3.2385056018829346, "learning_rate": 9.571618887537969e-06, "loss": 0.054, "step": 13743 }, { "epoch": 13.215384615384615, "grad_norm": 0.23202385008335114, "learning_rate": 9.570374359240828e-06, "loss": 0.0016, "step": 13744 }, { "epoch": 13.216346153846153, "grad_norm": 0.13687804341316223, "learning_rate": 9.569129837610216e-06, "loss": 0.0009, "step": 13745 }, { "epoch": 13.217307692307692, "grad_norm": 0.7045667767524719, "learning_rate": 9.567885322665444e-06, "loss": 0.0027, "step": 13746 }, { "epoch": 13.21826923076923, "grad_norm": 0.02746746316552162, "learning_rate": 9.566640814425823e-06, "loss": 0.0003, "step": 13747 }, { "epoch": 13.21923076923077, "grad_norm": 0.012771832756698132, "learning_rate": 9.565396312910665e-06, "loss": 0.0001, "step": 13748 }, { "epoch": 13.220192307692308, "grad_norm": 2.388943672180176, "learning_rate": 9.564151818139281e-06, "loss": 0.0354, "step": 13749 }, { "epoch": 13.221153846153847, "grad_norm": 0.6750142574310303, "learning_rate": 9.562907330130981e-06, "loss": 0.0038, "step": 13750 }, { "epoch": 13.222115384615385, "grad_norm": 0.3741263151168823, "learning_rate": 9.561662848905076e-06, "loss": 0.0013, "step": 13751 }, { "epoch": 13.223076923076922, "grad_norm": 0.8063076734542847, "learning_rate": 9.560418374480877e-06, "loss": 0.0043, "step": 13752 }, { "epoch": 13.224038461538461, "grad_norm": 0.12049004435539246, "learning_rate": 9.559173906877696e-06, "loss": 0.001, "step": 13753 }, { "epoch": 13.225, "grad_norm": 2.0056347846984863, "learning_rate": 9.557929446114841e-06, "loss": 0.0045, "step": 13754 }, { "epoch": 13.225961538461538, "grad_norm": 0.29537519812583923, "learning_rate": 9.556684992211624e-06, "loss": 0.0021, "step": 13755 }, { "epoch": 13.226923076923077, "grad_norm": 2.8304028511047363, "learning_rate": 9.555440545187355e-06, "loss": 0.0416, "step": 13756 }, { "epoch": 13.227884615384616, "grad_norm": 1.9005322456359863, "learning_rate": 9.554196105061345e-06, "loss": 0.0137, "step": 13757 }, { "epoch": 13.228846153846154, "grad_norm": 0.026133039966225624, "learning_rate": 9.5529516718529e-06, "loss": 0.0002, "step": 13758 }, { "epoch": 13.229807692307693, "grad_norm": 0.12591004371643066, "learning_rate": 9.551707245581336e-06, "loss": 0.001, "step": 13759 }, { "epoch": 13.23076923076923, "grad_norm": 0.14507262408733368, "learning_rate": 9.550462826265958e-06, "loss": 0.0009, "step": 13760 }, { "epoch": 13.231730769230769, "grad_norm": 0.083193339407444, "learning_rate": 9.54921841392608e-06, "loss": 0.0008, "step": 13761 }, { "epoch": 13.232692307692307, "grad_norm": 0.15349933505058289, "learning_rate": 9.547974008581004e-06, "loss": 0.0012, "step": 13762 }, { "epoch": 13.233653846153846, "grad_norm": 0.11097660660743713, "learning_rate": 9.54672961025005e-06, "loss": 0.001, "step": 13763 }, { "epoch": 13.234615384615385, "grad_norm": 1.73355233669281, "learning_rate": 9.54548521895252e-06, "loss": 0.0327, "step": 13764 }, { "epoch": 13.235576923076923, "grad_norm": 2.550968647003174, "learning_rate": 9.544240834707724e-06, "loss": 0.0221, "step": 13765 }, { "epoch": 13.236538461538462, "grad_norm": 0.2841843068599701, "learning_rate": 9.542996457534976e-06, "loss": 0.0011, "step": 13766 }, { "epoch": 13.2375, "grad_norm": 0.4821646511554718, "learning_rate": 9.54175208745358e-06, "loss": 0.003, "step": 13767 }, { "epoch": 13.238461538461538, "grad_norm": 0.06020659580826759, "learning_rate": 9.540507724482847e-06, "loss": 0.0006, "step": 13768 }, { "epoch": 13.239423076923076, "grad_norm": 1.4323660135269165, "learning_rate": 9.539263368642084e-06, "loss": 0.005, "step": 13769 }, { "epoch": 13.240384615384615, "grad_norm": 1.3750463724136353, "learning_rate": 9.538019019950602e-06, "loss": 0.0064, "step": 13770 }, { "epoch": 13.241346153846154, "grad_norm": 0.07458653301000595, "learning_rate": 9.536774678427709e-06, "loss": 0.0006, "step": 13771 }, { "epoch": 13.242307692307692, "grad_norm": 1.750540018081665, "learning_rate": 9.535530344092716e-06, "loss": 0.0231, "step": 13772 }, { "epoch": 13.243269230769231, "grad_norm": 0.02512240968644619, "learning_rate": 9.534286016964924e-06, "loss": 0.0002, "step": 13773 }, { "epoch": 13.24423076923077, "grad_norm": 2.4333274364471436, "learning_rate": 9.53304169706365e-06, "loss": 0.0372, "step": 13774 }, { "epoch": 13.245192307692308, "grad_norm": 2.1962342262268066, "learning_rate": 9.531797384408196e-06, "loss": 0.0098, "step": 13775 }, { "epoch": 13.246153846153845, "grad_norm": 1.2249855995178223, "learning_rate": 9.530553079017872e-06, "loss": 0.0081, "step": 13776 }, { "epoch": 13.247115384615384, "grad_norm": 0.9035472273826599, "learning_rate": 9.52930878091199e-06, "loss": 0.0049, "step": 13777 }, { "epoch": 13.248076923076923, "grad_norm": 0.260408878326416, "learning_rate": 9.528064490109853e-06, "loss": 0.0016, "step": 13778 }, { "epoch": 13.249038461538461, "grad_norm": 0.4875631332397461, "learning_rate": 9.526820206630768e-06, "loss": 0.0018, "step": 13779 }, { "epoch": 13.25, "grad_norm": 0.7806977033615112, "learning_rate": 9.525575930494045e-06, "loss": 0.0023, "step": 13780 }, { "epoch": 13.250961538461539, "grad_norm": 0.23194995522499084, "learning_rate": 9.524331661718994e-06, "loss": 0.0011, "step": 13781 }, { "epoch": 13.251923076923077, "grad_norm": 0.35487431287765503, "learning_rate": 9.523087400324916e-06, "loss": 0.002, "step": 13782 }, { "epoch": 13.252884615384616, "grad_norm": 0.025501159951090813, "learning_rate": 9.521843146331122e-06, "loss": 0.0002, "step": 13783 }, { "epoch": 13.253846153846155, "grad_norm": 0.038002531975507736, "learning_rate": 9.52059889975692e-06, "loss": 0.0002, "step": 13784 }, { "epoch": 13.254807692307692, "grad_norm": 0.019057895988225937, "learning_rate": 9.519354660621617e-06, "loss": 0.0002, "step": 13785 }, { "epoch": 13.25576923076923, "grad_norm": 4.811450958251953, "learning_rate": 9.51811042894452e-06, "loss": 0.0204, "step": 13786 }, { "epoch": 13.256730769230769, "grad_norm": 1.6626110076904297, "learning_rate": 9.516866204744932e-06, "loss": 0.0046, "step": 13787 }, { "epoch": 13.257692307692308, "grad_norm": 2.4906468391418457, "learning_rate": 9.515621988042164e-06, "loss": 0.0386, "step": 13788 }, { "epoch": 13.258653846153846, "grad_norm": 1.3029365539550781, "learning_rate": 9.514377778855521e-06, "loss": 0.0198, "step": 13789 }, { "epoch": 13.259615384615385, "grad_norm": 1.5632363557815552, "learning_rate": 9.51313357720431e-06, "loss": 0.0144, "step": 13790 }, { "epoch": 13.260576923076924, "grad_norm": 0.7582276463508606, "learning_rate": 9.511889383107835e-06, "loss": 0.0031, "step": 13791 }, { "epoch": 13.261538461538462, "grad_norm": 0.00917435996234417, "learning_rate": 9.510645196585407e-06, "loss": 0.0001, "step": 13792 }, { "epoch": 13.2625, "grad_norm": 1.2054651975631714, "learning_rate": 9.509401017656327e-06, "loss": 0.0108, "step": 13793 }, { "epoch": 13.263461538461538, "grad_norm": 0.2905459702014923, "learning_rate": 9.508156846339901e-06, "loss": 0.0012, "step": 13794 }, { "epoch": 13.264423076923077, "grad_norm": 0.5817355513572693, "learning_rate": 9.50691268265544e-06, "loss": 0.0026, "step": 13795 }, { "epoch": 13.265384615384615, "grad_norm": 1.371711254119873, "learning_rate": 9.505668526622246e-06, "loss": 0.0069, "step": 13796 }, { "epoch": 13.266346153846154, "grad_norm": 1.9050414562225342, "learning_rate": 9.504424378259628e-06, "loss": 0.0324, "step": 13797 }, { "epoch": 13.267307692307693, "grad_norm": 0.09349819272756577, "learning_rate": 9.503180237586888e-06, "loss": 0.0006, "step": 13798 }, { "epoch": 13.268269230769231, "grad_norm": 0.6722882986068726, "learning_rate": 9.501936104623334e-06, "loss": 0.0039, "step": 13799 }, { "epoch": 13.26923076923077, "grad_norm": 0.20788079500198364, "learning_rate": 9.500691979388267e-06, "loss": 0.0007, "step": 13800 }, { "epoch": 13.270192307692307, "grad_norm": 2.980783224105835, "learning_rate": 9.499447861900997e-06, "loss": 0.0116, "step": 13801 }, { "epoch": 13.271153846153846, "grad_norm": 0.4640553295612335, "learning_rate": 9.498203752180827e-06, "loss": 0.0025, "step": 13802 }, { "epoch": 13.272115384615384, "grad_norm": 0.708453893661499, "learning_rate": 9.49695965024706e-06, "loss": 0.002, "step": 13803 }, { "epoch": 13.273076923076923, "grad_norm": 0.028218822553753853, "learning_rate": 9.495715556119004e-06, "loss": 0.0003, "step": 13804 }, { "epoch": 13.274038461538462, "grad_norm": 0.2153215855360031, "learning_rate": 9.494471469815965e-06, "loss": 0.0011, "step": 13805 }, { "epoch": 13.275, "grad_norm": 0.045934706926345825, "learning_rate": 9.493227391357243e-06, "loss": 0.0005, "step": 13806 }, { "epoch": 13.275961538461539, "grad_norm": 0.5850118398666382, "learning_rate": 9.491983320762143e-06, "loss": 0.0023, "step": 13807 }, { "epoch": 13.276923076923078, "grad_norm": 0.33380743861198425, "learning_rate": 9.490739258049972e-06, "loss": 0.0011, "step": 13808 }, { "epoch": 13.277884615384615, "grad_norm": 0.08776084333658218, "learning_rate": 9.489495203240036e-06, "loss": 0.0006, "step": 13809 }, { "epoch": 13.278846153846153, "grad_norm": 0.08977522701025009, "learning_rate": 9.488251156351631e-06, "loss": 0.0006, "step": 13810 }, { "epoch": 13.279807692307692, "grad_norm": 0.05649280548095703, "learning_rate": 9.487007117404069e-06, "loss": 0.0005, "step": 13811 }, { "epoch": 13.28076923076923, "grad_norm": 1.6413829326629639, "learning_rate": 9.485763086416651e-06, "loss": 0.0086, "step": 13812 }, { "epoch": 13.28173076923077, "grad_norm": 3.7005155086517334, "learning_rate": 9.484519063408682e-06, "loss": 0.057, "step": 13813 }, { "epoch": 13.282692307692308, "grad_norm": 0.08864450454711914, "learning_rate": 9.48327504839946e-06, "loss": 0.0007, "step": 13814 }, { "epoch": 13.283653846153847, "grad_norm": 0.2085338830947876, "learning_rate": 9.482031041408296e-06, "loss": 0.0006, "step": 13815 }, { "epoch": 13.284615384615385, "grad_norm": 0.07618389278650284, "learning_rate": 9.48078704245449e-06, "loss": 0.0004, "step": 13816 }, { "epoch": 13.285576923076922, "grad_norm": 0.5220499634742737, "learning_rate": 9.479543051557344e-06, "loss": 0.0017, "step": 13817 }, { "epoch": 13.286538461538461, "grad_norm": 2.3160035610198975, "learning_rate": 9.478299068736162e-06, "loss": 0.0484, "step": 13818 }, { "epoch": 13.2875, "grad_norm": 0.10524428635835648, "learning_rate": 9.477055094010248e-06, "loss": 0.0005, "step": 13819 }, { "epoch": 13.288461538461538, "grad_norm": 0.6924018859863281, "learning_rate": 9.475811127398905e-06, "loss": 0.0022, "step": 13820 }, { "epoch": 13.289423076923077, "grad_norm": 0.10089709609746933, "learning_rate": 9.474567168921433e-06, "loss": 0.0006, "step": 13821 }, { "epoch": 13.290384615384616, "grad_norm": 0.1517111212015152, "learning_rate": 9.473323218597138e-06, "loss": 0.001, "step": 13822 }, { "epoch": 13.291346153846154, "grad_norm": 0.07440183311700821, "learning_rate": 9.47207927644532e-06, "loss": 0.0005, "step": 13823 }, { "epoch": 13.292307692307693, "grad_norm": 0.055682454258203506, "learning_rate": 9.470835342485283e-06, "loss": 0.0005, "step": 13824 }, { "epoch": 13.29326923076923, "grad_norm": 0.08970833569765091, "learning_rate": 9.469591416736326e-06, "loss": 0.0009, "step": 13825 }, { "epoch": 13.294230769230769, "grad_norm": 0.18652208149433136, "learning_rate": 9.468347499217755e-06, "loss": 0.0014, "step": 13826 }, { "epoch": 13.295192307692307, "grad_norm": 0.021023577079176903, "learning_rate": 9.467103589948872e-06, "loss": 0.0002, "step": 13827 }, { "epoch": 13.296153846153846, "grad_norm": 0.3882075846195221, "learning_rate": 9.465859688948977e-06, "loss": 0.0012, "step": 13828 }, { "epoch": 13.297115384615385, "grad_norm": 0.0691906288266182, "learning_rate": 9.46461579623737e-06, "loss": 0.0007, "step": 13829 }, { "epoch": 13.298076923076923, "grad_norm": 0.02824728563427925, "learning_rate": 9.463371911833355e-06, "loss": 0.0003, "step": 13830 }, { "epoch": 13.299038461538462, "grad_norm": 0.4201439321041107, "learning_rate": 9.462128035756234e-06, "loss": 0.0017, "step": 13831 }, { "epoch": 13.3, "grad_norm": 0.029719706624746323, "learning_rate": 9.460884168025305e-06, "loss": 0.0002, "step": 13832 }, { "epoch": 13.300961538461538, "grad_norm": 0.20132939517498016, "learning_rate": 9.459640308659873e-06, "loss": 0.0016, "step": 13833 }, { "epoch": 13.301923076923076, "grad_norm": 3.695033311843872, "learning_rate": 9.458396457679238e-06, "loss": 0.0546, "step": 13834 }, { "epoch": 13.302884615384615, "grad_norm": 1.3625980615615845, "learning_rate": 9.457152615102702e-06, "loss": 0.01, "step": 13835 }, { "epoch": 13.303846153846154, "grad_norm": 1.6022765636444092, "learning_rate": 9.45590878094956e-06, "loss": 0.0105, "step": 13836 }, { "epoch": 13.304807692307692, "grad_norm": 0.06685220450162888, "learning_rate": 9.454664955239119e-06, "loss": 0.0003, "step": 13837 }, { "epoch": 13.305769230769231, "grad_norm": 3.449000835418701, "learning_rate": 9.453421137990679e-06, "loss": 0.0321, "step": 13838 }, { "epoch": 13.30673076923077, "grad_norm": 0.4089224934577942, "learning_rate": 9.452177329223537e-06, "loss": 0.0016, "step": 13839 }, { "epoch": 13.307692307692308, "grad_norm": 0.15861006081104279, "learning_rate": 9.450933528956993e-06, "loss": 0.001, "step": 13840 }, { "epoch": 13.308653846153845, "grad_norm": 0.12735767662525177, "learning_rate": 9.449689737210352e-06, "loss": 0.0007, "step": 13841 }, { "epoch": 13.309615384615384, "grad_norm": 0.32055389881134033, "learning_rate": 9.448445954002911e-06, "loss": 0.0015, "step": 13842 }, { "epoch": 13.310576923076923, "grad_norm": 1.3945012092590332, "learning_rate": 9.447202179353967e-06, "loss": 0.0063, "step": 13843 }, { "epoch": 13.311538461538461, "grad_norm": 0.04198875278234482, "learning_rate": 9.445958413282826e-06, "loss": 0.0003, "step": 13844 }, { "epoch": 13.3125, "grad_norm": 4.90524959564209, "learning_rate": 9.444714655808784e-06, "loss": 0.0249, "step": 13845 }, { "epoch": 13.313461538461539, "grad_norm": 0.8093125224113464, "learning_rate": 9.443470906951141e-06, "loss": 0.0025, "step": 13846 }, { "epoch": 13.314423076923077, "grad_norm": 1.6793054342269897, "learning_rate": 9.442227166729193e-06, "loss": 0.0348, "step": 13847 }, { "epoch": 13.315384615384616, "grad_norm": 3.1546812057495117, "learning_rate": 9.440983435162245e-06, "loss": 0.0187, "step": 13848 }, { "epoch": 13.316346153846155, "grad_norm": 0.049590058624744415, "learning_rate": 9.439739712269594e-06, "loss": 0.0004, "step": 13849 }, { "epoch": 13.317307692307692, "grad_norm": 0.07704620063304901, "learning_rate": 9.438495998070539e-06, "loss": 0.0006, "step": 13850 }, { "epoch": 13.31826923076923, "grad_norm": 0.7449840307235718, "learning_rate": 9.437252292584373e-06, "loss": 0.0038, "step": 13851 }, { "epoch": 13.319230769230769, "grad_norm": 0.0814557671546936, "learning_rate": 9.436008595830405e-06, "loss": 0.0006, "step": 13852 }, { "epoch": 13.320192307692308, "grad_norm": 2.316720962524414, "learning_rate": 9.434764907827927e-06, "loss": 0.0509, "step": 13853 }, { "epoch": 13.321153846153846, "grad_norm": 0.8569480180740356, "learning_rate": 9.433521228596237e-06, "loss": 0.0025, "step": 13854 }, { "epoch": 13.322115384615385, "grad_norm": 0.12321621924638748, "learning_rate": 9.432277558154637e-06, "loss": 0.0008, "step": 13855 }, { "epoch": 13.323076923076924, "grad_norm": 0.12757109105587006, "learning_rate": 9.431033896522423e-06, "loss": 0.0008, "step": 13856 }, { "epoch": 13.324038461538462, "grad_norm": 1.5227967500686646, "learning_rate": 9.429790243718892e-06, "loss": 0.0066, "step": 13857 }, { "epoch": 13.325, "grad_norm": 0.12729914486408234, "learning_rate": 9.428546599763342e-06, "loss": 0.0011, "step": 13858 }, { "epoch": 13.325961538461538, "grad_norm": 0.6279481053352356, "learning_rate": 9.427302964675072e-06, "loss": 0.0013, "step": 13859 }, { "epoch": 13.326923076923077, "grad_norm": 0.2535472512245178, "learning_rate": 9.42605933847338e-06, "loss": 0.0014, "step": 13860 }, { "epoch": 13.327884615384615, "grad_norm": 0.047353822737932205, "learning_rate": 9.42481572117756e-06, "loss": 0.0005, "step": 13861 }, { "epoch": 13.328846153846154, "grad_norm": 0.02307625114917755, "learning_rate": 9.423572112806917e-06, "loss": 0.0002, "step": 13862 }, { "epoch": 13.329807692307693, "grad_norm": 0.018826473504304886, "learning_rate": 9.422328513380741e-06, "loss": 0.0002, "step": 13863 }, { "epoch": 13.330769230769231, "grad_norm": 0.2662539482116699, "learning_rate": 9.42108492291833e-06, "loss": 0.0029, "step": 13864 }, { "epoch": 13.33173076923077, "grad_norm": 2.5740060806274414, "learning_rate": 9.41984134143898e-06, "loss": 0.0366, "step": 13865 }, { "epoch": 13.332692307692307, "grad_norm": 0.6522115468978882, "learning_rate": 9.418597768961993e-06, "loss": 0.0039, "step": 13866 }, { "epoch": 13.333653846153846, "grad_norm": 0.053870368748903275, "learning_rate": 9.417354205506663e-06, "loss": 0.0006, "step": 13867 }, { "epoch": 13.334615384615384, "grad_norm": 0.8760233521461487, "learning_rate": 9.416110651092284e-06, "loss": 0.0038, "step": 13868 }, { "epoch": 13.335576923076923, "grad_norm": 0.6469208002090454, "learning_rate": 9.414867105738152e-06, "loss": 0.0016, "step": 13869 }, { "epoch": 13.336538461538462, "grad_norm": 2.860201120376587, "learning_rate": 9.413623569463566e-06, "loss": 0.0068, "step": 13870 }, { "epoch": 13.3375, "grad_norm": 0.08510427176952362, "learning_rate": 9.412380042287824e-06, "loss": 0.0006, "step": 13871 }, { "epoch": 13.338461538461539, "grad_norm": 0.29785436391830444, "learning_rate": 9.411136524230216e-06, "loss": 0.001, "step": 13872 }, { "epoch": 13.339423076923078, "grad_norm": 2.6751554012298584, "learning_rate": 9.409893015310041e-06, "loss": 0.02, "step": 13873 }, { "epoch": 13.340384615384615, "grad_norm": 0.07683470100164413, "learning_rate": 9.408649515546597e-06, "loss": 0.0006, "step": 13874 }, { "epoch": 13.341346153846153, "grad_norm": 0.2437744140625, "learning_rate": 9.407406024959176e-06, "loss": 0.0026, "step": 13875 }, { "epoch": 13.342307692307692, "grad_norm": 1.0218461751937866, "learning_rate": 9.406162543567072e-06, "loss": 0.0072, "step": 13876 }, { "epoch": 13.34326923076923, "grad_norm": 1.8336831331253052, "learning_rate": 9.404919071389586e-06, "loss": 0.0459, "step": 13877 }, { "epoch": 13.34423076923077, "grad_norm": 0.0395728275179863, "learning_rate": 9.403675608446008e-06, "loss": 0.0004, "step": 13878 }, { "epoch": 13.345192307692308, "grad_norm": 1.3158669471740723, "learning_rate": 9.402432154755634e-06, "loss": 0.0141, "step": 13879 }, { "epoch": 13.346153846153847, "grad_norm": 0.08341480791568756, "learning_rate": 9.401188710337757e-06, "loss": 0.0004, "step": 13880 }, { "epoch": 13.347115384615385, "grad_norm": 0.04104525223374367, "learning_rate": 9.399945275211677e-06, "loss": 0.0004, "step": 13881 }, { "epoch": 13.348076923076922, "grad_norm": 3.268967866897583, "learning_rate": 9.398701849396684e-06, "loss": 0.0463, "step": 13882 }, { "epoch": 13.349038461538461, "grad_norm": 1.2370442152023315, "learning_rate": 9.397458432912073e-06, "loss": 0.0063, "step": 13883 }, { "epoch": 13.35, "grad_norm": 0.5542938709259033, "learning_rate": 9.39621502577714e-06, "loss": 0.0021, "step": 13884 }, { "epoch": 13.350961538461538, "grad_norm": 1.2106724977493286, "learning_rate": 9.394971628011177e-06, "loss": 0.0064, "step": 13885 }, { "epoch": 13.351923076923077, "grad_norm": 0.9792291522026062, "learning_rate": 9.393728239633479e-06, "loss": 0.0046, "step": 13886 }, { "epoch": 13.352884615384616, "grad_norm": 0.07355838268995285, "learning_rate": 9.392484860663337e-06, "loss": 0.0004, "step": 13887 }, { "epoch": 13.353846153846154, "grad_norm": 0.19433541595935822, "learning_rate": 9.391241491120048e-06, "loss": 0.0007, "step": 13888 }, { "epoch": 13.354807692307693, "grad_norm": 0.10514909029006958, "learning_rate": 9.389998131022906e-06, "loss": 0.0007, "step": 13889 }, { "epoch": 13.35576923076923, "grad_norm": 0.05453801527619362, "learning_rate": 9.388754780391204e-06, "loss": 0.0003, "step": 13890 }, { "epoch": 13.356730769230769, "grad_norm": 0.12763433158397675, "learning_rate": 9.38751143924423e-06, "loss": 0.0007, "step": 13891 }, { "epoch": 13.357692307692307, "grad_norm": 0.03532175347208977, "learning_rate": 9.386268107601283e-06, "loss": 0.0003, "step": 13892 }, { "epoch": 13.358653846153846, "grad_norm": 0.9564370512962341, "learning_rate": 9.385024785481653e-06, "loss": 0.0105, "step": 13893 }, { "epoch": 13.359615384615385, "grad_norm": 0.04800568148493767, "learning_rate": 9.383781472904633e-06, "loss": 0.0004, "step": 13894 }, { "epoch": 13.360576923076923, "grad_norm": 3.521127462387085, "learning_rate": 9.382538169889516e-06, "loss": 0.0103, "step": 13895 }, { "epoch": 13.361538461538462, "grad_norm": 0.12597928941249847, "learning_rate": 9.381294876455595e-06, "loss": 0.0005, "step": 13896 }, { "epoch": 13.3625, "grad_norm": 0.026210660114884377, "learning_rate": 9.380051592622164e-06, "loss": 0.0003, "step": 13897 }, { "epoch": 13.363461538461538, "grad_norm": 0.06981226056814194, "learning_rate": 9.378808318408509e-06, "loss": 0.0003, "step": 13898 }, { "epoch": 13.364423076923076, "grad_norm": 0.2654113471508026, "learning_rate": 9.377565053833927e-06, "loss": 0.0014, "step": 13899 }, { "epoch": 13.365384615384615, "grad_norm": 1.5337378978729248, "learning_rate": 9.37632179891771e-06, "loss": 0.0054, "step": 13900 }, { "epoch": 13.366346153846154, "grad_norm": 1.5737977027893066, "learning_rate": 9.375078553679143e-06, "loss": 0.0116, "step": 13901 }, { "epoch": 13.367307692307692, "grad_norm": 1.049486756324768, "learning_rate": 9.373835318137527e-06, "loss": 0.0139, "step": 13902 }, { "epoch": 13.368269230769231, "grad_norm": 0.029447326436638832, "learning_rate": 9.37259209231215e-06, "loss": 0.0002, "step": 13903 }, { "epoch": 13.36923076923077, "grad_norm": 0.05978201702237129, "learning_rate": 9.371348876222301e-06, "loss": 0.0003, "step": 13904 }, { "epoch": 13.370192307692308, "grad_norm": 2.0963857173919678, "learning_rate": 9.37010566988727e-06, "loss": 0.0242, "step": 13905 }, { "epoch": 13.371153846153845, "grad_norm": 1.7520322799682617, "learning_rate": 9.368862473326355e-06, "loss": 0.0086, "step": 13906 }, { "epoch": 13.372115384615384, "grad_norm": 0.1026306003332138, "learning_rate": 9.36761928655884e-06, "loss": 0.0004, "step": 13907 }, { "epoch": 13.373076923076923, "grad_norm": 1.0779836177825928, "learning_rate": 9.366376109604017e-06, "loss": 0.0057, "step": 13908 }, { "epoch": 13.374038461538461, "grad_norm": 3.053441047668457, "learning_rate": 9.365132942481177e-06, "loss": 0.1214, "step": 13909 }, { "epoch": 13.375, "grad_norm": 4.261529922485352, "learning_rate": 9.36388978520961e-06, "loss": 0.0349, "step": 13910 }, { "epoch": 13.375961538461539, "grad_norm": 0.10367806255817413, "learning_rate": 9.362646637808608e-06, "loss": 0.0007, "step": 13911 }, { "epoch": 13.376923076923077, "grad_norm": 0.12131909281015396, "learning_rate": 9.361403500297459e-06, "loss": 0.0008, "step": 13912 }, { "epoch": 13.377884615384616, "grad_norm": 2.56834077835083, "learning_rate": 9.360160372695453e-06, "loss": 0.0187, "step": 13913 }, { "epoch": 13.378846153846155, "grad_norm": 0.640964925289154, "learning_rate": 9.35891725502188e-06, "loss": 0.0033, "step": 13914 }, { "epoch": 13.379807692307692, "grad_norm": 0.6266770362854004, "learning_rate": 9.357674147296032e-06, "loss": 0.0077, "step": 13915 }, { "epoch": 13.38076923076923, "grad_norm": 0.07261373847723007, "learning_rate": 9.356431049537191e-06, "loss": 0.0006, "step": 13916 }, { "epoch": 13.381730769230769, "grad_norm": 1.3231667280197144, "learning_rate": 9.355187961764655e-06, "loss": 0.0085, "step": 13917 }, { "epoch": 13.382692307692308, "grad_norm": 1.549757957458496, "learning_rate": 9.35394488399771e-06, "loss": 0.0218, "step": 13918 }, { "epoch": 13.383653846153846, "grad_norm": 0.03949762508273125, "learning_rate": 9.352701816255643e-06, "loss": 0.0004, "step": 13919 }, { "epoch": 13.384615384615385, "grad_norm": 1.584418535232544, "learning_rate": 9.351458758557744e-06, "loss": 0.0047, "step": 13920 }, { "epoch": 13.385576923076924, "grad_norm": 2.449612617492676, "learning_rate": 9.350215710923301e-06, "loss": 0.0315, "step": 13921 }, { "epoch": 13.386538461538462, "grad_norm": 0.039437416940927505, "learning_rate": 9.348972673371604e-06, "loss": 0.0002, "step": 13922 }, { "epoch": 13.3875, "grad_norm": 0.2024950385093689, "learning_rate": 9.34772964592194e-06, "loss": 0.0007, "step": 13923 }, { "epoch": 13.388461538461538, "grad_norm": 0.5055713057518005, "learning_rate": 9.346486628593597e-06, "loss": 0.0023, "step": 13924 }, { "epoch": 13.389423076923077, "grad_norm": 0.12089471518993378, "learning_rate": 9.345243621405865e-06, "loss": 0.0004, "step": 13925 }, { "epoch": 13.390384615384615, "grad_norm": 0.037322912365198135, "learning_rate": 9.34400062437803e-06, "loss": 0.0003, "step": 13926 }, { "epoch": 13.391346153846154, "grad_norm": 0.21694478392601013, "learning_rate": 9.342757637529377e-06, "loss": 0.0012, "step": 13927 }, { "epoch": 13.392307692307693, "grad_norm": 1.5344761610031128, "learning_rate": 9.3415146608792e-06, "loss": 0.0054, "step": 13928 }, { "epoch": 13.393269230769231, "grad_norm": 0.32708725333213806, "learning_rate": 9.340271694446782e-06, "loss": 0.0015, "step": 13929 }, { "epoch": 13.39423076923077, "grad_norm": 0.11772560328245163, "learning_rate": 9.339028738251411e-06, "loss": 0.0007, "step": 13930 }, { "epoch": 13.395192307692307, "grad_norm": 0.42242327332496643, "learning_rate": 9.337785792312372e-06, "loss": 0.0017, "step": 13931 }, { "epoch": 13.396153846153846, "grad_norm": 0.2611260712146759, "learning_rate": 9.336542856648958e-06, "loss": 0.0016, "step": 13932 }, { "epoch": 13.397115384615384, "grad_norm": 1.2221980094909668, "learning_rate": 9.335299931280448e-06, "loss": 0.009, "step": 13933 }, { "epoch": 13.398076923076923, "grad_norm": 0.025284219533205032, "learning_rate": 9.334057016226132e-06, "loss": 0.0003, "step": 13934 }, { "epoch": 13.399038461538462, "grad_norm": 0.43998679518699646, "learning_rate": 9.332814111505299e-06, "loss": 0.0019, "step": 13935 }, { "epoch": 13.4, "grad_norm": 0.19885997474193573, "learning_rate": 9.331571217137233e-06, "loss": 0.0011, "step": 13936 }, { "epoch": 13.400961538461539, "grad_norm": 1.9658480882644653, "learning_rate": 9.330328333141219e-06, "loss": 0.0313, "step": 13937 }, { "epoch": 13.401923076923078, "grad_norm": 0.038285937160253525, "learning_rate": 9.329085459536542e-06, "loss": 0.0003, "step": 13938 }, { "epoch": 13.402884615384615, "grad_norm": 0.04758104309439659, "learning_rate": 9.327842596342492e-06, "loss": 0.0006, "step": 13939 }, { "epoch": 13.403846153846153, "grad_norm": 0.01698361337184906, "learning_rate": 9.326599743578351e-06, "loss": 0.0002, "step": 13940 }, { "epoch": 13.404807692307692, "grad_norm": 0.016507985070347786, "learning_rate": 9.325356901263407e-06, "loss": 0.0002, "step": 13941 }, { "epoch": 13.40576923076923, "grad_norm": 0.13446956872940063, "learning_rate": 9.324114069416941e-06, "loss": 0.0008, "step": 13942 }, { "epoch": 13.40673076923077, "grad_norm": 0.08026309311389923, "learning_rate": 9.322871248058245e-06, "loss": 0.0005, "step": 13943 }, { "epoch": 13.407692307692308, "grad_norm": 0.5449901819229126, "learning_rate": 9.321628437206597e-06, "loss": 0.0017, "step": 13944 }, { "epoch": 13.408653846153847, "grad_norm": 0.7629894614219666, "learning_rate": 9.320385636881283e-06, "loss": 0.0028, "step": 13945 }, { "epoch": 13.409615384615385, "grad_norm": 0.927852988243103, "learning_rate": 9.319142847101593e-06, "loss": 0.0056, "step": 13946 }, { "epoch": 13.410576923076922, "grad_norm": 2.659829616546631, "learning_rate": 9.317900067886805e-06, "loss": 0.0343, "step": 13947 }, { "epoch": 13.411538461538461, "grad_norm": 0.08902093023061752, "learning_rate": 9.316657299256209e-06, "loss": 0.0005, "step": 13948 }, { "epoch": 13.4125, "grad_norm": 2.6171207427978516, "learning_rate": 9.315414541229082e-06, "loss": 0.0566, "step": 13949 }, { "epoch": 13.413461538461538, "grad_norm": 0.7116682529449463, "learning_rate": 9.314171793824716e-06, "loss": 0.002, "step": 13950 }, { "epoch": 13.414423076923077, "grad_norm": 0.3277761936187744, "learning_rate": 9.31292905706239e-06, "loss": 0.0011, "step": 13951 }, { "epoch": 13.415384615384616, "grad_norm": 0.19458256661891937, "learning_rate": 9.311686330961386e-06, "loss": 0.0012, "step": 13952 }, { "epoch": 13.416346153846154, "grad_norm": 0.22752048075199127, "learning_rate": 9.310443615540992e-06, "loss": 0.0012, "step": 13953 }, { "epoch": 13.417307692307693, "grad_norm": 2.97855281829834, "learning_rate": 9.30920091082049e-06, "loss": 0.0113, "step": 13954 }, { "epoch": 13.41826923076923, "grad_norm": 1.8067920207977295, "learning_rate": 9.307958216819162e-06, "loss": 0.0145, "step": 13955 }, { "epoch": 13.419230769230769, "grad_norm": 0.11505552381277084, "learning_rate": 9.306715533556289e-06, "loss": 0.0007, "step": 13956 }, { "epoch": 13.420192307692307, "grad_norm": 0.7420883774757385, "learning_rate": 9.305472861051157e-06, "loss": 0.0038, "step": 13957 }, { "epoch": 13.421153846153846, "grad_norm": 0.091959647834301, "learning_rate": 9.30423019932305e-06, "loss": 0.0005, "step": 13958 }, { "epoch": 13.422115384615385, "grad_norm": 0.19514571130275726, "learning_rate": 9.302987548391248e-06, "loss": 0.001, "step": 13959 }, { "epoch": 13.423076923076923, "grad_norm": 0.7023006081581116, "learning_rate": 9.301744908275032e-06, "loss": 0.003, "step": 13960 }, { "epoch": 13.424038461538462, "grad_norm": 0.1323644071817398, "learning_rate": 9.300502278993687e-06, "loss": 0.0007, "step": 13961 }, { "epoch": 13.425, "grad_norm": 0.018139254301786423, "learning_rate": 9.299259660566491e-06, "loss": 0.0002, "step": 13962 }, { "epoch": 13.425961538461538, "grad_norm": 0.06896733492612839, "learning_rate": 9.29801705301273e-06, "loss": 0.0003, "step": 13963 }, { "epoch": 13.426923076923076, "grad_norm": 1.3517720699310303, "learning_rate": 9.296774456351685e-06, "loss": 0.0039, "step": 13964 }, { "epoch": 13.427884615384615, "grad_norm": 0.08334790915250778, "learning_rate": 9.295531870602636e-06, "loss": 0.0005, "step": 13965 }, { "epoch": 13.428846153846154, "grad_norm": 1.7448780536651611, "learning_rate": 9.294289295784866e-06, "loss": 0.0572, "step": 13966 }, { "epoch": 13.429807692307692, "grad_norm": 0.025313979014754295, "learning_rate": 9.293046731917651e-06, "loss": 0.0002, "step": 13967 }, { "epoch": 13.430769230769231, "grad_norm": 0.5058680176734924, "learning_rate": 9.291804179020279e-06, "loss": 0.0037, "step": 13968 }, { "epoch": 13.43173076923077, "grad_norm": 1.1046744585037231, "learning_rate": 9.290561637112027e-06, "loss": 0.0135, "step": 13969 }, { "epoch": 13.432692307692308, "grad_norm": 0.046207401901483536, "learning_rate": 9.289319106212176e-06, "loss": 0.0003, "step": 13970 }, { "epoch": 13.433653846153845, "grad_norm": 0.06374334543943405, "learning_rate": 9.288076586340005e-06, "loss": 0.0005, "step": 13971 }, { "epoch": 13.434615384615384, "grad_norm": 0.16142217814922333, "learning_rate": 9.286834077514799e-06, "loss": 0.001, "step": 13972 }, { "epoch": 13.435576923076923, "grad_norm": 0.2658643424510956, "learning_rate": 9.285591579755834e-06, "loss": 0.0018, "step": 13973 }, { "epoch": 13.436538461538461, "grad_norm": 0.4053686857223511, "learning_rate": 9.28434909308239e-06, "loss": 0.002, "step": 13974 }, { "epoch": 13.4375, "grad_norm": 0.6488365530967712, "learning_rate": 9.283106617513748e-06, "loss": 0.0042, "step": 13975 }, { "epoch": 13.438461538461539, "grad_norm": 1.3581889867782593, "learning_rate": 9.281864153069189e-06, "loss": 0.0111, "step": 13976 }, { "epoch": 13.439423076923077, "grad_norm": 0.5310563445091248, "learning_rate": 9.280621699767988e-06, "loss": 0.0039, "step": 13977 }, { "epoch": 13.440384615384616, "grad_norm": 0.2155277132987976, "learning_rate": 9.279379257629427e-06, "loss": 0.001, "step": 13978 }, { "epoch": 13.441346153846155, "grad_norm": 0.0484469011425972, "learning_rate": 9.278136826672786e-06, "loss": 0.0003, "step": 13979 }, { "epoch": 13.442307692307692, "grad_norm": 0.08625829964876175, "learning_rate": 9.276894406917345e-06, "loss": 0.0007, "step": 13980 }, { "epoch": 13.44326923076923, "grad_norm": 1.6578174829483032, "learning_rate": 9.275651998382377e-06, "loss": 0.005, "step": 13981 }, { "epoch": 13.444230769230769, "grad_norm": 0.4363859295845032, "learning_rate": 9.274409601087165e-06, "loss": 0.002, "step": 13982 }, { "epoch": 13.445192307692308, "grad_norm": 0.05926606431603432, "learning_rate": 9.273167215050987e-06, "loss": 0.0004, "step": 13983 }, { "epoch": 13.446153846153846, "grad_norm": 0.1572830229997635, "learning_rate": 9.27192484029312e-06, "loss": 0.001, "step": 13984 }, { "epoch": 13.447115384615385, "grad_norm": 0.19737082719802856, "learning_rate": 9.270682476832842e-06, "loss": 0.0014, "step": 13985 }, { "epoch": 13.448076923076924, "grad_norm": 0.24114595353603363, "learning_rate": 9.269440124689432e-06, "loss": 0.0009, "step": 13986 }, { "epoch": 13.449038461538462, "grad_norm": 0.49769896268844604, "learning_rate": 9.26819778388217e-06, "loss": 0.0024, "step": 13987 }, { "epoch": 13.45, "grad_norm": 0.06728290021419525, "learning_rate": 9.266955454430328e-06, "loss": 0.0003, "step": 13988 }, { "epoch": 13.450961538461538, "grad_norm": 0.07936647534370422, "learning_rate": 9.265713136353184e-06, "loss": 0.0005, "step": 13989 }, { "epoch": 13.451923076923077, "grad_norm": 5.294432163238525, "learning_rate": 9.264470829670019e-06, "loss": 0.0767, "step": 13990 }, { "epoch": 13.452884615384615, "grad_norm": 1.0160331726074219, "learning_rate": 9.263228534400108e-06, "loss": 0.0071, "step": 13991 }, { "epoch": 13.453846153846154, "grad_norm": 0.012258421629667282, "learning_rate": 9.261986250562728e-06, "loss": 0.0001, "step": 13992 }, { "epoch": 13.454807692307693, "grad_norm": 0.3387789726257324, "learning_rate": 9.260743978177155e-06, "loss": 0.0021, "step": 13993 }, { "epoch": 13.455769230769231, "grad_norm": 3.260960340499878, "learning_rate": 9.259501717262666e-06, "loss": 0.0192, "step": 13994 }, { "epoch": 13.45673076923077, "grad_norm": 0.07719700038433075, "learning_rate": 9.258259467838537e-06, "loss": 0.0006, "step": 13995 }, { "epoch": 13.457692307692307, "grad_norm": 2.8206002712249756, "learning_rate": 9.257017229924042e-06, "loss": 0.0243, "step": 13996 }, { "epoch": 13.458653846153846, "grad_norm": 0.04732263833284378, "learning_rate": 9.255775003538462e-06, "loss": 0.0003, "step": 13997 }, { "epoch": 13.459615384615384, "grad_norm": 1.2455658912658691, "learning_rate": 9.254532788701068e-06, "loss": 0.008, "step": 13998 }, { "epoch": 13.460576923076923, "grad_norm": 3.684373140335083, "learning_rate": 9.253290585431139e-06, "loss": 0.0202, "step": 13999 }, { "epoch": 13.461538461538462, "grad_norm": 1.1310712099075317, "learning_rate": 9.252048393747945e-06, "loss": 0.0079, "step": 14000 }, { "epoch": 13.4625, "grad_norm": 0.01502522174268961, "learning_rate": 9.250806213670767e-06, "loss": 0.0001, "step": 14001 }, { "epoch": 13.463461538461539, "grad_norm": 1.539556860923767, "learning_rate": 9.24956404521888e-06, "loss": 0.0036, "step": 14002 }, { "epoch": 13.464423076923078, "grad_norm": 0.5701944231987, "learning_rate": 9.248321888411552e-06, "loss": 0.0027, "step": 14003 }, { "epoch": 13.465384615384615, "grad_norm": 2.6257426738739014, "learning_rate": 9.247079743268065e-06, "loss": 0.0733, "step": 14004 }, { "epoch": 13.466346153846153, "grad_norm": 1.6699386835098267, "learning_rate": 9.24583760980769e-06, "loss": 0.0038, "step": 14005 }, { "epoch": 13.467307692307692, "grad_norm": 0.04849240183830261, "learning_rate": 9.244595488049702e-06, "loss": 0.0004, "step": 14006 }, { "epoch": 13.46826923076923, "grad_norm": 0.027850717306137085, "learning_rate": 9.243353378013372e-06, "loss": 0.0002, "step": 14007 }, { "epoch": 13.46923076923077, "grad_norm": 0.054851774126291275, "learning_rate": 9.242111279717981e-06, "loss": 0.0004, "step": 14008 }, { "epoch": 13.470192307692308, "grad_norm": 0.7422464489936829, "learning_rate": 9.2408691931828e-06, "loss": 0.0035, "step": 14009 }, { "epoch": 13.471153846153847, "grad_norm": 0.3975062668323517, "learning_rate": 9.239627118427098e-06, "loss": 0.0016, "step": 14010 }, { "epoch": 13.472115384615385, "grad_norm": 0.49908962845802307, "learning_rate": 9.23838505547015e-06, "loss": 0.0057, "step": 14011 }, { "epoch": 13.473076923076922, "grad_norm": 2.3883018493652344, "learning_rate": 9.237143004331231e-06, "loss": 0.0204, "step": 14012 }, { "epoch": 13.474038461538461, "grad_norm": 1.7199593782424927, "learning_rate": 9.235900965029616e-06, "loss": 0.0217, "step": 14013 }, { "epoch": 13.475, "grad_norm": 2.125215530395508, "learning_rate": 9.234658937584572e-06, "loss": 0.0124, "step": 14014 }, { "epoch": 13.475961538461538, "grad_norm": 0.20752352476119995, "learning_rate": 9.23341692201538e-06, "loss": 0.0015, "step": 14015 }, { "epoch": 13.476923076923077, "grad_norm": 0.02639841102063656, "learning_rate": 9.232174918341305e-06, "loss": 0.0003, "step": 14016 }, { "epoch": 13.477884615384616, "grad_norm": 0.05218852683901787, "learning_rate": 9.230932926581622e-06, "loss": 0.0004, "step": 14017 }, { "epoch": 13.478846153846154, "grad_norm": 0.2322084605693817, "learning_rate": 9.2296909467556e-06, "loss": 0.0017, "step": 14018 }, { "epoch": 13.479807692307693, "grad_norm": 1.352250099182129, "learning_rate": 9.228448978882517e-06, "loss": 0.0139, "step": 14019 }, { "epoch": 13.48076923076923, "grad_norm": 0.027425270527601242, "learning_rate": 9.227207022981642e-06, "loss": 0.0002, "step": 14020 }, { "epoch": 13.481730769230769, "grad_norm": 0.03942868858575821, "learning_rate": 9.225965079072244e-06, "loss": 0.0003, "step": 14021 }, { "epoch": 13.482692307692307, "grad_norm": 1.8578526973724365, "learning_rate": 9.224723147173594e-06, "loss": 0.0151, "step": 14022 }, { "epoch": 13.483653846153846, "grad_norm": 0.05272364988923073, "learning_rate": 9.22348122730497e-06, "loss": 0.0003, "step": 14023 }, { "epoch": 13.484615384615385, "grad_norm": 1.9751337766647339, "learning_rate": 9.222239319485635e-06, "loss": 0.0622, "step": 14024 }, { "epoch": 13.485576923076923, "grad_norm": 0.06047823280096054, "learning_rate": 9.220997423734863e-06, "loss": 0.0004, "step": 14025 }, { "epoch": 13.486538461538462, "grad_norm": 1.7114721536636353, "learning_rate": 9.219755540071927e-06, "loss": 0.0096, "step": 14026 }, { "epoch": 13.4875, "grad_norm": 2.9916441440582275, "learning_rate": 9.218513668516094e-06, "loss": 0.0334, "step": 14027 }, { "epoch": 13.488461538461538, "grad_norm": 0.2786063253879547, "learning_rate": 9.217271809086635e-06, "loss": 0.0019, "step": 14028 }, { "epoch": 13.489423076923076, "grad_norm": 0.015174517408013344, "learning_rate": 9.216029961802819e-06, "loss": 0.0001, "step": 14029 }, { "epoch": 13.490384615384615, "grad_norm": 0.09410418570041656, "learning_rate": 9.214788126683919e-06, "loss": 0.0005, "step": 14030 }, { "epoch": 13.491346153846154, "grad_norm": 0.7703243494033813, "learning_rate": 9.213546303749203e-06, "loss": 0.0031, "step": 14031 }, { "epoch": 13.492307692307692, "grad_norm": 0.9710452556610107, "learning_rate": 9.212304493017937e-06, "loss": 0.01, "step": 14032 }, { "epoch": 13.493269230769231, "grad_norm": 3.262187957763672, "learning_rate": 9.211062694509396e-06, "loss": 0.0286, "step": 14033 }, { "epoch": 13.49423076923077, "grad_norm": 0.2751384377479553, "learning_rate": 9.209820908242847e-06, "loss": 0.0012, "step": 14034 }, { "epoch": 13.495192307692308, "grad_norm": 2.4555904865264893, "learning_rate": 9.208579134237559e-06, "loss": 0.0402, "step": 14035 }, { "epoch": 13.496153846153845, "grad_norm": 0.24983859062194824, "learning_rate": 9.207337372512797e-06, "loss": 0.0013, "step": 14036 }, { "epoch": 13.497115384615384, "grad_norm": 0.14798420667648315, "learning_rate": 9.206095623087834e-06, "loss": 0.0006, "step": 14037 }, { "epoch": 13.498076923076923, "grad_norm": 1.5209611654281616, "learning_rate": 9.204853885981938e-06, "loss": 0.0164, "step": 14038 }, { "epoch": 13.499038461538461, "grad_norm": 0.5615701079368591, "learning_rate": 9.203612161214375e-06, "loss": 0.0017, "step": 14039 }, { "epoch": 13.5, "grad_norm": 0.20418262481689453, "learning_rate": 9.202370448804412e-06, "loss": 0.0006, "step": 14040 }, { "epoch": 13.500961538461539, "grad_norm": 1.1962825059890747, "learning_rate": 9.201128748771322e-06, "loss": 0.0079, "step": 14041 }, { "epoch": 13.501923076923077, "grad_norm": 0.5095519423484802, "learning_rate": 9.199887061134367e-06, "loss": 0.0013, "step": 14042 }, { "epoch": 13.502884615384616, "grad_norm": 0.09217650443315506, "learning_rate": 9.198645385912817e-06, "loss": 0.0007, "step": 14043 }, { "epoch": 13.503846153846155, "grad_norm": 0.8937057852745056, "learning_rate": 9.197403723125938e-06, "loss": 0.0023, "step": 14044 }, { "epoch": 13.504807692307692, "grad_norm": 0.47734856605529785, "learning_rate": 9.196162072793e-06, "loss": 0.0017, "step": 14045 }, { "epoch": 13.50576923076923, "grad_norm": 0.05405659228563309, "learning_rate": 9.194920434933265e-06, "loss": 0.0004, "step": 14046 }, { "epoch": 13.506730769230769, "grad_norm": 2.1334547996520996, "learning_rate": 9.193678809566003e-06, "loss": 0.0267, "step": 14047 }, { "epoch": 13.507692307692308, "grad_norm": 0.3413998782634735, "learning_rate": 9.192437196710478e-06, "loss": 0.0019, "step": 14048 }, { "epoch": 13.508653846153846, "grad_norm": 0.21049946546554565, "learning_rate": 9.19119559638596e-06, "loss": 0.0011, "step": 14049 }, { "epoch": 13.509615384615385, "grad_norm": 0.04236038029193878, "learning_rate": 9.18995400861171e-06, "loss": 0.0004, "step": 14050 }, { "epoch": 13.510576923076924, "grad_norm": 0.21287918090820312, "learning_rate": 9.188712433406996e-06, "loss": 0.0014, "step": 14051 }, { "epoch": 13.51153846153846, "grad_norm": 0.017047103494405746, "learning_rate": 9.187470870791087e-06, "loss": 0.0002, "step": 14052 }, { "epoch": 13.5125, "grad_norm": 0.07563039660453796, "learning_rate": 9.186229320783244e-06, "loss": 0.0003, "step": 14053 }, { "epoch": 13.513461538461538, "grad_norm": 0.13612419366836548, "learning_rate": 9.184987783402732e-06, "loss": 0.0011, "step": 14054 }, { "epoch": 13.514423076923077, "grad_norm": 0.4207908809185028, "learning_rate": 9.18374625866882e-06, "loss": 0.0006, "step": 14055 }, { "epoch": 13.515384615384615, "grad_norm": 0.32049086689949036, "learning_rate": 9.182504746600769e-06, "loss": 0.0015, "step": 14056 }, { "epoch": 13.516346153846154, "grad_norm": 2.9152891635894775, "learning_rate": 9.181263247217847e-06, "loss": 0.0144, "step": 14057 }, { "epoch": 13.517307692307693, "grad_norm": 0.28036046028137207, "learning_rate": 9.180021760539312e-06, "loss": 0.0008, "step": 14058 }, { "epoch": 13.518269230769231, "grad_norm": 0.04320087283849716, "learning_rate": 9.178780286584437e-06, "loss": 0.0004, "step": 14059 }, { "epoch": 13.51923076923077, "grad_norm": 0.15387900173664093, "learning_rate": 9.177538825372479e-06, "loss": 0.0006, "step": 14060 }, { "epoch": 13.520192307692307, "grad_norm": 1.3846042156219482, "learning_rate": 9.176297376922706e-06, "loss": 0.0042, "step": 14061 }, { "epoch": 13.521153846153846, "grad_norm": 0.11799434572458267, "learning_rate": 9.17505594125438e-06, "loss": 0.0008, "step": 14062 }, { "epoch": 13.522115384615384, "grad_norm": 0.4499785900115967, "learning_rate": 9.173814518386764e-06, "loss": 0.0012, "step": 14063 }, { "epoch": 13.523076923076923, "grad_norm": 2.738773822784424, "learning_rate": 9.172573108339123e-06, "loss": 0.0245, "step": 14064 }, { "epoch": 13.524038461538462, "grad_norm": 1.4560081958770752, "learning_rate": 9.171331711130717e-06, "loss": 0.0112, "step": 14065 }, { "epoch": 13.525, "grad_norm": 2.0283164978027344, "learning_rate": 9.170090326780811e-06, "loss": 0.0144, "step": 14066 }, { "epoch": 13.525961538461539, "grad_norm": 6.15492057800293, "learning_rate": 9.16884895530867e-06, "loss": 0.0909, "step": 14067 }, { "epoch": 13.526923076923078, "grad_norm": 1.8794302940368652, "learning_rate": 9.167607596733552e-06, "loss": 0.0609, "step": 14068 }, { "epoch": 13.527884615384615, "grad_norm": 0.010958188213407993, "learning_rate": 9.166366251074719e-06, "loss": 0.0001, "step": 14069 }, { "epoch": 13.528846153846153, "grad_norm": 0.23380939662456512, "learning_rate": 9.16512491835144e-06, "loss": 0.0016, "step": 14070 }, { "epoch": 13.529807692307692, "grad_norm": 0.9627770781517029, "learning_rate": 9.163883598582968e-06, "loss": 0.0052, "step": 14071 }, { "epoch": 13.53076923076923, "grad_norm": 1.0147486925125122, "learning_rate": 9.16264229178857e-06, "loss": 0.0026, "step": 14072 }, { "epoch": 13.53173076923077, "grad_norm": 2.0088157653808594, "learning_rate": 9.161400997987505e-06, "loss": 0.0225, "step": 14073 }, { "epoch": 13.532692307692308, "grad_norm": 0.32781916856765747, "learning_rate": 9.160159717199036e-06, "loss": 0.0015, "step": 14074 }, { "epoch": 13.533653846153847, "grad_norm": 0.04492061212658882, "learning_rate": 9.158918449442425e-06, "loss": 0.0002, "step": 14075 }, { "epoch": 13.534615384615385, "grad_norm": 0.059650298207998276, "learning_rate": 9.157677194736928e-06, "loss": 0.0003, "step": 14076 }, { "epoch": 13.535576923076922, "grad_norm": 1.426865816116333, "learning_rate": 9.15643595310181e-06, "loss": 0.0481, "step": 14077 }, { "epoch": 13.536538461538461, "grad_norm": 0.2718634307384491, "learning_rate": 9.15519472455633e-06, "loss": 0.0012, "step": 14078 }, { "epoch": 13.5375, "grad_norm": 0.0847880095243454, "learning_rate": 9.15395350911975e-06, "loss": 0.0004, "step": 14079 }, { "epoch": 13.538461538461538, "grad_norm": 1.0265086889266968, "learning_rate": 9.152712306811326e-06, "loss": 0.0045, "step": 14080 }, { "epoch": 13.539423076923077, "grad_norm": 0.09417717158794403, "learning_rate": 9.151471117650321e-06, "loss": 0.0005, "step": 14081 }, { "epoch": 13.540384615384616, "grad_norm": 0.7936080098152161, "learning_rate": 9.150229941655996e-06, "loss": 0.002, "step": 14082 }, { "epoch": 13.541346153846154, "grad_norm": 0.12870176136493683, "learning_rate": 9.148988778847604e-06, "loss": 0.0007, "step": 14083 }, { "epoch": 13.542307692307693, "grad_norm": 0.11763465404510498, "learning_rate": 9.147747629244413e-06, "loss": 0.0006, "step": 14084 }, { "epoch": 13.54326923076923, "grad_norm": 0.14627490937709808, "learning_rate": 9.146506492865674e-06, "loss": 0.0007, "step": 14085 }, { "epoch": 13.544230769230769, "grad_norm": 0.04762724041938782, "learning_rate": 9.145265369730652e-06, "loss": 0.0006, "step": 14086 }, { "epoch": 13.545192307692307, "grad_norm": 0.1318952590227127, "learning_rate": 9.1440242598586e-06, "loss": 0.0006, "step": 14087 }, { "epoch": 13.546153846153846, "grad_norm": 0.13975930213928223, "learning_rate": 9.142783163268782e-06, "loss": 0.0008, "step": 14088 }, { "epoch": 13.547115384615385, "grad_norm": 0.2019398808479309, "learning_rate": 9.141542079980453e-06, "loss": 0.0013, "step": 14089 }, { "epoch": 13.548076923076923, "grad_norm": 0.23372411727905273, "learning_rate": 9.14030101001287e-06, "loss": 0.0009, "step": 14090 }, { "epoch": 13.549038461538462, "grad_norm": 0.6477456092834473, "learning_rate": 9.139059953385293e-06, "loss": 0.0111, "step": 14091 }, { "epoch": 13.55, "grad_norm": 0.08618487417697906, "learning_rate": 9.137818910116978e-06, "loss": 0.0006, "step": 14092 }, { "epoch": 13.55096153846154, "grad_norm": 1.1575006246566772, "learning_rate": 9.136577880227183e-06, "loss": 0.0042, "step": 14093 }, { "epoch": 13.551923076923076, "grad_norm": 0.9622493982315063, "learning_rate": 9.135336863735165e-06, "loss": 0.0032, "step": 14094 }, { "epoch": 13.552884615384615, "grad_norm": 0.017804862931370735, "learning_rate": 9.134095860660182e-06, "loss": 0.0002, "step": 14095 }, { "epoch": 13.553846153846154, "grad_norm": 2.3252456188201904, "learning_rate": 9.13285487102149e-06, "loss": 0.0226, "step": 14096 }, { "epoch": 13.554807692307692, "grad_norm": 0.30848991870880127, "learning_rate": 9.131613894838345e-06, "loss": 0.001, "step": 14097 }, { "epoch": 13.555769230769231, "grad_norm": 0.06846131384372711, "learning_rate": 9.130372932130002e-06, "loss": 0.0003, "step": 14098 }, { "epoch": 13.55673076923077, "grad_norm": 0.0384177528321743, "learning_rate": 9.129131982915721e-06, "loss": 0.0003, "step": 14099 }, { "epoch": 13.557692307692308, "grad_norm": 0.07347381114959717, "learning_rate": 9.127891047214756e-06, "loss": 0.0003, "step": 14100 }, { "epoch": 13.558653846153845, "grad_norm": 1.4398049116134644, "learning_rate": 9.126650125046361e-06, "loss": 0.046, "step": 14101 }, { "epoch": 13.559615384615384, "grad_norm": 0.2690867781639099, "learning_rate": 9.125409216429793e-06, "loss": 0.0016, "step": 14102 }, { "epoch": 13.560576923076923, "grad_norm": 0.8337056040763855, "learning_rate": 9.124168321384306e-06, "loss": 0.0099, "step": 14103 }, { "epoch": 13.561538461538461, "grad_norm": 1.965971827507019, "learning_rate": 9.12292743992916e-06, "loss": 0.0115, "step": 14104 }, { "epoch": 13.5625, "grad_norm": 0.005210214294493198, "learning_rate": 9.1216865720836e-06, "loss": 0.0001, "step": 14105 }, { "epoch": 13.563461538461539, "grad_norm": 0.693333625793457, "learning_rate": 9.120445717866892e-06, "loss": 0.0107, "step": 14106 }, { "epoch": 13.564423076923077, "grad_norm": 0.06657833606004715, "learning_rate": 9.119204877298281e-06, "loss": 0.0003, "step": 14107 }, { "epoch": 13.565384615384616, "grad_norm": 0.021571507677435875, "learning_rate": 9.117964050397029e-06, "loss": 0.0001, "step": 14108 }, { "epoch": 13.566346153846155, "grad_norm": 3.2586727142333984, "learning_rate": 9.116723237182383e-06, "loss": 0.0898, "step": 14109 }, { "epoch": 13.567307692307692, "grad_norm": 0.26025405526161194, "learning_rate": 9.115482437673601e-06, "loss": 0.0007, "step": 14110 }, { "epoch": 13.56826923076923, "grad_norm": 3.4298601150512695, "learning_rate": 9.114241651889936e-06, "loss": 0.0108, "step": 14111 }, { "epoch": 13.569230769230769, "grad_norm": 0.5582941770553589, "learning_rate": 9.113000879850641e-06, "loss": 0.002, "step": 14112 }, { "epoch": 13.570192307692308, "grad_norm": 1.7330269813537598, "learning_rate": 9.111760121574968e-06, "loss": 0.0079, "step": 14113 }, { "epoch": 13.571153846153846, "grad_norm": 0.21254859864711761, "learning_rate": 9.110519377082174e-06, "loss": 0.0013, "step": 14114 }, { "epoch": 13.572115384615385, "grad_norm": 1.9926213026046753, "learning_rate": 9.109278646391505e-06, "loss": 0.0143, "step": 14115 }, { "epoch": 13.573076923076924, "grad_norm": 0.1273520141839981, "learning_rate": 9.108037929522218e-06, "loss": 0.0004, "step": 14116 }, { "epoch": 13.57403846153846, "grad_norm": 0.07785235345363617, "learning_rate": 9.106797226493565e-06, "loss": 0.0004, "step": 14117 }, { "epoch": 13.575, "grad_norm": 0.3762584328651428, "learning_rate": 9.105556537324797e-06, "loss": 0.0017, "step": 14118 }, { "epoch": 13.575961538461538, "grad_norm": 0.24473993480205536, "learning_rate": 9.10431586203517e-06, "loss": 0.0014, "step": 14119 }, { "epoch": 13.576923076923077, "grad_norm": 0.05499004200100899, "learning_rate": 9.103075200643925e-06, "loss": 0.0004, "step": 14120 }, { "epoch": 13.577884615384615, "grad_norm": 0.557540774345398, "learning_rate": 9.101834553170326e-06, "loss": 0.0024, "step": 14121 }, { "epoch": 13.578846153846154, "grad_norm": 0.4035305678844452, "learning_rate": 9.100593919633618e-06, "loss": 0.0026, "step": 14122 }, { "epoch": 13.579807692307693, "grad_norm": 2.925529956817627, "learning_rate": 9.09935330005305e-06, "loss": 0.0177, "step": 14123 }, { "epoch": 13.580769230769231, "grad_norm": 0.05226937681436539, "learning_rate": 9.098112694447878e-06, "loss": 0.0005, "step": 14124 }, { "epoch": 13.58173076923077, "grad_norm": 0.13687792420387268, "learning_rate": 9.096872102837348e-06, "loss": 0.0009, "step": 14125 }, { "epoch": 13.582692307692307, "grad_norm": 1.092311978340149, "learning_rate": 9.095631525240714e-06, "loss": 0.0066, "step": 14126 }, { "epoch": 13.583653846153846, "grad_norm": 0.1301344782114029, "learning_rate": 9.094390961677223e-06, "loss": 0.0007, "step": 14127 }, { "epoch": 13.584615384615384, "grad_norm": 1.1100292205810547, "learning_rate": 9.093150412166127e-06, "loss": 0.0047, "step": 14128 }, { "epoch": 13.585576923076923, "grad_norm": 0.3743419349193573, "learning_rate": 9.091909876726676e-06, "loss": 0.0015, "step": 14129 }, { "epoch": 13.586538461538462, "grad_norm": 0.09874153137207031, "learning_rate": 9.090669355378119e-06, "loss": 0.0006, "step": 14130 }, { "epoch": 13.5875, "grad_norm": 0.12970399856567383, "learning_rate": 9.089428848139702e-06, "loss": 0.0006, "step": 14131 }, { "epoch": 13.588461538461539, "grad_norm": 0.10258257389068604, "learning_rate": 9.088188355030678e-06, "loss": 0.0006, "step": 14132 }, { "epoch": 13.589423076923078, "grad_norm": 0.01943156309425831, "learning_rate": 9.086947876070295e-06, "loss": 0.0002, "step": 14133 }, { "epoch": 13.590384615384615, "grad_norm": 0.20188355445861816, "learning_rate": 9.0857074112778e-06, "loss": 0.001, "step": 14134 }, { "epoch": 13.591346153846153, "grad_norm": 0.06756862252950668, "learning_rate": 9.084466960672444e-06, "loss": 0.0005, "step": 14135 }, { "epoch": 13.592307692307692, "grad_norm": 0.9655303955078125, "learning_rate": 9.083226524273475e-06, "loss": 0.0046, "step": 14136 }, { "epoch": 13.59326923076923, "grad_norm": 0.014823097735643387, "learning_rate": 9.081986102100137e-06, "loss": 0.0001, "step": 14137 }, { "epoch": 13.59423076923077, "grad_norm": 0.331564337015152, "learning_rate": 9.08074569417168e-06, "loss": 0.002, "step": 14138 }, { "epoch": 13.595192307692308, "grad_norm": 0.059561774134635925, "learning_rate": 9.079505300507354e-06, "loss": 0.0003, "step": 14139 }, { "epoch": 13.596153846153847, "grad_norm": 0.503725528717041, "learning_rate": 9.078264921126405e-06, "loss": 0.0014, "step": 14140 }, { "epoch": 13.597115384615385, "grad_norm": 0.5654325485229492, "learning_rate": 9.077024556048077e-06, "loss": 0.0024, "step": 14141 }, { "epoch": 13.598076923076922, "grad_norm": 0.2885090708732605, "learning_rate": 9.075784205291618e-06, "loss": 0.0021, "step": 14142 }, { "epoch": 13.599038461538461, "grad_norm": 0.050939664244651794, "learning_rate": 9.074543868876277e-06, "loss": 0.0003, "step": 14143 }, { "epoch": 13.6, "grad_norm": 0.016259582713246346, "learning_rate": 9.0733035468213e-06, "loss": 0.0001, "step": 14144 }, { "epoch": 13.600961538461538, "grad_norm": 0.015368765220046043, "learning_rate": 9.072063239145929e-06, "loss": 0.0002, "step": 14145 }, { "epoch": 13.601923076923077, "grad_norm": 1.6750614643096924, "learning_rate": 9.070822945869415e-06, "loss": 0.0044, "step": 14146 }, { "epoch": 13.602884615384616, "grad_norm": 0.023545699194073677, "learning_rate": 9.069582667011003e-06, "loss": 0.0003, "step": 14147 }, { "epoch": 13.603846153846154, "grad_norm": 0.014773137867450714, "learning_rate": 9.068342402589938e-06, "loss": 0.0001, "step": 14148 }, { "epoch": 13.604807692307693, "grad_norm": 1.866268277168274, "learning_rate": 9.06710215262546e-06, "loss": 0.0104, "step": 14149 }, { "epoch": 13.60576923076923, "grad_norm": 0.004568951204419136, "learning_rate": 9.065861917136822e-06, "loss": 0.0001, "step": 14150 }, { "epoch": 13.606730769230769, "grad_norm": 0.02934028021991253, "learning_rate": 9.064621696143266e-06, "loss": 0.0002, "step": 14151 }, { "epoch": 13.607692307692307, "grad_norm": 0.6325311064720154, "learning_rate": 9.063381489664034e-06, "loss": 0.0017, "step": 14152 }, { "epoch": 13.608653846153846, "grad_norm": 0.07028261572122574, "learning_rate": 9.062141297718372e-06, "loss": 0.0003, "step": 14153 }, { "epoch": 13.609615384615385, "grad_norm": 0.03586158528923988, "learning_rate": 9.060901120325526e-06, "loss": 0.0002, "step": 14154 }, { "epoch": 13.610576923076923, "grad_norm": 0.21999436616897583, "learning_rate": 9.059660957504737e-06, "loss": 0.0005, "step": 14155 }, { "epoch": 13.611538461538462, "grad_norm": 0.5369793176651001, "learning_rate": 9.05842080927525e-06, "loss": 0.001, "step": 14156 }, { "epoch": 13.6125, "grad_norm": 0.016451086848974228, "learning_rate": 9.05718067565631e-06, "loss": 0.0001, "step": 14157 }, { "epoch": 13.61346153846154, "grad_norm": 0.041055671870708466, "learning_rate": 9.05594055666716e-06, "loss": 0.0003, "step": 14158 }, { "epoch": 13.614423076923076, "grad_norm": 0.4657597243785858, "learning_rate": 9.05470045232704e-06, "loss": 0.0019, "step": 14159 }, { "epoch": 13.615384615384615, "grad_norm": 1.771169662475586, "learning_rate": 9.053460362655192e-06, "loss": 0.0198, "step": 14160 }, { "epoch": 13.616346153846154, "grad_norm": 0.034799739718437195, "learning_rate": 9.052220287670865e-06, "loss": 0.0003, "step": 14161 }, { "epoch": 13.617307692307692, "grad_norm": 0.10132879763841629, "learning_rate": 9.050980227393297e-06, "loss": 0.0005, "step": 14162 }, { "epoch": 13.618269230769231, "grad_norm": 0.1363735944032669, "learning_rate": 9.049740181841731e-06, "loss": 0.001, "step": 14163 }, { "epoch": 13.61923076923077, "grad_norm": 1.8784763813018799, "learning_rate": 9.048500151035404e-06, "loss": 0.0108, "step": 14164 }, { "epoch": 13.620192307692308, "grad_norm": 2.7920968532562256, "learning_rate": 9.047260134993566e-06, "loss": 0.039, "step": 14165 }, { "epoch": 13.621153846153845, "grad_norm": 3.761951446533203, "learning_rate": 9.046020133735455e-06, "loss": 0.0475, "step": 14166 }, { "epoch": 13.622115384615384, "grad_norm": 0.5763080716133118, "learning_rate": 9.044780147280307e-06, "loss": 0.0034, "step": 14167 }, { "epoch": 13.623076923076923, "grad_norm": 1.8329709768295288, "learning_rate": 9.043540175647373e-06, "loss": 0.0106, "step": 14168 }, { "epoch": 13.624038461538461, "grad_norm": 0.05094677209854126, "learning_rate": 9.042300218855887e-06, "loss": 0.0002, "step": 14169 }, { "epoch": 13.625, "grad_norm": 0.04626480117440224, "learning_rate": 9.041060276925089e-06, "loss": 0.0004, "step": 14170 }, { "epoch": 13.625961538461539, "grad_norm": 0.19481168687343597, "learning_rate": 9.03982034987422e-06, "loss": 0.001, "step": 14171 }, { "epoch": 13.626923076923077, "grad_norm": 3.075362205505371, "learning_rate": 9.038580437722524e-06, "loss": 0.0426, "step": 14172 }, { "epoch": 13.627884615384616, "grad_norm": 1.8597875833511353, "learning_rate": 9.037340540489239e-06, "loss": 0.0317, "step": 14173 }, { "epoch": 13.628846153846155, "grad_norm": 0.08158185333013535, "learning_rate": 9.036100658193599e-06, "loss": 0.0005, "step": 14174 }, { "epoch": 13.629807692307692, "grad_norm": 0.12621182203292847, "learning_rate": 9.034860790854848e-06, "loss": 0.0004, "step": 14175 }, { "epoch": 13.63076923076923, "grad_norm": 0.13691328465938568, "learning_rate": 9.033620938492228e-06, "loss": 0.0005, "step": 14176 }, { "epoch": 13.631730769230769, "grad_norm": 1.8220313787460327, "learning_rate": 9.032381101124975e-06, "loss": 0.0135, "step": 14177 }, { "epoch": 13.632692307692308, "grad_norm": 0.6566946506500244, "learning_rate": 9.031141278772325e-06, "loss": 0.0013, "step": 14178 }, { "epoch": 13.633653846153846, "grad_norm": 0.012205034494400024, "learning_rate": 9.02990147145352e-06, "loss": 0.0001, "step": 14179 }, { "epoch": 13.634615384615385, "grad_norm": 0.04999975115060806, "learning_rate": 9.028661679187797e-06, "loss": 0.0004, "step": 14180 }, { "epoch": 13.635576923076924, "grad_norm": 0.4018135070800781, "learning_rate": 9.027421901994395e-06, "loss": 0.002, "step": 14181 }, { "epoch": 13.63653846153846, "grad_norm": 1.520686149597168, "learning_rate": 9.026182139892547e-06, "loss": 0.0608, "step": 14182 }, { "epoch": 13.6375, "grad_norm": 0.03186621889472008, "learning_rate": 9.024942392901498e-06, "loss": 0.0001, "step": 14183 }, { "epoch": 13.638461538461538, "grad_norm": 0.2366178333759308, "learning_rate": 9.02370266104048e-06, "loss": 0.0008, "step": 14184 }, { "epoch": 13.639423076923077, "grad_norm": 0.038913752883672714, "learning_rate": 9.02246294432873e-06, "loss": 0.0002, "step": 14185 }, { "epoch": 13.640384615384615, "grad_norm": 1.6017707586288452, "learning_rate": 9.021223242785486e-06, "loss": 0.0319, "step": 14186 }, { "epoch": 13.641346153846154, "grad_norm": 0.21202613413333893, "learning_rate": 9.019983556429987e-06, "loss": 0.0013, "step": 14187 }, { "epoch": 13.642307692307693, "grad_norm": 1.8160820007324219, "learning_rate": 9.018743885281465e-06, "loss": 0.0077, "step": 14188 }, { "epoch": 13.643269230769231, "grad_norm": 1.1843119859695435, "learning_rate": 9.017504229359157e-06, "loss": 0.002, "step": 14189 }, { "epoch": 13.64423076923077, "grad_norm": 0.0461348257958889, "learning_rate": 9.016264588682303e-06, "loss": 0.0004, "step": 14190 }, { "epoch": 13.645192307692307, "grad_norm": 0.0366993173956871, "learning_rate": 9.015024963270133e-06, "loss": 0.0003, "step": 14191 }, { "epoch": 13.646153846153846, "grad_norm": 0.009405607357621193, "learning_rate": 9.013785353141887e-06, "loss": 0.0001, "step": 14192 }, { "epoch": 13.647115384615384, "grad_norm": 0.8137969970703125, "learning_rate": 9.012545758316794e-06, "loss": 0.0027, "step": 14193 }, { "epoch": 13.648076923076923, "grad_norm": 0.07571166008710861, "learning_rate": 9.011306178814096e-06, "loss": 0.0003, "step": 14194 }, { "epoch": 13.649038461538462, "grad_norm": 0.07613855600357056, "learning_rate": 9.010066614653023e-06, "loss": 0.0004, "step": 14195 }, { "epoch": 13.65, "grad_norm": 2.471609354019165, "learning_rate": 9.00882706585281e-06, "loss": 0.0351, "step": 14196 }, { "epoch": 13.650961538461539, "grad_norm": 0.17215311527252197, "learning_rate": 9.007587532432692e-06, "loss": 0.0012, "step": 14197 }, { "epoch": 13.651923076923078, "grad_norm": 0.8049716353416443, "learning_rate": 9.006348014411905e-06, "loss": 0.0025, "step": 14198 }, { "epoch": 13.652884615384615, "grad_norm": 0.7562359571456909, "learning_rate": 9.00510851180968e-06, "loss": 0.0067, "step": 14199 }, { "epoch": 13.653846153846153, "grad_norm": 0.53839111328125, "learning_rate": 9.003869024645247e-06, "loss": 0.0024, "step": 14200 }, { "epoch": 13.654807692307692, "grad_norm": 2.01489520072937, "learning_rate": 9.002629552937846e-06, "loss": 0.0026, "step": 14201 }, { "epoch": 13.65576923076923, "grad_norm": 0.0910872295498848, "learning_rate": 9.00139009670671e-06, "loss": 0.0009, "step": 14202 }, { "epoch": 13.65673076923077, "grad_norm": 1.60099196434021, "learning_rate": 9.000150655971065e-06, "loss": 0.0068, "step": 14203 }, { "epoch": 13.657692307692308, "grad_norm": 2.416579008102417, "learning_rate": 8.998911230750147e-06, "loss": 0.0188, "step": 14204 }, { "epoch": 13.658653846153847, "grad_norm": 2.3560774326324463, "learning_rate": 8.99767182106319e-06, "loss": 0.0203, "step": 14205 }, { "epoch": 13.659615384615385, "grad_norm": 0.030278535559773445, "learning_rate": 8.996432426929425e-06, "loss": 0.0002, "step": 14206 }, { "epoch": 13.660576923076922, "grad_norm": 1.3286402225494385, "learning_rate": 8.99519304836808e-06, "loss": 0.0056, "step": 14207 }, { "epoch": 13.661538461538461, "grad_norm": 1.179677128791809, "learning_rate": 8.993953685398394e-06, "loss": 0.0391, "step": 14208 }, { "epoch": 13.6625, "grad_norm": 0.09147589653730392, "learning_rate": 8.992714338039595e-06, "loss": 0.0005, "step": 14209 }, { "epoch": 13.663461538461538, "grad_norm": 1.10010826587677, "learning_rate": 8.99147500631091e-06, "loss": 0.0062, "step": 14210 }, { "epoch": 13.664423076923077, "grad_norm": 0.0966184064745903, "learning_rate": 8.990235690231572e-06, "loss": 0.0007, "step": 14211 }, { "epoch": 13.665384615384616, "grad_norm": 0.06034734100103378, "learning_rate": 8.988996389820814e-06, "loss": 0.0004, "step": 14212 }, { "epoch": 13.666346153846154, "grad_norm": 0.02883438766002655, "learning_rate": 8.987757105097866e-06, "loss": 0.0002, "step": 14213 }, { "epoch": 13.667307692307693, "grad_norm": 0.029839379712939262, "learning_rate": 8.986517836081954e-06, "loss": 0.0003, "step": 14214 }, { "epoch": 13.66826923076923, "grad_norm": 0.052133701741695404, "learning_rate": 8.985278582792314e-06, "loss": 0.0004, "step": 14215 }, { "epoch": 13.669230769230769, "grad_norm": 0.0188501738011837, "learning_rate": 8.98403934524817e-06, "loss": 0.0001, "step": 14216 }, { "epoch": 13.670192307692307, "grad_norm": 0.1043599396944046, "learning_rate": 8.982800123468755e-06, "loss": 0.0005, "step": 14217 }, { "epoch": 13.671153846153846, "grad_norm": 0.2262905389070511, "learning_rate": 8.981560917473292e-06, "loss": 0.0012, "step": 14218 }, { "epoch": 13.672115384615385, "grad_norm": 0.6183112263679504, "learning_rate": 8.98032172728102e-06, "loss": 0.0026, "step": 14219 }, { "epoch": 13.673076923076923, "grad_norm": 0.03004412353038788, "learning_rate": 8.979082552911162e-06, "loss": 0.0002, "step": 14220 }, { "epoch": 13.674038461538462, "grad_norm": 0.06394214928150177, "learning_rate": 8.977843394382945e-06, "loss": 0.0007, "step": 14221 }, { "epoch": 13.675, "grad_norm": 0.536500871181488, "learning_rate": 8.976604251715595e-06, "loss": 0.0017, "step": 14222 }, { "epoch": 13.67596153846154, "grad_norm": 0.046730853617191315, "learning_rate": 8.975365124928349e-06, "loss": 0.0003, "step": 14223 }, { "epoch": 13.676923076923076, "grad_norm": 0.0371411107480526, "learning_rate": 8.974126014040427e-06, "loss": 0.0004, "step": 14224 }, { "epoch": 13.677884615384615, "grad_norm": 0.10816773772239685, "learning_rate": 8.972886919071055e-06, "loss": 0.0005, "step": 14225 }, { "epoch": 13.678846153846154, "grad_norm": 0.017552558332681656, "learning_rate": 8.971647840039468e-06, "loss": 0.0001, "step": 14226 }, { "epoch": 13.679807692307692, "grad_norm": 0.24216319620609283, "learning_rate": 8.970408776964888e-06, "loss": 0.0009, "step": 14227 }, { "epoch": 13.680769230769231, "grad_norm": 2.065361976623535, "learning_rate": 8.96916972986654e-06, "loss": 0.0134, "step": 14228 }, { "epoch": 13.68173076923077, "grad_norm": 0.03797922283411026, "learning_rate": 8.967930698763651e-06, "loss": 0.0004, "step": 14229 }, { "epoch": 13.682692307692308, "grad_norm": 0.004456866066902876, "learning_rate": 8.966691683675453e-06, "loss": 0.0, "step": 14230 }, { "epoch": 13.683653846153845, "grad_norm": 0.0207209549844265, "learning_rate": 8.965452684621164e-06, "loss": 0.0002, "step": 14231 }, { "epoch": 13.684615384615384, "grad_norm": 0.18765607476234436, "learning_rate": 8.964213701620015e-06, "loss": 0.0007, "step": 14232 }, { "epoch": 13.685576923076923, "grad_norm": 0.513537585735321, "learning_rate": 8.962974734691226e-06, "loss": 0.0022, "step": 14233 }, { "epoch": 13.686538461538461, "grad_norm": 1.6255996227264404, "learning_rate": 8.961735783854029e-06, "loss": 0.0029, "step": 14234 }, { "epoch": 13.6875, "grad_norm": 2.2071077823638916, "learning_rate": 8.960496849127645e-06, "loss": 0.0422, "step": 14235 }, { "epoch": 13.688461538461539, "grad_norm": 0.027077801525592804, "learning_rate": 8.959257930531295e-06, "loss": 0.0002, "step": 14236 }, { "epoch": 13.689423076923077, "grad_norm": 1.8206502199172974, "learning_rate": 8.958019028084212e-06, "loss": 0.006, "step": 14237 }, { "epoch": 13.690384615384616, "grad_norm": 0.10162651538848877, "learning_rate": 8.956780141805615e-06, "loss": 0.0004, "step": 14238 }, { "epoch": 13.691346153846155, "grad_norm": 0.025873878970742226, "learning_rate": 8.955541271714727e-06, "loss": 0.0002, "step": 14239 }, { "epoch": 13.692307692307692, "grad_norm": 0.014878073707222939, "learning_rate": 8.954302417830772e-06, "loss": 0.0001, "step": 14240 }, { "epoch": 13.69326923076923, "grad_norm": 0.04041562229394913, "learning_rate": 8.953063580172975e-06, "loss": 0.0002, "step": 14241 }, { "epoch": 13.694230769230769, "grad_norm": 0.20499107241630554, "learning_rate": 8.951824758760561e-06, "loss": 0.0013, "step": 14242 }, { "epoch": 13.695192307692308, "grad_norm": 0.07743586599826813, "learning_rate": 8.95058595361275e-06, "loss": 0.0006, "step": 14243 }, { "epoch": 13.696153846153846, "grad_norm": 0.12174084782600403, "learning_rate": 8.949347164748761e-06, "loss": 0.0007, "step": 14244 }, { "epoch": 13.697115384615385, "grad_norm": 1.5917553901672363, "learning_rate": 8.948108392187824e-06, "loss": 0.0041, "step": 14245 }, { "epoch": 13.698076923076924, "grad_norm": 0.022257747128605843, "learning_rate": 8.946869635949156e-06, "loss": 0.0002, "step": 14246 }, { "epoch": 13.69903846153846, "grad_norm": 0.23878155648708344, "learning_rate": 8.94563089605198e-06, "loss": 0.0009, "step": 14247 }, { "epoch": 13.7, "grad_norm": 0.26447877287864685, "learning_rate": 8.944392172515519e-06, "loss": 0.0012, "step": 14248 }, { "epoch": 13.700961538461538, "grad_norm": 0.07076526433229446, "learning_rate": 8.943153465358994e-06, "loss": 0.0004, "step": 14249 }, { "epoch": 13.701923076923077, "grad_norm": 1.7873948812484741, "learning_rate": 8.941914774601625e-06, "loss": 0.0721, "step": 14250 }, { "epoch": 13.702884615384615, "grad_norm": 0.01988236792385578, "learning_rate": 8.940676100262633e-06, "loss": 0.0002, "step": 14251 }, { "epoch": 13.703846153846154, "grad_norm": 0.30975019931793213, "learning_rate": 8.939437442361238e-06, "loss": 0.0012, "step": 14252 }, { "epoch": 13.704807692307693, "grad_norm": 0.039755918085575104, "learning_rate": 8.938198800916662e-06, "loss": 0.0002, "step": 14253 }, { "epoch": 13.705769230769231, "grad_norm": 1.2657649517059326, "learning_rate": 8.936960175948123e-06, "loss": 0.0039, "step": 14254 }, { "epoch": 13.70673076923077, "grad_norm": 1.3203448057174683, "learning_rate": 8.935721567474843e-06, "loss": 0.0045, "step": 14255 }, { "epoch": 13.707692307692307, "grad_norm": 0.22368787229061127, "learning_rate": 8.934482975516042e-06, "loss": 0.0011, "step": 14256 }, { "epoch": 13.708653846153846, "grad_norm": 0.04418536275625229, "learning_rate": 8.933244400090937e-06, "loss": 0.0003, "step": 14257 }, { "epoch": 13.709615384615384, "grad_norm": 0.03521670401096344, "learning_rate": 8.932005841218746e-06, "loss": 0.0003, "step": 14258 }, { "epoch": 13.710576923076923, "grad_norm": 0.07533716410398483, "learning_rate": 8.930767298918692e-06, "loss": 0.0006, "step": 14259 }, { "epoch": 13.711538461538462, "grad_norm": 0.06677451729774475, "learning_rate": 8.929528773209992e-06, "loss": 0.0004, "step": 14260 }, { "epoch": 13.7125, "grad_norm": 1.5614540576934814, "learning_rate": 8.928290264111863e-06, "loss": 0.0053, "step": 14261 }, { "epoch": 13.713461538461539, "grad_norm": 3.397301435470581, "learning_rate": 8.927051771643522e-06, "loss": 0.165, "step": 14262 }, { "epoch": 13.714423076923078, "grad_norm": 0.07567597925662994, "learning_rate": 8.92581329582419e-06, "loss": 0.0004, "step": 14263 }, { "epoch": 13.715384615384615, "grad_norm": 2.0944063663482666, "learning_rate": 8.924574836673084e-06, "loss": 0.0113, "step": 14264 }, { "epoch": 13.716346153846153, "grad_norm": 0.16665232181549072, "learning_rate": 8.923336394209416e-06, "loss": 0.001, "step": 14265 }, { "epoch": 13.717307692307692, "grad_norm": 0.9900015592575073, "learning_rate": 8.92209796845241e-06, "loss": 0.0053, "step": 14266 }, { "epoch": 13.71826923076923, "grad_norm": 0.5789763927459717, "learning_rate": 8.920859559421283e-06, "loss": 0.0013, "step": 14267 }, { "epoch": 13.71923076923077, "grad_norm": 0.01215578243136406, "learning_rate": 8.919621167135244e-06, "loss": 0.0001, "step": 14268 }, { "epoch": 13.720192307692308, "grad_norm": 3.9297022819519043, "learning_rate": 8.918382791613514e-06, "loss": 0.0056, "step": 14269 }, { "epoch": 13.721153846153847, "grad_norm": 2.8759047985076904, "learning_rate": 8.91714443287531e-06, "loss": 0.0694, "step": 14270 }, { "epoch": 13.722115384615385, "grad_norm": 1.8434516191482544, "learning_rate": 8.915906090939848e-06, "loss": 0.0575, "step": 14271 }, { "epoch": 13.723076923076922, "grad_norm": 2.3236255645751953, "learning_rate": 8.91466776582634e-06, "loss": 0.0168, "step": 14272 }, { "epoch": 13.724038461538461, "grad_norm": 0.08217298984527588, "learning_rate": 8.913429457553999e-06, "loss": 0.0003, "step": 14273 }, { "epoch": 13.725, "grad_norm": 0.13759195804595947, "learning_rate": 8.91219116614205e-06, "loss": 0.0013, "step": 14274 }, { "epoch": 13.725961538461538, "grad_norm": 0.505562961101532, "learning_rate": 8.910952891609698e-06, "loss": 0.0015, "step": 14275 }, { "epoch": 13.726923076923077, "grad_norm": 0.013802258297801018, "learning_rate": 8.90971463397616e-06, "loss": 0.0001, "step": 14276 }, { "epoch": 13.727884615384616, "grad_norm": 1.664616346359253, "learning_rate": 8.908476393260652e-06, "loss": 0.0082, "step": 14277 }, { "epoch": 13.728846153846154, "grad_norm": 0.2717426121234894, "learning_rate": 8.907238169482388e-06, "loss": 0.002, "step": 14278 }, { "epoch": 13.729807692307693, "grad_norm": 0.03487521409988403, "learning_rate": 8.90599996266058e-06, "loss": 0.0003, "step": 14279 }, { "epoch": 13.73076923076923, "grad_norm": 0.04534561559557915, "learning_rate": 8.90476177281444e-06, "loss": 0.0004, "step": 14280 }, { "epoch": 13.731730769230769, "grad_norm": 0.16040903329849243, "learning_rate": 8.903523599963184e-06, "loss": 0.0014, "step": 14281 }, { "epoch": 13.732692307692307, "grad_norm": 0.04408882558345795, "learning_rate": 8.902285444126024e-06, "loss": 0.0004, "step": 14282 }, { "epoch": 13.733653846153846, "grad_norm": 0.35939040780067444, "learning_rate": 8.901047305322172e-06, "loss": 0.0018, "step": 14283 }, { "epoch": 13.734615384615385, "grad_norm": 0.5059444308280945, "learning_rate": 8.89980918357084e-06, "loss": 0.0023, "step": 14284 }, { "epoch": 13.735576923076923, "grad_norm": 0.3030160367488861, "learning_rate": 8.89857107889124e-06, "loss": 0.0014, "step": 14285 }, { "epoch": 13.736538461538462, "grad_norm": 1.5127804279327393, "learning_rate": 8.897332991302587e-06, "loss": 0.0048, "step": 14286 }, { "epoch": 13.7375, "grad_norm": 0.7713960409164429, "learning_rate": 8.896094920824086e-06, "loss": 0.0062, "step": 14287 }, { "epoch": 13.73846153846154, "grad_norm": 0.3954831659793854, "learning_rate": 8.894856867474954e-06, "loss": 0.0021, "step": 14288 }, { "epoch": 13.739423076923076, "grad_norm": 0.9991529583930969, "learning_rate": 8.893618831274399e-06, "loss": 0.0043, "step": 14289 }, { "epoch": 13.740384615384615, "grad_norm": 0.03489350527524948, "learning_rate": 8.892380812241633e-06, "loss": 0.0005, "step": 14290 }, { "epoch": 13.741346153846154, "grad_norm": 3.553285837173462, "learning_rate": 8.891142810395865e-06, "loss": 0.0227, "step": 14291 }, { "epoch": 13.742307692307692, "grad_norm": 3.0485758781433105, "learning_rate": 8.889904825756307e-06, "loss": 0.0459, "step": 14292 }, { "epoch": 13.743269230769231, "grad_norm": 0.05287492275238037, "learning_rate": 8.888666858342169e-06, "loss": 0.0004, "step": 14293 }, { "epoch": 13.74423076923077, "grad_norm": 1.0008474588394165, "learning_rate": 8.88742890817266e-06, "loss": 0.0049, "step": 14294 }, { "epoch": 13.745192307692308, "grad_norm": 0.855556070804596, "learning_rate": 8.886190975266985e-06, "loss": 0.004, "step": 14295 }, { "epoch": 13.746153846153845, "grad_norm": 0.17373168468475342, "learning_rate": 8.88495305964436e-06, "loss": 0.0009, "step": 14296 }, { "epoch": 13.747115384615384, "grad_norm": 0.3969942331314087, "learning_rate": 8.883715161323991e-06, "loss": 0.001, "step": 14297 }, { "epoch": 13.748076923076923, "grad_norm": 1.9764049053192139, "learning_rate": 8.882477280325085e-06, "loss": 0.016, "step": 14298 }, { "epoch": 13.749038461538461, "grad_norm": 0.8805743455886841, "learning_rate": 8.881239416666854e-06, "loss": 0.0034, "step": 14299 }, { "epoch": 13.75, "grad_norm": 0.05666998028755188, "learning_rate": 8.880001570368503e-06, "loss": 0.0005, "step": 14300 }, { "epoch": 13.750961538461539, "grad_norm": 0.1158561110496521, "learning_rate": 8.87876374144924e-06, "loss": 0.0008, "step": 14301 }, { "epoch": 13.751923076923077, "grad_norm": 0.023373965173959732, "learning_rate": 8.877525929928272e-06, "loss": 0.0002, "step": 14302 }, { "epoch": 13.752884615384616, "grad_norm": 0.11321506649255753, "learning_rate": 8.876288135824808e-06, "loss": 0.0006, "step": 14303 }, { "epoch": 13.753846153846155, "grad_norm": 1.7781823873519897, "learning_rate": 8.875050359158055e-06, "loss": 0.0172, "step": 14304 }, { "epoch": 13.754807692307692, "grad_norm": 2.3442509174346924, "learning_rate": 8.873812599947215e-06, "loss": 0.0642, "step": 14305 }, { "epoch": 13.75576923076923, "grad_norm": 0.14942555129528046, "learning_rate": 8.872574858211503e-06, "loss": 0.0014, "step": 14306 }, { "epoch": 13.756730769230769, "grad_norm": 0.02236844226717949, "learning_rate": 8.871337133970118e-06, "loss": 0.0003, "step": 14307 }, { "epoch": 13.757692307692308, "grad_norm": 0.06549166142940521, "learning_rate": 8.870099427242269e-06, "loss": 0.0005, "step": 14308 }, { "epoch": 13.758653846153846, "grad_norm": 2.557387590408325, "learning_rate": 8.868861738047158e-06, "loss": 0.0073, "step": 14309 }, { "epoch": 13.759615384615385, "grad_norm": 0.019338149577379227, "learning_rate": 8.867624066403995e-06, "loss": 0.0002, "step": 14310 }, { "epoch": 13.760576923076924, "grad_norm": 1.0317796468734741, "learning_rate": 8.866386412331985e-06, "loss": 0.0412, "step": 14311 }, { "epoch": 13.76153846153846, "grad_norm": 0.15650516748428345, "learning_rate": 8.865148775850328e-06, "loss": 0.0008, "step": 14312 }, { "epoch": 13.7625, "grad_norm": 0.025185635313391685, "learning_rate": 8.863911156978231e-06, "loss": 0.0002, "step": 14313 }, { "epoch": 13.763461538461538, "grad_norm": 0.27275243401527405, "learning_rate": 8.8626735557349e-06, "loss": 0.0015, "step": 14314 }, { "epoch": 13.764423076923077, "grad_norm": 2.3877296447753906, "learning_rate": 8.861435972139538e-06, "loss": 0.0133, "step": 14315 }, { "epoch": 13.765384615384615, "grad_norm": 1.5234439373016357, "learning_rate": 8.860198406211345e-06, "loss": 0.0216, "step": 14316 }, { "epoch": 13.766346153846154, "grad_norm": 0.022459104657173157, "learning_rate": 8.85896085796953e-06, "loss": 0.0002, "step": 14317 }, { "epoch": 13.767307692307693, "grad_norm": 2.0928618907928467, "learning_rate": 8.857723327433292e-06, "loss": 0.0105, "step": 14318 }, { "epoch": 13.768269230769231, "grad_norm": 4.869027137756348, "learning_rate": 8.856485814621838e-06, "loss": 0.0365, "step": 14319 }, { "epoch": 13.76923076923077, "grad_norm": 0.0994873121380806, "learning_rate": 8.855248319554364e-06, "loss": 0.0008, "step": 14320 }, { "epoch": 13.770192307692307, "grad_norm": 0.07180380821228027, "learning_rate": 8.85401084225008e-06, "loss": 0.0004, "step": 14321 }, { "epoch": 13.771153846153846, "grad_norm": 1.3738555908203125, "learning_rate": 8.852773382728184e-06, "loss": 0.009, "step": 14322 }, { "epoch": 13.772115384615384, "grad_norm": 1.4439281225204468, "learning_rate": 8.851535941007877e-06, "loss": 0.0054, "step": 14323 }, { "epoch": 13.773076923076923, "grad_norm": 2.4910888671875, "learning_rate": 8.85029851710836e-06, "loss": 0.0242, "step": 14324 }, { "epoch": 13.774038461538462, "grad_norm": 1.3318908214569092, "learning_rate": 8.849061111048838e-06, "loss": 0.0062, "step": 14325 }, { "epoch": 13.775, "grad_norm": 0.17117835581302643, "learning_rate": 8.847823722848508e-06, "loss": 0.0008, "step": 14326 }, { "epoch": 13.775961538461539, "grad_norm": 5.368950366973877, "learning_rate": 8.846586352526572e-06, "loss": 0.1536, "step": 14327 }, { "epoch": 13.776923076923078, "grad_norm": 0.07148675620555878, "learning_rate": 8.845349000102232e-06, "loss": 0.0009, "step": 14328 }, { "epoch": 13.777884615384615, "grad_norm": 0.6404228806495667, "learning_rate": 8.844111665594687e-06, "loss": 0.0035, "step": 14329 }, { "epoch": 13.778846153846153, "grad_norm": 0.9865643382072449, "learning_rate": 8.842874349023136e-06, "loss": 0.0049, "step": 14330 }, { "epoch": 13.779807692307692, "grad_norm": 0.07682596147060394, "learning_rate": 8.841637050406778e-06, "loss": 0.0007, "step": 14331 }, { "epoch": 13.78076923076923, "grad_norm": 1.5708749294281006, "learning_rate": 8.840399769764814e-06, "loss": 0.0358, "step": 14332 }, { "epoch": 13.78173076923077, "grad_norm": 0.06312768906354904, "learning_rate": 8.839162507116443e-06, "loss": 0.0005, "step": 14333 }, { "epoch": 13.782692307692308, "grad_norm": 0.7357980608940125, "learning_rate": 8.837925262480862e-06, "loss": 0.0083, "step": 14334 }, { "epoch": 13.783653846153847, "grad_norm": 1.5570663213729858, "learning_rate": 8.836688035877268e-06, "loss": 0.0091, "step": 14335 }, { "epoch": 13.784615384615385, "grad_norm": 0.15850821137428284, "learning_rate": 8.835450827324864e-06, "loss": 0.0008, "step": 14336 }, { "epoch": 13.785576923076922, "grad_norm": 0.09846950322389603, "learning_rate": 8.834213636842846e-06, "loss": 0.0008, "step": 14337 }, { "epoch": 13.786538461538461, "grad_norm": 0.14071853458881378, "learning_rate": 8.832976464450407e-06, "loss": 0.0015, "step": 14338 }, { "epoch": 13.7875, "grad_norm": 0.0973549485206604, "learning_rate": 8.831739310166752e-06, "loss": 0.0008, "step": 14339 }, { "epoch": 13.788461538461538, "grad_norm": 0.05873899906873703, "learning_rate": 8.830502174011072e-06, "loss": 0.0004, "step": 14340 }, { "epoch": 13.789423076923077, "grad_norm": 0.09108523279428482, "learning_rate": 8.829265056002569e-06, "loss": 0.0006, "step": 14341 }, { "epoch": 13.790384615384616, "grad_norm": 0.05367155000567436, "learning_rate": 8.828027956160432e-06, "loss": 0.0005, "step": 14342 }, { "epoch": 13.791346153846154, "grad_norm": 0.17836014926433563, "learning_rate": 8.826790874503865e-06, "loss": 0.0013, "step": 14343 }, { "epoch": 13.792307692307693, "grad_norm": 0.9369357824325562, "learning_rate": 8.825553811052059e-06, "loss": 0.0031, "step": 14344 }, { "epoch": 13.79326923076923, "grad_norm": 3.856435775756836, "learning_rate": 8.824316765824207e-06, "loss": 0.0282, "step": 14345 }, { "epoch": 13.794230769230769, "grad_norm": 0.4863451421260834, "learning_rate": 8.823079738839514e-06, "loss": 0.003, "step": 14346 }, { "epoch": 13.795192307692307, "grad_norm": 5.593087196350098, "learning_rate": 8.821842730117166e-06, "loss": 0.0356, "step": 14347 }, { "epoch": 13.796153846153846, "grad_norm": 3.9933595657348633, "learning_rate": 8.820605739676363e-06, "loss": 0.0268, "step": 14348 }, { "epoch": 13.797115384615385, "grad_norm": 0.1971125304698944, "learning_rate": 8.819368767536296e-06, "loss": 0.0012, "step": 14349 }, { "epoch": 13.798076923076923, "grad_norm": 2.672853708267212, "learning_rate": 8.818131813716161e-06, "loss": 0.0271, "step": 14350 }, { "epoch": 13.799038461538462, "grad_norm": 0.06709292531013489, "learning_rate": 8.816894878235151e-06, "loss": 0.0007, "step": 14351 }, { "epoch": 13.8, "grad_norm": 0.7795166969299316, "learning_rate": 8.815657961112463e-06, "loss": 0.0225, "step": 14352 }, { "epoch": 13.80096153846154, "grad_norm": 1.4471874237060547, "learning_rate": 8.814421062367284e-06, "loss": 0.0057, "step": 14353 }, { "epoch": 13.801923076923076, "grad_norm": 0.12935452163219452, "learning_rate": 8.813184182018812e-06, "loss": 0.0009, "step": 14354 }, { "epoch": 13.802884615384615, "grad_norm": 0.6239891648292542, "learning_rate": 8.811947320086237e-06, "loss": 0.0023, "step": 14355 }, { "epoch": 13.803846153846154, "grad_norm": 0.049306225031614304, "learning_rate": 8.810710476588752e-06, "loss": 0.0004, "step": 14356 }, { "epoch": 13.804807692307692, "grad_norm": 2.701406717300415, "learning_rate": 8.809473651545553e-06, "loss": 0.0213, "step": 14357 }, { "epoch": 13.805769230769231, "grad_norm": 0.45054692029953003, "learning_rate": 8.808236844975828e-06, "loss": 0.0013, "step": 14358 }, { "epoch": 13.80673076923077, "grad_norm": 1.0710169076919556, "learning_rate": 8.807000056898768e-06, "loss": 0.0046, "step": 14359 }, { "epoch": 13.807692307692308, "grad_norm": 2.074777126312256, "learning_rate": 8.805763287333564e-06, "loss": 0.0743, "step": 14360 }, { "epoch": 13.808653846153845, "grad_norm": 1.774511694908142, "learning_rate": 8.804526536299413e-06, "loss": 0.0149, "step": 14361 }, { "epoch": 13.809615384615384, "grad_norm": 2.039220094680786, "learning_rate": 8.8032898038155e-06, "loss": 0.0131, "step": 14362 }, { "epoch": 13.810576923076923, "grad_norm": 1.14451265335083, "learning_rate": 8.802053089901016e-06, "loss": 0.0166, "step": 14363 }, { "epoch": 13.811538461538461, "grad_norm": 0.38139310479164124, "learning_rate": 8.800816394575151e-06, "loss": 0.0013, "step": 14364 }, { "epoch": 13.8125, "grad_norm": 0.04263882711529732, "learning_rate": 8.799579717857098e-06, "loss": 0.0005, "step": 14365 }, { "epoch": 13.813461538461539, "grad_norm": 0.08831843733787537, "learning_rate": 8.798343059766044e-06, "loss": 0.0006, "step": 14366 }, { "epoch": 13.814423076923077, "grad_norm": 0.4457251727581024, "learning_rate": 8.797106420321178e-06, "loss": 0.0012, "step": 14367 }, { "epoch": 13.815384615384616, "grad_norm": 0.13906435668468475, "learning_rate": 8.795869799541692e-06, "loss": 0.0009, "step": 14368 }, { "epoch": 13.816346153846155, "grad_norm": 1.04423189163208, "learning_rate": 8.79463319744677e-06, "loss": 0.0124, "step": 14369 }, { "epoch": 13.817307692307692, "grad_norm": 0.14028814435005188, "learning_rate": 8.793396614055607e-06, "loss": 0.0008, "step": 14370 }, { "epoch": 13.81826923076923, "grad_norm": 0.641346275806427, "learning_rate": 8.792160049387383e-06, "loss": 0.0034, "step": 14371 }, { "epoch": 13.819230769230769, "grad_norm": 3.756817579269409, "learning_rate": 8.790923503461294e-06, "loss": 0.0312, "step": 14372 }, { "epoch": 13.820192307692308, "grad_norm": 0.6215329766273499, "learning_rate": 8.789686976296521e-06, "loss": 0.0042, "step": 14373 }, { "epoch": 13.821153846153846, "grad_norm": 0.3157936632633209, "learning_rate": 8.788450467912254e-06, "loss": 0.0013, "step": 14374 }, { "epoch": 13.822115384615385, "grad_norm": 0.45376062393188477, "learning_rate": 8.78721397832768e-06, "loss": 0.0021, "step": 14375 }, { "epoch": 13.823076923076924, "grad_norm": 0.14770884811878204, "learning_rate": 8.785977507561985e-06, "loss": 0.001, "step": 14376 }, { "epoch": 13.82403846153846, "grad_norm": 1.5656051635742188, "learning_rate": 8.784741055634359e-06, "loss": 0.0309, "step": 14377 }, { "epoch": 13.825, "grad_norm": 3.1321051120758057, "learning_rate": 8.78350462256398e-06, "loss": 0.017, "step": 14378 }, { "epoch": 13.825961538461538, "grad_norm": 0.03627592697739601, "learning_rate": 8.782268208370042e-06, "loss": 0.0004, "step": 14379 }, { "epoch": 13.826923076923077, "grad_norm": 0.3065948188304901, "learning_rate": 8.781031813071728e-06, "loss": 0.0022, "step": 14380 }, { "epoch": 13.827884615384615, "grad_norm": 0.15368106961250305, "learning_rate": 8.779795436688224e-06, "loss": 0.0011, "step": 14381 }, { "epoch": 13.828846153846154, "grad_norm": 0.0763397216796875, "learning_rate": 8.77855907923871e-06, "loss": 0.0006, "step": 14382 }, { "epoch": 13.829807692307693, "grad_norm": 4.189023971557617, "learning_rate": 8.777322740742376e-06, "loss": 0.0634, "step": 14383 }, { "epoch": 13.830769230769231, "grad_norm": 0.2921808958053589, "learning_rate": 8.776086421218404e-06, "loss": 0.0013, "step": 14384 }, { "epoch": 13.83173076923077, "grad_norm": 0.27480629086494446, "learning_rate": 8.774850120685979e-06, "loss": 0.0016, "step": 14385 }, { "epoch": 13.832692307692307, "grad_norm": 0.0595429502427578, "learning_rate": 8.773613839164283e-06, "loss": 0.0007, "step": 14386 }, { "epoch": 13.833653846153846, "grad_norm": 0.13569039106369019, "learning_rate": 8.772377576672502e-06, "loss": 0.001, "step": 14387 }, { "epoch": 13.834615384615384, "grad_norm": 2.5400919914245605, "learning_rate": 8.77114133322982e-06, "loss": 0.021, "step": 14388 }, { "epoch": 13.835576923076923, "grad_norm": 0.7470234036445618, "learning_rate": 8.769905108855412e-06, "loss": 0.0028, "step": 14389 }, { "epoch": 13.836538461538462, "grad_norm": 0.844702422618866, "learning_rate": 8.768668903568472e-06, "loss": 0.003, "step": 14390 }, { "epoch": 13.8375, "grad_norm": 0.1654454916715622, "learning_rate": 8.767432717388174e-06, "loss": 0.0008, "step": 14391 }, { "epoch": 13.838461538461539, "grad_norm": 0.5554351806640625, "learning_rate": 8.766196550333706e-06, "loss": 0.0045, "step": 14392 }, { "epoch": 13.839423076923078, "grad_norm": 0.1074562594294548, "learning_rate": 8.76496040242424e-06, "loss": 0.0006, "step": 14393 }, { "epoch": 13.840384615384615, "grad_norm": 0.4533284306526184, "learning_rate": 8.763724273678969e-06, "loss": 0.0026, "step": 14394 }, { "epoch": 13.841346153846153, "grad_norm": 3.662567138671875, "learning_rate": 8.762488164117069e-06, "loss": 0.0454, "step": 14395 }, { "epoch": 13.842307692307692, "grad_norm": 1.419576644897461, "learning_rate": 8.761252073757717e-06, "loss": 0.0154, "step": 14396 }, { "epoch": 13.84326923076923, "grad_norm": 2.586611032485962, "learning_rate": 8.7600160026201e-06, "loss": 0.0237, "step": 14397 }, { "epoch": 13.84423076923077, "grad_norm": 0.6839630007743835, "learning_rate": 8.758779950723394e-06, "loss": 0.0022, "step": 14398 }, { "epoch": 13.845192307692308, "grad_norm": 0.12439145892858505, "learning_rate": 8.757543918086783e-06, "loss": 0.0007, "step": 14399 }, { "epoch": 13.846153846153847, "grad_norm": 0.1776663213968277, "learning_rate": 8.75630790472944e-06, "loss": 0.001, "step": 14400 }, { "epoch": 13.847115384615385, "grad_norm": 0.003959266934543848, "learning_rate": 8.75507191067055e-06, "loss": 0.0, "step": 14401 }, { "epoch": 13.848076923076922, "grad_norm": 0.8571129441261292, "learning_rate": 8.753835935929292e-06, "loss": 0.0051, "step": 14402 }, { "epoch": 13.849038461538461, "grad_norm": 0.05909985676407814, "learning_rate": 8.75259998052484e-06, "loss": 0.0005, "step": 14403 }, { "epoch": 13.85, "grad_norm": 3.602277994155884, "learning_rate": 8.751364044476374e-06, "loss": 0.0203, "step": 14404 }, { "epoch": 13.850961538461538, "grad_norm": 0.16952168941497803, "learning_rate": 8.750128127803077e-06, "loss": 0.0013, "step": 14405 }, { "epoch": 13.851923076923077, "grad_norm": 0.0795888602733612, "learning_rate": 8.748892230524121e-06, "loss": 0.0005, "step": 14406 }, { "epoch": 13.852884615384616, "grad_norm": 0.0823543518781662, "learning_rate": 8.747656352658685e-06, "loss": 0.0005, "step": 14407 }, { "epoch": 13.853846153846154, "grad_norm": 0.6608134508132935, "learning_rate": 8.746420494225948e-06, "loss": 0.0031, "step": 14408 }, { "epoch": 13.854807692307693, "grad_norm": 0.059162795543670654, "learning_rate": 8.745184655245088e-06, "loss": 0.0003, "step": 14409 }, { "epoch": 13.85576923076923, "grad_norm": 0.0487196259200573, "learning_rate": 8.743948835735277e-06, "loss": 0.0003, "step": 14410 }, { "epoch": 13.856730769230769, "grad_norm": 0.14835776388645172, "learning_rate": 8.74271303571569e-06, "loss": 0.0009, "step": 14411 }, { "epoch": 13.857692307692307, "grad_norm": 0.096718929708004, "learning_rate": 8.74147725520551e-06, "loss": 0.0008, "step": 14412 }, { "epoch": 13.858653846153846, "grad_norm": 0.0899098739027977, "learning_rate": 8.740241494223911e-06, "loss": 0.0002, "step": 14413 }, { "epoch": 13.859615384615385, "grad_norm": 0.17231781780719757, "learning_rate": 8.739005752790067e-06, "loss": 0.0012, "step": 14414 }, { "epoch": 13.860576923076923, "grad_norm": 0.3689117431640625, "learning_rate": 8.737770030923148e-06, "loss": 0.0014, "step": 14415 }, { "epoch": 13.861538461538462, "grad_norm": 1.0950980186462402, "learning_rate": 8.736534328642337e-06, "loss": 0.0113, "step": 14416 }, { "epoch": 13.8625, "grad_norm": 0.012823669239878654, "learning_rate": 8.735298645966806e-06, "loss": 0.0002, "step": 14417 }, { "epoch": 13.86346153846154, "grad_norm": 3.3845555782318115, "learning_rate": 8.734062982915724e-06, "loss": 0.0487, "step": 14418 }, { "epoch": 13.864423076923076, "grad_norm": 0.049912985414266586, "learning_rate": 8.732827339508272e-06, "loss": 0.0004, "step": 14419 }, { "epoch": 13.865384615384615, "grad_norm": 1.3485703468322754, "learning_rate": 8.731591715763621e-06, "loss": 0.0065, "step": 14420 }, { "epoch": 13.866346153846154, "grad_norm": 2.7215776443481445, "learning_rate": 8.730356111700944e-06, "loss": 0.0435, "step": 14421 }, { "epoch": 13.867307692307692, "grad_norm": 0.09826936572790146, "learning_rate": 8.729120527339412e-06, "loss": 0.0006, "step": 14422 }, { "epoch": 13.868269230769231, "grad_norm": 0.0328841470181942, "learning_rate": 8.7278849626982e-06, "loss": 0.0003, "step": 14423 }, { "epoch": 13.86923076923077, "grad_norm": 0.17478182911872864, "learning_rate": 8.72664941779648e-06, "loss": 0.0008, "step": 14424 }, { "epoch": 13.870192307692308, "grad_norm": 0.8940187096595764, "learning_rate": 8.725413892653424e-06, "loss": 0.0109, "step": 14425 }, { "epoch": 13.871153846153845, "grad_norm": 0.12369932234287262, "learning_rate": 8.724178387288202e-06, "loss": 0.0006, "step": 14426 }, { "epoch": 13.872115384615384, "grad_norm": 0.3854424059391022, "learning_rate": 8.722942901719989e-06, "loss": 0.0016, "step": 14427 }, { "epoch": 13.873076923076923, "grad_norm": 1.155983567237854, "learning_rate": 8.721707435967954e-06, "loss": 0.0042, "step": 14428 }, { "epoch": 13.874038461538461, "grad_norm": 1.0073012113571167, "learning_rate": 8.720471990051265e-06, "loss": 0.0039, "step": 14429 }, { "epoch": 13.875, "grad_norm": 0.13410331308841705, "learning_rate": 8.719236563989098e-06, "loss": 0.0007, "step": 14430 }, { "epoch": 13.875961538461539, "grad_norm": 1.9337061643600464, "learning_rate": 8.71800115780062e-06, "loss": 0.0068, "step": 14431 }, { "epoch": 13.876923076923077, "grad_norm": 2.0957818031311035, "learning_rate": 8.716765771505002e-06, "loss": 0.0063, "step": 14432 }, { "epoch": 13.877884615384616, "grad_norm": 2.0084009170532227, "learning_rate": 8.71553040512141e-06, "loss": 0.0113, "step": 14433 }, { "epoch": 13.878846153846155, "grad_norm": 1.2759813070297241, "learning_rate": 8.71429505866902e-06, "loss": 0.0151, "step": 14434 }, { "epoch": 13.879807692307692, "grad_norm": 2.4841082096099854, "learning_rate": 8.713059732166996e-06, "loss": 0.0245, "step": 14435 }, { "epoch": 13.88076923076923, "grad_norm": 0.030843224376440048, "learning_rate": 8.711824425634506e-06, "loss": 0.0002, "step": 14436 }, { "epoch": 13.881730769230769, "grad_norm": 0.21190011501312256, "learning_rate": 8.710589139090722e-06, "loss": 0.0009, "step": 14437 }, { "epoch": 13.882692307692308, "grad_norm": 0.1401132345199585, "learning_rate": 8.70935387255481e-06, "loss": 0.0006, "step": 14438 }, { "epoch": 13.883653846153846, "grad_norm": 0.5294821262359619, "learning_rate": 8.708118626045939e-06, "loss": 0.0029, "step": 14439 }, { "epoch": 13.884615384615385, "grad_norm": 0.7473779916763306, "learning_rate": 8.706883399583273e-06, "loss": 0.0033, "step": 14440 }, { "epoch": 13.885576923076924, "grad_norm": 0.5763852000236511, "learning_rate": 8.705648193185982e-06, "loss": 0.0026, "step": 14441 }, { "epoch": 13.88653846153846, "grad_norm": 2.227811813354492, "learning_rate": 8.704413006873235e-06, "loss": 0.0284, "step": 14442 }, { "epoch": 13.8875, "grad_norm": 0.025100531056523323, "learning_rate": 8.703177840664193e-06, "loss": 0.0003, "step": 14443 }, { "epoch": 13.888461538461538, "grad_norm": 0.05563563480973244, "learning_rate": 8.701942694578023e-06, "loss": 0.0005, "step": 14444 }, { "epoch": 13.889423076923077, "grad_norm": 2.467125415802002, "learning_rate": 8.700707568633896e-06, "loss": 0.0137, "step": 14445 }, { "epoch": 13.890384615384615, "grad_norm": 0.2489997148513794, "learning_rate": 8.699472462850973e-06, "loss": 0.0013, "step": 14446 }, { "epoch": 13.891346153846154, "grad_norm": 1.4227510690689087, "learning_rate": 8.698237377248419e-06, "loss": 0.009, "step": 14447 }, { "epoch": 13.892307692307693, "grad_norm": 0.03258412703871727, "learning_rate": 8.697002311845402e-06, "loss": 0.0002, "step": 14448 }, { "epoch": 13.893269230769231, "grad_norm": 2.259187698364258, "learning_rate": 8.695767266661085e-06, "loss": 0.013, "step": 14449 }, { "epoch": 13.89423076923077, "grad_norm": 0.05576653778553009, "learning_rate": 8.694532241714633e-06, "loss": 0.0004, "step": 14450 }, { "epoch": 13.895192307692307, "grad_norm": 0.26965242624282837, "learning_rate": 8.693297237025206e-06, "loss": 0.0013, "step": 14451 }, { "epoch": 13.896153846153846, "grad_norm": 3.2545270919799805, "learning_rate": 8.692062252611973e-06, "loss": 0.014, "step": 14452 }, { "epoch": 13.897115384615384, "grad_norm": 0.3532225489616394, "learning_rate": 8.690827288494094e-06, "loss": 0.0014, "step": 14453 }, { "epoch": 13.898076923076923, "grad_norm": 0.24792258441448212, "learning_rate": 8.689592344690734e-06, "loss": 0.0009, "step": 14454 }, { "epoch": 13.899038461538462, "grad_norm": 3.9553372859954834, "learning_rate": 8.688357421221054e-06, "loss": 0.0149, "step": 14455 }, { "epoch": 13.9, "grad_norm": 0.9404668807983398, "learning_rate": 8.687122518104218e-06, "loss": 0.01, "step": 14456 }, { "epoch": 13.900961538461539, "grad_norm": 3.1029117107391357, "learning_rate": 8.685887635359387e-06, "loss": 0.0501, "step": 14457 }, { "epoch": 13.901923076923078, "grad_norm": 0.05515362322330475, "learning_rate": 8.684652773005722e-06, "loss": 0.0003, "step": 14458 }, { "epoch": 13.902884615384615, "grad_norm": 0.3033919632434845, "learning_rate": 8.683417931062385e-06, "loss": 0.001, "step": 14459 }, { "epoch": 13.903846153846153, "grad_norm": 0.4784580171108246, "learning_rate": 8.682183109548541e-06, "loss": 0.0013, "step": 14460 }, { "epoch": 13.904807692307692, "grad_norm": 1.9861207008361816, "learning_rate": 8.680948308483346e-06, "loss": 0.01, "step": 14461 }, { "epoch": 13.90576923076923, "grad_norm": 0.8533010482788086, "learning_rate": 8.679713527885959e-06, "loss": 0.0029, "step": 14462 }, { "epoch": 13.90673076923077, "grad_norm": 0.27026182413101196, "learning_rate": 8.678478767775547e-06, "loss": 0.0008, "step": 14463 }, { "epoch": 13.907692307692308, "grad_norm": 0.06277553737163544, "learning_rate": 8.677244028171265e-06, "loss": 0.0006, "step": 14464 }, { "epoch": 13.908653846153847, "grad_norm": 0.013318896293640137, "learning_rate": 8.676009309092273e-06, "loss": 0.0001, "step": 14465 }, { "epoch": 13.909615384615385, "grad_norm": 0.5399419069290161, "learning_rate": 8.674774610557728e-06, "loss": 0.0028, "step": 14466 }, { "epoch": 13.910576923076922, "grad_norm": 2.121121406555176, "learning_rate": 8.673539932586794e-06, "loss": 0.0837, "step": 14467 }, { "epoch": 13.911538461538461, "grad_norm": 0.02753388322889805, "learning_rate": 8.672305275198628e-06, "loss": 0.0002, "step": 14468 }, { "epoch": 13.9125, "grad_norm": 3.6951780319213867, "learning_rate": 8.671070638412385e-06, "loss": 0.0426, "step": 14469 }, { "epoch": 13.913461538461538, "grad_norm": 0.021901661530137062, "learning_rate": 8.669836022247227e-06, "loss": 0.0002, "step": 14470 }, { "epoch": 13.914423076923077, "grad_norm": 0.9207298755645752, "learning_rate": 8.66860142672231e-06, "loss": 0.0041, "step": 14471 }, { "epoch": 13.915384615384616, "grad_norm": 4.104196071624756, "learning_rate": 8.667366851856791e-06, "loss": 0.0495, "step": 14472 }, { "epoch": 13.916346153846154, "grad_norm": 1.0044188499450684, "learning_rate": 8.666132297669827e-06, "loss": 0.004, "step": 14473 }, { "epoch": 13.917307692307693, "grad_norm": 0.06790638715028763, "learning_rate": 8.664897764180576e-06, "loss": 0.0005, "step": 14474 }, { "epoch": 13.91826923076923, "grad_norm": 0.16257664561271667, "learning_rate": 8.663663251408191e-06, "loss": 0.0009, "step": 14475 }, { "epoch": 13.919230769230769, "grad_norm": 0.15802162885665894, "learning_rate": 8.662428759371832e-06, "loss": 0.0007, "step": 14476 }, { "epoch": 13.920192307692307, "grad_norm": 0.9570573568344116, "learning_rate": 8.66119428809065e-06, "loss": 0.0041, "step": 14477 }, { "epoch": 13.921153846153846, "grad_norm": 0.0462370328605175, "learning_rate": 8.659959837583808e-06, "loss": 0.0004, "step": 14478 }, { "epoch": 13.922115384615385, "grad_norm": 0.19076432287693024, "learning_rate": 8.658725407870454e-06, "loss": 0.0015, "step": 14479 }, { "epoch": 13.923076923076923, "grad_norm": 0.09994098544120789, "learning_rate": 8.657490998969744e-06, "loss": 0.0009, "step": 14480 }, { "epoch": 13.924038461538462, "grad_norm": 0.03727278113365173, "learning_rate": 8.656256610900834e-06, "loss": 0.0003, "step": 14481 }, { "epoch": 13.925, "grad_norm": 0.05941688269376755, "learning_rate": 8.655022243682881e-06, "loss": 0.0004, "step": 14482 }, { "epoch": 13.92596153846154, "grad_norm": 0.2682640552520752, "learning_rate": 8.653787897335032e-06, "loss": 0.0011, "step": 14483 }, { "epoch": 13.926923076923076, "grad_norm": 0.194271981716156, "learning_rate": 8.652553571876443e-06, "loss": 0.0012, "step": 14484 }, { "epoch": 13.927884615384615, "grad_norm": 1.147826910018921, "learning_rate": 8.65131926732627e-06, "loss": 0.0048, "step": 14485 }, { "epoch": 13.928846153846154, "grad_norm": 0.5910943746566772, "learning_rate": 8.650084983703663e-06, "loss": 0.0031, "step": 14486 }, { "epoch": 13.929807692307692, "grad_norm": 0.035561561584472656, "learning_rate": 8.648850721027773e-06, "loss": 0.0003, "step": 14487 }, { "epoch": 13.930769230769231, "grad_norm": 0.06394880264997482, "learning_rate": 8.647616479317756e-06, "loss": 0.0006, "step": 14488 }, { "epoch": 13.93173076923077, "grad_norm": 0.023035231977701187, "learning_rate": 8.646382258592762e-06, "loss": 0.0003, "step": 14489 }, { "epoch": 13.932692307692308, "grad_norm": 1.7017613649368286, "learning_rate": 8.645148058871943e-06, "loss": 0.0079, "step": 14490 }, { "epoch": 13.933653846153845, "grad_norm": 1.6793137788772583, "learning_rate": 8.643913880174449e-06, "loss": 0.0045, "step": 14491 }, { "epoch": 13.934615384615384, "grad_norm": 2.6213645935058594, "learning_rate": 8.642679722519433e-06, "loss": 0.0167, "step": 14492 }, { "epoch": 13.935576923076923, "grad_norm": 0.03834038972854614, "learning_rate": 8.641445585926042e-06, "loss": 0.0002, "step": 14493 }, { "epoch": 13.936538461538461, "grad_norm": 0.2551247179508209, "learning_rate": 8.640211470413431e-06, "loss": 0.0014, "step": 14494 }, { "epoch": 13.9375, "grad_norm": 1.4967153072357178, "learning_rate": 8.638977376000743e-06, "loss": 0.0078, "step": 14495 }, { "epoch": 13.938461538461539, "grad_norm": 0.033601921051740646, "learning_rate": 8.637743302707134e-06, "loss": 0.0003, "step": 14496 }, { "epoch": 13.939423076923077, "grad_norm": 0.05008299648761749, "learning_rate": 8.636509250551752e-06, "loss": 0.0006, "step": 14497 }, { "epoch": 13.940384615384616, "grad_norm": 1.1687947511672974, "learning_rate": 8.63527521955374e-06, "loss": 0.0065, "step": 14498 }, { "epoch": 13.941346153846155, "grad_norm": 0.9718804359436035, "learning_rate": 8.634041209732255e-06, "loss": 0.0257, "step": 14499 }, { "epoch": 13.942307692307692, "grad_norm": 0.4475480616092682, "learning_rate": 8.632807221106442e-06, "loss": 0.0019, "step": 14500 }, { "epoch": 13.94326923076923, "grad_norm": 2.34332013130188, "learning_rate": 8.631573253695446e-06, "loss": 0.0236, "step": 14501 }, { "epoch": 13.944230769230769, "grad_norm": 0.5389596223831177, "learning_rate": 8.630339307518418e-06, "loss": 0.0019, "step": 14502 }, { "epoch": 13.945192307692308, "grad_norm": 2.353996753692627, "learning_rate": 8.629105382594503e-06, "loss": 0.0091, "step": 14503 }, { "epoch": 13.946153846153846, "grad_norm": 2.699310302734375, "learning_rate": 8.62787147894285e-06, "loss": 0.0102, "step": 14504 }, { "epoch": 13.947115384615385, "grad_norm": 0.18145911395549774, "learning_rate": 8.626637596582605e-06, "loss": 0.0011, "step": 14505 }, { "epoch": 13.948076923076924, "grad_norm": 1.3382176160812378, "learning_rate": 8.625403735532912e-06, "loss": 0.0191, "step": 14506 }, { "epoch": 13.94903846153846, "grad_norm": 0.06120594963431358, "learning_rate": 8.624169895812919e-06, "loss": 0.0002, "step": 14507 }, { "epoch": 13.95, "grad_norm": 3.7654693126678467, "learning_rate": 8.622936077441773e-06, "loss": 0.0371, "step": 14508 }, { "epoch": 13.950961538461538, "grad_norm": 0.06450977176427841, "learning_rate": 8.621702280438615e-06, "loss": 0.0004, "step": 14509 }, { "epoch": 13.951923076923077, "grad_norm": 3.1036767959594727, "learning_rate": 8.620468504822594e-06, "loss": 0.0192, "step": 14510 }, { "epoch": 13.952884615384615, "grad_norm": 0.9700639247894287, "learning_rate": 8.619234750612853e-06, "loss": 0.0048, "step": 14511 }, { "epoch": 13.953846153846154, "grad_norm": 0.07998764514923096, "learning_rate": 8.618001017828536e-06, "loss": 0.0005, "step": 14512 }, { "epoch": 13.954807692307693, "grad_norm": 2.232879161834717, "learning_rate": 8.616767306488785e-06, "loss": 0.0133, "step": 14513 }, { "epoch": 13.955769230769231, "grad_norm": 0.38793057203292847, "learning_rate": 8.615533616612748e-06, "loss": 0.0017, "step": 14514 }, { "epoch": 13.95673076923077, "grad_norm": 0.09726085513830185, "learning_rate": 8.614299948219566e-06, "loss": 0.0006, "step": 14515 }, { "epoch": 13.957692307692307, "grad_norm": 0.06466306746006012, "learning_rate": 8.613066301328382e-06, "loss": 0.0005, "step": 14516 }, { "epoch": 13.958653846153846, "grad_norm": 0.06517495959997177, "learning_rate": 8.611832675958335e-06, "loss": 0.0005, "step": 14517 }, { "epoch": 13.959615384615384, "grad_norm": 0.08023643493652344, "learning_rate": 8.610599072128575e-06, "loss": 0.0004, "step": 14518 }, { "epoch": 13.960576923076923, "grad_norm": 1.3300870656967163, "learning_rate": 8.609365489858237e-06, "loss": 0.0123, "step": 14519 }, { "epoch": 13.961538461538462, "grad_norm": 0.5769469141960144, "learning_rate": 8.608131929166466e-06, "loss": 0.0067, "step": 14520 }, { "epoch": 13.9625, "grad_norm": 0.18848615884780884, "learning_rate": 8.606898390072403e-06, "loss": 0.0009, "step": 14521 }, { "epoch": 13.963461538461539, "grad_norm": 0.41860777139663696, "learning_rate": 8.605664872595188e-06, "loss": 0.0036, "step": 14522 }, { "epoch": 13.964423076923078, "grad_norm": 0.3497195541858673, "learning_rate": 8.604431376753961e-06, "loss": 0.0015, "step": 14523 }, { "epoch": 13.965384615384615, "grad_norm": 0.5825095176696777, "learning_rate": 8.603197902567862e-06, "loss": 0.0031, "step": 14524 }, { "epoch": 13.966346153846153, "grad_norm": 0.38217994570732117, "learning_rate": 8.601964450056033e-06, "loss": 0.004, "step": 14525 }, { "epoch": 13.967307692307692, "grad_norm": 0.08123040199279785, "learning_rate": 8.600731019237614e-06, "loss": 0.0008, "step": 14526 }, { "epoch": 13.96826923076923, "grad_norm": 0.23252931237220764, "learning_rate": 8.59949761013174e-06, "loss": 0.0012, "step": 14527 }, { "epoch": 13.96923076923077, "grad_norm": 4.258718967437744, "learning_rate": 8.598264222757554e-06, "loss": 0.0373, "step": 14528 }, { "epoch": 13.970192307692308, "grad_norm": 0.5348007678985596, "learning_rate": 8.597030857134196e-06, "loss": 0.0029, "step": 14529 }, { "epoch": 13.971153846153847, "grad_norm": 0.48277339339256287, "learning_rate": 8.595797513280799e-06, "loss": 0.0053, "step": 14530 }, { "epoch": 13.972115384615385, "grad_norm": 0.02934499830007553, "learning_rate": 8.594564191216503e-06, "loss": 0.0003, "step": 14531 }, { "epoch": 13.973076923076922, "grad_norm": 0.21804554760456085, "learning_rate": 8.593330890960447e-06, "loss": 0.0011, "step": 14532 }, { "epoch": 13.974038461538461, "grad_norm": 0.01617562025785446, "learning_rate": 8.592097612531766e-06, "loss": 0.0002, "step": 14533 }, { "epoch": 13.975, "grad_norm": 0.019730467349290848, "learning_rate": 8.590864355949598e-06, "loss": 0.0001, "step": 14534 }, { "epoch": 13.975961538461538, "grad_norm": 0.020835578441619873, "learning_rate": 8.58963112123308e-06, "loss": 0.0001, "step": 14535 }, { "epoch": 13.976923076923077, "grad_norm": 0.10302809625864029, "learning_rate": 8.588397908401347e-06, "loss": 0.0005, "step": 14536 }, { "epoch": 13.977884615384616, "grad_norm": 1.4211828708648682, "learning_rate": 8.587164717473537e-06, "loss": 0.0089, "step": 14537 }, { "epoch": 13.978846153846154, "grad_norm": 2.036999464035034, "learning_rate": 8.58593154846878e-06, "loss": 0.0173, "step": 14538 }, { "epoch": 13.979807692307693, "grad_norm": 2.5194153785705566, "learning_rate": 8.584698401406219e-06, "loss": 0.0183, "step": 14539 }, { "epoch": 13.98076923076923, "grad_norm": 0.22082534432411194, "learning_rate": 8.583465276304984e-06, "loss": 0.0007, "step": 14540 }, { "epoch": 13.981730769230769, "grad_norm": 2.8560903072357178, "learning_rate": 8.58223217318421e-06, "loss": 0.058, "step": 14541 }, { "epoch": 13.982692307692307, "grad_norm": 0.03827543556690216, "learning_rate": 8.58099909206303e-06, "loss": 0.0003, "step": 14542 }, { "epoch": 13.983653846153846, "grad_norm": 0.41897743940353394, "learning_rate": 8.579766032960582e-06, "loss": 0.0019, "step": 14543 }, { "epoch": 13.984615384615385, "grad_norm": 2.6028926372528076, "learning_rate": 8.578532995895998e-06, "loss": 0.0101, "step": 14544 }, { "epoch": 13.985576923076923, "grad_norm": 1.9864473342895508, "learning_rate": 8.577299980888407e-06, "loss": 0.0155, "step": 14545 }, { "epoch": 13.986538461538462, "grad_norm": 3.3791966438293457, "learning_rate": 8.576066987956945e-06, "loss": 0.0695, "step": 14546 }, { "epoch": 13.9875, "grad_norm": 0.07531405240297318, "learning_rate": 8.574834017120746e-06, "loss": 0.0004, "step": 14547 }, { "epoch": 13.98846153846154, "grad_norm": 0.026875611394643784, "learning_rate": 8.573601068398939e-06, "loss": 0.0004, "step": 14548 }, { "epoch": 13.989423076923076, "grad_norm": 0.10645809024572372, "learning_rate": 8.572368141810656e-06, "loss": 0.0008, "step": 14549 }, { "epoch": 13.990384615384615, "grad_norm": 1.489888310432434, "learning_rate": 8.57113523737503e-06, "loss": 0.0062, "step": 14550 }, { "epoch": 13.991346153846154, "grad_norm": 0.333356648683548, "learning_rate": 8.569902355111192e-06, "loss": 0.0016, "step": 14551 }, { "epoch": 13.992307692307692, "grad_norm": 1.7227938175201416, "learning_rate": 8.568669495038274e-06, "loss": 0.0096, "step": 14552 }, { "epoch": 13.993269230769231, "grad_norm": 1.5456440448760986, "learning_rate": 8.5674366571754e-06, "loss": 0.0064, "step": 14553 }, { "epoch": 13.99423076923077, "grad_norm": 0.41251519322395325, "learning_rate": 8.566203841541706e-06, "loss": 0.0017, "step": 14554 }, { "epoch": 13.995192307692308, "grad_norm": 1.0003000497817993, "learning_rate": 8.564971048156323e-06, "loss": 0.0255, "step": 14555 }, { "epoch": 13.996153846153845, "grad_norm": 0.03094249777495861, "learning_rate": 8.563738277038376e-06, "loss": 0.0004, "step": 14556 }, { "epoch": 13.997115384615384, "grad_norm": 0.7003090381622314, "learning_rate": 8.562505528206995e-06, "loss": 0.0028, "step": 14557 }, { "epoch": 13.998076923076923, "grad_norm": 0.6028164029121399, "learning_rate": 8.561272801681311e-06, "loss": 0.0026, "step": 14558 }, { "epoch": 13.999038461538461, "grad_norm": 2.230459451675415, "learning_rate": 8.560040097480448e-06, "loss": 0.0089, "step": 14559 }, { "epoch": 14.0, "grad_norm": 0.058638643473386765, "learning_rate": 8.558807415623537e-06, "loss": 0.0003, "step": 14560 }, { "epoch": 14.000961538461539, "grad_norm": 0.07605984061956406, "learning_rate": 8.557574756129707e-06, "loss": 0.0004, "step": 14561 }, { "epoch": 14.001923076923077, "grad_norm": 0.051802799105644226, "learning_rate": 8.556342119018081e-06, "loss": 0.0005, "step": 14562 }, { "epoch": 14.002884615384616, "grad_norm": 0.5893211960792542, "learning_rate": 8.55510950430779e-06, "loss": 0.0029, "step": 14563 }, { "epoch": 14.003846153846155, "grad_norm": 0.022920966148376465, "learning_rate": 8.553876912017958e-06, "loss": 0.0002, "step": 14564 }, { "epoch": 14.004807692307692, "grad_norm": 0.0038089831359684467, "learning_rate": 8.552644342167713e-06, "loss": 0.0, "step": 14565 }, { "epoch": 14.00576923076923, "grad_norm": 2.6065595149993896, "learning_rate": 8.551411794776179e-06, "loss": 0.009, "step": 14566 }, { "epoch": 14.006730769230769, "grad_norm": 0.2123727947473526, "learning_rate": 8.55017926986248e-06, "loss": 0.0012, "step": 14567 }, { "epoch": 14.007692307692308, "grad_norm": 1.0197666883468628, "learning_rate": 8.548946767445746e-06, "loss": 0.0039, "step": 14568 }, { "epoch": 14.008653846153846, "grad_norm": 0.36035293340682983, "learning_rate": 8.5477142875451e-06, "loss": 0.0024, "step": 14569 }, { "epoch": 14.009615384615385, "grad_norm": 0.08885684609413147, "learning_rate": 8.546481830179666e-06, "loss": 0.0004, "step": 14570 }, { "epoch": 14.010576923076924, "grad_norm": 0.3566613495349884, "learning_rate": 8.545249395368566e-06, "loss": 0.0011, "step": 14571 }, { "epoch": 14.011538461538462, "grad_norm": 0.03781329095363617, "learning_rate": 8.544016983130927e-06, "loss": 0.0002, "step": 14572 }, { "epoch": 14.0125, "grad_norm": 0.03987499698996544, "learning_rate": 8.542784593485871e-06, "loss": 0.0002, "step": 14573 }, { "epoch": 14.013461538461538, "grad_norm": 2.661036491394043, "learning_rate": 8.541552226452522e-06, "loss": 0.0392, "step": 14574 }, { "epoch": 14.014423076923077, "grad_norm": 0.06251098215579987, "learning_rate": 8.54031988205e-06, "loss": 0.0004, "step": 14575 }, { "epoch": 14.015384615384615, "grad_norm": 0.0798042044043541, "learning_rate": 8.539087560297431e-06, "loss": 0.0005, "step": 14576 }, { "epoch": 14.016346153846154, "grad_norm": 0.013865160755813122, "learning_rate": 8.537855261213936e-06, "loss": 0.0001, "step": 14577 }, { "epoch": 14.017307692307693, "grad_norm": 0.01982671208679676, "learning_rate": 8.536622984818632e-06, "loss": 0.0002, "step": 14578 }, { "epoch": 14.018269230769231, "grad_norm": 0.04739496484398842, "learning_rate": 8.535390731130648e-06, "loss": 0.0002, "step": 14579 }, { "epoch": 14.01923076923077, "grad_norm": 0.018276827409863472, "learning_rate": 8.5341585001691e-06, "loss": 0.0002, "step": 14580 }, { "epoch": 14.020192307692307, "grad_norm": 0.07732770591974258, "learning_rate": 8.532926291953113e-06, "loss": 0.0007, "step": 14581 }, { "epoch": 14.021153846153846, "grad_norm": 0.3909798264503479, "learning_rate": 8.531694106501796e-06, "loss": 0.002, "step": 14582 }, { "epoch": 14.022115384615384, "grad_norm": 0.41702204942703247, "learning_rate": 8.530461943834284e-06, "loss": 0.0014, "step": 14583 }, { "epoch": 14.023076923076923, "grad_norm": 0.016665440052747726, "learning_rate": 8.529229803969686e-06, "loss": 0.0003, "step": 14584 }, { "epoch": 14.024038461538462, "grad_norm": 0.9332520365715027, "learning_rate": 8.527997686927127e-06, "loss": 0.0028, "step": 14585 }, { "epoch": 14.025, "grad_norm": 1.4053435325622559, "learning_rate": 8.526765592725725e-06, "loss": 0.0053, "step": 14586 }, { "epoch": 14.025961538461539, "grad_norm": 3.4889001846313477, "learning_rate": 8.525533521384594e-06, "loss": 0.0186, "step": 14587 }, { "epoch": 14.026923076923078, "grad_norm": 0.02013404481112957, "learning_rate": 8.524301472922857e-06, "loss": 0.0002, "step": 14588 }, { "epoch": 14.027884615384615, "grad_norm": 0.13234739005565643, "learning_rate": 8.523069447359631e-06, "loss": 0.0007, "step": 14589 }, { "epoch": 14.028846153846153, "grad_norm": 0.3821597993373871, "learning_rate": 8.521837444714032e-06, "loss": 0.0013, "step": 14590 }, { "epoch": 14.029807692307692, "grad_norm": 0.0044577536173164845, "learning_rate": 8.520605465005174e-06, "loss": 0.0001, "step": 14591 }, { "epoch": 14.03076923076923, "grad_norm": 2.6968984603881836, "learning_rate": 8.51937350825218e-06, "loss": 0.0105, "step": 14592 }, { "epoch": 14.03173076923077, "grad_norm": 0.9616196155548096, "learning_rate": 8.518141574474167e-06, "loss": 0.0357, "step": 14593 }, { "epoch": 14.032692307692308, "grad_norm": 0.1831914782524109, "learning_rate": 8.516909663690243e-06, "loss": 0.0007, "step": 14594 }, { "epoch": 14.033653846153847, "grad_norm": 0.011940417811274529, "learning_rate": 8.515677775919528e-06, "loss": 0.0001, "step": 14595 }, { "epoch": 14.034615384615385, "grad_norm": 0.023088889196515083, "learning_rate": 8.51444591118114e-06, "loss": 0.0002, "step": 14596 }, { "epoch": 14.035576923076922, "grad_norm": 0.1646447479724884, "learning_rate": 8.51321406949419e-06, "loss": 0.0007, "step": 14597 }, { "epoch": 14.036538461538461, "grad_norm": 0.17747241258621216, "learning_rate": 8.511982250877793e-06, "loss": 0.0007, "step": 14598 }, { "epoch": 14.0375, "grad_norm": 0.5647518634796143, "learning_rate": 8.510750455351065e-06, "loss": 0.0019, "step": 14599 }, { "epoch": 14.038461538461538, "grad_norm": 0.04598020389676094, "learning_rate": 8.50951868293312e-06, "loss": 0.0005, "step": 14600 }, { "epoch": 14.039423076923077, "grad_norm": 0.11585493385791779, "learning_rate": 8.50828693364307e-06, "loss": 0.0005, "step": 14601 }, { "epoch": 14.040384615384616, "grad_norm": 0.2698688805103302, "learning_rate": 8.507055207500028e-06, "loss": 0.0012, "step": 14602 }, { "epoch": 14.041346153846154, "grad_norm": 2.4323227405548096, "learning_rate": 8.505823504523109e-06, "loss": 0.0133, "step": 14603 }, { "epoch": 14.042307692307693, "grad_norm": 1.0006130933761597, "learning_rate": 8.504591824731424e-06, "loss": 0.0028, "step": 14604 }, { "epoch": 14.04326923076923, "grad_norm": 0.030916444957256317, "learning_rate": 8.503360168144081e-06, "loss": 0.0003, "step": 14605 }, { "epoch": 14.044230769230769, "grad_norm": 1.2666175365447998, "learning_rate": 8.502128534780198e-06, "loss": 0.0032, "step": 14606 }, { "epoch": 14.045192307692307, "grad_norm": 0.135100319981575, "learning_rate": 8.500896924658886e-06, "loss": 0.0005, "step": 14607 }, { "epoch": 14.046153846153846, "grad_norm": 1.897634744644165, "learning_rate": 8.499665337799254e-06, "loss": 0.0163, "step": 14608 }, { "epoch": 14.047115384615385, "grad_norm": 0.01280476339161396, "learning_rate": 8.498433774220408e-06, "loss": 0.0001, "step": 14609 }, { "epoch": 14.048076923076923, "grad_norm": 0.01698732189834118, "learning_rate": 8.497202233941467e-06, "loss": 0.0001, "step": 14610 }, { "epoch": 14.049038461538462, "grad_norm": 0.5375607013702393, "learning_rate": 8.495970716981536e-06, "loss": 0.0034, "step": 14611 }, { "epoch": 14.05, "grad_norm": 1.8693732023239136, "learning_rate": 8.494739223359725e-06, "loss": 0.0113, "step": 14612 }, { "epoch": 14.050961538461538, "grad_norm": 0.7410258054733276, "learning_rate": 8.49350775309514e-06, "loss": 0.0038, "step": 14613 }, { "epoch": 14.051923076923076, "grad_norm": 0.03442681208252907, "learning_rate": 8.492276306206897e-06, "loss": 0.0002, "step": 14614 }, { "epoch": 14.052884615384615, "grad_norm": 0.5837999582290649, "learning_rate": 8.491044882714101e-06, "loss": 0.0035, "step": 14615 }, { "epoch": 14.053846153846154, "grad_norm": 0.25051936507225037, "learning_rate": 8.489813482635857e-06, "loss": 0.0008, "step": 14616 }, { "epoch": 14.054807692307692, "grad_norm": 0.013665243051946163, "learning_rate": 8.488582105991277e-06, "loss": 0.0002, "step": 14617 }, { "epoch": 14.055769230769231, "grad_norm": 1.4675793647766113, "learning_rate": 8.487350752799466e-06, "loss": 0.0169, "step": 14618 }, { "epoch": 14.05673076923077, "grad_norm": 0.07706652581691742, "learning_rate": 8.486119423079534e-06, "loss": 0.0003, "step": 14619 }, { "epoch": 14.057692307692308, "grad_norm": 0.04946570098400116, "learning_rate": 8.48488811685058e-06, "loss": 0.0003, "step": 14620 }, { "epoch": 14.058653846153845, "grad_norm": 0.17478372156620026, "learning_rate": 8.48365683413172e-06, "loss": 0.001, "step": 14621 }, { "epoch": 14.059615384615384, "grad_norm": 0.01808471418917179, "learning_rate": 8.482425574942057e-06, "loss": 0.0001, "step": 14622 }, { "epoch": 14.060576923076923, "grad_norm": 3.9779117107391357, "learning_rate": 8.481194339300692e-06, "loss": 0.0387, "step": 14623 }, { "epoch": 14.061538461538461, "grad_norm": 2.128997802734375, "learning_rate": 8.479963127226733e-06, "loss": 0.0124, "step": 14624 }, { "epoch": 14.0625, "grad_norm": 0.8559554815292358, "learning_rate": 8.478731938739286e-06, "loss": 0.002, "step": 14625 }, { "epoch": 14.063461538461539, "grad_norm": 1.2123528718948364, "learning_rate": 8.477500773857456e-06, "loss": 0.0084, "step": 14626 }, { "epoch": 14.064423076923077, "grad_norm": 0.1160661056637764, "learning_rate": 8.476269632600342e-06, "loss": 0.0009, "step": 14627 }, { "epoch": 14.065384615384616, "grad_norm": 1.2018966674804688, "learning_rate": 8.475038514987053e-06, "loss": 0.0036, "step": 14628 }, { "epoch": 14.066346153846155, "grad_norm": 0.10350339114665985, "learning_rate": 8.473807421036692e-06, "loss": 0.0006, "step": 14629 }, { "epoch": 14.067307692307692, "grad_norm": 0.9600966572761536, "learning_rate": 8.47257635076836e-06, "loss": 0.0043, "step": 14630 }, { "epoch": 14.06826923076923, "grad_norm": 0.023556949570775032, "learning_rate": 8.471345304201157e-06, "loss": 0.0002, "step": 14631 }, { "epoch": 14.069230769230769, "grad_norm": 0.28707846999168396, "learning_rate": 8.470114281354191e-06, "loss": 0.0007, "step": 14632 }, { "epoch": 14.070192307692308, "grad_norm": 0.0774630531668663, "learning_rate": 8.468883282246562e-06, "loss": 0.0003, "step": 14633 }, { "epoch": 14.071153846153846, "grad_norm": 1.3682444095611572, "learning_rate": 8.46765230689737e-06, "loss": 0.0034, "step": 14634 }, { "epoch": 14.072115384615385, "grad_norm": 0.003950971644371748, "learning_rate": 8.466421355325715e-06, "loss": 0.0, "step": 14635 }, { "epoch": 14.073076923076924, "grad_norm": 0.03046107478439808, "learning_rate": 8.465190427550701e-06, "loss": 0.0003, "step": 14636 }, { "epoch": 14.074038461538462, "grad_norm": 1.344416618347168, "learning_rate": 8.463959523591428e-06, "loss": 0.0089, "step": 14637 }, { "epoch": 14.075, "grad_norm": 0.05966425687074661, "learning_rate": 8.46272864346699e-06, "loss": 0.0003, "step": 14638 }, { "epoch": 14.075961538461538, "grad_norm": 3.113602638244629, "learning_rate": 8.461497787196497e-06, "loss": 0.0265, "step": 14639 }, { "epoch": 14.076923076923077, "grad_norm": 0.02389691211283207, "learning_rate": 8.460266954799043e-06, "loss": 0.0002, "step": 14640 }, { "epoch": 14.077884615384615, "grad_norm": 0.44240647554397583, "learning_rate": 8.459036146293725e-06, "loss": 0.0011, "step": 14641 }, { "epoch": 14.078846153846154, "grad_norm": 0.005364356562495232, "learning_rate": 8.457805361699642e-06, "loss": 0.0001, "step": 14642 }, { "epoch": 14.079807692307693, "grad_norm": 0.11119257658720016, "learning_rate": 8.456574601035896e-06, "loss": 0.0011, "step": 14643 }, { "epoch": 14.080769230769231, "grad_norm": 1.5378053188323975, "learning_rate": 8.455343864321582e-06, "loss": 0.0068, "step": 14644 }, { "epoch": 14.08173076923077, "grad_norm": 1.5805490016937256, "learning_rate": 8.454113151575795e-06, "loss": 0.0138, "step": 14645 }, { "epoch": 14.082692307692307, "grad_norm": 0.402759313583374, "learning_rate": 8.452882462817636e-06, "loss": 0.0014, "step": 14646 }, { "epoch": 14.083653846153846, "grad_norm": 0.07567250728607178, "learning_rate": 8.451651798066203e-06, "loss": 0.0003, "step": 14647 }, { "epoch": 14.084615384615384, "grad_norm": 1.7715003490447998, "learning_rate": 8.450421157340589e-06, "loss": 0.0187, "step": 14648 }, { "epoch": 14.085576923076923, "grad_norm": 0.026685459539294243, "learning_rate": 8.449190540659888e-06, "loss": 0.0002, "step": 14649 }, { "epoch": 14.086538461538462, "grad_norm": 0.015983864665031433, "learning_rate": 8.447959948043199e-06, "loss": 0.0002, "step": 14650 }, { "epoch": 14.0875, "grad_norm": 0.914327085018158, "learning_rate": 8.446729379509618e-06, "loss": 0.0028, "step": 14651 }, { "epoch": 14.088461538461539, "grad_norm": 0.06447873264551163, "learning_rate": 8.445498835078238e-06, "loss": 0.0003, "step": 14652 }, { "epoch": 14.089423076923078, "grad_norm": 0.009850447066128254, "learning_rate": 8.444268314768152e-06, "loss": 0.0001, "step": 14653 }, { "epoch": 14.090384615384615, "grad_norm": 1.6992555856704712, "learning_rate": 8.443037818598458e-06, "loss": 0.0075, "step": 14654 }, { "epoch": 14.091346153846153, "grad_norm": 0.007758943364024162, "learning_rate": 8.441807346588247e-06, "loss": 0.0001, "step": 14655 }, { "epoch": 14.092307692307692, "grad_norm": 0.010141371749341488, "learning_rate": 8.440576898756609e-06, "loss": 0.0001, "step": 14656 }, { "epoch": 14.09326923076923, "grad_norm": 0.11449139565229416, "learning_rate": 8.439346475122645e-06, "loss": 0.0008, "step": 14657 }, { "epoch": 14.09423076923077, "grad_norm": 0.05455591529607773, "learning_rate": 8.438116075705442e-06, "loss": 0.0004, "step": 14658 }, { "epoch": 14.095192307692308, "grad_norm": 0.4070793390274048, "learning_rate": 8.436885700524094e-06, "loss": 0.001, "step": 14659 }, { "epoch": 14.096153846153847, "grad_norm": 0.7135907411575317, "learning_rate": 8.43565534959769e-06, "loss": 0.0018, "step": 14660 }, { "epoch": 14.097115384615385, "grad_norm": 0.773044764995575, "learning_rate": 8.434425022945326e-06, "loss": 0.0019, "step": 14661 }, { "epoch": 14.098076923076922, "grad_norm": 1.4769941568374634, "learning_rate": 8.433194720586091e-06, "loss": 0.0073, "step": 14662 }, { "epoch": 14.099038461538461, "grad_norm": 1.8145142793655396, "learning_rate": 8.431964442539075e-06, "loss": 0.0101, "step": 14663 }, { "epoch": 14.1, "grad_norm": 0.5763346552848816, "learning_rate": 8.430734188823366e-06, "loss": 0.0021, "step": 14664 }, { "epoch": 14.100961538461538, "grad_norm": 0.02099200151860714, "learning_rate": 8.429503959458061e-06, "loss": 0.0002, "step": 14665 }, { "epoch": 14.101923076923077, "grad_norm": 0.2624181807041168, "learning_rate": 8.428273754462243e-06, "loss": 0.0008, "step": 14666 }, { "epoch": 14.102884615384616, "grad_norm": 4.100768089294434, "learning_rate": 8.427043573855001e-06, "loss": 0.0412, "step": 14667 }, { "epoch": 14.103846153846154, "grad_norm": 2.3729872703552246, "learning_rate": 8.42581341765543e-06, "loss": 0.0248, "step": 14668 }, { "epoch": 14.104807692307693, "grad_norm": 1.9774246215820312, "learning_rate": 8.424583285882614e-06, "loss": 0.0098, "step": 14669 }, { "epoch": 14.10576923076923, "grad_norm": 0.013063528575003147, "learning_rate": 8.42335317855564e-06, "loss": 0.0001, "step": 14670 }, { "epoch": 14.106730769230769, "grad_norm": 1.8351397514343262, "learning_rate": 8.422123095693598e-06, "loss": 0.0063, "step": 14671 }, { "epoch": 14.107692307692307, "grad_norm": 0.026149481534957886, "learning_rate": 8.420893037315574e-06, "loss": 0.0001, "step": 14672 }, { "epoch": 14.108653846153846, "grad_norm": 0.33639636635780334, "learning_rate": 8.419663003440657e-06, "loss": 0.0011, "step": 14673 }, { "epoch": 14.109615384615385, "grad_norm": 0.05532630905508995, "learning_rate": 8.418432994087931e-06, "loss": 0.0005, "step": 14674 }, { "epoch": 14.110576923076923, "grad_norm": 0.12470905482769012, "learning_rate": 8.417203009276482e-06, "loss": 0.0006, "step": 14675 }, { "epoch": 14.111538461538462, "grad_norm": 0.030080599710345268, "learning_rate": 8.4159730490254e-06, "loss": 0.0002, "step": 14676 }, { "epoch": 14.1125, "grad_norm": 0.03633251413702965, "learning_rate": 8.414743113353764e-06, "loss": 0.0001, "step": 14677 }, { "epoch": 14.113461538461538, "grad_norm": 0.7473471760749817, "learning_rate": 8.413513202280662e-06, "loss": 0.0023, "step": 14678 }, { "epoch": 14.114423076923076, "grad_norm": 2.4597010612487793, "learning_rate": 8.41228331582518e-06, "loss": 0.0075, "step": 14679 }, { "epoch": 14.115384615384615, "grad_norm": 0.09128788858652115, "learning_rate": 8.411053454006403e-06, "loss": 0.0004, "step": 14680 }, { "epoch": 14.116346153846154, "grad_norm": 0.11862370371818542, "learning_rate": 8.40982361684341e-06, "loss": 0.0007, "step": 14681 }, { "epoch": 14.117307692307692, "grad_norm": 2.090505361557007, "learning_rate": 8.408593804355287e-06, "loss": 0.008, "step": 14682 }, { "epoch": 14.118269230769231, "grad_norm": 1.2176425457000732, "learning_rate": 8.40736401656112e-06, "loss": 0.0063, "step": 14683 }, { "epoch": 14.11923076923077, "grad_norm": 0.022173387929797173, "learning_rate": 8.40613425347999e-06, "loss": 0.0003, "step": 14684 }, { "epoch": 14.120192307692308, "grad_norm": 1.2403396368026733, "learning_rate": 8.404904515130977e-06, "loss": 0.0085, "step": 14685 }, { "epoch": 14.121153846153845, "grad_norm": 1.0693286657333374, "learning_rate": 8.40367480153316e-06, "loss": 0.0345, "step": 14686 }, { "epoch": 14.122115384615384, "grad_norm": 1.3104751110076904, "learning_rate": 8.40244511270563e-06, "loss": 0.0038, "step": 14687 }, { "epoch": 14.123076923076923, "grad_norm": 0.04078163579106331, "learning_rate": 8.401215448667463e-06, "loss": 0.0003, "step": 14688 }, { "epoch": 14.124038461538461, "grad_norm": 0.8550576567649841, "learning_rate": 8.399985809437737e-06, "loss": 0.0036, "step": 14689 }, { "epoch": 14.125, "grad_norm": 0.01094095129519701, "learning_rate": 8.398756195035538e-06, "loss": 0.0001, "step": 14690 }, { "epoch": 14.125961538461539, "grad_norm": 0.023482322692871094, "learning_rate": 8.397526605479942e-06, "loss": 0.0002, "step": 14691 }, { "epoch": 14.126923076923077, "grad_norm": 0.08206463605165482, "learning_rate": 8.396297040790032e-06, "loss": 0.0004, "step": 14692 }, { "epoch": 14.127884615384616, "grad_norm": 0.12861239910125732, "learning_rate": 8.39506750098488e-06, "loss": 0.0006, "step": 14693 }, { "epoch": 14.128846153846155, "grad_norm": 1.6613677740097046, "learning_rate": 8.393837986083574e-06, "loss": 0.0109, "step": 14694 }, { "epoch": 14.129807692307692, "grad_norm": 0.028805186972022057, "learning_rate": 8.392608496105188e-06, "loss": 0.0003, "step": 14695 }, { "epoch": 14.13076923076923, "grad_norm": 0.8536558151245117, "learning_rate": 8.391379031068797e-06, "loss": 0.0021, "step": 14696 }, { "epoch": 14.131730769230769, "grad_norm": 0.013013253919780254, "learning_rate": 8.390149590993488e-06, "loss": 0.0002, "step": 14697 }, { "epoch": 14.132692307692308, "grad_norm": 0.340128630399704, "learning_rate": 8.38892017589833e-06, "loss": 0.0009, "step": 14698 }, { "epoch": 14.133653846153846, "grad_norm": 0.03452317789196968, "learning_rate": 8.387690785802403e-06, "loss": 0.0003, "step": 14699 }, { "epoch": 14.134615384615385, "grad_norm": 0.015040209516882896, "learning_rate": 8.38646142072478e-06, "loss": 0.0001, "step": 14700 }, { "epoch": 14.135576923076924, "grad_norm": 0.10517764836549759, "learning_rate": 8.385232080684543e-06, "loss": 0.0003, "step": 14701 }, { "epoch": 14.136538461538462, "grad_norm": 0.07888926565647125, "learning_rate": 8.384002765700764e-06, "loss": 0.0004, "step": 14702 }, { "epoch": 14.1375, "grad_norm": 0.015536051243543625, "learning_rate": 8.382773475792521e-06, "loss": 0.0001, "step": 14703 }, { "epoch": 14.138461538461538, "grad_norm": 2.397156238555908, "learning_rate": 8.381544210978883e-06, "loss": 0.0088, "step": 14704 }, { "epoch": 14.139423076923077, "grad_norm": 0.07377180457115173, "learning_rate": 8.380314971278932e-06, "loss": 0.0003, "step": 14705 }, { "epoch": 14.140384615384615, "grad_norm": 0.6729604005813599, "learning_rate": 8.379085756711738e-06, "loss": 0.0034, "step": 14706 }, { "epoch": 14.141346153846154, "grad_norm": 0.383428692817688, "learning_rate": 8.377856567296374e-06, "loss": 0.002, "step": 14707 }, { "epoch": 14.142307692307693, "grad_norm": 1.8599131107330322, "learning_rate": 8.376627403051918e-06, "loss": 0.0625, "step": 14708 }, { "epoch": 14.143269230769231, "grad_norm": 0.05180003121495247, "learning_rate": 8.37539826399744e-06, "loss": 0.0002, "step": 14709 }, { "epoch": 14.14423076923077, "grad_norm": 0.9289339184761047, "learning_rate": 8.374169150152011e-06, "loss": 0.0036, "step": 14710 }, { "epoch": 14.145192307692307, "grad_norm": 0.053835172206163406, "learning_rate": 8.372940061534704e-06, "loss": 0.0002, "step": 14711 }, { "epoch": 14.146153846153846, "grad_norm": 0.013841167092323303, "learning_rate": 8.371710998164595e-06, "loss": 0.0002, "step": 14712 }, { "epoch": 14.147115384615384, "grad_norm": 0.02309282310307026, "learning_rate": 8.370481960060753e-06, "loss": 0.0002, "step": 14713 }, { "epoch": 14.148076923076923, "grad_norm": 0.16849371790885925, "learning_rate": 8.369252947242246e-06, "loss": 0.0011, "step": 14714 }, { "epoch": 14.149038461538462, "grad_norm": 0.2476625293493271, "learning_rate": 8.368023959728146e-06, "loss": 0.0009, "step": 14715 }, { "epoch": 14.15, "grad_norm": 0.021398503333330154, "learning_rate": 8.366794997537527e-06, "loss": 0.0002, "step": 14716 }, { "epoch": 14.150961538461539, "grad_norm": 0.018000751733779907, "learning_rate": 8.365566060689455e-06, "loss": 0.0002, "step": 14717 }, { "epoch": 14.151923076923078, "grad_norm": 0.05846427381038666, "learning_rate": 8.364337149202999e-06, "loss": 0.0003, "step": 14718 }, { "epoch": 14.152884615384615, "grad_norm": 0.08541589975357056, "learning_rate": 8.363108263097232e-06, "loss": 0.0003, "step": 14719 }, { "epoch": 14.153846153846153, "grad_norm": 0.13902319967746735, "learning_rate": 8.361879402391222e-06, "loss": 0.0006, "step": 14720 }, { "epoch": 14.154807692307692, "grad_norm": 0.014390411786735058, "learning_rate": 8.360650567104034e-06, "loss": 0.0001, "step": 14721 }, { "epoch": 14.15576923076923, "grad_norm": 0.0456085205078125, "learning_rate": 8.359421757254737e-06, "loss": 0.0006, "step": 14722 }, { "epoch": 14.15673076923077, "grad_norm": 0.040456607937812805, "learning_rate": 8.358192972862401e-06, "loss": 0.0002, "step": 14723 }, { "epoch": 14.157692307692308, "grad_norm": 0.03258537873625755, "learning_rate": 8.35696421394609e-06, "loss": 0.0002, "step": 14724 }, { "epoch": 14.158653846153847, "grad_norm": 0.12261222302913666, "learning_rate": 8.355735480524874e-06, "loss": 0.0004, "step": 14725 }, { "epoch": 14.159615384615385, "grad_norm": 0.03705846145749092, "learning_rate": 8.354506772617814e-06, "loss": 0.0002, "step": 14726 }, { "epoch": 14.160576923076922, "grad_norm": 0.21538227796554565, "learning_rate": 8.353278090243983e-06, "loss": 0.0007, "step": 14727 }, { "epoch": 14.161538461538461, "grad_norm": 0.05135558918118477, "learning_rate": 8.352049433422442e-06, "loss": 0.0004, "step": 14728 }, { "epoch": 14.1625, "grad_norm": 1.072664499282837, "learning_rate": 8.350820802172256e-06, "loss": 0.0137, "step": 14729 }, { "epoch": 14.163461538461538, "grad_norm": 0.03513398393988609, "learning_rate": 8.349592196512491e-06, "loss": 0.0001, "step": 14730 }, { "epoch": 14.164423076923077, "grad_norm": 0.007609700318425894, "learning_rate": 8.348363616462212e-06, "loss": 0.0001, "step": 14731 }, { "epoch": 14.165384615384616, "grad_norm": 0.0419040285050869, "learning_rate": 8.347135062040484e-06, "loss": 0.0002, "step": 14732 }, { "epoch": 14.166346153846154, "grad_norm": 0.011411811225116253, "learning_rate": 8.345906533266364e-06, "loss": 0.0001, "step": 14733 }, { "epoch": 14.167307692307693, "grad_norm": 0.16025035083293915, "learning_rate": 8.344678030158923e-06, "loss": 0.0006, "step": 14734 }, { "epoch": 14.16826923076923, "grad_norm": 0.022880900651216507, "learning_rate": 8.34344955273722e-06, "loss": 0.0001, "step": 14735 }, { "epoch": 14.169230769230769, "grad_norm": 0.2886158227920532, "learning_rate": 8.342221101020317e-06, "loss": 0.0024, "step": 14736 }, { "epoch": 14.170192307692307, "grad_norm": 0.042522408068180084, "learning_rate": 8.34099267502728e-06, "loss": 0.0003, "step": 14737 }, { "epoch": 14.171153846153846, "grad_norm": 0.05377912148833275, "learning_rate": 8.339764274777165e-06, "loss": 0.0006, "step": 14738 }, { "epoch": 14.172115384615385, "grad_norm": 0.010951709933578968, "learning_rate": 8.338535900289038e-06, "loss": 0.0001, "step": 14739 }, { "epoch": 14.173076923076923, "grad_norm": 1.213399052619934, "learning_rate": 8.337307551581955e-06, "loss": 0.0693, "step": 14740 }, { "epoch": 14.174038461538462, "grad_norm": 1.828489899635315, "learning_rate": 8.33607922867498e-06, "loss": 0.012, "step": 14741 }, { "epoch": 14.175, "grad_norm": 1.472525715827942, "learning_rate": 8.334850931587173e-06, "loss": 0.0085, "step": 14742 }, { "epoch": 14.175961538461538, "grad_norm": 0.8450919985771179, "learning_rate": 8.333622660337592e-06, "loss": 0.0041, "step": 14743 }, { "epoch": 14.176923076923076, "grad_norm": 1.0580183267593384, "learning_rate": 8.332394414945293e-06, "loss": 0.0065, "step": 14744 }, { "epoch": 14.177884615384615, "grad_norm": 0.04879273101687431, "learning_rate": 8.331166195429342e-06, "loss": 0.0003, "step": 14745 }, { "epoch": 14.178846153846154, "grad_norm": 3.973668336868286, "learning_rate": 8.329938001808793e-06, "loss": 0.0199, "step": 14746 }, { "epoch": 14.179807692307692, "grad_norm": 0.4350896179676056, "learning_rate": 8.328709834102703e-06, "loss": 0.0013, "step": 14747 }, { "epoch": 14.180769230769231, "grad_norm": 1.7174879312515259, "learning_rate": 8.327481692330132e-06, "loss": 0.0088, "step": 14748 }, { "epoch": 14.18173076923077, "grad_norm": 0.016151636838912964, "learning_rate": 8.326253576510138e-06, "loss": 0.0002, "step": 14749 }, { "epoch": 14.182692307692308, "grad_norm": 0.22499722242355347, "learning_rate": 8.325025486661776e-06, "loss": 0.0014, "step": 14750 }, { "epoch": 14.183653846153845, "grad_norm": 0.055001407861709595, "learning_rate": 8.3237974228041e-06, "loss": 0.0006, "step": 14751 }, { "epoch": 14.184615384615384, "grad_norm": 0.06770603358745575, "learning_rate": 8.32256938495617e-06, "loss": 0.0004, "step": 14752 }, { "epoch": 14.185576923076923, "grad_norm": 0.027172010391950607, "learning_rate": 8.32134137313704e-06, "loss": 0.0002, "step": 14753 }, { "epoch": 14.186538461538461, "grad_norm": 0.016515318304300308, "learning_rate": 8.320113387365765e-06, "loss": 0.0002, "step": 14754 }, { "epoch": 14.1875, "grad_norm": 0.5742295980453491, "learning_rate": 8.318885427661399e-06, "loss": 0.0017, "step": 14755 }, { "epoch": 14.188461538461539, "grad_norm": 0.039635829627513885, "learning_rate": 8.317657494042997e-06, "loss": 0.0002, "step": 14756 }, { "epoch": 14.189423076923077, "grad_norm": 0.04457349702715874, "learning_rate": 8.316429586529616e-06, "loss": 0.0003, "step": 14757 }, { "epoch": 14.190384615384616, "grad_norm": 1.410850167274475, "learning_rate": 8.315201705140303e-06, "loss": 0.0067, "step": 14758 }, { "epoch": 14.191346153846155, "grad_norm": 0.9270100593566895, "learning_rate": 8.313973849894116e-06, "loss": 0.0064, "step": 14759 }, { "epoch": 14.192307692307692, "grad_norm": 3.5152089595794678, "learning_rate": 8.312746020810107e-06, "loss": 0.0515, "step": 14760 }, { "epoch": 14.19326923076923, "grad_norm": 0.7470545768737793, "learning_rate": 8.311518217907328e-06, "loss": 0.0009, "step": 14761 }, { "epoch": 14.194230769230769, "grad_norm": 0.04224860295653343, "learning_rate": 8.310290441204829e-06, "loss": 0.0002, "step": 14762 }, { "epoch": 14.195192307692308, "grad_norm": 1.4835736751556396, "learning_rate": 8.309062690721664e-06, "loss": 0.0082, "step": 14763 }, { "epoch": 14.196153846153846, "grad_norm": 0.08958730101585388, "learning_rate": 8.307834966476885e-06, "loss": 0.0006, "step": 14764 }, { "epoch": 14.197115384615385, "grad_norm": 0.13218101859092712, "learning_rate": 8.30660726848954e-06, "loss": 0.0008, "step": 14765 }, { "epoch": 14.198076923076924, "grad_norm": 1.4361019134521484, "learning_rate": 8.305379596778677e-06, "loss": 0.0053, "step": 14766 }, { "epoch": 14.199038461538462, "grad_norm": 0.07739367336034775, "learning_rate": 8.304151951363352e-06, "loss": 0.0004, "step": 14767 }, { "epoch": 14.2, "grad_norm": 0.02736051380634308, "learning_rate": 8.302924332262611e-06, "loss": 0.0003, "step": 14768 }, { "epoch": 14.200961538461538, "grad_norm": 0.0364251434803009, "learning_rate": 8.301696739495502e-06, "loss": 0.0002, "step": 14769 }, { "epoch": 14.201923076923077, "grad_norm": 0.1798412948846817, "learning_rate": 8.300469173081076e-06, "loss": 0.0005, "step": 14770 }, { "epoch": 14.202884615384615, "grad_norm": 0.271334707736969, "learning_rate": 8.299241633038381e-06, "loss": 0.0013, "step": 14771 }, { "epoch": 14.203846153846154, "grad_norm": 0.012630094774067402, "learning_rate": 8.298014119386466e-06, "loss": 0.0001, "step": 14772 }, { "epoch": 14.204807692307693, "grad_norm": 0.24116133153438568, "learning_rate": 8.296786632144373e-06, "loss": 0.0005, "step": 14773 }, { "epoch": 14.205769230769231, "grad_norm": 0.01399214193224907, "learning_rate": 8.295559171331155e-06, "loss": 0.0001, "step": 14774 }, { "epoch": 14.20673076923077, "grad_norm": 1.0616860389709473, "learning_rate": 8.294331736965855e-06, "loss": 0.0077, "step": 14775 }, { "epoch": 14.207692307692307, "grad_norm": 0.024054517969489098, "learning_rate": 8.293104329067519e-06, "loss": 0.0002, "step": 14776 }, { "epoch": 14.208653846153846, "grad_norm": 0.10847080498933792, "learning_rate": 8.291876947655197e-06, "loss": 0.0003, "step": 14777 }, { "epoch": 14.209615384615384, "grad_norm": 0.2551976442337036, "learning_rate": 8.29064959274793e-06, "loss": 0.0016, "step": 14778 }, { "epoch": 14.210576923076923, "grad_norm": 0.008328154683113098, "learning_rate": 8.289422264364765e-06, "loss": 0.0001, "step": 14779 }, { "epoch": 14.211538461538462, "grad_norm": 0.0953812226653099, "learning_rate": 8.288194962524744e-06, "loss": 0.0003, "step": 14780 }, { "epoch": 14.2125, "grad_norm": 0.08415637910366058, "learning_rate": 8.286967687246916e-06, "loss": 0.0004, "step": 14781 }, { "epoch": 14.213461538461539, "grad_norm": 0.01794569566845894, "learning_rate": 8.285740438550321e-06, "loss": 0.0001, "step": 14782 }, { "epoch": 14.214423076923078, "grad_norm": 0.0212288498878479, "learning_rate": 8.284513216454005e-06, "loss": 0.0002, "step": 14783 }, { "epoch": 14.215384615384615, "grad_norm": 0.08953847736120224, "learning_rate": 8.283286020977004e-06, "loss": 0.0008, "step": 14784 }, { "epoch": 14.216346153846153, "grad_norm": 0.2424810528755188, "learning_rate": 8.282058852138369e-06, "loss": 0.0006, "step": 14785 }, { "epoch": 14.217307692307692, "grad_norm": 0.07665721327066422, "learning_rate": 8.280831709957138e-06, "loss": 0.0004, "step": 14786 }, { "epoch": 14.21826923076923, "grad_norm": 0.04640624299645424, "learning_rate": 8.279604594452351e-06, "loss": 0.0003, "step": 14787 }, { "epoch": 14.21923076923077, "grad_norm": 0.031625114381313324, "learning_rate": 8.278377505643054e-06, "loss": 0.0002, "step": 14788 }, { "epoch": 14.220192307692308, "grad_norm": 0.8765597939491272, "learning_rate": 8.277150443548285e-06, "loss": 0.0017, "step": 14789 }, { "epoch": 14.221153846153847, "grad_norm": 0.013657393865287304, "learning_rate": 8.275923408187086e-06, "loss": 0.0001, "step": 14790 }, { "epoch": 14.222115384615385, "grad_norm": 0.20393440127372742, "learning_rate": 8.274696399578492e-06, "loss": 0.0009, "step": 14791 }, { "epoch": 14.223076923076922, "grad_norm": 0.04971400648355484, "learning_rate": 8.273469417741547e-06, "loss": 0.0002, "step": 14792 }, { "epoch": 14.224038461538461, "grad_norm": 0.01441140752285719, "learning_rate": 8.272242462695291e-06, "loss": 0.0002, "step": 14793 }, { "epoch": 14.225, "grad_norm": 0.9794310331344604, "learning_rate": 8.271015534458762e-06, "loss": 0.0057, "step": 14794 }, { "epoch": 14.225961538461538, "grad_norm": 2.0842294692993164, "learning_rate": 8.269788633050994e-06, "loss": 0.0271, "step": 14795 }, { "epoch": 14.226923076923077, "grad_norm": 0.19119644165039062, "learning_rate": 8.26856175849103e-06, "loss": 0.0005, "step": 14796 }, { "epoch": 14.227884615384616, "grad_norm": 0.6223405599594116, "learning_rate": 8.267334910797907e-06, "loss": 0.0031, "step": 14797 }, { "epoch": 14.228846153846154, "grad_norm": 0.2220667004585266, "learning_rate": 8.266108089990658e-06, "loss": 0.0006, "step": 14798 }, { "epoch": 14.229807692307693, "grad_norm": 0.7054035067558289, "learning_rate": 8.264881296088325e-06, "loss": 0.0015, "step": 14799 }, { "epoch": 14.23076923076923, "grad_norm": 0.01965978369116783, "learning_rate": 8.263654529109941e-06, "loss": 0.0001, "step": 14800 }, { "epoch": 14.231730769230769, "grad_norm": 0.03921674191951752, "learning_rate": 8.262427789074543e-06, "loss": 0.0002, "step": 14801 }, { "epoch": 14.232692307692307, "grad_norm": 0.1079614982008934, "learning_rate": 8.261201076001164e-06, "loss": 0.0004, "step": 14802 }, { "epoch": 14.233653846153846, "grad_norm": 3.5417332649230957, "learning_rate": 8.259974389908842e-06, "loss": 0.0181, "step": 14803 }, { "epoch": 14.234615384615385, "grad_norm": 1.9624736309051514, "learning_rate": 8.258747730816613e-06, "loss": 0.01, "step": 14804 }, { "epoch": 14.235576923076923, "grad_norm": 0.04173190891742706, "learning_rate": 8.257521098743507e-06, "loss": 0.0002, "step": 14805 }, { "epoch": 14.236538461538462, "grad_norm": 0.02541053295135498, "learning_rate": 8.256294493708557e-06, "loss": 0.0002, "step": 14806 }, { "epoch": 14.2375, "grad_norm": 0.06854762881994247, "learning_rate": 8.255067915730802e-06, "loss": 0.0005, "step": 14807 }, { "epoch": 14.238461538461538, "grad_norm": 0.07571201026439667, "learning_rate": 8.253841364829272e-06, "loss": 0.0003, "step": 14808 }, { "epoch": 14.239423076923076, "grad_norm": 0.2983695864677429, "learning_rate": 8.252614841022996e-06, "loss": 0.0007, "step": 14809 }, { "epoch": 14.240384615384615, "grad_norm": 0.006082604639232159, "learning_rate": 8.25138834433101e-06, "loss": 0.0001, "step": 14810 }, { "epoch": 14.241346153846154, "grad_norm": 0.19059906899929047, "learning_rate": 8.250161874772347e-06, "loss": 0.0011, "step": 14811 }, { "epoch": 14.242307692307692, "grad_norm": 0.9121437668800354, "learning_rate": 8.248935432366036e-06, "loss": 0.0035, "step": 14812 }, { "epoch": 14.243269230769231, "grad_norm": 0.35937684774398804, "learning_rate": 8.247709017131104e-06, "loss": 0.0008, "step": 14813 }, { "epoch": 14.24423076923077, "grad_norm": 0.9021456241607666, "learning_rate": 8.246482629086588e-06, "loss": 0.0017, "step": 14814 }, { "epoch": 14.245192307692308, "grad_norm": 0.03336828202009201, "learning_rate": 8.245256268251516e-06, "loss": 0.0003, "step": 14815 }, { "epoch": 14.246153846153845, "grad_norm": 0.02514388971030712, "learning_rate": 8.244029934644916e-06, "loss": 0.0003, "step": 14816 }, { "epoch": 14.247115384615384, "grad_norm": 0.025730380788445473, "learning_rate": 8.242803628285814e-06, "loss": 0.0002, "step": 14817 }, { "epoch": 14.248076923076923, "grad_norm": 0.7780828475952148, "learning_rate": 8.241577349193247e-06, "loss": 0.004, "step": 14818 }, { "epoch": 14.249038461538461, "grad_norm": 0.024194011464715004, "learning_rate": 8.240351097386238e-06, "loss": 0.0002, "step": 14819 }, { "epoch": 14.25, "grad_norm": 0.19122974574565887, "learning_rate": 8.239124872883813e-06, "loss": 0.0004, "step": 14820 }, { "epoch": 14.250961538461539, "grad_norm": 0.07246549427509308, "learning_rate": 8.237898675705003e-06, "loss": 0.0003, "step": 14821 }, { "epoch": 14.251923076923077, "grad_norm": 0.08527448028326035, "learning_rate": 8.236672505868834e-06, "loss": 0.0003, "step": 14822 }, { "epoch": 14.252884615384616, "grad_norm": 1.2607784271240234, "learning_rate": 8.235446363394334e-06, "loss": 0.0079, "step": 14823 }, { "epoch": 14.253846153846155, "grad_norm": 3.0221824645996094, "learning_rate": 8.234220248300524e-06, "loss": 0.0734, "step": 14824 }, { "epoch": 14.254807692307692, "grad_norm": 0.023203197866678238, "learning_rate": 8.232994160606436e-06, "loss": 0.0001, "step": 14825 }, { "epoch": 14.25576923076923, "grad_norm": 0.5979776978492737, "learning_rate": 8.231768100331092e-06, "loss": 0.0029, "step": 14826 }, { "epoch": 14.256730769230769, "grad_norm": 0.023768533021211624, "learning_rate": 8.230542067493517e-06, "loss": 0.0002, "step": 14827 }, { "epoch": 14.257692307692308, "grad_norm": 0.028564538806676865, "learning_rate": 8.229316062112736e-06, "loss": 0.0002, "step": 14828 }, { "epoch": 14.258653846153846, "grad_norm": 0.9090267419815063, "learning_rate": 8.228090084207773e-06, "loss": 0.0042, "step": 14829 }, { "epoch": 14.259615384615385, "grad_norm": 0.07655847072601318, "learning_rate": 8.226864133797652e-06, "loss": 0.0002, "step": 14830 }, { "epoch": 14.260576923076924, "grad_norm": 0.738818883895874, "learning_rate": 8.225638210901394e-06, "loss": 0.0028, "step": 14831 }, { "epoch": 14.261538461538462, "grad_norm": 0.04835710674524307, "learning_rate": 8.224412315538025e-06, "loss": 0.0003, "step": 14832 }, { "epoch": 14.2625, "grad_norm": 0.04430685564875603, "learning_rate": 8.223186447726564e-06, "loss": 0.0003, "step": 14833 }, { "epoch": 14.263461538461538, "grad_norm": 0.011859585531055927, "learning_rate": 8.221960607486035e-06, "loss": 0.0002, "step": 14834 }, { "epoch": 14.264423076923077, "grad_norm": 0.09669563174247742, "learning_rate": 8.220734794835459e-06, "loss": 0.0004, "step": 14835 }, { "epoch": 14.265384615384615, "grad_norm": 0.37724876403808594, "learning_rate": 8.219509009793857e-06, "loss": 0.0015, "step": 14836 }, { "epoch": 14.266346153846154, "grad_norm": 0.01406086515635252, "learning_rate": 8.21828325238025e-06, "loss": 0.0001, "step": 14837 }, { "epoch": 14.267307692307693, "grad_norm": 0.04548076167702675, "learning_rate": 8.217057522613655e-06, "loss": 0.0003, "step": 14838 }, { "epoch": 14.268269230769231, "grad_norm": 0.05622022971510887, "learning_rate": 8.215831820513098e-06, "loss": 0.0004, "step": 14839 }, { "epoch": 14.26923076923077, "grad_norm": 2.811319351196289, "learning_rate": 8.214606146097594e-06, "loss": 0.0301, "step": 14840 }, { "epoch": 14.270192307692307, "grad_norm": 0.022076399996876717, "learning_rate": 8.21338049938616e-06, "loss": 0.0001, "step": 14841 }, { "epoch": 14.271153846153846, "grad_norm": 0.006595437414944172, "learning_rate": 8.212154880397817e-06, "loss": 0.0001, "step": 14842 }, { "epoch": 14.272115384615384, "grad_norm": 0.047546423971652985, "learning_rate": 8.210929289151587e-06, "loss": 0.0002, "step": 14843 }, { "epoch": 14.273076923076923, "grad_norm": 0.0489959754049778, "learning_rate": 8.209703725666482e-06, "loss": 0.0001, "step": 14844 }, { "epoch": 14.274038461538462, "grad_norm": 0.05243153125047684, "learning_rate": 8.20847818996152e-06, "loss": 0.0004, "step": 14845 }, { "epoch": 14.275, "grad_norm": 0.04118429124355316, "learning_rate": 8.207252682055716e-06, "loss": 0.0003, "step": 14846 }, { "epoch": 14.275961538461539, "grad_norm": 0.23419351875782013, "learning_rate": 8.206027201968092e-06, "loss": 0.0014, "step": 14847 }, { "epoch": 14.276923076923078, "grad_norm": 0.029762068763375282, "learning_rate": 8.204801749717661e-06, "loss": 0.0002, "step": 14848 }, { "epoch": 14.277884615384615, "grad_norm": 1.091647982597351, "learning_rate": 8.203576325323435e-06, "loss": 0.0037, "step": 14849 }, { "epoch": 14.278846153846153, "grad_norm": 0.39294829964637756, "learning_rate": 8.202350928804435e-06, "loss": 0.0038, "step": 14850 }, { "epoch": 14.279807692307692, "grad_norm": 0.8543919324874878, "learning_rate": 8.20112556017967e-06, "loss": 0.0032, "step": 14851 }, { "epoch": 14.28076923076923, "grad_norm": 0.599780797958374, "learning_rate": 8.19990021946816e-06, "loss": 0.002, "step": 14852 }, { "epoch": 14.28173076923077, "grad_norm": 0.03147837147116661, "learning_rate": 8.19867490668891e-06, "loss": 0.0002, "step": 14853 }, { "epoch": 14.282692307692308, "grad_norm": 0.004900314379483461, "learning_rate": 8.197449621860944e-06, "loss": 0.0, "step": 14854 }, { "epoch": 14.283653846153847, "grad_norm": 2.405457019805908, "learning_rate": 8.196224365003267e-06, "loss": 0.0091, "step": 14855 }, { "epoch": 14.284615384615385, "grad_norm": 0.037836793810129166, "learning_rate": 8.194999136134894e-06, "loss": 0.0003, "step": 14856 }, { "epoch": 14.285576923076922, "grad_norm": 1.0018359422683716, "learning_rate": 8.193773935274836e-06, "loss": 0.0042, "step": 14857 }, { "epoch": 14.286538461538461, "grad_norm": 0.010469241067767143, "learning_rate": 8.192548762442108e-06, "loss": 0.0001, "step": 14858 }, { "epoch": 14.2875, "grad_norm": 0.8337635397911072, "learning_rate": 8.191323617655717e-06, "loss": 0.003, "step": 14859 }, { "epoch": 14.288461538461538, "grad_norm": 1.9681940078735352, "learning_rate": 8.190098500934673e-06, "loss": 0.0253, "step": 14860 }, { "epoch": 14.289423076923077, "grad_norm": 0.07594864815473557, "learning_rate": 8.18887341229799e-06, "loss": 0.0004, "step": 14861 }, { "epoch": 14.290384615384616, "grad_norm": 0.10416281223297119, "learning_rate": 8.187648351764676e-06, "loss": 0.0003, "step": 14862 }, { "epoch": 14.291346153846154, "grad_norm": 0.2314247041940689, "learning_rate": 8.186423319353742e-06, "loss": 0.0006, "step": 14863 }, { "epoch": 14.292307692307693, "grad_norm": 0.3883597254753113, "learning_rate": 8.185198315084192e-06, "loss": 0.0011, "step": 14864 }, { "epoch": 14.29326923076923, "grad_norm": 0.03047819808125496, "learning_rate": 8.18397333897504e-06, "loss": 0.0003, "step": 14865 }, { "epoch": 14.294230769230769, "grad_norm": 1.8219300508499146, "learning_rate": 8.182748391045293e-06, "loss": 0.0115, "step": 14866 }, { "epoch": 14.295192307692307, "grad_norm": 0.06956397742033005, "learning_rate": 8.181523471313952e-06, "loss": 0.0002, "step": 14867 }, { "epoch": 14.296153846153846, "grad_norm": 0.009437797591090202, "learning_rate": 8.180298579800034e-06, "loss": 0.0001, "step": 14868 }, { "epoch": 14.297115384615385, "grad_norm": 1.0802603960037231, "learning_rate": 8.179073716522542e-06, "loss": 0.0046, "step": 14869 }, { "epoch": 14.298076923076923, "grad_norm": 0.062429264187812805, "learning_rate": 8.177848881500481e-06, "loss": 0.0003, "step": 14870 }, { "epoch": 14.299038461538462, "grad_norm": 0.01883852481842041, "learning_rate": 8.176624074752854e-06, "loss": 0.0001, "step": 14871 }, { "epoch": 14.3, "grad_norm": 2.392728090286255, "learning_rate": 8.175399296298673e-06, "loss": 0.0301, "step": 14872 }, { "epoch": 14.300961538461538, "grad_norm": 0.2498096376657486, "learning_rate": 8.17417454615694e-06, "loss": 0.0006, "step": 14873 }, { "epoch": 14.301923076923076, "grad_norm": 0.044955335557460785, "learning_rate": 8.172949824346659e-06, "loss": 0.0002, "step": 14874 }, { "epoch": 14.302884615384615, "grad_norm": 0.0913430005311966, "learning_rate": 8.171725130886831e-06, "loss": 0.0007, "step": 14875 }, { "epoch": 14.303846153846154, "grad_norm": 1.0786272287368774, "learning_rate": 8.170500465796468e-06, "loss": 0.006, "step": 14876 }, { "epoch": 14.304807692307692, "grad_norm": 0.22804543375968933, "learning_rate": 8.169275829094566e-06, "loss": 0.0006, "step": 14877 }, { "epoch": 14.305769230769231, "grad_norm": 0.026277678087353706, "learning_rate": 8.168051220800129e-06, "loss": 0.0002, "step": 14878 }, { "epoch": 14.30673076923077, "grad_norm": 0.033386703580617905, "learning_rate": 8.166826640932161e-06, "loss": 0.0003, "step": 14879 }, { "epoch": 14.307692307692308, "grad_norm": 0.03580905869603157, "learning_rate": 8.165602089509665e-06, "loss": 0.0002, "step": 14880 }, { "epoch": 14.308653846153845, "grad_norm": 0.023851480334997177, "learning_rate": 8.16437756655164e-06, "loss": 0.0001, "step": 14881 }, { "epoch": 14.309615384615384, "grad_norm": 0.04171600937843323, "learning_rate": 8.163153072077086e-06, "loss": 0.0002, "step": 14882 }, { "epoch": 14.310576923076923, "grad_norm": 0.5091095566749573, "learning_rate": 8.161928606105007e-06, "loss": 0.003, "step": 14883 }, { "epoch": 14.311538461538461, "grad_norm": 0.6889264583587646, "learning_rate": 8.160704168654401e-06, "loss": 0.0014, "step": 14884 }, { "epoch": 14.3125, "grad_norm": 2.0788309574127197, "learning_rate": 8.159479759744267e-06, "loss": 0.0639, "step": 14885 }, { "epoch": 14.313461538461539, "grad_norm": 4.361705780029297, "learning_rate": 8.158255379393606e-06, "loss": 0.0885, "step": 14886 }, { "epoch": 14.314423076923077, "grad_norm": 1.3326613903045654, "learning_rate": 8.157031027621416e-06, "loss": 0.0067, "step": 14887 }, { "epoch": 14.315384615384616, "grad_norm": 0.020334217697381973, "learning_rate": 8.155806704446696e-06, "loss": 0.0001, "step": 14888 }, { "epoch": 14.316346153846155, "grad_norm": 0.0604076012969017, "learning_rate": 8.154582409888441e-06, "loss": 0.0004, "step": 14889 }, { "epoch": 14.317307692307692, "grad_norm": 1.541367769241333, "learning_rate": 8.153358143965652e-06, "loss": 0.0056, "step": 14890 }, { "epoch": 14.31826923076923, "grad_norm": 0.01745831035077572, "learning_rate": 8.152133906697328e-06, "loss": 0.0002, "step": 14891 }, { "epoch": 14.319230769230769, "grad_norm": 0.008995299227535725, "learning_rate": 8.15090969810246e-06, "loss": 0.0001, "step": 14892 }, { "epoch": 14.320192307692308, "grad_norm": 0.04398178681731224, "learning_rate": 8.149685518200044e-06, "loss": 0.0003, "step": 14893 }, { "epoch": 14.321153846153846, "grad_norm": 0.06530982255935669, "learning_rate": 8.148461367009081e-06, "loss": 0.0005, "step": 14894 }, { "epoch": 14.322115384615385, "grad_norm": 0.1017819494009018, "learning_rate": 8.147237244548563e-06, "loss": 0.0004, "step": 14895 }, { "epoch": 14.323076923076924, "grad_norm": 1.318725347518921, "learning_rate": 8.146013150837484e-06, "loss": 0.0059, "step": 14896 }, { "epoch": 14.324038461538462, "grad_norm": 0.44345369935035706, "learning_rate": 8.14478908589484e-06, "loss": 0.0016, "step": 14897 }, { "epoch": 14.325, "grad_norm": 1.897047996520996, "learning_rate": 8.143565049739625e-06, "loss": 0.0045, "step": 14898 }, { "epoch": 14.325961538461538, "grad_norm": 0.1658174991607666, "learning_rate": 8.142341042390832e-06, "loss": 0.0008, "step": 14899 }, { "epoch": 14.326923076923077, "grad_norm": 0.17288686335086823, "learning_rate": 8.141117063867452e-06, "loss": 0.001, "step": 14900 }, { "epoch": 14.327884615384615, "grad_norm": 0.024882515892386436, "learning_rate": 8.139893114188482e-06, "loss": 0.0001, "step": 14901 }, { "epoch": 14.328846153846154, "grad_norm": 0.06609050184488297, "learning_rate": 8.13866919337291e-06, "loss": 0.0003, "step": 14902 }, { "epoch": 14.329807692307693, "grad_norm": 0.10020368546247482, "learning_rate": 8.13744530143973e-06, "loss": 0.0006, "step": 14903 }, { "epoch": 14.330769230769231, "grad_norm": 1.815438151359558, "learning_rate": 8.13622143840793e-06, "loss": 0.0381, "step": 14904 }, { "epoch": 14.33173076923077, "grad_norm": 0.14445078372955322, "learning_rate": 8.134997604296506e-06, "loss": 0.001, "step": 14905 }, { "epoch": 14.332692307692307, "grad_norm": 3.3726792335510254, "learning_rate": 8.133773799124446e-06, "loss": 0.0924, "step": 14906 }, { "epoch": 14.333653846153846, "grad_norm": 0.18822291493415833, "learning_rate": 8.132550022910737e-06, "loss": 0.0006, "step": 14907 }, { "epoch": 14.334615384615384, "grad_norm": 0.6081699728965759, "learning_rate": 8.131326275674371e-06, "loss": 0.0021, "step": 14908 }, { "epoch": 14.335576923076923, "grad_norm": 0.01358020305633545, "learning_rate": 8.130102557434338e-06, "loss": 0.0001, "step": 14909 }, { "epoch": 14.336538461538462, "grad_norm": 0.6577103137969971, "learning_rate": 8.128878868209626e-06, "loss": 0.0031, "step": 14910 }, { "epoch": 14.3375, "grad_norm": 1.2094838619232178, "learning_rate": 8.127655208019219e-06, "loss": 0.0044, "step": 14911 }, { "epoch": 14.338461538461539, "grad_norm": 0.035033684223890305, "learning_rate": 8.12643157688211e-06, "loss": 0.0004, "step": 14912 }, { "epoch": 14.339423076923078, "grad_norm": 0.05421667918562889, "learning_rate": 8.125207974817284e-06, "loss": 0.0006, "step": 14913 }, { "epoch": 14.340384615384615, "grad_norm": 0.011719781905412674, "learning_rate": 8.123984401843728e-06, "loss": 0.0002, "step": 14914 }, { "epoch": 14.341346153846153, "grad_norm": 0.40090373158454895, "learning_rate": 8.122760857980426e-06, "loss": 0.0019, "step": 14915 }, { "epoch": 14.342307692307692, "grad_norm": 0.03934239596128464, "learning_rate": 8.121537343246368e-06, "loss": 0.0004, "step": 14916 }, { "epoch": 14.34326923076923, "grad_norm": 0.08603620529174805, "learning_rate": 8.120313857660537e-06, "loss": 0.0004, "step": 14917 }, { "epoch": 14.34423076923077, "grad_norm": 0.17206116020679474, "learning_rate": 8.119090401241917e-06, "loss": 0.0007, "step": 14918 }, { "epoch": 14.345192307692308, "grad_norm": 1.2191671133041382, "learning_rate": 8.117866974009496e-06, "loss": 0.0411, "step": 14919 }, { "epoch": 14.346153846153847, "grad_norm": 0.035055410116910934, "learning_rate": 8.116643575982254e-06, "loss": 0.0003, "step": 14920 }, { "epoch": 14.347115384615385, "grad_norm": 0.08609174937009811, "learning_rate": 8.115420207179177e-06, "loss": 0.0006, "step": 14921 }, { "epoch": 14.348076923076922, "grad_norm": 0.5990473031997681, "learning_rate": 8.114196867619245e-06, "loss": 0.002, "step": 14922 }, { "epoch": 14.349038461538461, "grad_norm": 0.03148910775780678, "learning_rate": 8.112973557321445e-06, "loss": 0.0004, "step": 14923 }, { "epoch": 14.35, "grad_norm": 0.08993968367576599, "learning_rate": 8.111750276304755e-06, "loss": 0.0005, "step": 14924 }, { "epoch": 14.350961538461538, "grad_norm": 0.09375298768281937, "learning_rate": 8.110527024588162e-06, "loss": 0.0006, "step": 14925 }, { "epoch": 14.351923076923077, "grad_norm": 2.852668046951294, "learning_rate": 8.10930380219064e-06, "loss": 0.0484, "step": 14926 }, { "epoch": 14.352884615384616, "grad_norm": 0.3809249699115753, "learning_rate": 8.108080609131176e-06, "loss": 0.0009, "step": 14927 }, { "epoch": 14.353846153846154, "grad_norm": 0.342816561460495, "learning_rate": 8.106857445428748e-06, "loss": 0.0021, "step": 14928 }, { "epoch": 14.354807692307693, "grad_norm": 0.14100560545921326, "learning_rate": 8.105634311102336e-06, "loss": 0.0006, "step": 14929 }, { "epoch": 14.35576923076923, "grad_norm": 1.7311145067214966, "learning_rate": 8.10441120617092e-06, "loss": 0.0228, "step": 14930 }, { "epoch": 14.356730769230769, "grad_norm": 0.48659244179725647, "learning_rate": 8.103188130653478e-06, "loss": 0.0016, "step": 14931 }, { "epoch": 14.357692307692307, "grad_norm": 0.01089208759367466, "learning_rate": 8.10196508456899e-06, "loss": 0.0001, "step": 14932 }, { "epoch": 14.358653846153846, "grad_norm": 0.014464487321674824, "learning_rate": 8.100742067936432e-06, "loss": 0.0001, "step": 14933 }, { "epoch": 14.359615384615385, "grad_norm": 0.050350625067949295, "learning_rate": 8.099519080774784e-06, "loss": 0.0004, "step": 14934 }, { "epoch": 14.360576923076923, "grad_norm": 0.16717429459095, "learning_rate": 8.098296123103023e-06, "loss": 0.0012, "step": 14935 }, { "epoch": 14.361538461538462, "grad_norm": 1.3651275634765625, "learning_rate": 8.097073194940123e-06, "loss": 0.0182, "step": 14936 }, { "epoch": 14.3625, "grad_norm": 0.7341782450675964, "learning_rate": 8.09585029630506e-06, "loss": 0.011, "step": 14937 }, { "epoch": 14.363461538461538, "grad_norm": 0.023520631715655327, "learning_rate": 8.094627427216816e-06, "loss": 0.0002, "step": 14938 }, { "epoch": 14.364423076923076, "grad_norm": 0.013829009607434273, "learning_rate": 8.09340458769436e-06, "loss": 0.0001, "step": 14939 }, { "epoch": 14.365384615384615, "grad_norm": 0.04019162058830261, "learning_rate": 8.092181777756669e-06, "loss": 0.0002, "step": 14940 }, { "epoch": 14.366346153846154, "grad_norm": 0.030612334609031677, "learning_rate": 8.090958997422718e-06, "loss": 0.0004, "step": 14941 }, { "epoch": 14.367307692307692, "grad_norm": 0.030304117128252983, "learning_rate": 8.08973624671148e-06, "loss": 0.0002, "step": 14942 }, { "epoch": 14.368269230769231, "grad_norm": 0.14908447861671448, "learning_rate": 8.08851352564193e-06, "loss": 0.0011, "step": 14943 }, { "epoch": 14.36923076923077, "grad_norm": 0.5422585010528564, "learning_rate": 8.087290834233037e-06, "loss": 0.0027, "step": 14944 }, { "epoch": 14.370192307692308, "grad_norm": 1.927594780921936, "learning_rate": 8.086068172503778e-06, "loss": 0.0885, "step": 14945 }, { "epoch": 14.371153846153845, "grad_norm": 0.20579814910888672, "learning_rate": 8.084845540473127e-06, "loss": 0.001, "step": 14946 }, { "epoch": 14.372115384615384, "grad_norm": 6.731496810913086, "learning_rate": 8.08362293816005e-06, "loss": 0.0731, "step": 14947 }, { "epoch": 14.373076923076923, "grad_norm": 0.9591445326805115, "learning_rate": 8.082400365583517e-06, "loss": 0.0046, "step": 14948 }, { "epoch": 14.374038461538461, "grad_norm": 0.17614863812923431, "learning_rate": 8.081177822762506e-06, "loss": 0.0007, "step": 14949 }, { "epoch": 14.375, "grad_norm": 0.01052328385412693, "learning_rate": 8.079955309715984e-06, "loss": 0.0001, "step": 14950 }, { "epoch": 14.375961538461539, "grad_norm": 0.05064154416322708, "learning_rate": 8.078732826462917e-06, "loss": 0.0003, "step": 14951 }, { "epoch": 14.376923076923077, "grad_norm": 0.9357632994651794, "learning_rate": 8.077510373022279e-06, "loss": 0.0027, "step": 14952 }, { "epoch": 14.377884615384616, "grad_norm": 0.22974824905395508, "learning_rate": 8.076287949413039e-06, "loss": 0.0007, "step": 14953 }, { "epoch": 14.378846153846155, "grad_norm": 0.06338430196046829, "learning_rate": 8.075065555654165e-06, "loss": 0.0003, "step": 14954 }, { "epoch": 14.379807692307692, "grad_norm": 0.45988729596138, "learning_rate": 8.07384319176462e-06, "loss": 0.0024, "step": 14955 }, { "epoch": 14.38076923076923, "grad_norm": 1.9604120254516602, "learning_rate": 8.072620857763378e-06, "loss": 0.0683, "step": 14956 }, { "epoch": 14.381730769230769, "grad_norm": 0.02404763735830784, "learning_rate": 8.071398553669405e-06, "loss": 0.0002, "step": 14957 }, { "epoch": 14.382692307692308, "grad_norm": 1.0739014148712158, "learning_rate": 8.07017627950166e-06, "loss": 0.0023, "step": 14958 }, { "epoch": 14.383653846153846, "grad_norm": 4.125621318817139, "learning_rate": 8.068954035279121e-06, "loss": 0.0554, "step": 14959 }, { "epoch": 14.384615384615385, "grad_norm": 1.2572211027145386, "learning_rate": 8.067731821020747e-06, "loss": 0.0014, "step": 14960 }, { "epoch": 14.385576923076924, "grad_norm": 2.919454574584961, "learning_rate": 8.066509636745505e-06, "loss": 0.0434, "step": 14961 }, { "epoch": 14.386538461538462, "grad_norm": 0.019667014479637146, "learning_rate": 8.065287482472355e-06, "loss": 0.0002, "step": 14962 }, { "epoch": 14.3875, "grad_norm": 0.036896176636219025, "learning_rate": 8.064065358220268e-06, "loss": 0.0002, "step": 14963 }, { "epoch": 14.388461538461538, "grad_norm": 0.039075933396816254, "learning_rate": 8.062843264008204e-06, "loss": 0.0004, "step": 14964 }, { "epoch": 14.389423076923077, "grad_norm": 0.016810784116387367, "learning_rate": 8.061621199855127e-06, "loss": 0.0002, "step": 14965 }, { "epoch": 14.390384615384615, "grad_norm": 2.3209822177886963, "learning_rate": 8.060399165779998e-06, "loss": 0.013, "step": 14966 }, { "epoch": 14.391346153846154, "grad_norm": 0.049380309879779816, "learning_rate": 8.059177161801783e-06, "loss": 0.0003, "step": 14967 }, { "epoch": 14.392307692307693, "grad_norm": 0.1868765652179718, "learning_rate": 8.057955187939442e-06, "loss": 0.0017, "step": 14968 }, { "epoch": 14.393269230769231, "grad_norm": 1.2489265203475952, "learning_rate": 8.056733244211935e-06, "loss": 0.0043, "step": 14969 }, { "epoch": 14.39423076923077, "grad_norm": 0.04720579460263252, "learning_rate": 8.055511330638228e-06, "loss": 0.0004, "step": 14970 }, { "epoch": 14.395192307692307, "grad_norm": 0.277210533618927, "learning_rate": 8.054289447237277e-06, "loss": 0.0013, "step": 14971 }, { "epoch": 14.396153846153846, "grad_norm": 0.7451940774917603, "learning_rate": 8.053067594028044e-06, "loss": 0.0049, "step": 14972 }, { "epoch": 14.397115384615384, "grad_norm": 0.41830289363861084, "learning_rate": 8.051845771029483e-06, "loss": 0.0016, "step": 14973 }, { "epoch": 14.398076923076923, "grad_norm": 0.6450973749160767, "learning_rate": 8.050623978260563e-06, "loss": 0.0021, "step": 14974 }, { "epoch": 14.399038461538462, "grad_norm": 1.4642033576965332, "learning_rate": 8.049402215740237e-06, "loss": 0.019, "step": 14975 }, { "epoch": 14.4, "grad_norm": 0.027964752167463303, "learning_rate": 8.048180483487462e-06, "loss": 0.0002, "step": 14976 }, { "epoch": 14.400961538461539, "grad_norm": 0.07125674933195114, "learning_rate": 8.046958781521198e-06, "loss": 0.0003, "step": 14977 }, { "epoch": 14.401923076923078, "grad_norm": 0.02155226469039917, "learning_rate": 8.045737109860401e-06, "loss": 0.0001, "step": 14978 }, { "epoch": 14.402884615384615, "grad_norm": 1.3422491550445557, "learning_rate": 8.044515468524028e-06, "loss": 0.0053, "step": 14979 }, { "epoch": 14.403846153846153, "grad_norm": 0.7428433895111084, "learning_rate": 8.043293857531035e-06, "loss": 0.0039, "step": 14980 }, { "epoch": 14.404807692307692, "grad_norm": 0.12346939742565155, "learning_rate": 8.042072276900382e-06, "loss": 0.0008, "step": 14981 }, { "epoch": 14.40576923076923, "grad_norm": 0.09377648681402206, "learning_rate": 8.04085072665102e-06, "loss": 0.0008, "step": 14982 }, { "epoch": 14.40673076923077, "grad_norm": 0.004131286405026913, "learning_rate": 8.039629206801903e-06, "loss": 0.0, "step": 14983 }, { "epoch": 14.407692307692308, "grad_norm": 0.027282441034913063, "learning_rate": 8.038407717371985e-06, "loss": 0.0004, "step": 14984 }, { "epoch": 14.408653846153847, "grad_norm": 0.08037400245666504, "learning_rate": 8.037186258380226e-06, "loss": 0.0006, "step": 14985 }, { "epoch": 14.409615384615385, "grad_norm": 0.3922627568244934, "learning_rate": 8.035964829845575e-06, "loss": 0.0014, "step": 14986 }, { "epoch": 14.410576923076922, "grad_norm": 0.24280321598052979, "learning_rate": 8.034743431786985e-06, "loss": 0.0014, "step": 14987 }, { "epoch": 14.411538461538461, "grad_norm": 0.08453304320573807, "learning_rate": 8.033522064223407e-06, "loss": 0.0006, "step": 14988 }, { "epoch": 14.4125, "grad_norm": 0.26416829228401184, "learning_rate": 8.032300727173797e-06, "loss": 0.0018, "step": 14989 }, { "epoch": 14.413461538461538, "grad_norm": 0.8297101855278015, "learning_rate": 8.031079420657105e-06, "loss": 0.0041, "step": 14990 }, { "epoch": 14.414423076923077, "grad_norm": 0.024647263810038567, "learning_rate": 8.029858144692278e-06, "loss": 0.0002, "step": 14991 }, { "epoch": 14.415384615384616, "grad_norm": 1.0787147283554077, "learning_rate": 8.028636899298275e-06, "loss": 0.032, "step": 14992 }, { "epoch": 14.416346153846154, "grad_norm": 0.018785113468766212, "learning_rate": 8.02741568449404e-06, "loss": 0.0002, "step": 14993 }, { "epoch": 14.417307692307693, "grad_norm": 0.23013095557689667, "learning_rate": 8.026194500298524e-06, "loss": 0.0021, "step": 14994 }, { "epoch": 14.41826923076923, "grad_norm": 0.6242040991783142, "learning_rate": 8.024973346730675e-06, "loss": 0.0029, "step": 14995 }, { "epoch": 14.419230769230769, "grad_norm": 0.05692059174180031, "learning_rate": 8.023752223809443e-06, "loss": 0.0005, "step": 14996 }, { "epoch": 14.420192307692307, "grad_norm": 0.16010558605194092, "learning_rate": 8.02253113155378e-06, "loss": 0.0007, "step": 14997 }, { "epoch": 14.421153846153846, "grad_norm": 0.9053471684455872, "learning_rate": 8.021310069982624e-06, "loss": 0.0062, "step": 14998 }, { "epoch": 14.422115384615385, "grad_norm": 0.05223258584737778, "learning_rate": 8.020089039114932e-06, "loss": 0.0005, "step": 14999 }, { "epoch": 14.423076923076923, "grad_norm": 0.9325821995735168, "learning_rate": 8.018868038969648e-06, "loss": 0.0256, "step": 15000 }, { "epoch": 14.424038461538462, "grad_norm": 1.8493860960006714, "learning_rate": 8.017647069565718e-06, "loss": 0.022, "step": 15001 }, { "epoch": 14.425, "grad_norm": 0.04781123995780945, "learning_rate": 8.016426130922082e-06, "loss": 0.0005, "step": 15002 }, { "epoch": 14.425961538461538, "grad_norm": 0.0528222993016243, "learning_rate": 8.015205223057696e-06, "loss": 0.0004, "step": 15003 }, { "epoch": 14.426923076923076, "grad_norm": 0.3941092789173126, "learning_rate": 8.013984345991498e-06, "loss": 0.0013, "step": 15004 }, { "epoch": 14.427884615384615, "grad_norm": 2.652986526489258, "learning_rate": 8.012763499742435e-06, "loss": 0.0166, "step": 15005 }, { "epoch": 14.428846153846154, "grad_norm": 0.22260497510433197, "learning_rate": 8.011542684329447e-06, "loss": 0.0009, "step": 15006 }, { "epoch": 14.429807692307692, "grad_norm": 0.08585081994533539, "learning_rate": 8.010321899771483e-06, "loss": 0.0006, "step": 15007 }, { "epoch": 14.430769230769231, "grad_norm": 0.4354079067707062, "learning_rate": 8.009101146087483e-06, "loss": 0.0092, "step": 15008 }, { "epoch": 14.43173076923077, "grad_norm": 1.3735973834991455, "learning_rate": 8.00788042329639e-06, "loss": 0.0133, "step": 15009 }, { "epoch": 14.432692307692308, "grad_norm": 0.26191434264183044, "learning_rate": 8.006659731417146e-06, "loss": 0.0008, "step": 15010 }, { "epoch": 14.433653846153845, "grad_norm": 0.028958488255739212, "learning_rate": 8.005439070468692e-06, "loss": 0.0003, "step": 15011 }, { "epoch": 14.434615384615384, "grad_norm": 0.6830450892448425, "learning_rate": 8.004218440469972e-06, "loss": 0.003, "step": 15012 }, { "epoch": 14.435576923076923, "grad_norm": 0.27735403180122375, "learning_rate": 8.00299784143992e-06, "loss": 0.0011, "step": 15013 }, { "epoch": 14.436538461538461, "grad_norm": 0.5186381936073303, "learning_rate": 8.001777273397484e-06, "loss": 0.0017, "step": 15014 }, { "epoch": 14.4375, "grad_norm": 0.06849360466003418, "learning_rate": 8.0005567363616e-06, "loss": 0.0003, "step": 15015 }, { "epoch": 14.438461538461539, "grad_norm": 1.2764794826507568, "learning_rate": 7.999336230351206e-06, "loss": 0.0051, "step": 15016 }, { "epoch": 14.439423076923077, "grad_norm": 0.048173610121011734, "learning_rate": 7.998115755385241e-06, "loss": 0.0006, "step": 15017 }, { "epoch": 14.440384615384616, "grad_norm": 5.014532566070557, "learning_rate": 7.996895311482644e-06, "loss": 0.0296, "step": 15018 }, { "epoch": 14.441346153846155, "grad_norm": 1.2502501010894775, "learning_rate": 7.995674898662356e-06, "loss": 0.0054, "step": 15019 }, { "epoch": 14.442307692307692, "grad_norm": 0.08983270078897476, "learning_rate": 7.994454516943307e-06, "loss": 0.0004, "step": 15020 }, { "epoch": 14.44326923076923, "grad_norm": 0.2627038061618805, "learning_rate": 7.99323416634444e-06, "loss": 0.0008, "step": 15021 }, { "epoch": 14.444230769230769, "grad_norm": 0.028318585827946663, "learning_rate": 7.992013846884687e-06, "loss": 0.0002, "step": 15022 }, { "epoch": 14.445192307692308, "grad_norm": 2.478235960006714, "learning_rate": 7.990793558582988e-06, "loss": 0.0196, "step": 15023 }, { "epoch": 14.446153846153846, "grad_norm": 0.028843732550740242, "learning_rate": 7.989573301458274e-06, "loss": 0.0002, "step": 15024 }, { "epoch": 14.447115384615385, "grad_norm": 0.2384025603532791, "learning_rate": 7.988353075529482e-06, "loss": 0.0012, "step": 15025 }, { "epoch": 14.448076923076924, "grad_norm": 0.9051646590232849, "learning_rate": 7.987132880815548e-06, "loss": 0.0046, "step": 15026 }, { "epoch": 14.449038461538462, "grad_norm": 0.37886369228363037, "learning_rate": 7.985912717335405e-06, "loss": 0.0015, "step": 15027 }, { "epoch": 14.45, "grad_norm": 0.2343125343322754, "learning_rate": 7.984692585107981e-06, "loss": 0.0015, "step": 15028 }, { "epoch": 14.450961538461538, "grad_norm": 4.66849946975708, "learning_rate": 7.983472484152216e-06, "loss": 0.053, "step": 15029 }, { "epoch": 14.451923076923077, "grad_norm": 0.6594165563583374, "learning_rate": 7.98225241448704e-06, "loss": 0.0021, "step": 15030 }, { "epoch": 14.452884615384615, "grad_norm": 4.507673740386963, "learning_rate": 7.981032376131382e-06, "loss": 0.0305, "step": 15031 }, { "epoch": 14.453846153846154, "grad_norm": 1.8444828987121582, "learning_rate": 7.979812369104177e-06, "loss": 0.0314, "step": 15032 }, { "epoch": 14.454807692307693, "grad_norm": 0.17280195653438568, "learning_rate": 7.978592393424356e-06, "loss": 0.0011, "step": 15033 }, { "epoch": 14.455769230769231, "grad_norm": 0.023596955463290215, "learning_rate": 7.977372449110849e-06, "loss": 0.0002, "step": 15034 }, { "epoch": 14.45673076923077, "grad_norm": 0.16920717060565948, "learning_rate": 7.976152536182583e-06, "loss": 0.0013, "step": 15035 }, { "epoch": 14.457692307692307, "grad_norm": 2.7860805988311768, "learning_rate": 7.97493265465849e-06, "loss": 0.0594, "step": 15036 }, { "epoch": 14.458653846153846, "grad_norm": 0.008924957364797592, "learning_rate": 7.9737128045575e-06, "loss": 0.0001, "step": 15037 }, { "epoch": 14.459615384615384, "grad_norm": 0.03208795189857483, "learning_rate": 7.97249298589854e-06, "loss": 0.0002, "step": 15038 }, { "epoch": 14.460576923076923, "grad_norm": 0.35057133436203003, "learning_rate": 7.971273198700537e-06, "loss": 0.0022, "step": 15039 }, { "epoch": 14.461538461538462, "grad_norm": 4.938731670379639, "learning_rate": 7.97005344298242e-06, "loss": 0.0427, "step": 15040 }, { "epoch": 14.4625, "grad_norm": 0.13023000955581665, "learning_rate": 7.968833718763117e-06, "loss": 0.0005, "step": 15041 }, { "epoch": 14.463461538461539, "grad_norm": 1.605729341506958, "learning_rate": 7.96761402606155e-06, "loss": 0.008, "step": 15042 }, { "epoch": 14.464423076923078, "grad_norm": 0.7447648644447327, "learning_rate": 7.966394364896653e-06, "loss": 0.0022, "step": 15043 }, { "epoch": 14.465384615384615, "grad_norm": 0.07227256149053574, "learning_rate": 7.965174735287345e-06, "loss": 0.0004, "step": 15044 }, { "epoch": 14.466346153846153, "grad_norm": 0.12368880212306976, "learning_rate": 7.963955137252552e-06, "loss": 0.0006, "step": 15045 }, { "epoch": 14.467307692307692, "grad_norm": 0.16605094075202942, "learning_rate": 7.9627355708112e-06, "loss": 0.0007, "step": 15046 }, { "epoch": 14.46826923076923, "grad_norm": 0.005109521094709635, "learning_rate": 7.961516035982212e-06, "loss": 0.0001, "step": 15047 }, { "epoch": 14.46923076923077, "grad_norm": 0.43903404474258423, "learning_rate": 7.960296532784515e-06, "loss": 0.0016, "step": 15048 }, { "epoch": 14.470192307692308, "grad_norm": 0.0052483342587947845, "learning_rate": 7.959077061237026e-06, "loss": 0.0001, "step": 15049 }, { "epoch": 14.471153846153847, "grad_norm": 0.29558587074279785, "learning_rate": 7.957857621358674e-06, "loss": 0.0014, "step": 15050 }, { "epoch": 14.472115384615385, "grad_norm": 0.12356258183717728, "learning_rate": 7.956638213168376e-06, "loss": 0.0006, "step": 15051 }, { "epoch": 14.473076923076922, "grad_norm": 2.1287777423858643, "learning_rate": 7.955418836685057e-06, "loss": 0.0113, "step": 15052 }, { "epoch": 14.474038461538461, "grad_norm": 0.6983804702758789, "learning_rate": 7.954199491927635e-06, "loss": 0.0034, "step": 15053 }, { "epoch": 14.475, "grad_norm": 0.20916475355625153, "learning_rate": 7.952980178915034e-06, "loss": 0.0011, "step": 15054 }, { "epoch": 14.475961538461538, "grad_norm": 0.15791717171669006, "learning_rate": 7.951760897666173e-06, "loss": 0.0013, "step": 15055 }, { "epoch": 14.476923076923077, "grad_norm": 0.28223174810409546, "learning_rate": 7.95054164819997e-06, "loss": 0.0011, "step": 15056 }, { "epoch": 14.477884615384616, "grad_norm": 0.6316908001899719, "learning_rate": 7.949322430535345e-06, "loss": 0.0026, "step": 15057 }, { "epoch": 14.478846153846154, "grad_norm": 0.026913311332464218, "learning_rate": 7.94810324469122e-06, "loss": 0.0003, "step": 15058 }, { "epoch": 14.479807692307693, "grad_norm": 0.0843866690993309, "learning_rate": 7.946884090686508e-06, "loss": 0.0004, "step": 15059 }, { "epoch": 14.48076923076923, "grad_norm": 1.259893536567688, "learning_rate": 7.94566496854013e-06, "loss": 0.0036, "step": 15060 }, { "epoch": 14.481730769230769, "grad_norm": 0.32502275705337524, "learning_rate": 7.944445878271002e-06, "loss": 0.001, "step": 15061 }, { "epoch": 14.482692307692307, "grad_norm": 1.2116081714630127, "learning_rate": 7.943226819898042e-06, "loss": 0.0428, "step": 15062 }, { "epoch": 14.483653846153846, "grad_norm": 0.12900809943675995, "learning_rate": 7.942007793440165e-06, "loss": 0.0008, "step": 15063 }, { "epoch": 14.484615384615385, "grad_norm": 0.047430917620658875, "learning_rate": 7.940788798916286e-06, "loss": 0.0003, "step": 15064 }, { "epoch": 14.485576923076923, "grad_norm": 0.10135535150766373, "learning_rate": 7.93956983634532e-06, "loss": 0.0009, "step": 15065 }, { "epoch": 14.486538461538462, "grad_norm": 0.16151128709316254, "learning_rate": 7.938350905746186e-06, "loss": 0.0009, "step": 15066 }, { "epoch": 14.4875, "grad_norm": 1.685257911682129, "learning_rate": 7.937132007137794e-06, "loss": 0.0108, "step": 15067 }, { "epoch": 14.488461538461538, "grad_norm": 4.205721855163574, "learning_rate": 7.935913140539056e-06, "loss": 0.0419, "step": 15068 }, { "epoch": 14.489423076923076, "grad_norm": 0.014305047690868378, "learning_rate": 7.934694305968893e-06, "loss": 0.0002, "step": 15069 }, { "epoch": 14.490384615384615, "grad_norm": 0.17003318667411804, "learning_rate": 7.933475503446209e-06, "loss": 0.0005, "step": 15070 }, { "epoch": 14.491346153846154, "grad_norm": 0.7387310266494751, "learning_rate": 7.932256732989919e-06, "loss": 0.0043, "step": 15071 }, { "epoch": 14.492307692307692, "grad_norm": 0.6331326365470886, "learning_rate": 7.931037994618938e-06, "loss": 0.0025, "step": 15072 }, { "epoch": 14.493269230769231, "grad_norm": 0.14860662817955017, "learning_rate": 7.929819288352176e-06, "loss": 0.0007, "step": 15073 }, { "epoch": 14.49423076923077, "grad_norm": 0.2198205143213272, "learning_rate": 7.928600614208543e-06, "loss": 0.001, "step": 15074 }, { "epoch": 14.495192307692308, "grad_norm": 0.36587172746658325, "learning_rate": 7.927381972206945e-06, "loss": 0.0012, "step": 15075 }, { "epoch": 14.496153846153845, "grad_norm": 0.2591412365436554, "learning_rate": 7.9261633623663e-06, "loss": 0.0007, "step": 15076 }, { "epoch": 14.497115384615384, "grad_norm": 0.03727758675813675, "learning_rate": 7.92494478470551e-06, "loss": 0.0002, "step": 15077 }, { "epoch": 14.498076923076923, "grad_norm": 1.001654028892517, "learning_rate": 7.92372623924349e-06, "loss": 0.0054, "step": 15078 }, { "epoch": 14.499038461538461, "grad_norm": 1.2233116626739502, "learning_rate": 7.92250772599914e-06, "loss": 0.0058, "step": 15079 }, { "epoch": 14.5, "grad_norm": 0.14601103961467743, "learning_rate": 7.921289244991378e-06, "loss": 0.0006, "step": 15080 }, { "epoch": 14.500961538461539, "grad_norm": 0.013331365771591663, "learning_rate": 7.920070796239104e-06, "loss": 0.0001, "step": 15081 }, { "epoch": 14.501923076923077, "grad_norm": 0.040341511368751526, "learning_rate": 7.918852379761223e-06, "loss": 0.0004, "step": 15082 }, { "epoch": 14.502884615384616, "grad_norm": 0.7042840123176575, "learning_rate": 7.91763399557665e-06, "loss": 0.0025, "step": 15083 }, { "epoch": 14.503846153846155, "grad_norm": 0.3064146041870117, "learning_rate": 7.916415643704284e-06, "loss": 0.0019, "step": 15084 }, { "epoch": 14.504807692307692, "grad_norm": 1.090153694152832, "learning_rate": 7.915197324163033e-06, "loss": 0.0044, "step": 15085 }, { "epoch": 14.50576923076923, "grad_norm": 0.09123318642377853, "learning_rate": 7.913979036971799e-06, "loss": 0.0005, "step": 15086 }, { "epoch": 14.506730769230769, "grad_norm": 0.8373637795448303, "learning_rate": 7.912760782149489e-06, "loss": 0.0041, "step": 15087 }, { "epoch": 14.507692307692308, "grad_norm": 1.2971527576446533, "learning_rate": 7.911542559715005e-06, "loss": 0.0196, "step": 15088 }, { "epoch": 14.508653846153846, "grad_norm": 1.3952913284301758, "learning_rate": 7.91032436968725e-06, "loss": 0.0241, "step": 15089 }, { "epoch": 14.509615384615385, "grad_norm": 0.03539007902145386, "learning_rate": 7.909106212085129e-06, "loss": 0.0003, "step": 15090 }, { "epoch": 14.510576923076924, "grad_norm": 0.07355067878961563, "learning_rate": 7.907888086927544e-06, "loss": 0.0004, "step": 15091 }, { "epoch": 14.51153846153846, "grad_norm": 1.1600375175476074, "learning_rate": 7.906669994233394e-06, "loss": 0.0067, "step": 15092 }, { "epoch": 14.5125, "grad_norm": 0.014999684877693653, "learning_rate": 7.905451934021581e-06, "loss": 0.0002, "step": 15093 }, { "epoch": 14.513461538461538, "grad_norm": 0.15786013007164001, "learning_rate": 7.904233906311008e-06, "loss": 0.001, "step": 15094 }, { "epoch": 14.514423076923077, "grad_norm": 1.4465997219085693, "learning_rate": 7.903015911120574e-06, "loss": 0.0197, "step": 15095 }, { "epoch": 14.515384615384615, "grad_norm": 0.11413069069385529, "learning_rate": 7.90179794846918e-06, "loss": 0.0008, "step": 15096 }, { "epoch": 14.516346153846154, "grad_norm": 0.23703168332576752, "learning_rate": 7.90058001837572e-06, "loss": 0.0007, "step": 15097 }, { "epoch": 14.517307692307693, "grad_norm": 1.8617501258850098, "learning_rate": 7.899362120859098e-06, "loss": 0.0063, "step": 15098 }, { "epoch": 14.518269230769231, "grad_norm": 2.317898988723755, "learning_rate": 7.898144255938212e-06, "loss": 0.054, "step": 15099 }, { "epoch": 14.51923076923077, "grad_norm": 0.08284622430801392, "learning_rate": 7.896926423631955e-06, "loss": 0.0004, "step": 15100 }, { "epoch": 14.520192307692307, "grad_norm": 0.1990087777376175, "learning_rate": 7.895708623959231e-06, "loss": 0.0014, "step": 15101 }, { "epoch": 14.521153846153846, "grad_norm": 1.9909337759017944, "learning_rate": 7.894490856938931e-06, "loss": 0.0085, "step": 15102 }, { "epoch": 14.522115384615384, "grad_norm": 0.09261948615312576, "learning_rate": 7.893273122589954e-06, "loss": 0.0006, "step": 15103 }, { "epoch": 14.523076923076923, "grad_norm": 0.21994268894195557, "learning_rate": 7.892055420931196e-06, "loss": 0.001, "step": 15104 }, { "epoch": 14.524038461538462, "grad_norm": 0.8170866370201111, "learning_rate": 7.89083775198155e-06, "loss": 0.0043, "step": 15105 }, { "epoch": 14.525, "grad_norm": 0.03549729287624359, "learning_rate": 7.889620115759911e-06, "loss": 0.0003, "step": 15106 }, { "epoch": 14.525961538461539, "grad_norm": 0.10533512383699417, "learning_rate": 7.888402512285175e-06, "loss": 0.0007, "step": 15107 }, { "epoch": 14.526923076923078, "grad_norm": 0.011470063589513302, "learning_rate": 7.887184941576233e-06, "loss": 0.0002, "step": 15108 }, { "epoch": 14.527884615384615, "grad_norm": 1.0458260774612427, "learning_rate": 7.885967403651982e-06, "loss": 0.0038, "step": 15109 }, { "epoch": 14.528846153846153, "grad_norm": 0.1612692028284073, "learning_rate": 7.884749898531312e-06, "loss": 0.0009, "step": 15110 }, { "epoch": 14.529807692307692, "grad_norm": 0.019497480243444443, "learning_rate": 7.883532426233112e-06, "loss": 0.0002, "step": 15111 }, { "epoch": 14.53076923076923, "grad_norm": 0.5458737015724182, "learning_rate": 7.882314986776281e-06, "loss": 0.0017, "step": 15112 }, { "epoch": 14.53173076923077, "grad_norm": 0.18135356903076172, "learning_rate": 7.881097580179705e-06, "loss": 0.0007, "step": 15113 }, { "epoch": 14.532692307692308, "grad_norm": 0.3698427081108093, "learning_rate": 7.879880206462276e-06, "loss": 0.0016, "step": 15114 }, { "epoch": 14.533653846153847, "grad_norm": 0.07052616029977798, "learning_rate": 7.87866286564288e-06, "loss": 0.0006, "step": 15115 }, { "epoch": 14.534615384615385, "grad_norm": 2.6502983570098877, "learning_rate": 7.877445557740415e-06, "loss": 0.0244, "step": 15116 }, { "epoch": 14.535576923076922, "grad_norm": 0.01309713814407587, "learning_rate": 7.876228282773765e-06, "loss": 0.0002, "step": 15117 }, { "epoch": 14.536538461538461, "grad_norm": 0.4295318126678467, "learning_rate": 7.875011040761817e-06, "loss": 0.002, "step": 15118 }, { "epoch": 14.5375, "grad_norm": 1.1968581676483154, "learning_rate": 7.87379383172346e-06, "loss": 0.013, "step": 15119 }, { "epoch": 14.538461538461538, "grad_norm": 1.9827030897140503, "learning_rate": 7.872576655677584e-06, "loss": 0.0032, "step": 15120 }, { "epoch": 14.539423076923077, "grad_norm": 0.017793746665120125, "learning_rate": 7.871359512643076e-06, "loss": 0.0001, "step": 15121 }, { "epoch": 14.540384615384616, "grad_norm": 0.08782446384429932, "learning_rate": 7.870142402638817e-06, "loss": 0.0006, "step": 15122 }, { "epoch": 14.541346153846154, "grad_norm": 0.17661912739276886, "learning_rate": 7.8689253256837e-06, "loss": 0.0014, "step": 15123 }, { "epoch": 14.542307692307693, "grad_norm": 0.06242845579981804, "learning_rate": 7.867708281796606e-06, "loss": 0.0004, "step": 15124 }, { "epoch": 14.54326923076923, "grad_norm": 0.5983259677886963, "learning_rate": 7.866491270996421e-06, "loss": 0.0016, "step": 15125 }, { "epoch": 14.544230769230769, "grad_norm": 0.07954760640859604, "learning_rate": 7.86527429330203e-06, "loss": 0.0005, "step": 15126 }, { "epoch": 14.545192307692307, "grad_norm": 0.6091139316558838, "learning_rate": 7.864057348732316e-06, "loss": 0.0021, "step": 15127 }, { "epoch": 14.546153846153846, "grad_norm": 0.19512733817100525, "learning_rate": 7.862840437306165e-06, "loss": 0.001, "step": 15128 }, { "epoch": 14.547115384615385, "grad_norm": 1.7975493669509888, "learning_rate": 7.861623559042457e-06, "loss": 0.0064, "step": 15129 }, { "epoch": 14.548076923076923, "grad_norm": 0.17251712083816528, "learning_rate": 7.860406713960073e-06, "loss": 0.0006, "step": 15130 }, { "epoch": 14.549038461538462, "grad_norm": 0.08801320940256119, "learning_rate": 7.859189902077901e-06, "loss": 0.0005, "step": 15131 }, { "epoch": 14.55, "grad_norm": 1.400290608406067, "learning_rate": 7.857973123414817e-06, "loss": 0.006, "step": 15132 }, { "epoch": 14.55096153846154, "grad_norm": 0.014620020985603333, "learning_rate": 7.856756377989702e-06, "loss": 0.0002, "step": 15133 }, { "epoch": 14.551923076923076, "grad_norm": 0.17735011875629425, "learning_rate": 7.855539665821439e-06, "loss": 0.0005, "step": 15134 }, { "epoch": 14.552884615384615, "grad_norm": 0.060043033212423325, "learning_rate": 7.854322986928906e-06, "loss": 0.0006, "step": 15135 }, { "epoch": 14.553846153846154, "grad_norm": 0.04583629220724106, "learning_rate": 7.853106341330983e-06, "loss": 0.0003, "step": 15136 }, { "epoch": 14.554807692307692, "grad_norm": 0.011088146828114986, "learning_rate": 7.851889729046548e-06, "loss": 0.0002, "step": 15137 }, { "epoch": 14.555769230769231, "grad_norm": 0.04706217348575592, "learning_rate": 7.850673150094481e-06, "loss": 0.0002, "step": 15138 }, { "epoch": 14.55673076923077, "grad_norm": 1.5072568655014038, "learning_rate": 7.849456604493658e-06, "loss": 0.0186, "step": 15139 }, { "epoch": 14.557692307692308, "grad_norm": 0.49865731596946716, "learning_rate": 7.848240092262955e-06, "loss": 0.0027, "step": 15140 }, { "epoch": 14.558653846153845, "grad_norm": 0.2160465270280838, "learning_rate": 7.847023613421251e-06, "loss": 0.0006, "step": 15141 }, { "epoch": 14.559615384615384, "grad_norm": 0.011113284155726433, "learning_rate": 7.845807167987424e-06, "loss": 0.0002, "step": 15142 }, { "epoch": 14.560576923076923, "grad_norm": 0.15633435547351837, "learning_rate": 7.844590755980346e-06, "loss": 0.001, "step": 15143 }, { "epoch": 14.561538461538461, "grad_norm": 0.126379132270813, "learning_rate": 7.843374377418893e-06, "loss": 0.0005, "step": 15144 }, { "epoch": 14.5625, "grad_norm": 0.12252609431743622, "learning_rate": 7.84215803232194e-06, "loss": 0.0005, "step": 15145 }, { "epoch": 14.563461538461539, "grad_norm": 0.010432644747197628, "learning_rate": 7.840941720708363e-06, "loss": 0.0001, "step": 15146 }, { "epoch": 14.564423076923077, "grad_norm": 0.3761143982410431, "learning_rate": 7.839725442597033e-06, "loss": 0.001, "step": 15147 }, { "epoch": 14.565384615384616, "grad_norm": 0.035153523087501526, "learning_rate": 7.83850919800682e-06, "loss": 0.0003, "step": 15148 }, { "epoch": 14.566346153846155, "grad_norm": 2.9152755737304688, "learning_rate": 7.837292986956604e-06, "loss": 0.0139, "step": 15149 }, { "epoch": 14.567307692307692, "grad_norm": 0.07111337035894394, "learning_rate": 7.836076809465254e-06, "loss": 0.0003, "step": 15150 }, { "epoch": 14.56826923076923, "grad_norm": 0.7397220134735107, "learning_rate": 7.834860665551638e-06, "loss": 0.0062, "step": 15151 }, { "epoch": 14.569230769230769, "grad_norm": 0.01684647984802723, "learning_rate": 7.833644555234631e-06, "loss": 0.0001, "step": 15152 }, { "epoch": 14.570192307692308, "grad_norm": 0.9679173231124878, "learning_rate": 7.832428478533105e-06, "loss": 0.0014, "step": 15153 }, { "epoch": 14.571153846153846, "grad_norm": 4.395599365234375, "learning_rate": 7.831212435465925e-06, "loss": 0.0345, "step": 15154 }, { "epoch": 14.572115384615385, "grad_norm": 0.09862364828586578, "learning_rate": 7.82999642605196e-06, "loss": 0.0009, "step": 15155 }, { "epoch": 14.573076923076924, "grad_norm": 0.01630808785557747, "learning_rate": 7.828780450310085e-06, "loss": 0.0002, "step": 15156 }, { "epoch": 14.57403846153846, "grad_norm": 0.04342992231249809, "learning_rate": 7.827564508259165e-06, "loss": 0.0002, "step": 15157 }, { "epoch": 14.575, "grad_norm": 0.050880689173936844, "learning_rate": 7.826348599918067e-06, "loss": 0.0003, "step": 15158 }, { "epoch": 14.575961538461538, "grad_norm": 0.458915650844574, "learning_rate": 7.82513272530566e-06, "loss": 0.0014, "step": 15159 }, { "epoch": 14.576923076923077, "grad_norm": 0.009626772254705429, "learning_rate": 7.823916884440809e-06, "loss": 0.0001, "step": 15160 }, { "epoch": 14.577884615384615, "grad_norm": 0.9841489195823669, "learning_rate": 7.82270107734238e-06, "loss": 0.0041, "step": 15161 }, { "epoch": 14.578846153846154, "grad_norm": 0.030058052390813828, "learning_rate": 7.821485304029241e-06, "loss": 0.0003, "step": 15162 }, { "epoch": 14.579807692307693, "grad_norm": 0.030377978459000587, "learning_rate": 7.820269564520258e-06, "loss": 0.0001, "step": 15163 }, { "epoch": 14.580769230769231, "grad_norm": 0.03367701917886734, "learning_rate": 7.819053858834292e-06, "loss": 0.0003, "step": 15164 }, { "epoch": 14.58173076923077, "grad_norm": 0.0246147271245718, "learning_rate": 7.81783818699021e-06, "loss": 0.0001, "step": 15165 }, { "epoch": 14.582692307692307, "grad_norm": 2.440007448196411, "learning_rate": 7.816622549006874e-06, "loss": 0.0088, "step": 15166 }, { "epoch": 14.583653846153846, "grad_norm": 0.09236372262239456, "learning_rate": 7.815406944903148e-06, "loss": 0.0007, "step": 15167 }, { "epoch": 14.584615384615384, "grad_norm": 0.5585734248161316, "learning_rate": 7.814191374697895e-06, "loss": 0.0018, "step": 15168 }, { "epoch": 14.585576923076923, "grad_norm": 1.582842469215393, "learning_rate": 7.812975838409977e-06, "loss": 0.0038, "step": 15169 }, { "epoch": 14.586538461538462, "grad_norm": 0.03423664718866348, "learning_rate": 7.811760336058254e-06, "loss": 0.0003, "step": 15170 }, { "epoch": 14.5875, "grad_norm": 2.8082902431488037, "learning_rate": 7.810544867661589e-06, "loss": 0.0077, "step": 15171 }, { "epoch": 14.588461538461539, "grad_norm": 0.6572167873382568, "learning_rate": 7.80932943323884e-06, "loss": 0.0022, "step": 15172 }, { "epoch": 14.589423076923078, "grad_norm": 0.01879630610346794, "learning_rate": 7.80811403280887e-06, "loss": 0.0001, "step": 15173 }, { "epoch": 14.590384615384615, "grad_norm": 1.3505780696868896, "learning_rate": 7.806898666390538e-06, "loss": 0.0055, "step": 15174 }, { "epoch": 14.591346153846153, "grad_norm": 0.03288910537958145, "learning_rate": 7.805683334002702e-06, "loss": 0.0002, "step": 15175 }, { "epoch": 14.592307692307692, "grad_norm": 0.0651896595954895, "learning_rate": 7.80446803566422e-06, "loss": 0.0004, "step": 15176 }, { "epoch": 14.59326923076923, "grad_norm": 2.2818427085876465, "learning_rate": 7.803252771393948e-06, "loss": 0.0194, "step": 15177 }, { "epoch": 14.59423076923077, "grad_norm": 0.13575875759124756, "learning_rate": 7.802037541210747e-06, "loss": 0.0006, "step": 15178 }, { "epoch": 14.595192307692308, "grad_norm": 2.906672239303589, "learning_rate": 7.800822345133475e-06, "loss": 0.0329, "step": 15179 }, { "epoch": 14.596153846153847, "grad_norm": 2.2839090824127197, "learning_rate": 7.799607183180981e-06, "loss": 0.0062, "step": 15180 }, { "epoch": 14.597115384615385, "grad_norm": 0.007723584771156311, "learning_rate": 7.798392055372129e-06, "loss": 0.0001, "step": 15181 }, { "epoch": 14.598076923076922, "grad_norm": 0.020252002403140068, "learning_rate": 7.79717696172577e-06, "loss": 0.0001, "step": 15182 }, { "epoch": 14.599038461538461, "grad_norm": 0.03560478985309601, "learning_rate": 7.79596190226076e-06, "loss": 0.0002, "step": 15183 }, { "epoch": 14.6, "grad_norm": 1.5902416706085205, "learning_rate": 7.79474687699595e-06, "loss": 0.0056, "step": 15184 }, { "epoch": 14.600961538461538, "grad_norm": 0.04503827542066574, "learning_rate": 7.793531885950199e-06, "loss": 0.0005, "step": 15185 }, { "epoch": 14.601923076923077, "grad_norm": 2.2360317707061768, "learning_rate": 7.792316929142355e-06, "loss": 0.02, "step": 15186 }, { "epoch": 14.602884615384616, "grad_norm": 1.245842695236206, "learning_rate": 7.791102006591274e-06, "loss": 0.0034, "step": 15187 }, { "epoch": 14.603846153846154, "grad_norm": 1.8087979555130005, "learning_rate": 7.789887118315806e-06, "loss": 0.0257, "step": 15188 }, { "epoch": 14.604807692307693, "grad_norm": 0.030861707404255867, "learning_rate": 7.788672264334804e-06, "loss": 0.0004, "step": 15189 }, { "epoch": 14.60576923076923, "grad_norm": 0.5105446577072144, "learning_rate": 7.787457444667119e-06, "loss": 0.0016, "step": 15190 }, { "epoch": 14.606730769230769, "grad_norm": 0.8291911482810974, "learning_rate": 7.786242659331598e-06, "loss": 0.0016, "step": 15191 }, { "epoch": 14.607692307692307, "grad_norm": 0.019978078082203865, "learning_rate": 7.785027908347095e-06, "loss": 0.0002, "step": 15192 }, { "epoch": 14.608653846153846, "grad_norm": 0.013140471652150154, "learning_rate": 7.78381319173246e-06, "loss": 0.0002, "step": 15193 }, { "epoch": 14.609615384615385, "grad_norm": 3.94885516166687, "learning_rate": 7.782598509506539e-06, "loss": 0.0161, "step": 15194 }, { "epoch": 14.610576923076923, "grad_norm": 0.0909646600484848, "learning_rate": 7.781383861688179e-06, "loss": 0.0005, "step": 15195 }, { "epoch": 14.611538461538462, "grad_norm": 1.6048204898834229, "learning_rate": 7.780169248296232e-06, "loss": 0.0079, "step": 15196 }, { "epoch": 14.6125, "grad_norm": 1.6753661632537842, "learning_rate": 7.778954669349544e-06, "loss": 0.0073, "step": 15197 }, { "epoch": 14.61346153846154, "grad_norm": 0.5988690853118896, "learning_rate": 7.777740124866959e-06, "loss": 0.0025, "step": 15198 }, { "epoch": 14.614423076923076, "grad_norm": 0.06567136943340302, "learning_rate": 7.776525614867323e-06, "loss": 0.0003, "step": 15199 }, { "epoch": 14.615384615384615, "grad_norm": 0.2691442370414734, "learning_rate": 7.775311139369487e-06, "loss": 0.0006, "step": 15200 }, { "epoch": 14.616346153846154, "grad_norm": 0.10838022828102112, "learning_rate": 7.774096698392292e-06, "loss": 0.0006, "step": 15201 }, { "epoch": 14.617307692307692, "grad_norm": 0.03976651281118393, "learning_rate": 7.772882291954582e-06, "loss": 0.0004, "step": 15202 }, { "epoch": 14.618269230769231, "grad_norm": 0.06820831447839737, "learning_rate": 7.771667920075204e-06, "loss": 0.0004, "step": 15203 }, { "epoch": 14.61923076923077, "grad_norm": 1.6233934164047241, "learning_rate": 7.770453582772999e-06, "loss": 0.0075, "step": 15204 }, { "epoch": 14.620192307692308, "grad_norm": 0.042519837617874146, "learning_rate": 7.76923928006681e-06, "loss": 0.0003, "step": 15205 }, { "epoch": 14.621153846153845, "grad_norm": 0.09813106805086136, "learning_rate": 7.768025011975481e-06, "loss": 0.0005, "step": 15206 }, { "epoch": 14.622115384615384, "grad_norm": 0.5379394888877869, "learning_rate": 7.766810778517854e-06, "loss": 0.0018, "step": 15207 }, { "epoch": 14.623076923076923, "grad_norm": 0.006139469798654318, "learning_rate": 7.765596579712769e-06, "loss": 0.0, "step": 15208 }, { "epoch": 14.624038461538461, "grad_norm": 0.027693206444382668, "learning_rate": 7.764382415579067e-06, "loss": 0.0001, "step": 15209 }, { "epoch": 14.625, "grad_norm": 0.6037219762802124, "learning_rate": 7.763168286135586e-06, "loss": 0.0027, "step": 15210 }, { "epoch": 14.625961538461539, "grad_norm": 0.07681600004434586, "learning_rate": 7.76195419140117e-06, "loss": 0.0004, "step": 15211 }, { "epoch": 14.626923076923077, "grad_norm": 0.026766279712319374, "learning_rate": 7.760740131394659e-06, "loss": 0.0002, "step": 15212 }, { "epoch": 14.627884615384616, "grad_norm": 0.01978779211640358, "learning_rate": 7.759526106134885e-06, "loss": 0.0001, "step": 15213 }, { "epoch": 14.628846153846155, "grad_norm": 1.7419739961624146, "learning_rate": 7.758312115640691e-06, "loss": 0.0094, "step": 15214 }, { "epoch": 14.629807692307692, "grad_norm": 0.022380977869033813, "learning_rate": 7.757098159930915e-06, "loss": 0.0002, "step": 15215 }, { "epoch": 14.63076923076923, "grad_norm": 1.7701749801635742, "learning_rate": 7.755884239024394e-06, "loss": 0.0366, "step": 15216 }, { "epoch": 14.631730769230769, "grad_norm": 0.201250821352005, "learning_rate": 7.754670352939959e-06, "loss": 0.0012, "step": 15217 }, { "epoch": 14.632692307692308, "grad_norm": 0.41432228684425354, "learning_rate": 7.753456501696453e-06, "loss": 0.0013, "step": 15218 }, { "epoch": 14.633653846153846, "grad_norm": 2.34794282913208, "learning_rate": 7.752242685312709e-06, "loss": 0.0166, "step": 15219 }, { "epoch": 14.634615384615385, "grad_norm": 1.4037199020385742, "learning_rate": 7.751028903807559e-06, "loss": 0.0072, "step": 15220 }, { "epoch": 14.635576923076924, "grad_norm": 0.7966345548629761, "learning_rate": 7.749815157199843e-06, "loss": 0.0107, "step": 15221 }, { "epoch": 14.63653846153846, "grad_norm": 0.22692374885082245, "learning_rate": 7.74860144550839e-06, "loss": 0.0006, "step": 15222 }, { "epoch": 14.6375, "grad_norm": 2.4065093994140625, "learning_rate": 7.747387768752035e-06, "loss": 0.0165, "step": 15223 }, { "epoch": 14.638461538461538, "grad_norm": 1.452152132987976, "learning_rate": 7.746174126949608e-06, "loss": 0.0064, "step": 15224 }, { "epoch": 14.639423076923077, "grad_norm": 0.02188342995941639, "learning_rate": 7.744960520119946e-06, "loss": 0.0001, "step": 15225 }, { "epoch": 14.640384615384615, "grad_norm": 1.0552847385406494, "learning_rate": 7.74374694828188e-06, "loss": 0.0526, "step": 15226 }, { "epoch": 14.641346153846154, "grad_norm": 3.7685341835021973, "learning_rate": 7.742533411454238e-06, "loss": 0.0156, "step": 15227 }, { "epoch": 14.642307692307693, "grad_norm": 1.0957330465316772, "learning_rate": 7.741319909655848e-06, "loss": 0.0026, "step": 15228 }, { "epoch": 14.643269230769231, "grad_norm": 1.5149343013763428, "learning_rate": 7.740106442905548e-06, "loss": 0.0076, "step": 15229 }, { "epoch": 14.64423076923077, "grad_norm": 0.003554752329364419, "learning_rate": 7.738893011222164e-06, "loss": 0.0, "step": 15230 }, { "epoch": 14.645192307692307, "grad_norm": 2.1219186782836914, "learning_rate": 7.73767961462452e-06, "loss": 0.0438, "step": 15231 }, { "epoch": 14.646153846153846, "grad_norm": 0.5154216289520264, "learning_rate": 7.736466253131451e-06, "loss": 0.0019, "step": 15232 }, { "epoch": 14.647115384615384, "grad_norm": 0.5394242405891418, "learning_rate": 7.735252926761784e-06, "loss": 0.0024, "step": 15233 }, { "epoch": 14.648076923076923, "grad_norm": 1.4386892318725586, "learning_rate": 7.734039635534344e-06, "loss": 0.0039, "step": 15234 }, { "epoch": 14.649038461538462, "grad_norm": 0.13591063022613525, "learning_rate": 7.732826379467956e-06, "loss": 0.0006, "step": 15235 }, { "epoch": 14.65, "grad_norm": 3.086270570755005, "learning_rate": 7.73161315858145e-06, "loss": 0.0534, "step": 15236 }, { "epoch": 14.650961538461539, "grad_norm": 0.07308939099311829, "learning_rate": 7.730399972893651e-06, "loss": 0.0003, "step": 15237 }, { "epoch": 14.651923076923078, "grad_norm": 0.057952284812927246, "learning_rate": 7.729186822423383e-06, "loss": 0.0004, "step": 15238 }, { "epoch": 14.652884615384615, "grad_norm": 0.3089328408241272, "learning_rate": 7.727973707189469e-06, "loss": 0.0018, "step": 15239 }, { "epoch": 14.653846153846153, "grad_norm": 0.006651970557868481, "learning_rate": 7.726760627210736e-06, "loss": 0.0001, "step": 15240 }, { "epoch": 14.654807692307692, "grad_norm": 2.201761245727539, "learning_rate": 7.725547582506007e-06, "loss": 0.0145, "step": 15241 }, { "epoch": 14.65576923076923, "grad_norm": 0.11276646703481674, "learning_rate": 7.724334573094101e-06, "loss": 0.001, "step": 15242 }, { "epoch": 14.65673076923077, "grad_norm": 0.029277930036187172, "learning_rate": 7.723121598993846e-06, "loss": 0.0002, "step": 15243 }, { "epoch": 14.657692307692308, "grad_norm": 1.992067575454712, "learning_rate": 7.721908660224062e-06, "loss": 0.0345, "step": 15244 }, { "epoch": 14.658653846153847, "grad_norm": 2.408252000808716, "learning_rate": 7.720695756803569e-06, "loss": 0.053, "step": 15245 }, { "epoch": 14.659615384615385, "grad_norm": 0.055765267461538315, "learning_rate": 7.719482888751187e-06, "loss": 0.0004, "step": 15246 }, { "epoch": 14.660576923076922, "grad_norm": 0.017532622441649437, "learning_rate": 7.718270056085738e-06, "loss": 0.0002, "step": 15247 }, { "epoch": 14.661538461538461, "grad_norm": 1.0614627599716187, "learning_rate": 7.71705725882604e-06, "loss": 0.0077, "step": 15248 }, { "epoch": 14.6625, "grad_norm": 0.6931411027908325, "learning_rate": 7.715844496990914e-06, "loss": 0.0022, "step": 15249 }, { "epoch": 14.663461538461538, "grad_norm": 0.046566110104322433, "learning_rate": 7.714631770599176e-06, "loss": 0.0004, "step": 15250 }, { "epoch": 14.664423076923077, "grad_norm": 2.6833832263946533, "learning_rate": 7.713419079669647e-06, "loss": 0.0219, "step": 15251 }, { "epoch": 14.665384615384616, "grad_norm": 0.023543640971183777, "learning_rate": 7.712206424221144e-06, "loss": 0.0002, "step": 15252 }, { "epoch": 14.666346153846154, "grad_norm": 0.13559789955615997, "learning_rate": 7.71099380427248e-06, "loss": 0.0007, "step": 15253 }, { "epoch": 14.667307692307693, "grad_norm": 0.04965757951140404, "learning_rate": 7.709781219842475e-06, "loss": 0.0003, "step": 15254 }, { "epoch": 14.66826923076923, "grad_norm": 2.2025773525238037, "learning_rate": 7.708568670949944e-06, "loss": 0.0244, "step": 15255 }, { "epoch": 14.669230769230769, "grad_norm": 0.01896541193127632, "learning_rate": 7.707356157613704e-06, "loss": 0.0001, "step": 15256 }, { "epoch": 14.670192307692307, "grad_norm": 0.08103808760643005, "learning_rate": 7.706143679852562e-06, "loss": 0.0003, "step": 15257 }, { "epoch": 14.671153846153846, "grad_norm": 0.038115955889225006, "learning_rate": 7.704931237685342e-06, "loss": 0.0003, "step": 15258 }, { "epoch": 14.672115384615385, "grad_norm": 0.1804807037115097, "learning_rate": 7.703718831130854e-06, "loss": 0.0005, "step": 15259 }, { "epoch": 14.673076923076923, "grad_norm": 0.03255943953990936, "learning_rate": 7.702506460207909e-06, "loss": 0.0002, "step": 15260 }, { "epoch": 14.674038461538462, "grad_norm": 0.39366406202316284, "learning_rate": 7.701294124935319e-06, "loss": 0.0018, "step": 15261 }, { "epoch": 14.675, "grad_norm": 0.13090907037258148, "learning_rate": 7.7000818253319e-06, "loss": 0.001, "step": 15262 }, { "epoch": 14.67596153846154, "grad_norm": 0.3504805564880371, "learning_rate": 7.69886956141646e-06, "loss": 0.0011, "step": 15263 }, { "epoch": 14.676923076923076, "grad_norm": 0.515987753868103, "learning_rate": 7.697657333207808e-06, "loss": 0.0019, "step": 15264 }, { "epoch": 14.677884615384615, "grad_norm": 0.10642644017934799, "learning_rate": 7.69644514072476e-06, "loss": 0.0004, "step": 15265 }, { "epoch": 14.678846153846154, "grad_norm": 0.11963590979576111, "learning_rate": 7.695232983986123e-06, "loss": 0.0005, "step": 15266 }, { "epoch": 14.679807692307692, "grad_norm": 0.10658629983663559, "learning_rate": 7.694020863010705e-06, "loss": 0.0005, "step": 15267 }, { "epoch": 14.680769230769231, "grad_norm": 0.2142503559589386, "learning_rate": 7.692808777817313e-06, "loss": 0.001, "step": 15268 }, { "epoch": 14.68173076923077, "grad_norm": 1.7097245454788208, "learning_rate": 7.69159672842476e-06, "loss": 0.0746, "step": 15269 }, { "epoch": 14.682692307692308, "grad_norm": 0.06320471316576004, "learning_rate": 7.690384714851851e-06, "loss": 0.0004, "step": 15270 }, { "epoch": 14.683653846153845, "grad_norm": 2.522193431854248, "learning_rate": 7.689172737117389e-06, "loss": 0.0277, "step": 15271 }, { "epoch": 14.684615384615384, "grad_norm": 4.687250137329102, "learning_rate": 7.687960795240188e-06, "loss": 0.0454, "step": 15272 }, { "epoch": 14.685576923076923, "grad_norm": 0.07751351594924927, "learning_rate": 7.686748889239049e-06, "loss": 0.0005, "step": 15273 }, { "epoch": 14.686538461538461, "grad_norm": 0.1598251461982727, "learning_rate": 7.685537019132776e-06, "loss": 0.0005, "step": 15274 }, { "epoch": 14.6875, "grad_norm": 2.0678956508636475, "learning_rate": 7.684325184940176e-06, "loss": 0.0126, "step": 15275 }, { "epoch": 14.688461538461539, "grad_norm": 2.3414297103881836, "learning_rate": 7.683113386680053e-06, "loss": 0.026, "step": 15276 }, { "epoch": 14.689423076923077, "grad_norm": 0.7406747341156006, "learning_rate": 7.68190162437121e-06, "loss": 0.0035, "step": 15277 }, { "epoch": 14.690384615384616, "grad_norm": 0.14353519678115845, "learning_rate": 7.680689898032453e-06, "loss": 0.0006, "step": 15278 }, { "epoch": 14.691346153846155, "grad_norm": 0.08793634921312332, "learning_rate": 7.679478207682578e-06, "loss": 0.0008, "step": 15279 }, { "epoch": 14.692307692307692, "grad_norm": 3.715805768966675, "learning_rate": 7.678266553340393e-06, "loss": 0.0405, "step": 15280 }, { "epoch": 14.69326923076923, "grad_norm": 0.016884762793779373, "learning_rate": 7.677054935024696e-06, "loss": 0.0001, "step": 15281 }, { "epoch": 14.694230769230769, "grad_norm": 2.015239715576172, "learning_rate": 7.675843352754287e-06, "loss": 0.011, "step": 15282 }, { "epoch": 14.695192307692308, "grad_norm": 0.1609313189983368, "learning_rate": 7.67463180654797e-06, "loss": 0.0009, "step": 15283 }, { "epoch": 14.696153846153846, "grad_norm": 0.16196668148040771, "learning_rate": 7.673420296424541e-06, "loss": 0.0007, "step": 15284 }, { "epoch": 14.697115384615385, "grad_norm": 0.026867354288697243, "learning_rate": 7.672208822402802e-06, "loss": 0.0003, "step": 15285 }, { "epoch": 14.698076923076924, "grad_norm": 0.024912888184189796, "learning_rate": 7.670997384501548e-06, "loss": 0.0002, "step": 15286 }, { "epoch": 14.69903846153846, "grad_norm": 0.05130990222096443, "learning_rate": 7.669785982739582e-06, "loss": 0.0003, "step": 15287 }, { "epoch": 14.7, "grad_norm": 0.12054839730262756, "learning_rate": 7.668574617135698e-06, "loss": 0.0006, "step": 15288 }, { "epoch": 14.700961538461538, "grad_norm": 0.051461223512887955, "learning_rate": 7.667363287708692e-06, "loss": 0.0006, "step": 15289 }, { "epoch": 14.701923076923077, "grad_norm": 0.16984418034553528, "learning_rate": 7.66615199447736e-06, "loss": 0.0013, "step": 15290 }, { "epoch": 14.702884615384615, "grad_norm": 0.0500100702047348, "learning_rate": 7.664940737460502e-06, "loss": 0.0006, "step": 15291 }, { "epoch": 14.703846153846154, "grad_norm": 0.30584102869033813, "learning_rate": 7.663729516676911e-06, "loss": 0.001, "step": 15292 }, { "epoch": 14.704807692307693, "grad_norm": 1.6247478723526, "learning_rate": 7.662518332145377e-06, "loss": 0.0047, "step": 15293 }, { "epoch": 14.705769230769231, "grad_norm": 0.06058521196246147, "learning_rate": 7.661307183884701e-06, "loss": 0.0003, "step": 15294 }, { "epoch": 14.70673076923077, "grad_norm": 2.177191972732544, "learning_rate": 7.660096071913675e-06, "loss": 0.0239, "step": 15295 }, { "epoch": 14.707692307692307, "grad_norm": 1.7860885858535767, "learning_rate": 7.65888499625109e-06, "loss": 0.0217, "step": 15296 }, { "epoch": 14.708653846153846, "grad_norm": 2.7185418605804443, "learning_rate": 7.657673956915735e-06, "loss": 0.0127, "step": 15297 }, { "epoch": 14.709615384615384, "grad_norm": 0.02325497940182686, "learning_rate": 7.65646295392641e-06, "loss": 0.0003, "step": 15298 }, { "epoch": 14.710576923076923, "grad_norm": 0.024033136665821075, "learning_rate": 7.6552519873019e-06, "loss": 0.0002, "step": 15299 }, { "epoch": 14.711538461538462, "grad_norm": 2.4158706665039062, "learning_rate": 7.654041057060997e-06, "loss": 0.0163, "step": 15300 }, { "epoch": 14.7125, "grad_norm": 2.0865695476531982, "learning_rate": 7.65283016322249e-06, "loss": 0.0077, "step": 15301 }, { "epoch": 14.713461538461539, "grad_norm": 0.2247742861509323, "learning_rate": 7.651619305805172e-06, "loss": 0.0011, "step": 15302 }, { "epoch": 14.714423076923078, "grad_norm": 1.7506060600280762, "learning_rate": 7.650408484827831e-06, "loss": 0.0137, "step": 15303 }, { "epoch": 14.715384615384615, "grad_norm": 0.09737815707921982, "learning_rate": 7.649197700309251e-06, "loss": 0.0004, "step": 15304 }, { "epoch": 14.716346153846153, "grad_norm": 0.22813673317432404, "learning_rate": 7.647986952268224e-06, "loss": 0.0011, "step": 15305 }, { "epoch": 14.717307692307692, "grad_norm": 0.031749922782182693, "learning_rate": 7.646776240723539e-06, "loss": 0.0005, "step": 15306 }, { "epoch": 14.71826923076923, "grad_norm": 0.005795577075332403, "learning_rate": 7.64556556569398e-06, "loss": 0.0001, "step": 15307 }, { "epoch": 14.71923076923077, "grad_norm": 0.07759494334459305, "learning_rate": 7.644354927198329e-06, "loss": 0.0004, "step": 15308 }, { "epoch": 14.720192307692308, "grad_norm": 0.7499900460243225, "learning_rate": 7.64314432525538e-06, "loss": 0.0024, "step": 15309 }, { "epoch": 14.721153846153847, "grad_norm": 1.0810911655426025, "learning_rate": 7.641933759883913e-06, "loss": 0.0034, "step": 15310 }, { "epoch": 14.722115384615385, "grad_norm": 0.04817213863134384, "learning_rate": 7.640723231102712e-06, "loss": 0.0005, "step": 15311 }, { "epoch": 14.723076923076922, "grad_norm": 1.0582892894744873, "learning_rate": 7.639512738930562e-06, "loss": 0.0043, "step": 15312 }, { "epoch": 14.724038461538461, "grad_norm": 0.02063063532114029, "learning_rate": 7.638302283386248e-06, "loss": 0.0003, "step": 15313 }, { "epoch": 14.725, "grad_norm": 0.9507997035980225, "learning_rate": 7.63709186448855e-06, "loss": 0.0036, "step": 15314 }, { "epoch": 14.725961538461538, "grad_norm": 0.38102656602859497, "learning_rate": 7.63588148225625e-06, "loss": 0.0016, "step": 15315 }, { "epoch": 14.726923076923077, "grad_norm": 0.018697038292884827, "learning_rate": 7.634671136708133e-06, "loss": 0.0002, "step": 15316 }, { "epoch": 14.727884615384616, "grad_norm": 2.7213211059570312, "learning_rate": 7.633460827862977e-06, "loss": 0.013, "step": 15317 }, { "epoch": 14.728846153846154, "grad_norm": 0.008908652700483799, "learning_rate": 7.632250555739565e-06, "loss": 0.0001, "step": 15318 }, { "epoch": 14.729807692307693, "grad_norm": 0.01592911221086979, "learning_rate": 7.631040320356671e-06, "loss": 0.0002, "step": 15319 }, { "epoch": 14.73076923076923, "grad_norm": 3.1833441257476807, "learning_rate": 7.629830121733081e-06, "loss": 0.0221, "step": 15320 }, { "epoch": 14.731730769230769, "grad_norm": 0.23120446503162384, "learning_rate": 7.628619959887572e-06, "loss": 0.0012, "step": 15321 }, { "epoch": 14.732692307692307, "grad_norm": 0.025688352063298225, "learning_rate": 7.627409834838919e-06, "loss": 0.0002, "step": 15322 }, { "epoch": 14.733653846153846, "grad_norm": 0.03569621592760086, "learning_rate": 7.6261997466059035e-06, "loss": 0.0001, "step": 15323 }, { "epoch": 14.734615384615385, "grad_norm": 0.012828183360397816, "learning_rate": 7.624989695207301e-06, "loss": 0.0001, "step": 15324 }, { "epoch": 14.735576923076923, "grad_norm": 0.21916674077510834, "learning_rate": 7.623779680661888e-06, "loss": 0.0013, "step": 15325 }, { "epoch": 14.736538461538462, "grad_norm": 0.017261844128370285, "learning_rate": 7.622569702988438e-06, "loss": 0.0002, "step": 15326 }, { "epoch": 14.7375, "grad_norm": 1.0221326351165771, "learning_rate": 7.6213597622057315e-06, "loss": 0.0056, "step": 15327 }, { "epoch": 14.73846153846154, "grad_norm": 3.079942464828491, "learning_rate": 7.620149858332539e-06, "loss": 0.012, "step": 15328 }, { "epoch": 14.739423076923076, "grad_norm": 0.07190404087305069, "learning_rate": 7.618939991387636e-06, "loss": 0.0004, "step": 15329 }, { "epoch": 14.740384615384615, "grad_norm": 0.07168806344270706, "learning_rate": 7.617730161389794e-06, "loss": 0.0003, "step": 15330 }, { "epoch": 14.741346153846154, "grad_norm": 0.054099880158901215, "learning_rate": 7.61652036835779e-06, "loss": 0.0004, "step": 15331 }, { "epoch": 14.742307692307692, "grad_norm": 0.06822178512811661, "learning_rate": 7.615310612310396e-06, "loss": 0.0006, "step": 15332 }, { "epoch": 14.743269230769231, "grad_norm": 0.31048330664634705, "learning_rate": 7.6141008932663786e-06, "loss": 0.0014, "step": 15333 }, { "epoch": 14.74423076923077, "grad_norm": 0.028185173869132996, "learning_rate": 7.6128912112445155e-06, "loss": 0.0002, "step": 15334 }, { "epoch": 14.745192307692308, "grad_norm": 0.8495181202888489, "learning_rate": 7.6116815662635735e-06, "loss": 0.0022, "step": 15335 }, { "epoch": 14.746153846153845, "grad_norm": 0.3022833466529846, "learning_rate": 7.610471958342326e-06, "loss": 0.0014, "step": 15336 }, { "epoch": 14.747115384615384, "grad_norm": 0.01626870408654213, "learning_rate": 7.609262387499536e-06, "loss": 0.0002, "step": 15337 }, { "epoch": 14.748076923076923, "grad_norm": 0.028852829709649086, "learning_rate": 7.608052853753982e-06, "loss": 0.0003, "step": 15338 }, { "epoch": 14.749038461538461, "grad_norm": 1.0812960863113403, "learning_rate": 7.606843357124426e-06, "loss": 0.0047, "step": 15339 }, { "epoch": 14.75, "grad_norm": 0.022525373846292496, "learning_rate": 7.605633897629636e-06, "loss": 0.0002, "step": 15340 }, { "epoch": 14.750961538461539, "grad_norm": 0.1751413643360138, "learning_rate": 7.604424475288381e-06, "loss": 0.0006, "step": 15341 }, { "epoch": 14.751923076923077, "grad_norm": 0.9903155565261841, "learning_rate": 7.603215090119428e-06, "loss": 0.0073, "step": 15342 }, { "epoch": 14.752884615384616, "grad_norm": 1.6758840084075928, "learning_rate": 7.602005742141542e-06, "loss": 0.0135, "step": 15343 }, { "epoch": 14.753846153846155, "grad_norm": 0.01170576736330986, "learning_rate": 7.600796431373487e-06, "loss": 0.0002, "step": 15344 }, { "epoch": 14.754807692307692, "grad_norm": 4.0674028396606445, "learning_rate": 7.599587157834032e-06, "loss": 0.0718, "step": 15345 }, { "epoch": 14.75576923076923, "grad_norm": 1.9158703088760376, "learning_rate": 7.59837792154194e-06, "loss": 0.0108, "step": 15346 }, { "epoch": 14.756730769230769, "grad_norm": 0.04420952871441841, "learning_rate": 7.597168722515973e-06, "loss": 0.0002, "step": 15347 }, { "epoch": 14.757692307692308, "grad_norm": 0.33929651975631714, "learning_rate": 7.5959595607748925e-06, "loss": 0.0014, "step": 15348 }, { "epoch": 14.758653846153846, "grad_norm": 2.6545004844665527, "learning_rate": 7.594750436337467e-06, "loss": 0.0149, "step": 15349 }, { "epoch": 14.759615384615385, "grad_norm": 0.19823595881462097, "learning_rate": 7.593541349222456e-06, "loss": 0.001, "step": 15350 }, { "epoch": 14.760576923076924, "grad_norm": 0.12622541189193726, "learning_rate": 7.592332299448618e-06, "loss": 0.0004, "step": 15351 }, { "epoch": 14.76153846153846, "grad_norm": 1.023036241531372, "learning_rate": 7.591123287034716e-06, "loss": 0.0037, "step": 15352 }, { "epoch": 14.7625, "grad_norm": 0.11665015667676926, "learning_rate": 7.589914311999512e-06, "loss": 0.0008, "step": 15353 }, { "epoch": 14.763461538461538, "grad_norm": 0.8941678404808044, "learning_rate": 7.588705374361765e-06, "loss": 0.0034, "step": 15354 }, { "epoch": 14.764423076923077, "grad_norm": 0.061822179704904556, "learning_rate": 7.587496474140231e-06, "loss": 0.0003, "step": 15355 }, { "epoch": 14.765384615384615, "grad_norm": 0.2306499481201172, "learning_rate": 7.586287611353673e-06, "loss": 0.0011, "step": 15356 }, { "epoch": 14.766346153846154, "grad_norm": 0.052256979048252106, "learning_rate": 7.585078786020848e-06, "loss": 0.0004, "step": 15357 }, { "epoch": 14.767307692307693, "grad_norm": 2.8184635639190674, "learning_rate": 7.583869998160512e-06, "loss": 0.0326, "step": 15358 }, { "epoch": 14.768269230769231, "grad_norm": 0.009731294587254524, "learning_rate": 7.582661247791421e-06, "loss": 0.0001, "step": 15359 }, { "epoch": 14.76923076923077, "grad_norm": 0.10212405025959015, "learning_rate": 7.581452534932334e-06, "loss": 0.0006, "step": 15360 }, { "epoch": 14.770192307692307, "grad_norm": 0.123593769967556, "learning_rate": 7.580243859602006e-06, "loss": 0.0004, "step": 15361 }, { "epoch": 14.771153846153846, "grad_norm": 0.16297736763954163, "learning_rate": 7.579035221819188e-06, "loss": 0.0009, "step": 15362 }, { "epoch": 14.772115384615384, "grad_norm": 2.6998488903045654, "learning_rate": 7.57782662160264e-06, "loss": 0.0284, "step": 15363 }, { "epoch": 14.773076923076923, "grad_norm": 1.1309623718261719, "learning_rate": 7.5766180589711145e-06, "loss": 0.0068, "step": 15364 }, { "epoch": 14.774038461538462, "grad_norm": 0.15065954625606537, "learning_rate": 7.575409533943364e-06, "loss": 0.0009, "step": 15365 }, { "epoch": 14.775, "grad_norm": 1.7690157890319824, "learning_rate": 7.5742010465381395e-06, "loss": 0.0097, "step": 15366 }, { "epoch": 14.775961538461539, "grad_norm": 0.12836655974388123, "learning_rate": 7.572992596774198e-06, "loss": 0.0006, "step": 15367 }, { "epoch": 14.776923076923078, "grad_norm": 0.03639602288603783, "learning_rate": 7.571784184670288e-06, "loss": 0.0004, "step": 15368 }, { "epoch": 14.777884615384615, "grad_norm": 0.25991010665893555, "learning_rate": 7.57057581024516e-06, "loss": 0.0014, "step": 15369 }, { "epoch": 14.778846153846153, "grad_norm": 0.006360659375786781, "learning_rate": 7.569367473517561e-06, "loss": 0.0001, "step": 15370 }, { "epoch": 14.779807692307692, "grad_norm": 0.04295528307557106, "learning_rate": 7.568159174506246e-06, "loss": 0.0005, "step": 15371 }, { "epoch": 14.78076923076923, "grad_norm": 0.10193105787038803, "learning_rate": 7.566950913229967e-06, "loss": 0.0008, "step": 15372 }, { "epoch": 14.78173076923077, "grad_norm": 0.12561999261379242, "learning_rate": 7.5657426897074694e-06, "loss": 0.0008, "step": 15373 }, { "epoch": 14.782692307692308, "grad_norm": 0.06471864134073257, "learning_rate": 7.564534503957499e-06, "loss": 0.0005, "step": 15374 }, { "epoch": 14.783653846153847, "grad_norm": 0.5876480340957642, "learning_rate": 7.5633263559988035e-06, "loss": 0.0021, "step": 15375 }, { "epoch": 14.784615384615385, "grad_norm": 0.07693678140640259, "learning_rate": 7.562118245850134e-06, "loss": 0.0004, "step": 15376 }, { "epoch": 14.785576923076922, "grad_norm": 1.093519687652588, "learning_rate": 7.5609101735302344e-06, "loss": 0.0143, "step": 15377 }, { "epoch": 14.786538461538461, "grad_norm": 1.369428038597107, "learning_rate": 7.55970213905785e-06, "loss": 0.0057, "step": 15378 }, { "epoch": 14.7875, "grad_norm": 0.3032158613204956, "learning_rate": 7.558494142451724e-06, "loss": 0.0013, "step": 15379 }, { "epoch": 14.788461538461538, "grad_norm": 0.2649591863155365, "learning_rate": 7.557286183730606e-06, "loss": 0.0007, "step": 15380 }, { "epoch": 14.789423076923077, "grad_norm": 0.0768246129155159, "learning_rate": 7.556078262913236e-06, "loss": 0.0004, "step": 15381 }, { "epoch": 14.790384615384616, "grad_norm": 4.165951728820801, "learning_rate": 7.554870380018358e-06, "loss": 0.058, "step": 15382 }, { "epoch": 14.791346153846154, "grad_norm": 0.039924923330545425, "learning_rate": 7.553662535064715e-06, "loss": 0.0004, "step": 15383 }, { "epoch": 14.792307692307693, "grad_norm": 0.04527609422802925, "learning_rate": 7.552454728071051e-06, "loss": 0.0003, "step": 15384 }, { "epoch": 14.79326923076923, "grad_norm": 0.07242438942193985, "learning_rate": 7.551246959056106e-06, "loss": 0.0004, "step": 15385 }, { "epoch": 14.794230769230769, "grad_norm": 0.08918042480945587, "learning_rate": 7.550039228038619e-06, "loss": 0.0003, "step": 15386 }, { "epoch": 14.795192307692307, "grad_norm": 2.535348892211914, "learning_rate": 7.548831535037336e-06, "loss": 0.0417, "step": 15387 }, { "epoch": 14.796153846153846, "grad_norm": 0.07690232247114182, "learning_rate": 7.547623880070992e-06, "loss": 0.0004, "step": 15388 }, { "epoch": 14.797115384615385, "grad_norm": 0.08281567692756653, "learning_rate": 7.546416263158326e-06, "loss": 0.0003, "step": 15389 }, { "epoch": 14.798076923076923, "grad_norm": 1.2902085781097412, "learning_rate": 7.54520868431808e-06, "loss": 0.0101, "step": 15390 }, { "epoch": 14.799038461538462, "grad_norm": 0.015409918501973152, "learning_rate": 7.5440011435689915e-06, "loss": 0.0002, "step": 15391 }, { "epoch": 14.8, "grad_norm": 0.5400558114051819, "learning_rate": 7.542793640929796e-06, "loss": 0.0048, "step": 15392 }, { "epoch": 14.80096153846154, "grad_norm": 0.03950592875480652, "learning_rate": 7.541586176419231e-06, "loss": 0.0002, "step": 15393 }, { "epoch": 14.801923076923076, "grad_norm": 1.4752346277236938, "learning_rate": 7.540378750056033e-06, "loss": 0.0041, "step": 15394 }, { "epoch": 14.802884615384615, "grad_norm": 0.5055122375488281, "learning_rate": 7.539171361858939e-06, "loss": 0.0013, "step": 15395 }, { "epoch": 14.803846153846154, "grad_norm": 1.1135181188583374, "learning_rate": 7.537964011846684e-06, "loss": 0.0046, "step": 15396 }, { "epoch": 14.804807692307692, "grad_norm": 0.6387578845024109, "learning_rate": 7.536756700037998e-06, "loss": 0.0022, "step": 15397 }, { "epoch": 14.805769230769231, "grad_norm": 1.0478692054748535, "learning_rate": 7.535549426451621e-06, "loss": 0.0056, "step": 15398 }, { "epoch": 14.80673076923077, "grad_norm": 0.010300341062247753, "learning_rate": 7.534342191106284e-06, "loss": 0.0001, "step": 15399 }, { "epoch": 14.807692307692308, "grad_norm": 0.2708057165145874, "learning_rate": 7.533134994020716e-06, "loss": 0.0014, "step": 15400 }, { "epoch": 14.808653846153845, "grad_norm": 3.7591774463653564, "learning_rate": 7.531927835213657e-06, "loss": 0.054, "step": 15401 }, { "epoch": 14.809615384615384, "grad_norm": 0.7100303173065186, "learning_rate": 7.5307207147038345e-06, "loss": 0.0027, "step": 15402 }, { "epoch": 14.810576923076923, "grad_norm": 1.1640223264694214, "learning_rate": 7.529513632509977e-06, "loss": 0.0075, "step": 15403 }, { "epoch": 14.811538461538461, "grad_norm": 0.9841777086257935, "learning_rate": 7.528306588650816e-06, "loss": 0.0035, "step": 15404 }, { "epoch": 14.8125, "grad_norm": 2.7447469234466553, "learning_rate": 7.527099583145085e-06, "loss": 0.0126, "step": 15405 }, { "epoch": 14.813461538461539, "grad_norm": 1.9260534048080444, "learning_rate": 7.525892616011511e-06, "loss": 0.0082, "step": 15406 }, { "epoch": 14.814423076923077, "grad_norm": 0.00886554829776287, "learning_rate": 7.5246856872688216e-06, "loss": 0.0001, "step": 15407 }, { "epoch": 14.815384615384616, "grad_norm": 0.24248549342155457, "learning_rate": 7.5234787969357425e-06, "loss": 0.0012, "step": 15408 }, { "epoch": 14.816346153846155, "grad_norm": 0.013986732810735703, "learning_rate": 7.5222719450310075e-06, "loss": 0.0001, "step": 15409 }, { "epoch": 14.817307692307692, "grad_norm": 0.0628315731883049, "learning_rate": 7.5210651315733385e-06, "loss": 0.0005, "step": 15410 }, { "epoch": 14.81826923076923, "grad_norm": 2.1899003982543945, "learning_rate": 7.519858356581462e-06, "loss": 0.0216, "step": 15411 }, { "epoch": 14.819230769230769, "grad_norm": 0.013851160183548927, "learning_rate": 7.518651620074107e-06, "loss": 0.0001, "step": 15412 }, { "epoch": 14.820192307692308, "grad_norm": 2.4771034717559814, "learning_rate": 7.517444922069995e-06, "loss": 0.0256, "step": 15413 }, { "epoch": 14.821153846153846, "grad_norm": 0.03055937960743904, "learning_rate": 7.516238262587851e-06, "loss": 0.0002, "step": 15414 }, { "epoch": 14.822115384615385, "grad_norm": 0.015261114574968815, "learning_rate": 7.5150316416464e-06, "loss": 0.0002, "step": 15415 }, { "epoch": 14.823076923076924, "grad_norm": 1.1624842882156372, "learning_rate": 7.513825059264364e-06, "loss": 0.0038, "step": 15416 }, { "epoch": 14.82403846153846, "grad_norm": 0.01336741354316473, "learning_rate": 7.512618515460468e-06, "loss": 0.0001, "step": 15417 }, { "epoch": 14.825, "grad_norm": 0.5969736576080322, "learning_rate": 7.511412010253431e-06, "loss": 0.0027, "step": 15418 }, { "epoch": 14.825961538461538, "grad_norm": 0.24852880835533142, "learning_rate": 7.510205543661975e-06, "loss": 0.001, "step": 15419 }, { "epoch": 14.826923076923077, "grad_norm": 0.17406001687049866, "learning_rate": 7.508999115704824e-06, "loss": 0.0006, "step": 15420 }, { "epoch": 14.827884615384615, "grad_norm": 0.04776105657219887, "learning_rate": 7.507792726400694e-06, "loss": 0.0001, "step": 15421 }, { "epoch": 14.828846153846154, "grad_norm": 0.42613133788108826, "learning_rate": 7.506586375768304e-06, "loss": 0.0013, "step": 15422 }, { "epoch": 14.829807692307693, "grad_norm": 0.01820221170783043, "learning_rate": 7.505380063826379e-06, "loss": 0.0002, "step": 15423 }, { "epoch": 14.830769230769231, "grad_norm": 0.30453765392303467, "learning_rate": 7.504173790593633e-06, "loss": 0.0009, "step": 15424 }, { "epoch": 14.83173076923077, "grad_norm": 0.43590813875198364, "learning_rate": 7.502967556088785e-06, "loss": 0.0015, "step": 15425 }, { "epoch": 14.832692307692307, "grad_norm": 0.05449475347995758, "learning_rate": 7.50176136033055e-06, "loss": 0.0003, "step": 15426 }, { "epoch": 14.833653846153846, "grad_norm": 0.04100242629647255, "learning_rate": 7.500555203337647e-06, "loss": 0.0002, "step": 15427 }, { "epoch": 14.834615384615384, "grad_norm": 0.04653966799378395, "learning_rate": 7.4993490851287935e-06, "loss": 0.0002, "step": 15428 }, { "epoch": 14.835576923076923, "grad_norm": 0.10218023508787155, "learning_rate": 7.4981430057227015e-06, "loss": 0.0004, "step": 15429 }, { "epoch": 14.836538461538462, "grad_norm": 0.050658125430345535, "learning_rate": 7.496936965138085e-06, "loss": 0.0003, "step": 15430 }, { "epoch": 14.8375, "grad_norm": 0.015548123978078365, "learning_rate": 7.4957309633936635e-06, "loss": 0.0002, "step": 15431 }, { "epoch": 14.838461538461539, "grad_norm": 2.961777687072754, "learning_rate": 7.494525000508146e-06, "loss": 0.0322, "step": 15432 }, { "epoch": 14.839423076923078, "grad_norm": 0.02122524380683899, "learning_rate": 7.4933190765002476e-06, "loss": 0.0003, "step": 15433 }, { "epoch": 14.840384615384615, "grad_norm": 0.01169962901622057, "learning_rate": 7.492113191388679e-06, "loss": 0.0001, "step": 15434 }, { "epoch": 14.841346153846153, "grad_norm": 2.2480111122131348, "learning_rate": 7.490907345192156e-06, "loss": 0.0271, "step": 15435 }, { "epoch": 14.842307692307692, "grad_norm": 0.8109039068222046, "learning_rate": 7.489701537929384e-06, "loss": 0.0035, "step": 15436 }, { "epoch": 14.84326923076923, "grad_norm": 0.04992932826280594, "learning_rate": 7.488495769619077e-06, "loss": 0.0004, "step": 15437 }, { "epoch": 14.84423076923077, "grad_norm": 2.3629837036132812, "learning_rate": 7.487290040279944e-06, "loss": 0.0049, "step": 15438 }, { "epoch": 14.845192307692308, "grad_norm": 0.036530252546072006, "learning_rate": 7.486084349930698e-06, "loss": 0.0002, "step": 15439 }, { "epoch": 14.846153846153847, "grad_norm": 0.7996399998664856, "learning_rate": 7.48487869859004e-06, "loss": 0.0031, "step": 15440 }, { "epoch": 14.847115384615385, "grad_norm": 0.26242324709892273, "learning_rate": 7.483673086276686e-06, "loss": 0.0007, "step": 15441 }, { "epoch": 14.848076923076922, "grad_norm": 0.02758721075952053, "learning_rate": 7.4824675130093395e-06, "loss": 0.0002, "step": 15442 }, { "epoch": 14.849038461538461, "grad_norm": 0.05185622721910477, "learning_rate": 7.481261978806709e-06, "loss": 0.0003, "step": 15443 }, { "epoch": 14.85, "grad_norm": 1.2267770767211914, "learning_rate": 7.480056483687499e-06, "loss": 0.0027, "step": 15444 }, { "epoch": 14.850961538461538, "grad_norm": 0.23855601251125336, "learning_rate": 7.478851027670417e-06, "loss": 0.001, "step": 15445 }, { "epoch": 14.851923076923077, "grad_norm": 1.8807344436645508, "learning_rate": 7.477645610774168e-06, "loss": 0.0477, "step": 15446 }, { "epoch": 14.852884615384616, "grad_norm": 0.12805186212062836, "learning_rate": 7.476440233017457e-06, "loss": 0.0005, "step": 15447 }, { "epoch": 14.853846153846154, "grad_norm": 0.2673596441745758, "learning_rate": 7.475234894418984e-06, "loss": 0.0012, "step": 15448 }, { "epoch": 14.854807692307693, "grad_norm": 0.2539585530757904, "learning_rate": 7.474029594997456e-06, "loss": 0.0008, "step": 15449 }, { "epoch": 14.85576923076923, "grad_norm": 1.4543790817260742, "learning_rate": 7.472824334771578e-06, "loss": 0.0068, "step": 15450 }, { "epoch": 14.856730769230769, "grad_norm": 0.007481106091290712, "learning_rate": 7.471619113760044e-06, "loss": 0.0001, "step": 15451 }, { "epoch": 14.857692307692307, "grad_norm": 0.02718365006148815, "learning_rate": 7.470413931981566e-06, "loss": 0.0002, "step": 15452 }, { "epoch": 14.858653846153846, "grad_norm": 1.6753606796264648, "learning_rate": 7.469208789454838e-06, "loss": 0.0073, "step": 15453 }, { "epoch": 14.859615384615385, "grad_norm": 0.3319774568080902, "learning_rate": 7.468003686198562e-06, "loss": 0.0014, "step": 15454 }, { "epoch": 14.860576923076923, "grad_norm": 2.8233563899993896, "learning_rate": 7.4667986222314345e-06, "loss": 0.0105, "step": 15455 }, { "epoch": 14.861538461538462, "grad_norm": 1.918471336364746, "learning_rate": 7.46559359757216e-06, "loss": 0.0267, "step": 15456 }, { "epoch": 14.8625, "grad_norm": 0.054705798625946045, "learning_rate": 7.464388612239435e-06, "loss": 0.0003, "step": 15457 }, { "epoch": 14.86346153846154, "grad_norm": 2.172555923461914, "learning_rate": 7.4631836662519565e-06, "loss": 0.0449, "step": 15458 }, { "epoch": 14.864423076923076, "grad_norm": 1.8297125101089478, "learning_rate": 7.461978759628421e-06, "loss": 0.0077, "step": 15459 }, { "epoch": 14.865384615384615, "grad_norm": 0.8341392874717712, "learning_rate": 7.460773892387527e-06, "loss": 0.0052, "step": 15460 }, { "epoch": 14.866346153846154, "grad_norm": 1.939589500427246, "learning_rate": 7.459569064547969e-06, "loss": 0.0192, "step": 15461 }, { "epoch": 14.867307692307692, "grad_norm": 0.09284273535013199, "learning_rate": 7.458364276128442e-06, "loss": 0.0005, "step": 15462 }, { "epoch": 14.868269230769231, "grad_norm": 0.03853989392518997, "learning_rate": 7.4571595271476435e-06, "loss": 0.0003, "step": 15463 }, { "epoch": 14.86923076923077, "grad_norm": 1.903023362159729, "learning_rate": 7.455954817624267e-06, "loss": 0.0417, "step": 15464 }, { "epoch": 14.870192307692308, "grad_norm": 0.7677045464515686, "learning_rate": 7.454750147577004e-06, "loss": 0.0024, "step": 15465 }, { "epoch": 14.871153846153845, "grad_norm": 0.30486851930618286, "learning_rate": 7.4535455170245476e-06, "loss": 0.0012, "step": 15466 }, { "epoch": 14.872115384615384, "grad_norm": 0.24135378003120422, "learning_rate": 7.4523409259855905e-06, "loss": 0.0011, "step": 15467 }, { "epoch": 14.873076923076923, "grad_norm": 0.0783299058675766, "learning_rate": 7.451136374478827e-06, "loss": 0.0004, "step": 15468 }, { "epoch": 14.874038461538461, "grad_norm": 3.680246591567993, "learning_rate": 7.449931862522946e-06, "loss": 0.0528, "step": 15469 }, { "epoch": 14.875, "grad_norm": 3.4223389625549316, "learning_rate": 7.4487273901366366e-06, "loss": 0.0459, "step": 15470 }, { "epoch": 14.875961538461539, "grad_norm": 1.7014259099960327, "learning_rate": 7.447522957338591e-06, "loss": 0.0355, "step": 15471 }, { "epoch": 14.876923076923077, "grad_norm": 0.0773756355047226, "learning_rate": 7.446318564147499e-06, "loss": 0.0006, "step": 15472 }, { "epoch": 14.877884615384616, "grad_norm": 0.03261391445994377, "learning_rate": 7.445114210582044e-06, "loss": 0.0002, "step": 15473 }, { "epoch": 14.878846153846155, "grad_norm": 0.10002963989973068, "learning_rate": 7.4439098966609215e-06, "loss": 0.0005, "step": 15474 }, { "epoch": 14.879807692307692, "grad_norm": 0.05611233040690422, "learning_rate": 7.442705622402815e-06, "loss": 0.0005, "step": 15475 }, { "epoch": 14.88076923076923, "grad_norm": 0.02352171763777733, "learning_rate": 7.441501387826411e-06, "loss": 0.0003, "step": 15476 }, { "epoch": 14.881730769230769, "grad_norm": 0.8433123826980591, "learning_rate": 7.440297192950395e-06, "loss": 0.0042, "step": 15477 }, { "epoch": 14.882692307692308, "grad_norm": 0.07761072367429733, "learning_rate": 7.439093037793457e-06, "loss": 0.0003, "step": 15478 }, { "epoch": 14.883653846153846, "grad_norm": 0.10682833194732666, "learning_rate": 7.4378889223742766e-06, "loss": 0.0003, "step": 15479 }, { "epoch": 14.884615384615385, "grad_norm": 0.1422346830368042, "learning_rate": 7.43668484671154e-06, "loss": 0.0007, "step": 15480 }, { "epoch": 14.885576923076924, "grad_norm": 0.5343791246414185, "learning_rate": 7.4354808108239325e-06, "loss": 0.0019, "step": 15481 }, { "epoch": 14.88653846153846, "grad_norm": 0.07318461686372757, "learning_rate": 7.4342768147301366e-06, "loss": 0.0008, "step": 15482 }, { "epoch": 14.8875, "grad_norm": 1.8957406282424927, "learning_rate": 7.433072858448834e-06, "loss": 0.0101, "step": 15483 }, { "epoch": 14.888461538461538, "grad_norm": 1.71107816696167, "learning_rate": 7.431868941998706e-06, "loss": 0.0064, "step": 15484 }, { "epoch": 14.889423076923077, "grad_norm": 0.10718691349029541, "learning_rate": 7.430665065398435e-06, "loss": 0.0004, "step": 15485 }, { "epoch": 14.890384615384615, "grad_norm": 0.018178589642047882, "learning_rate": 7.4294612286667015e-06, "loss": 0.0002, "step": 15486 }, { "epoch": 14.891346153846154, "grad_norm": 0.0347016379237175, "learning_rate": 7.428257431822186e-06, "loss": 0.0003, "step": 15487 }, { "epoch": 14.892307692307693, "grad_norm": 0.0959462895989418, "learning_rate": 7.4270536748835645e-06, "loss": 0.0007, "step": 15488 }, { "epoch": 14.893269230769231, "grad_norm": 0.031380005180835724, "learning_rate": 7.42584995786952e-06, "loss": 0.0003, "step": 15489 }, { "epoch": 14.89423076923077, "grad_norm": 0.09443632513284683, "learning_rate": 7.424646280798731e-06, "loss": 0.0004, "step": 15490 }, { "epoch": 14.895192307692307, "grad_norm": 0.025556791573762894, "learning_rate": 7.423442643689869e-06, "loss": 0.0002, "step": 15491 }, { "epoch": 14.896153846153846, "grad_norm": 2.4538164138793945, "learning_rate": 7.422239046561619e-06, "loss": 0.0092, "step": 15492 }, { "epoch": 14.897115384615384, "grad_norm": 1.0465333461761475, "learning_rate": 7.421035489432652e-06, "loss": 0.0041, "step": 15493 }, { "epoch": 14.898076923076923, "grad_norm": 0.9116073250770569, "learning_rate": 7.419831972321645e-06, "loss": 0.0076, "step": 15494 }, { "epoch": 14.899038461538462, "grad_norm": 0.13196444511413574, "learning_rate": 7.418628495247271e-06, "loss": 0.0009, "step": 15495 }, { "epoch": 14.9, "grad_norm": 4.060264587402344, "learning_rate": 7.417425058228209e-06, "loss": 0.1136, "step": 15496 }, { "epoch": 14.900961538461539, "grad_norm": 0.05516175925731659, "learning_rate": 7.416221661283132e-06, "loss": 0.0005, "step": 15497 }, { "epoch": 14.901923076923078, "grad_norm": 0.7542485594749451, "learning_rate": 7.415018304430709e-06, "loss": 0.0055, "step": 15498 }, { "epoch": 14.902884615384615, "grad_norm": 0.6388163566589355, "learning_rate": 7.413814987689614e-06, "loss": 0.0015, "step": 15499 }, { "epoch": 14.903846153846153, "grad_norm": 0.014293487183749676, "learning_rate": 7.412611711078522e-06, "loss": 0.0002, "step": 15500 }, { "epoch": 14.904807692307692, "grad_norm": 0.08597806096076965, "learning_rate": 7.411408474616102e-06, "loss": 0.0005, "step": 15501 }, { "epoch": 14.90576923076923, "grad_norm": 0.051293641328811646, "learning_rate": 7.4102052783210235e-06, "loss": 0.0003, "step": 15502 }, { "epoch": 14.90673076923077, "grad_norm": 0.07348904013633728, "learning_rate": 7.409002122211961e-06, "loss": 0.0005, "step": 15503 }, { "epoch": 14.907692307692308, "grad_norm": 1.5249574184417725, "learning_rate": 7.4077990063075784e-06, "loss": 0.0127, "step": 15504 }, { "epoch": 14.908653846153847, "grad_norm": 0.022155623883008957, "learning_rate": 7.40659593062655e-06, "loss": 0.0002, "step": 15505 }, { "epoch": 14.909615384615385, "grad_norm": 0.02962328866124153, "learning_rate": 7.405392895187538e-06, "loss": 0.0002, "step": 15506 }, { "epoch": 14.910576923076922, "grad_norm": 0.38826337456703186, "learning_rate": 7.404189900009215e-06, "loss": 0.0016, "step": 15507 }, { "epoch": 14.911538461538461, "grad_norm": 0.22486697137355804, "learning_rate": 7.402986945110247e-06, "loss": 0.001, "step": 15508 }, { "epoch": 14.9125, "grad_norm": 0.8109511733055115, "learning_rate": 7.401784030509299e-06, "loss": 0.0028, "step": 15509 }, { "epoch": 14.913461538461538, "grad_norm": 0.1448570042848587, "learning_rate": 7.400581156225035e-06, "loss": 0.0006, "step": 15510 }, { "epoch": 14.914423076923077, "grad_norm": 1.324652910232544, "learning_rate": 7.399378322276125e-06, "loss": 0.0055, "step": 15511 }, { "epoch": 14.915384615384616, "grad_norm": 0.12413021177053452, "learning_rate": 7.3981755286812295e-06, "loss": 0.0006, "step": 15512 }, { "epoch": 14.916346153846154, "grad_norm": 0.34716275334358215, "learning_rate": 7.396972775459013e-06, "loss": 0.0017, "step": 15513 }, { "epoch": 14.917307692307693, "grad_norm": 1.79049551486969, "learning_rate": 7.395770062628139e-06, "loss": 0.009, "step": 15514 }, { "epoch": 14.91826923076923, "grad_norm": 0.026505732908844948, "learning_rate": 7.394567390207272e-06, "loss": 0.0003, "step": 15515 }, { "epoch": 14.919230769230769, "grad_norm": 0.019946640357375145, "learning_rate": 7.3933647582150715e-06, "loss": 0.0002, "step": 15516 }, { "epoch": 14.920192307692307, "grad_norm": 0.03949151560664177, "learning_rate": 7.392162166670198e-06, "loss": 0.0003, "step": 15517 }, { "epoch": 14.921153846153846, "grad_norm": 1.6719969511032104, "learning_rate": 7.390959615591315e-06, "loss": 0.0092, "step": 15518 }, { "epoch": 14.922115384615385, "grad_norm": 0.8491096496582031, "learning_rate": 7.389757104997084e-06, "loss": 0.0027, "step": 15519 }, { "epoch": 14.923076923076923, "grad_norm": 0.7619355916976929, "learning_rate": 7.388554634906158e-06, "loss": 0.0015, "step": 15520 }, { "epoch": 14.924038461538462, "grad_norm": 2.196641206741333, "learning_rate": 7.3873522053372e-06, "loss": 0.008, "step": 15521 }, { "epoch": 14.925, "grad_norm": 1.0335103273391724, "learning_rate": 7.3861498163088695e-06, "loss": 0.0035, "step": 15522 }, { "epoch": 14.92596153846154, "grad_norm": 0.3282398283481598, "learning_rate": 7.384947467839823e-06, "loss": 0.0023, "step": 15523 }, { "epoch": 14.926923076923076, "grad_norm": 0.021130573004484177, "learning_rate": 7.383745159948712e-06, "loss": 0.0002, "step": 15524 }, { "epoch": 14.927884615384615, "grad_norm": 0.045049652457237244, "learning_rate": 7.382542892654201e-06, "loss": 0.0003, "step": 15525 }, { "epoch": 14.928846153846154, "grad_norm": 0.028666311874985695, "learning_rate": 7.381340665974943e-06, "loss": 0.0003, "step": 15526 }, { "epoch": 14.929807692307692, "grad_norm": 1.9568464756011963, "learning_rate": 7.380138479929591e-06, "loss": 0.0188, "step": 15527 }, { "epoch": 14.930769230769231, "grad_norm": 0.328797310590744, "learning_rate": 7.378936334536799e-06, "loss": 0.0019, "step": 15528 }, { "epoch": 14.93173076923077, "grad_norm": 0.9537805318832397, "learning_rate": 7.377734229815225e-06, "loss": 0.0048, "step": 15529 }, { "epoch": 14.932692307692308, "grad_norm": 1.745723009109497, "learning_rate": 7.376532165783518e-06, "loss": 0.0062, "step": 15530 }, { "epoch": 14.933653846153845, "grad_norm": 0.0334174819290638, "learning_rate": 7.375330142460331e-06, "loss": 0.0003, "step": 15531 }, { "epoch": 14.934615384615384, "grad_norm": 0.05449386313557625, "learning_rate": 7.37412815986432e-06, "loss": 0.0003, "step": 15532 }, { "epoch": 14.935576923076923, "grad_norm": 1.3219050168991089, "learning_rate": 7.372926218014131e-06, "loss": 0.0057, "step": 15533 }, { "epoch": 14.936538461538461, "grad_norm": 0.04918617755174637, "learning_rate": 7.371724316928418e-06, "loss": 0.0005, "step": 15534 }, { "epoch": 14.9375, "grad_norm": 0.2964963912963867, "learning_rate": 7.370522456625827e-06, "loss": 0.0009, "step": 15535 }, { "epoch": 14.938461538461539, "grad_norm": 0.07021350413560867, "learning_rate": 7.369320637125013e-06, "loss": 0.0003, "step": 15536 }, { "epoch": 14.939423076923077, "grad_norm": 0.2610829174518585, "learning_rate": 7.3681188584446205e-06, "loss": 0.0005, "step": 15537 }, { "epoch": 14.940384615384616, "grad_norm": 1.2680326700210571, "learning_rate": 7.3669171206033e-06, "loss": 0.0048, "step": 15538 }, { "epoch": 14.941346153846155, "grad_norm": 0.19671013951301575, "learning_rate": 7.365715423619695e-06, "loss": 0.0009, "step": 15539 }, { "epoch": 14.942307692307692, "grad_norm": 0.03655929118394852, "learning_rate": 7.3645137675124575e-06, "loss": 0.0004, "step": 15540 }, { "epoch": 14.94326923076923, "grad_norm": 3.635246992111206, "learning_rate": 7.36331215230023e-06, "loss": 0.0291, "step": 15541 }, { "epoch": 14.944230769230769, "grad_norm": 0.05651745945215225, "learning_rate": 7.36211057800166e-06, "loss": 0.0004, "step": 15542 }, { "epoch": 14.945192307692308, "grad_norm": 1.9556089639663696, "learning_rate": 7.360909044635391e-06, "loss": 0.0599, "step": 15543 }, { "epoch": 14.946153846153846, "grad_norm": 0.07772467285394669, "learning_rate": 7.35970755222007e-06, "loss": 0.0004, "step": 15544 }, { "epoch": 14.947115384615385, "grad_norm": 1.4484021663665771, "learning_rate": 7.358506100774338e-06, "loss": 0.006, "step": 15545 }, { "epoch": 14.948076923076924, "grad_norm": 0.022321920841932297, "learning_rate": 7.357304690316836e-06, "loss": 0.0002, "step": 15546 }, { "epoch": 14.94903846153846, "grad_norm": 0.5789583325386047, "learning_rate": 7.356103320866211e-06, "loss": 0.002, "step": 15547 }, { "epoch": 14.95, "grad_norm": 0.08031051605939865, "learning_rate": 7.3549019924411035e-06, "loss": 0.0007, "step": 15548 }, { "epoch": 14.950961538461538, "grad_norm": 0.2123134881258011, "learning_rate": 7.353700705060154e-06, "loss": 0.0006, "step": 15549 }, { "epoch": 14.951923076923077, "grad_norm": 2.415670394897461, "learning_rate": 7.352499458742e-06, "loss": 0.0179, "step": 15550 }, { "epoch": 14.952884615384615, "grad_norm": 0.02075769007205963, "learning_rate": 7.351298253505285e-06, "loss": 0.0003, "step": 15551 }, { "epoch": 14.953846153846154, "grad_norm": 3.7261219024658203, "learning_rate": 7.350097089368648e-06, "loss": 0.0983, "step": 15552 }, { "epoch": 14.954807692307693, "grad_norm": 0.014278585091233253, "learning_rate": 7.348895966350727e-06, "loss": 0.0002, "step": 15553 }, { "epoch": 14.955769230769231, "grad_norm": 0.2711350619792938, "learning_rate": 7.3476948844701605e-06, "loss": 0.001, "step": 15554 }, { "epoch": 14.95673076923077, "grad_norm": 0.013822714798152447, "learning_rate": 7.3464938437455835e-06, "loss": 0.0001, "step": 15555 }, { "epoch": 14.957692307692307, "grad_norm": 0.06109710782766342, "learning_rate": 7.345292844195635e-06, "loss": 0.0006, "step": 15556 }, { "epoch": 14.958653846153846, "grad_norm": 0.6690741777420044, "learning_rate": 7.344091885838949e-06, "loss": 0.002, "step": 15557 }, { "epoch": 14.959615384615384, "grad_norm": 1.5327961444854736, "learning_rate": 7.342890968694163e-06, "loss": 0.0049, "step": 15558 }, { "epoch": 14.960576923076923, "grad_norm": 1.2721554040908813, "learning_rate": 7.341690092779909e-06, "loss": 0.0055, "step": 15559 }, { "epoch": 14.961538461538462, "grad_norm": 0.8315612077713013, "learning_rate": 7.3404892581148255e-06, "loss": 0.0033, "step": 15560 }, { "epoch": 14.9625, "grad_norm": 0.30150869488716125, "learning_rate": 7.33928846471754e-06, "loss": 0.0018, "step": 15561 }, { "epoch": 14.963461538461539, "grad_norm": 1.4356589317321777, "learning_rate": 7.33808771260669e-06, "loss": 0.0079, "step": 15562 }, { "epoch": 14.964423076923078, "grad_norm": 0.04005742445588112, "learning_rate": 7.3368870018009075e-06, "loss": 0.0003, "step": 15563 }, { "epoch": 14.965384615384615, "grad_norm": 2.133457899093628, "learning_rate": 7.335686332318819e-06, "loss": 0.0467, "step": 15564 }, { "epoch": 14.966346153846153, "grad_norm": 0.31922370195388794, "learning_rate": 7.3344857041790616e-06, "loss": 0.0008, "step": 15565 }, { "epoch": 14.967307692307692, "grad_norm": 0.7690572738647461, "learning_rate": 7.333285117400264e-06, "loss": 0.0026, "step": 15566 }, { "epoch": 14.96826923076923, "grad_norm": 0.030283192172646523, "learning_rate": 7.332084572001053e-06, "loss": 0.0003, "step": 15567 }, { "epoch": 14.96923076923077, "grad_norm": 0.28288987278938293, "learning_rate": 7.330884068000057e-06, "loss": 0.0009, "step": 15568 }, { "epoch": 14.970192307692308, "grad_norm": 0.08727619051933289, "learning_rate": 7.32968360541591e-06, "loss": 0.0004, "step": 15569 }, { "epoch": 14.971153846153847, "grad_norm": 0.014324881136417389, "learning_rate": 7.328483184267236e-06, "loss": 0.0001, "step": 15570 }, { "epoch": 14.972115384615385, "grad_norm": 2.5414764881134033, "learning_rate": 7.3272828045726595e-06, "loss": 0.0338, "step": 15571 }, { "epoch": 14.973076923076922, "grad_norm": 0.04693729802966118, "learning_rate": 7.326082466350811e-06, "loss": 0.0002, "step": 15572 }, { "epoch": 14.974038461538461, "grad_norm": 0.16627833247184753, "learning_rate": 7.324882169620315e-06, "loss": 0.0008, "step": 15573 }, { "epoch": 14.975, "grad_norm": 0.21653467416763306, "learning_rate": 7.323681914399798e-06, "loss": 0.0008, "step": 15574 }, { "epoch": 14.975961538461538, "grad_norm": 0.029377035796642303, "learning_rate": 7.32248170070788e-06, "loss": 0.0002, "step": 15575 }, { "epoch": 14.976923076923077, "grad_norm": 2.2101240158081055, "learning_rate": 7.321281528563189e-06, "loss": 0.0226, "step": 15576 }, { "epoch": 14.977884615384616, "grad_norm": 0.2480216771364212, "learning_rate": 7.320081397984347e-06, "loss": 0.001, "step": 15577 }, { "epoch": 14.978846153846154, "grad_norm": 0.024096619337797165, "learning_rate": 7.3188813089899756e-06, "loss": 0.0003, "step": 15578 }, { "epoch": 14.979807692307693, "grad_norm": 0.28429940342903137, "learning_rate": 7.3176812615986955e-06, "loss": 0.0017, "step": 15579 }, { "epoch": 14.98076923076923, "grad_norm": 0.24730494618415833, "learning_rate": 7.316481255829133e-06, "loss": 0.0012, "step": 15580 }, { "epoch": 14.981730769230769, "grad_norm": 0.3924543559551239, "learning_rate": 7.315281291699904e-06, "loss": 0.0041, "step": 15581 }, { "epoch": 14.982692307692307, "grad_norm": 0.08651728928089142, "learning_rate": 7.3140813692296286e-06, "loss": 0.0005, "step": 15582 }, { "epoch": 14.983653846153846, "grad_norm": 0.015625599771738052, "learning_rate": 7.312881488436928e-06, "loss": 0.0002, "step": 15583 }, { "epoch": 14.984615384615385, "grad_norm": 0.03740144520998001, "learning_rate": 7.3116816493404205e-06, "loss": 0.0003, "step": 15584 }, { "epoch": 14.985576923076923, "grad_norm": 0.04170966520905495, "learning_rate": 7.310481851958725e-06, "loss": 0.0003, "step": 15585 }, { "epoch": 14.986538461538462, "grad_norm": 0.1511983871459961, "learning_rate": 7.309282096310455e-06, "loss": 0.0008, "step": 15586 }, { "epoch": 14.9875, "grad_norm": 0.5502722859382629, "learning_rate": 7.308082382414231e-06, "loss": 0.0074, "step": 15587 }, { "epoch": 14.98846153846154, "grad_norm": 0.09763404726982117, "learning_rate": 7.306882710288668e-06, "loss": 0.0007, "step": 15588 }, { "epoch": 14.989423076923076, "grad_norm": 0.08783520013093948, "learning_rate": 7.305683079952381e-06, "loss": 0.0006, "step": 15589 }, { "epoch": 14.990384615384615, "grad_norm": 0.025176146999001503, "learning_rate": 7.304483491423984e-06, "loss": 0.0002, "step": 15590 }, { "epoch": 14.991346153846154, "grad_norm": 0.3812280595302582, "learning_rate": 7.3032839447220924e-06, "loss": 0.0012, "step": 15591 }, { "epoch": 14.992307692307692, "grad_norm": 0.1525849848985672, "learning_rate": 7.30208443986532e-06, "loss": 0.0008, "step": 15592 }, { "epoch": 14.993269230769231, "grad_norm": 0.09307863563299179, "learning_rate": 7.300884976872277e-06, "loss": 0.0003, "step": 15593 }, { "epoch": 14.99423076923077, "grad_norm": 0.019771823659539223, "learning_rate": 7.29968555576158e-06, "loss": 0.0002, "step": 15594 }, { "epoch": 14.995192307692308, "grad_norm": 2.3920488357543945, "learning_rate": 7.2984861765518355e-06, "loss": 0.0098, "step": 15595 }, { "epoch": 14.996153846153845, "grad_norm": 0.27659890055656433, "learning_rate": 7.297286839261659e-06, "loss": 0.0012, "step": 15596 }, { "epoch": 14.997115384615384, "grad_norm": 0.4793160855770111, "learning_rate": 7.2960875439096555e-06, "loss": 0.0008, "step": 15597 }, { "epoch": 14.998076923076923, "grad_norm": 0.09305481612682343, "learning_rate": 7.2948882905144394e-06, "loss": 0.0006, "step": 15598 }, { "epoch": 14.999038461538461, "grad_norm": 1.7712239027023315, "learning_rate": 7.293689079094617e-06, "loss": 0.0049, "step": 15599 }, { "epoch": 15.0, "grad_norm": 0.21824365854263306, "learning_rate": 7.292489909668798e-06, "loss": 0.0007, "step": 15600 }, { "epoch": 15.000961538461539, "grad_norm": 0.1490514725446701, "learning_rate": 7.291290782255587e-06, "loss": 0.0005, "step": 15601 }, { "epoch": 15.001923076923077, "grad_norm": 0.9181676506996155, "learning_rate": 7.290091696873596e-06, "loss": 0.0036, "step": 15602 }, { "epoch": 15.002884615384616, "grad_norm": 0.247524231672287, "learning_rate": 7.288892653541426e-06, "loss": 0.0014, "step": 15603 }, { "epoch": 15.003846153846155, "grad_norm": 0.038754746317863464, "learning_rate": 7.287693652277685e-06, "loss": 0.0002, "step": 15604 }, { "epoch": 15.004807692307692, "grad_norm": 1.5370033979415894, "learning_rate": 7.28649469310098e-06, "loss": 0.0091, "step": 15605 }, { "epoch": 15.00576923076923, "grad_norm": 0.0995100885629654, "learning_rate": 7.285295776029912e-06, "loss": 0.0005, "step": 15606 }, { "epoch": 15.006730769230769, "grad_norm": 0.10920707881450653, "learning_rate": 7.284096901083086e-06, "loss": 0.0007, "step": 15607 }, { "epoch": 15.007692307692308, "grad_norm": 0.026590047404170036, "learning_rate": 7.282898068279105e-06, "loss": 0.0003, "step": 15608 }, { "epoch": 15.008653846153846, "grad_norm": 1.3672854900360107, "learning_rate": 7.2816992776365714e-06, "loss": 0.0029, "step": 15609 }, { "epoch": 15.009615384615385, "grad_norm": 0.021007470786571503, "learning_rate": 7.280500529174088e-06, "loss": 0.0002, "step": 15610 }, { "epoch": 15.010576923076924, "grad_norm": 0.6873984932899475, "learning_rate": 7.279301822910253e-06, "loss": 0.0025, "step": 15611 }, { "epoch": 15.011538461538462, "grad_norm": 0.009994889609515667, "learning_rate": 7.27810315886367e-06, "loss": 0.0001, "step": 15612 }, { "epoch": 15.0125, "grad_norm": 0.7610943913459778, "learning_rate": 7.276904537052938e-06, "loss": 0.0045, "step": 15613 }, { "epoch": 15.013461538461538, "grad_norm": 0.013872439041733742, "learning_rate": 7.2757059574966535e-06, "loss": 0.0001, "step": 15614 }, { "epoch": 15.014423076923077, "grad_norm": 0.008225485682487488, "learning_rate": 7.2745074202134165e-06, "loss": 0.0001, "step": 15615 }, { "epoch": 15.015384615384615, "grad_norm": 0.015607627108693123, "learning_rate": 7.273308925221827e-06, "loss": 0.0002, "step": 15616 }, { "epoch": 15.016346153846154, "grad_norm": 1.5729219913482666, "learning_rate": 7.272110472540479e-06, "loss": 0.0311, "step": 15617 }, { "epoch": 15.017307692307693, "grad_norm": 0.014394889585673809, "learning_rate": 7.270912062187971e-06, "loss": 0.0001, "step": 15618 }, { "epoch": 15.018269230769231, "grad_norm": 0.03730722516775131, "learning_rate": 7.269713694182896e-06, "loss": 0.0002, "step": 15619 }, { "epoch": 15.01923076923077, "grad_norm": 0.029187237843871117, "learning_rate": 7.268515368543853e-06, "loss": 0.0003, "step": 15620 }, { "epoch": 15.020192307692307, "grad_norm": 0.04153851419687271, "learning_rate": 7.267317085289436e-06, "loss": 0.0002, "step": 15621 }, { "epoch": 15.021153846153846, "grad_norm": 0.15956556797027588, "learning_rate": 7.2661188444382345e-06, "loss": 0.0006, "step": 15622 }, { "epoch": 15.022115384615384, "grad_norm": 0.0223060492426157, "learning_rate": 7.264920646008846e-06, "loss": 0.0002, "step": 15623 }, { "epoch": 15.023076923076923, "grad_norm": 0.009381449781358242, "learning_rate": 7.2637224900198626e-06, "loss": 0.0001, "step": 15624 }, { "epoch": 15.024038461538462, "grad_norm": 0.020980851724743843, "learning_rate": 7.2625243764898746e-06, "loss": 0.0002, "step": 15625 }, { "epoch": 15.025, "grad_norm": 0.04640781134366989, "learning_rate": 7.2613263054374725e-06, "loss": 0.0003, "step": 15626 }, { "epoch": 15.025961538461539, "grad_norm": 0.06225711852312088, "learning_rate": 7.2601282768812495e-06, "loss": 0.0003, "step": 15627 }, { "epoch": 15.026923076923078, "grad_norm": 0.013242916204035282, "learning_rate": 7.2589302908397955e-06, "loss": 0.0001, "step": 15628 }, { "epoch": 15.027884615384615, "grad_norm": 0.01358302403241396, "learning_rate": 7.257732347331698e-06, "loss": 0.0001, "step": 15629 }, { "epoch": 15.028846153846153, "grad_norm": 1.0671247243881226, "learning_rate": 7.256534446375543e-06, "loss": 0.0052, "step": 15630 }, { "epoch": 15.029807692307692, "grad_norm": 0.2104010432958603, "learning_rate": 7.255336587989924e-06, "loss": 0.0004, "step": 15631 }, { "epoch": 15.03076923076923, "grad_norm": 0.013617056421935558, "learning_rate": 7.254138772193426e-06, "loss": 0.0001, "step": 15632 }, { "epoch": 15.03173076923077, "grad_norm": 0.12710651755332947, "learning_rate": 7.252940999004633e-06, "loss": 0.0006, "step": 15633 }, { "epoch": 15.032692307692308, "grad_norm": 0.20657561719417572, "learning_rate": 7.251743268442135e-06, "loss": 0.0011, "step": 15634 }, { "epoch": 15.033653846153847, "grad_norm": 0.0552142895758152, "learning_rate": 7.250545580524515e-06, "loss": 0.0003, "step": 15635 }, { "epoch": 15.034615384615385, "grad_norm": 0.018630750477313995, "learning_rate": 7.249347935270359e-06, "loss": 0.0001, "step": 15636 }, { "epoch": 15.035576923076922, "grad_norm": 0.046348389238119125, "learning_rate": 7.248150332698248e-06, "loss": 0.0004, "step": 15637 }, { "epoch": 15.036538461538461, "grad_norm": 0.04515988379716873, "learning_rate": 7.24695277282677e-06, "loss": 0.0002, "step": 15638 }, { "epoch": 15.0375, "grad_norm": 0.4302893579006195, "learning_rate": 7.245755255674503e-06, "loss": 0.001, "step": 15639 }, { "epoch": 15.038461538461538, "grad_norm": 0.011237493716180325, "learning_rate": 7.244557781260031e-06, "loss": 0.0001, "step": 15640 }, { "epoch": 15.039423076923077, "grad_norm": 0.02860238216817379, "learning_rate": 7.243360349601933e-06, "loss": 0.0002, "step": 15641 }, { "epoch": 15.040384615384616, "grad_norm": 0.6099168658256531, "learning_rate": 7.242162960718795e-06, "loss": 0.0013, "step": 15642 }, { "epoch": 15.041346153846154, "grad_norm": 0.04550248011946678, "learning_rate": 7.240965614629194e-06, "loss": 0.0003, "step": 15643 }, { "epoch": 15.042307692307693, "grad_norm": 0.010137481614947319, "learning_rate": 7.2397683113517055e-06, "loss": 0.0001, "step": 15644 }, { "epoch": 15.04326923076923, "grad_norm": 0.03318403288722038, "learning_rate": 7.238571050904914e-06, "loss": 0.0003, "step": 15645 }, { "epoch": 15.044230769230769, "grad_norm": 0.11641232669353485, "learning_rate": 7.2373738333073965e-06, "loss": 0.0005, "step": 15646 }, { "epoch": 15.045192307692307, "grad_norm": 0.0059193773195147514, "learning_rate": 7.236176658577727e-06, "loss": 0.0001, "step": 15647 }, { "epoch": 15.046153846153846, "grad_norm": 0.016475938260555267, "learning_rate": 7.234979526734482e-06, "loss": 0.0002, "step": 15648 }, { "epoch": 15.047115384615385, "grad_norm": 0.009492941200733185, "learning_rate": 7.233782437796243e-06, "loss": 0.0001, "step": 15649 }, { "epoch": 15.048076923076923, "grad_norm": 1.4163392782211304, "learning_rate": 7.232585391781581e-06, "loss": 0.0066, "step": 15650 }, { "epoch": 15.049038461538462, "grad_norm": 0.05757558345794678, "learning_rate": 7.231388388709072e-06, "loss": 0.0002, "step": 15651 }, { "epoch": 15.05, "grad_norm": 1.7498270273208618, "learning_rate": 7.230191428597287e-06, "loss": 0.0202, "step": 15652 }, { "epoch": 15.050961538461538, "grad_norm": 0.008055145852267742, "learning_rate": 7.228994511464804e-06, "loss": 0.0001, "step": 15653 }, { "epoch": 15.051923076923076, "grad_norm": 0.18414780497550964, "learning_rate": 7.227797637330193e-06, "loss": 0.0006, "step": 15654 }, { "epoch": 15.052884615384615, "grad_norm": 0.021973075345158577, "learning_rate": 7.226600806212024e-06, "loss": 0.0002, "step": 15655 }, { "epoch": 15.053846153846154, "grad_norm": 0.00906667672097683, "learning_rate": 7.225404018128873e-06, "loss": 0.0001, "step": 15656 }, { "epoch": 15.054807692307692, "grad_norm": 0.014674828387796879, "learning_rate": 7.2242072730993085e-06, "loss": 0.0001, "step": 15657 }, { "epoch": 15.055769230769231, "grad_norm": 0.010263524949550629, "learning_rate": 7.2230105711419e-06, "loss": 0.0001, "step": 15658 }, { "epoch": 15.05673076923077, "grad_norm": 0.008272574283182621, "learning_rate": 7.221813912275215e-06, "loss": 0.0001, "step": 15659 }, { "epoch": 15.057692307692308, "grad_norm": 0.005341912154108286, "learning_rate": 7.220617296517827e-06, "loss": 0.0001, "step": 15660 }, { "epoch": 15.058653846153845, "grad_norm": 0.02798738330602646, "learning_rate": 7.219420723888301e-06, "loss": 0.0002, "step": 15661 }, { "epoch": 15.059615384615384, "grad_norm": 0.013870816677808762, "learning_rate": 7.218224194405201e-06, "loss": 0.0002, "step": 15662 }, { "epoch": 15.060576923076923, "grad_norm": 0.07929203659296036, "learning_rate": 7.217027708087101e-06, "loss": 0.0003, "step": 15663 }, { "epoch": 15.061538461538461, "grad_norm": 0.12640921771526337, "learning_rate": 7.215831264952562e-06, "loss": 0.0004, "step": 15664 }, { "epoch": 15.0625, "grad_norm": 0.041829030960798264, "learning_rate": 7.21463486502015e-06, "loss": 0.0002, "step": 15665 }, { "epoch": 15.063461538461539, "grad_norm": 0.46471163630485535, "learning_rate": 7.213438508308429e-06, "loss": 0.0011, "step": 15666 }, { "epoch": 15.064423076923077, "grad_norm": 1.4861360788345337, "learning_rate": 7.212242194835965e-06, "loss": 0.0116, "step": 15667 }, { "epoch": 15.065384615384616, "grad_norm": 0.009002650156617165, "learning_rate": 7.21104592462132e-06, "loss": 0.0001, "step": 15668 }, { "epoch": 15.066346153846155, "grad_norm": 0.004519413225352764, "learning_rate": 7.209849697683058e-06, "loss": 0.0001, "step": 15669 }, { "epoch": 15.067307692307692, "grad_norm": 0.016222795471549034, "learning_rate": 7.2086535140397375e-06, "loss": 0.0001, "step": 15670 }, { "epoch": 15.06826923076923, "grad_norm": 0.02361381985247135, "learning_rate": 7.207457373709923e-06, "loss": 0.0002, "step": 15671 }, { "epoch": 15.069230769230769, "grad_norm": 0.11521802097558975, "learning_rate": 7.206261276712174e-06, "loss": 0.0006, "step": 15672 }, { "epoch": 15.070192307692308, "grad_norm": 0.011644218116998672, "learning_rate": 7.20506522306505e-06, "loss": 0.0001, "step": 15673 }, { "epoch": 15.071153846153846, "grad_norm": 0.05181960389018059, "learning_rate": 7.203869212787112e-06, "loss": 0.0005, "step": 15674 }, { "epoch": 15.072115384615385, "grad_norm": 0.3790765106678009, "learning_rate": 7.202673245896916e-06, "loss": 0.0016, "step": 15675 }, { "epoch": 15.073076923076924, "grad_norm": 0.8068870306015015, "learning_rate": 7.201477322413022e-06, "loss": 0.0215, "step": 15676 }, { "epoch": 15.074038461538462, "grad_norm": 0.01654251292347908, "learning_rate": 7.200281442353985e-06, "loss": 0.0001, "step": 15677 }, { "epoch": 15.075, "grad_norm": 3.3566689491271973, "learning_rate": 7.199085605738364e-06, "loss": 0.0263, "step": 15678 }, { "epoch": 15.075961538461538, "grad_norm": 1.1595524549484253, "learning_rate": 7.197889812584715e-06, "loss": 0.0168, "step": 15679 }, { "epoch": 15.076923076923077, "grad_norm": 0.12530817091464996, "learning_rate": 7.196694062911591e-06, "loss": 0.0004, "step": 15680 }, { "epoch": 15.077884615384615, "grad_norm": 0.007702053990215063, "learning_rate": 7.195498356737545e-06, "loss": 0.0001, "step": 15681 }, { "epoch": 15.078846153846154, "grad_norm": 2.200836658477783, "learning_rate": 7.194302694081137e-06, "loss": 0.0147, "step": 15682 }, { "epoch": 15.079807692307693, "grad_norm": 0.027058962732553482, "learning_rate": 7.1931070749609155e-06, "loss": 0.0002, "step": 15683 }, { "epoch": 15.080769230769231, "grad_norm": 0.03562741354107857, "learning_rate": 7.191911499395432e-06, "loss": 0.0002, "step": 15684 }, { "epoch": 15.08173076923077, "grad_norm": 0.007205648813396692, "learning_rate": 7.190715967403243e-06, "loss": 0.0001, "step": 15685 }, { "epoch": 15.082692307692307, "grad_norm": 0.009627239778637886, "learning_rate": 7.189520479002896e-06, "loss": 0.0001, "step": 15686 }, { "epoch": 15.083653846153846, "grad_norm": 0.03136923536658287, "learning_rate": 7.188325034212944e-06, "loss": 0.0001, "step": 15687 }, { "epoch": 15.084615384615384, "grad_norm": 1.8874380588531494, "learning_rate": 7.187129633051933e-06, "loss": 0.0074, "step": 15688 }, { "epoch": 15.085576923076923, "grad_norm": 0.08545934408903122, "learning_rate": 7.185934275538416e-06, "loss": 0.0003, "step": 15689 }, { "epoch": 15.086538461538462, "grad_norm": 0.028721431270241737, "learning_rate": 7.18473896169094e-06, "loss": 0.0002, "step": 15690 }, { "epoch": 15.0875, "grad_norm": 0.013050843961536884, "learning_rate": 7.183543691528052e-06, "loss": 0.0001, "step": 15691 }, { "epoch": 15.088461538461539, "grad_norm": 0.019631464034318924, "learning_rate": 7.1823484650682986e-06, "loss": 0.0002, "step": 15692 }, { "epoch": 15.089423076923078, "grad_norm": 0.03567332774400711, "learning_rate": 7.181153282330229e-06, "loss": 0.0002, "step": 15693 }, { "epoch": 15.090384615384615, "grad_norm": 3.4825737476348877, "learning_rate": 7.179958143332386e-06, "loss": 0.0593, "step": 15694 }, { "epoch": 15.091346153846153, "grad_norm": 0.04194783419370651, "learning_rate": 7.1787630480933155e-06, "loss": 0.0003, "step": 15695 }, { "epoch": 15.092307692307692, "grad_norm": 0.06376152485609055, "learning_rate": 7.177567996631564e-06, "loss": 0.0003, "step": 15696 }, { "epoch": 15.09326923076923, "grad_norm": 0.016500873491168022, "learning_rate": 7.176372988965673e-06, "loss": 0.0002, "step": 15697 }, { "epoch": 15.09423076923077, "grad_norm": 0.007234973832964897, "learning_rate": 7.1751780251141865e-06, "loss": 0.0001, "step": 15698 }, { "epoch": 15.095192307692308, "grad_norm": 0.020618850365281105, "learning_rate": 7.173983105095643e-06, "loss": 0.0002, "step": 15699 }, { "epoch": 15.096153846153847, "grad_norm": 0.017636077478528023, "learning_rate": 7.1727882289285915e-06, "loss": 0.0002, "step": 15700 }, { "epoch": 15.097115384615385, "grad_norm": 0.22672392427921295, "learning_rate": 7.1715933966315675e-06, "loss": 0.0006, "step": 15701 }, { "epoch": 15.098076923076922, "grad_norm": 0.012008086778223515, "learning_rate": 7.17039860822311e-06, "loss": 0.0001, "step": 15702 }, { "epoch": 15.099038461538461, "grad_norm": 0.009041210636496544, "learning_rate": 7.169203863721765e-06, "loss": 0.0001, "step": 15703 }, { "epoch": 15.1, "grad_norm": 0.031809549778699875, "learning_rate": 7.1680091631460676e-06, "loss": 0.0002, "step": 15704 }, { "epoch": 15.100961538461538, "grad_norm": 0.035396069288253784, "learning_rate": 7.166814506514555e-06, "loss": 0.0002, "step": 15705 }, { "epoch": 15.101923076923077, "grad_norm": 0.04369840398430824, "learning_rate": 7.1656198938457655e-06, "loss": 0.0002, "step": 15706 }, { "epoch": 15.102884615384616, "grad_norm": 0.022785793989896774, "learning_rate": 7.1644253251582374e-06, "loss": 0.0002, "step": 15707 }, { "epoch": 15.103846153846154, "grad_norm": 2.2976086139678955, "learning_rate": 7.163230800470507e-06, "loss": 0.0554, "step": 15708 }, { "epoch": 15.104807692307693, "grad_norm": 0.028741583228111267, "learning_rate": 7.162036319801106e-06, "loss": 0.0002, "step": 15709 }, { "epoch": 15.10576923076923, "grad_norm": 0.042985763400793076, "learning_rate": 7.160841883168574e-06, "loss": 0.0004, "step": 15710 }, { "epoch": 15.106730769230769, "grad_norm": 0.2008475959300995, "learning_rate": 7.159647490591443e-06, "loss": 0.0009, "step": 15711 }, { "epoch": 15.107692307692307, "grad_norm": 0.01997470296919346, "learning_rate": 7.158453142088246e-06, "loss": 0.0001, "step": 15712 }, { "epoch": 15.108653846153846, "grad_norm": 0.012083955109119415, "learning_rate": 7.157258837677514e-06, "loss": 0.0001, "step": 15713 }, { "epoch": 15.109615384615385, "grad_norm": 0.1610361635684967, "learning_rate": 7.156064577377786e-06, "loss": 0.0006, "step": 15714 }, { "epoch": 15.110576923076923, "grad_norm": 0.05806930363178253, "learning_rate": 7.154870361207587e-06, "loss": 0.0003, "step": 15715 }, { "epoch": 15.111538461538462, "grad_norm": 0.04309207573533058, "learning_rate": 7.15367618918545e-06, "loss": 0.0002, "step": 15716 }, { "epoch": 15.1125, "grad_norm": 0.022184092551469803, "learning_rate": 7.152482061329903e-06, "loss": 0.0004, "step": 15717 }, { "epoch": 15.113461538461538, "grad_norm": 0.045670561492443085, "learning_rate": 7.151287977659479e-06, "loss": 0.0002, "step": 15718 }, { "epoch": 15.114423076923076, "grad_norm": 0.2102755606174469, "learning_rate": 7.1500939381927044e-06, "loss": 0.0011, "step": 15719 }, { "epoch": 15.115384615384615, "grad_norm": 0.014992466196417809, "learning_rate": 7.148899942948108e-06, "loss": 0.0001, "step": 15720 }, { "epoch": 15.116346153846154, "grad_norm": 0.05555359274148941, "learning_rate": 7.1477059919442135e-06, "loss": 0.0003, "step": 15721 }, { "epoch": 15.117307692307692, "grad_norm": 0.016086002811789513, "learning_rate": 7.146512085199554e-06, "loss": 0.0001, "step": 15722 }, { "epoch": 15.118269230769231, "grad_norm": 3.7117323875427246, "learning_rate": 7.1453182227326516e-06, "loss": 0.0274, "step": 15723 }, { "epoch": 15.11923076923077, "grad_norm": 0.011488431133329868, "learning_rate": 7.144124404562029e-06, "loss": 0.0001, "step": 15724 }, { "epoch": 15.120192307692308, "grad_norm": 0.4576835632324219, "learning_rate": 7.142930630706217e-06, "loss": 0.0011, "step": 15725 }, { "epoch": 15.121153846153845, "grad_norm": 0.007884186692535877, "learning_rate": 7.1417369011837355e-06, "loss": 0.0001, "step": 15726 }, { "epoch": 15.122115384615384, "grad_norm": 0.05736108124256134, "learning_rate": 7.140543216013109e-06, "loss": 0.0002, "step": 15727 }, { "epoch": 15.123076923076923, "grad_norm": 1.5715173482894897, "learning_rate": 7.139349575212857e-06, "loss": 0.0043, "step": 15728 }, { "epoch": 15.124038461538461, "grad_norm": 0.023949094116687775, "learning_rate": 7.1381559788015065e-06, "loss": 0.0002, "step": 15729 }, { "epoch": 15.125, "grad_norm": 0.11545062065124512, "learning_rate": 7.136962426797574e-06, "loss": 0.0004, "step": 15730 }, { "epoch": 15.125961538461539, "grad_norm": 0.06088058650493622, "learning_rate": 7.135768919219583e-06, "loss": 0.0005, "step": 15731 }, { "epoch": 15.126923076923077, "grad_norm": 1.1399967670440674, "learning_rate": 7.13457545608605e-06, "loss": 0.0062, "step": 15732 }, { "epoch": 15.127884615384616, "grad_norm": 0.010057582519948483, "learning_rate": 7.133382037415497e-06, "loss": 0.0001, "step": 15733 }, { "epoch": 15.128846153846155, "grad_norm": 1.6175670623779297, "learning_rate": 7.1321886632264425e-06, "loss": 0.0062, "step": 15734 }, { "epoch": 15.129807692307692, "grad_norm": 0.010041228495538235, "learning_rate": 7.130995333537401e-06, "loss": 0.0001, "step": 15735 }, { "epoch": 15.13076923076923, "grad_norm": 0.004853381775319576, "learning_rate": 7.129802048366894e-06, "loss": 0.0001, "step": 15736 }, { "epoch": 15.131730769230769, "grad_norm": 1.077465534210205, "learning_rate": 7.128608807733434e-06, "loss": 0.0052, "step": 15737 }, { "epoch": 15.132692307692308, "grad_norm": 0.026740891858935356, "learning_rate": 7.127415611655538e-06, "loss": 0.0002, "step": 15738 }, { "epoch": 15.133653846153846, "grad_norm": 0.7163619995117188, "learning_rate": 7.126222460151719e-06, "loss": 0.0018, "step": 15739 }, { "epoch": 15.134615384615385, "grad_norm": 0.041559167206287384, "learning_rate": 7.125029353240496e-06, "loss": 0.0002, "step": 15740 }, { "epoch": 15.135576923076924, "grad_norm": 3.344637870788574, "learning_rate": 7.123836290940378e-06, "loss": 0.0832, "step": 15741 }, { "epoch": 15.136538461538462, "grad_norm": 0.0555526502430439, "learning_rate": 7.122643273269878e-06, "loss": 0.0002, "step": 15742 }, { "epoch": 15.1375, "grad_norm": 0.27958646416664124, "learning_rate": 7.12145030024751e-06, "loss": 0.0009, "step": 15743 }, { "epoch": 15.138461538461538, "grad_norm": 0.8895354866981506, "learning_rate": 7.120257371891787e-06, "loss": 0.0026, "step": 15744 }, { "epoch": 15.139423076923077, "grad_norm": 0.03829162195324898, "learning_rate": 7.119064488221217e-06, "loss": 0.0003, "step": 15745 }, { "epoch": 15.140384615384615, "grad_norm": 0.3916740119457245, "learning_rate": 7.117871649254309e-06, "loss": 0.0013, "step": 15746 }, { "epoch": 15.141346153846154, "grad_norm": 0.02067578211426735, "learning_rate": 7.116678855009576e-06, "loss": 0.0002, "step": 15747 }, { "epoch": 15.142307692307693, "grad_norm": 0.017734302207827568, "learning_rate": 7.115486105505523e-06, "loss": 0.0002, "step": 15748 }, { "epoch": 15.143269230769231, "grad_norm": 0.040483515709638596, "learning_rate": 7.114293400760661e-06, "loss": 0.0004, "step": 15749 }, { "epoch": 15.14423076923077, "grad_norm": 0.07756611704826355, "learning_rate": 7.113100740793495e-06, "loss": 0.0005, "step": 15750 }, { "epoch": 15.145192307692307, "grad_norm": 0.6519725918769836, "learning_rate": 7.111908125622533e-06, "loss": 0.0017, "step": 15751 }, { "epoch": 15.146153846153846, "grad_norm": 4.49077844619751, "learning_rate": 7.110715555266281e-06, "loss": 0.0385, "step": 15752 }, { "epoch": 15.147115384615384, "grad_norm": 0.039012134075164795, "learning_rate": 7.109523029743242e-06, "loss": 0.0002, "step": 15753 }, { "epoch": 15.148076923076923, "grad_norm": 0.10075010359287262, "learning_rate": 7.108330549071922e-06, "loss": 0.0005, "step": 15754 }, { "epoch": 15.149038461538462, "grad_norm": 0.025713426992297173, "learning_rate": 7.107138113270828e-06, "loss": 0.0003, "step": 15755 }, { "epoch": 15.15, "grad_norm": 0.1212630346417427, "learning_rate": 7.105945722358458e-06, "loss": 0.0005, "step": 15756 }, { "epoch": 15.150961538461539, "grad_norm": 0.21777255833148956, "learning_rate": 7.104753376353315e-06, "loss": 0.001, "step": 15757 }, { "epoch": 15.151923076923078, "grad_norm": 0.03216901049017906, "learning_rate": 7.103561075273905e-06, "loss": 0.0002, "step": 15758 }, { "epoch": 15.152884615384615, "grad_norm": 0.7434823513031006, "learning_rate": 7.102368819138725e-06, "loss": 0.0031, "step": 15759 }, { "epoch": 15.153846153846153, "grad_norm": 0.5844968557357788, "learning_rate": 7.101176607966278e-06, "loss": 0.0022, "step": 15760 }, { "epoch": 15.154807692307692, "grad_norm": 1.0233834981918335, "learning_rate": 7.0999844417750584e-06, "loss": 0.0132, "step": 15761 }, { "epoch": 15.15576923076923, "grad_norm": 0.07902760803699493, "learning_rate": 7.098792320583572e-06, "loss": 0.0005, "step": 15762 }, { "epoch": 15.15673076923077, "grad_norm": 0.033618777990341187, "learning_rate": 7.097600244410313e-06, "loss": 0.0003, "step": 15763 }, { "epoch": 15.157692307692308, "grad_norm": 0.1090148538351059, "learning_rate": 7.0964082132737775e-06, "loss": 0.0007, "step": 15764 }, { "epoch": 15.158653846153847, "grad_norm": 0.9171453714370728, "learning_rate": 7.095216227192467e-06, "loss": 0.0047, "step": 15765 }, { "epoch": 15.159615384615385, "grad_norm": 0.05067302659153938, "learning_rate": 7.094024286184876e-06, "loss": 0.0004, "step": 15766 }, { "epoch": 15.160576923076922, "grad_norm": 0.057574715465307236, "learning_rate": 7.0928323902695e-06, "loss": 0.0003, "step": 15767 }, { "epoch": 15.161538461538461, "grad_norm": 0.022749820724129677, "learning_rate": 7.091640539464828e-06, "loss": 0.0001, "step": 15768 }, { "epoch": 15.1625, "grad_norm": 0.01663735881447792, "learning_rate": 7.090448733789362e-06, "loss": 0.0001, "step": 15769 }, { "epoch": 15.163461538461538, "grad_norm": 0.016187623143196106, "learning_rate": 7.089256973261593e-06, "loss": 0.0001, "step": 15770 }, { "epoch": 15.164423076923077, "grad_norm": 0.23930290341377258, "learning_rate": 7.088065257900012e-06, "loss": 0.0008, "step": 15771 }, { "epoch": 15.165384615384616, "grad_norm": 0.1159038096666336, "learning_rate": 7.086873587723111e-06, "loss": 0.0006, "step": 15772 }, { "epoch": 15.166346153846154, "grad_norm": 0.013053059577941895, "learning_rate": 7.085681962749382e-06, "loss": 0.0002, "step": 15773 }, { "epoch": 15.167307692307693, "grad_norm": 0.4922848641872406, "learning_rate": 7.0844903829973175e-06, "loss": 0.0015, "step": 15774 }, { "epoch": 15.16826923076923, "grad_norm": 0.019817933440208435, "learning_rate": 7.083298848485403e-06, "loss": 0.0001, "step": 15775 }, { "epoch": 15.169230769230769, "grad_norm": 0.05331306532025337, "learning_rate": 7.082107359232131e-06, "loss": 0.0005, "step": 15776 }, { "epoch": 15.170192307692307, "grad_norm": 0.6161525845527649, "learning_rate": 7.080915915255989e-06, "loss": 0.0022, "step": 15777 }, { "epoch": 15.171153846153846, "grad_norm": 0.009557821787893772, "learning_rate": 7.0797245165754654e-06, "loss": 0.0001, "step": 15778 }, { "epoch": 15.172115384615385, "grad_norm": 0.013197208754718304, "learning_rate": 7.078533163209043e-06, "loss": 0.0001, "step": 15779 }, { "epoch": 15.173076923076923, "grad_norm": 0.042282018810510635, "learning_rate": 7.0773418551752146e-06, "loss": 0.0004, "step": 15780 }, { "epoch": 15.174038461538462, "grad_norm": 1.0456281900405884, "learning_rate": 7.076150592492464e-06, "loss": 0.0024, "step": 15781 }, { "epoch": 15.175, "grad_norm": 0.03393922746181488, "learning_rate": 7.0749593751792726e-06, "loss": 0.0003, "step": 15782 }, { "epoch": 15.175961538461538, "grad_norm": 0.3832018971443176, "learning_rate": 7.073768203254126e-06, "loss": 0.002, "step": 15783 }, { "epoch": 15.176923076923076, "grad_norm": 0.13489702343940735, "learning_rate": 7.072577076735509e-06, "loss": 0.0004, "step": 15784 }, { "epoch": 15.177884615384615, "grad_norm": 1.219714879989624, "learning_rate": 7.071385995641905e-06, "loss": 0.0049, "step": 15785 }, { "epoch": 15.178846153846154, "grad_norm": 1.1456937789916992, "learning_rate": 7.070194959991793e-06, "loss": 0.0651, "step": 15786 }, { "epoch": 15.179807692307692, "grad_norm": 0.1985624134540558, "learning_rate": 7.069003969803658e-06, "loss": 0.0007, "step": 15787 }, { "epoch": 15.180769230769231, "grad_norm": 0.20671942830085754, "learning_rate": 7.0678130250959795e-06, "loss": 0.0005, "step": 15788 }, { "epoch": 15.18173076923077, "grad_norm": 0.26603424549102783, "learning_rate": 7.066622125887237e-06, "loss": 0.0006, "step": 15789 }, { "epoch": 15.182692307692308, "grad_norm": 0.017647573724389076, "learning_rate": 7.065431272195906e-06, "loss": 0.0001, "step": 15790 }, { "epoch": 15.183653846153845, "grad_norm": 0.016743870452046394, "learning_rate": 7.064240464040472e-06, "loss": 0.0002, "step": 15791 }, { "epoch": 15.184615384615384, "grad_norm": 0.08519043773412704, "learning_rate": 7.063049701439411e-06, "loss": 0.0004, "step": 15792 }, { "epoch": 15.185576923076923, "grad_norm": 1.399357795715332, "learning_rate": 7.061858984411196e-06, "loss": 0.0461, "step": 15793 }, { "epoch": 15.186538461538461, "grad_norm": 0.21319986879825592, "learning_rate": 7.060668312974308e-06, "loss": 0.0021, "step": 15794 }, { "epoch": 15.1875, "grad_norm": 0.02591804228723049, "learning_rate": 7.05947768714722e-06, "loss": 0.0002, "step": 15795 }, { "epoch": 15.188461538461539, "grad_norm": 0.024891434237360954, "learning_rate": 7.0582871069484095e-06, "loss": 0.0002, "step": 15796 }, { "epoch": 15.189423076923077, "grad_norm": 0.5316495299339294, "learning_rate": 7.057096572396347e-06, "loss": 0.002, "step": 15797 }, { "epoch": 15.190384615384616, "grad_norm": 1.5915772914886475, "learning_rate": 7.05590608350951e-06, "loss": 0.0605, "step": 15798 }, { "epoch": 15.191346153846155, "grad_norm": 0.010731881484389305, "learning_rate": 7.054715640306369e-06, "loss": 0.0001, "step": 15799 }, { "epoch": 15.192307692307692, "grad_norm": 0.04987318441271782, "learning_rate": 7.053525242805399e-06, "loss": 0.0002, "step": 15800 }, { "epoch": 15.19326923076923, "grad_norm": 2.1160714626312256, "learning_rate": 7.052334891025065e-06, "loss": 0.0095, "step": 15801 }, { "epoch": 15.194230769230769, "grad_norm": 0.7863094806671143, "learning_rate": 7.051144584983846e-06, "loss": 0.0067, "step": 15802 }, { "epoch": 15.195192307692308, "grad_norm": 0.01612604595720768, "learning_rate": 7.049954324700208e-06, "loss": 0.0001, "step": 15803 }, { "epoch": 15.196153846153846, "grad_norm": 0.01472235843539238, "learning_rate": 7.048764110192618e-06, "loss": 0.0002, "step": 15804 }, { "epoch": 15.197115384615385, "grad_norm": 0.026275211945176125, "learning_rate": 7.047573941479549e-06, "loss": 0.0003, "step": 15805 }, { "epoch": 15.198076923076924, "grad_norm": 2.18276047706604, "learning_rate": 7.0463838185794675e-06, "loss": 0.0055, "step": 15806 }, { "epoch": 15.199038461538462, "grad_norm": 0.007095732726156712, "learning_rate": 7.0451937415108405e-06, "loss": 0.0001, "step": 15807 }, { "epoch": 15.2, "grad_norm": 0.010159862227737904, "learning_rate": 7.044003710292133e-06, "loss": 0.0001, "step": 15808 }, { "epoch": 15.200961538461538, "grad_norm": 0.01900048740208149, "learning_rate": 7.0428137249418125e-06, "loss": 0.0002, "step": 15809 }, { "epoch": 15.201923076923077, "grad_norm": 0.009115418419241905, "learning_rate": 7.041623785478346e-06, "loss": 0.0001, "step": 15810 }, { "epoch": 15.202884615384615, "grad_norm": 0.3906722366809845, "learning_rate": 7.0404338919201935e-06, "loss": 0.0016, "step": 15811 }, { "epoch": 15.203846153846154, "grad_norm": 3.3656668663024902, "learning_rate": 7.03924404428582e-06, "loss": 0.0809, "step": 15812 }, { "epoch": 15.204807692307693, "grad_norm": 0.01662263832986355, "learning_rate": 7.038054242593691e-06, "loss": 0.0001, "step": 15813 }, { "epoch": 15.205769230769231, "grad_norm": 0.4513927400112152, "learning_rate": 7.036864486862267e-06, "loss": 0.0019, "step": 15814 }, { "epoch": 15.20673076923077, "grad_norm": 1.5690605640411377, "learning_rate": 7.035674777110006e-06, "loss": 0.034, "step": 15815 }, { "epoch": 15.207692307692307, "grad_norm": 0.022446421906352043, "learning_rate": 7.034485113355376e-06, "loss": 0.0002, "step": 15816 }, { "epoch": 15.208653846153846, "grad_norm": 0.03323753550648689, "learning_rate": 7.033295495616834e-06, "loss": 0.0003, "step": 15817 }, { "epoch": 15.209615384615384, "grad_norm": 0.004096910357475281, "learning_rate": 7.032105923912838e-06, "loss": 0.0, "step": 15818 }, { "epoch": 15.210576923076923, "grad_norm": 1.491722583770752, "learning_rate": 7.0309163982618444e-06, "loss": 0.0047, "step": 15819 }, { "epoch": 15.211538461538462, "grad_norm": 0.023069629445672035, "learning_rate": 7.029726918682317e-06, "loss": 0.0003, "step": 15820 }, { "epoch": 15.2125, "grad_norm": 2.318140745162964, "learning_rate": 7.02853748519271e-06, "loss": 0.0219, "step": 15821 }, { "epoch": 15.213461538461539, "grad_norm": 0.027663325890898705, "learning_rate": 7.0273480978114795e-06, "loss": 0.0002, "step": 15822 }, { "epoch": 15.214423076923078, "grad_norm": 0.018190287053585052, "learning_rate": 7.026158756557081e-06, "loss": 0.0002, "step": 15823 }, { "epoch": 15.215384615384615, "grad_norm": 0.02973189949989319, "learning_rate": 7.024969461447973e-06, "loss": 0.0002, "step": 15824 }, { "epoch": 15.216346153846153, "grad_norm": 0.12060870230197906, "learning_rate": 7.023780212502604e-06, "loss": 0.0004, "step": 15825 }, { "epoch": 15.217307692307692, "grad_norm": 2.3487961292266846, "learning_rate": 7.022591009739432e-06, "loss": 0.0096, "step": 15826 }, { "epoch": 15.21826923076923, "grad_norm": 0.01797773316502571, "learning_rate": 7.0214018531769105e-06, "loss": 0.0002, "step": 15827 }, { "epoch": 15.21923076923077, "grad_norm": 0.20295213162899017, "learning_rate": 7.02021274283349e-06, "loss": 0.0005, "step": 15828 }, { "epoch": 15.220192307692308, "grad_norm": 0.021972937509417534, "learning_rate": 7.019023678727622e-06, "loss": 0.0002, "step": 15829 }, { "epoch": 15.221153846153847, "grad_norm": 0.01369753759354353, "learning_rate": 7.017834660877756e-06, "loss": 0.0001, "step": 15830 }, { "epoch": 15.222115384615385, "grad_norm": 3.2825942039489746, "learning_rate": 7.016645689302345e-06, "loss": 0.018, "step": 15831 }, { "epoch": 15.223076923076922, "grad_norm": 0.017374644055962563, "learning_rate": 7.015456764019837e-06, "loss": 0.0001, "step": 15832 }, { "epoch": 15.224038461538461, "grad_norm": 0.03976559266448021, "learning_rate": 7.0142678850486775e-06, "loss": 0.0003, "step": 15833 }, { "epoch": 15.225, "grad_norm": 0.009453367441892624, "learning_rate": 7.013079052407321e-06, "loss": 0.0001, "step": 15834 }, { "epoch": 15.225961538461538, "grad_norm": 0.05054066702723503, "learning_rate": 7.0118902661142095e-06, "loss": 0.0003, "step": 15835 }, { "epoch": 15.226923076923077, "grad_norm": 0.010279602371156216, "learning_rate": 7.0107015261877934e-06, "loss": 0.0001, "step": 15836 }, { "epoch": 15.227884615384616, "grad_norm": 0.022092290222644806, "learning_rate": 7.009512832646513e-06, "loss": 0.0002, "step": 15837 }, { "epoch": 15.228846153846154, "grad_norm": 3.152758836746216, "learning_rate": 7.008324185508817e-06, "loss": 0.0283, "step": 15838 }, { "epoch": 15.229807692307693, "grad_norm": 0.1831555962562561, "learning_rate": 7.00713558479315e-06, "loss": 0.0007, "step": 15839 }, { "epoch": 15.23076923076923, "grad_norm": 0.014165569096803665, "learning_rate": 7.005947030517955e-06, "loss": 0.0001, "step": 15840 }, { "epoch": 15.231730769230769, "grad_norm": 0.037876904010772705, "learning_rate": 7.004758522701673e-06, "loss": 0.0003, "step": 15841 }, { "epoch": 15.232692307692307, "grad_norm": 0.29451367259025574, "learning_rate": 7.00357006136275e-06, "loss": 0.001, "step": 15842 }, { "epoch": 15.233653846153846, "grad_norm": 0.19828911125659943, "learning_rate": 7.002381646519625e-06, "loss": 0.0007, "step": 15843 }, { "epoch": 15.234615384615385, "grad_norm": 0.005287838168442249, "learning_rate": 7.001193278190737e-06, "loss": 0.0001, "step": 15844 }, { "epoch": 15.235576923076923, "grad_norm": 0.02856694906949997, "learning_rate": 7.000004956394528e-06, "loss": 0.0003, "step": 15845 }, { "epoch": 15.236538461538462, "grad_norm": 0.10396500676870346, "learning_rate": 6.99881668114944e-06, "loss": 0.0005, "step": 15846 }, { "epoch": 15.2375, "grad_norm": 0.03399061784148216, "learning_rate": 6.997628452473908e-06, "loss": 0.0004, "step": 15847 }, { "epoch": 15.238461538461538, "grad_norm": 0.04094298556447029, "learning_rate": 6.9964402703863675e-06, "loss": 0.0002, "step": 15848 }, { "epoch": 15.239423076923076, "grad_norm": 0.03351249545812607, "learning_rate": 6.995252134905261e-06, "loss": 0.0004, "step": 15849 }, { "epoch": 15.240384615384615, "grad_norm": 0.03787706047296524, "learning_rate": 6.994064046049023e-06, "loss": 0.0002, "step": 15850 }, { "epoch": 15.241346153846154, "grad_norm": 0.07861359417438507, "learning_rate": 6.992876003836089e-06, "loss": 0.0005, "step": 15851 }, { "epoch": 15.242307692307692, "grad_norm": 0.01914091408252716, "learning_rate": 6.991688008284891e-06, "loss": 0.0002, "step": 15852 }, { "epoch": 15.243269230769231, "grad_norm": 2.9999074935913086, "learning_rate": 6.990500059413868e-06, "loss": 0.0266, "step": 15853 }, { "epoch": 15.24423076923077, "grad_norm": 0.037062518298625946, "learning_rate": 6.989312157241451e-06, "loss": 0.0002, "step": 15854 }, { "epoch": 15.245192307692308, "grad_norm": 0.03495028242468834, "learning_rate": 6.988124301786071e-06, "loss": 0.0002, "step": 15855 }, { "epoch": 15.246153846153845, "grad_norm": 0.04385208711028099, "learning_rate": 6.986936493066165e-06, "loss": 0.0002, "step": 15856 }, { "epoch": 15.247115384615384, "grad_norm": 0.05151663348078728, "learning_rate": 6.9857487311001605e-06, "loss": 0.0002, "step": 15857 }, { "epoch": 15.248076923076923, "grad_norm": 0.009271856397390366, "learning_rate": 6.98456101590649e-06, "loss": 0.0001, "step": 15858 }, { "epoch": 15.249038461538461, "grad_norm": 0.014360932633280754, "learning_rate": 6.98337334750358e-06, "loss": 0.0002, "step": 15859 }, { "epoch": 15.25, "grad_norm": 0.030627189204096794, "learning_rate": 6.982185725909862e-06, "loss": 0.0002, "step": 15860 }, { "epoch": 15.250961538461539, "grad_norm": 2.419832944869995, "learning_rate": 6.980998151143766e-06, "loss": 0.0272, "step": 15861 }, { "epoch": 15.251923076923077, "grad_norm": 0.010965706780552864, "learning_rate": 6.979810623223718e-06, "loss": 0.0001, "step": 15862 }, { "epoch": 15.252884615384616, "grad_norm": 0.030066533014178276, "learning_rate": 6.9786231421681435e-06, "loss": 0.0002, "step": 15863 }, { "epoch": 15.253846153846155, "grad_norm": 0.011555494740605354, "learning_rate": 6.977435707995472e-06, "loss": 0.0002, "step": 15864 }, { "epoch": 15.254807692307692, "grad_norm": 0.07075989991426468, "learning_rate": 6.976248320724126e-06, "loss": 0.0004, "step": 15865 }, { "epoch": 15.25576923076923, "grad_norm": 0.0197286419570446, "learning_rate": 6.97506098037253e-06, "loss": 0.0002, "step": 15866 }, { "epoch": 15.256730769230769, "grad_norm": 0.12431896477937698, "learning_rate": 6.973873686959111e-06, "loss": 0.0006, "step": 15867 }, { "epoch": 15.257692307692308, "grad_norm": 2.1330726146698, "learning_rate": 6.972686440502293e-06, "loss": 0.0203, "step": 15868 }, { "epoch": 15.258653846153846, "grad_norm": 0.025181524455547333, "learning_rate": 6.971499241020495e-06, "loss": 0.0003, "step": 15869 }, { "epoch": 15.259615384615385, "grad_norm": 1.9238474369049072, "learning_rate": 6.970312088532138e-06, "loss": 0.0068, "step": 15870 }, { "epoch": 15.260576923076924, "grad_norm": 0.012073490768671036, "learning_rate": 6.969124983055649e-06, "loss": 0.0001, "step": 15871 }, { "epoch": 15.261538461538462, "grad_norm": 0.03385847434401512, "learning_rate": 6.9679379246094425e-06, "loss": 0.0001, "step": 15872 }, { "epoch": 15.2625, "grad_norm": 2.2852978706359863, "learning_rate": 6.966750913211941e-06, "loss": 0.0277, "step": 15873 }, { "epoch": 15.263461538461538, "grad_norm": 0.01133644301444292, "learning_rate": 6.9655639488815605e-06, "loss": 0.0001, "step": 15874 }, { "epoch": 15.264423076923077, "grad_norm": 0.9739069938659668, "learning_rate": 6.964377031636724e-06, "loss": 0.003, "step": 15875 }, { "epoch": 15.265384615384615, "grad_norm": 0.175826758146286, "learning_rate": 6.963190161495848e-06, "loss": 0.0008, "step": 15876 }, { "epoch": 15.266346153846154, "grad_norm": 0.11473345756530762, "learning_rate": 6.962003338477343e-06, "loss": 0.0008, "step": 15877 }, { "epoch": 15.267307692307693, "grad_norm": 2.300377130508423, "learning_rate": 6.960816562599634e-06, "loss": 0.0124, "step": 15878 }, { "epoch": 15.268269230769231, "grad_norm": 0.018622254952788353, "learning_rate": 6.959629833881131e-06, "loss": 0.0001, "step": 15879 }, { "epoch": 15.26923076923077, "grad_norm": 2.1095097064971924, "learning_rate": 6.95844315234025e-06, "loss": 0.0108, "step": 15880 }, { "epoch": 15.270192307692307, "grad_norm": 0.16269610822200775, "learning_rate": 6.957256517995401e-06, "loss": 0.0008, "step": 15881 }, { "epoch": 15.271153846153846, "grad_norm": 0.7265529036521912, "learning_rate": 6.956069930865005e-06, "loss": 0.0017, "step": 15882 }, { "epoch": 15.272115384615384, "grad_norm": 0.1998617947101593, "learning_rate": 6.9548833909674685e-06, "loss": 0.0007, "step": 15883 }, { "epoch": 15.273076923076923, "grad_norm": 0.009798428043723106, "learning_rate": 6.953696898321203e-06, "loss": 0.0001, "step": 15884 }, { "epoch": 15.274038461538462, "grad_norm": 0.029233191162347794, "learning_rate": 6.952510452944621e-06, "loss": 0.0002, "step": 15885 }, { "epoch": 15.275, "grad_norm": 0.2283153533935547, "learning_rate": 6.951324054856134e-06, "loss": 0.001, "step": 15886 }, { "epoch": 15.275961538461539, "grad_norm": 0.08521570265293121, "learning_rate": 6.95013770407415e-06, "loss": 0.0005, "step": 15887 }, { "epoch": 15.276923076923078, "grad_norm": 0.004690849222242832, "learning_rate": 6.948951400617075e-06, "loss": 0.0, "step": 15888 }, { "epoch": 15.277884615384615, "grad_norm": 0.019274581223726273, "learning_rate": 6.947765144503323e-06, "loss": 0.0001, "step": 15889 }, { "epoch": 15.278846153846153, "grad_norm": 0.6396936774253845, "learning_rate": 6.946578935751296e-06, "loss": 0.0068, "step": 15890 }, { "epoch": 15.279807692307692, "grad_norm": 0.03510962799191475, "learning_rate": 6.945392774379403e-06, "loss": 0.0004, "step": 15891 }, { "epoch": 15.28076923076923, "grad_norm": 0.040945809334516525, "learning_rate": 6.944206660406048e-06, "loss": 0.0005, "step": 15892 }, { "epoch": 15.28173076923077, "grad_norm": 3.57281756401062, "learning_rate": 6.943020593849637e-06, "loss": 0.0348, "step": 15893 }, { "epoch": 15.282692307692308, "grad_norm": 0.49770259857177734, "learning_rate": 6.941834574728575e-06, "loss": 0.0016, "step": 15894 }, { "epoch": 15.283653846153847, "grad_norm": 0.08531459420919418, "learning_rate": 6.940648603061263e-06, "loss": 0.0005, "step": 15895 }, { "epoch": 15.284615384615385, "grad_norm": 0.5722311735153198, "learning_rate": 6.939462678866107e-06, "loss": 0.0027, "step": 15896 }, { "epoch": 15.285576923076922, "grad_norm": 0.03438784182071686, "learning_rate": 6.9382768021615085e-06, "loss": 0.0002, "step": 15897 }, { "epoch": 15.286538461538461, "grad_norm": 0.008368423208594322, "learning_rate": 6.937090972965867e-06, "loss": 0.0001, "step": 15898 }, { "epoch": 15.2875, "grad_norm": 0.01456491369754076, "learning_rate": 6.935905191297584e-06, "loss": 0.0002, "step": 15899 }, { "epoch": 15.288461538461538, "grad_norm": 0.02953503094613552, "learning_rate": 6.93471945717506e-06, "loss": 0.0002, "step": 15900 }, { "epoch": 15.289423076923077, "grad_norm": 2.6117777824401855, "learning_rate": 6.933533770616694e-06, "loss": 0.0314, "step": 15901 }, { "epoch": 15.290384615384616, "grad_norm": 0.021489711478352547, "learning_rate": 6.932348131640883e-06, "loss": 0.0001, "step": 15902 }, { "epoch": 15.291346153846154, "grad_norm": 0.255719929933548, "learning_rate": 6.931162540266024e-06, "loss": 0.0006, "step": 15903 }, { "epoch": 15.292307692307693, "grad_norm": 0.04772955924272537, "learning_rate": 6.929976996510518e-06, "loss": 0.0004, "step": 15904 }, { "epoch": 15.29326923076923, "grad_norm": 0.10237011313438416, "learning_rate": 6.928791500392758e-06, "loss": 0.0007, "step": 15905 }, { "epoch": 15.294230769230769, "grad_norm": 0.01536367554217577, "learning_rate": 6.927606051931139e-06, "loss": 0.0001, "step": 15906 }, { "epoch": 15.295192307692307, "grad_norm": 0.14798618853092194, "learning_rate": 6.9264206511440565e-06, "loss": 0.0004, "step": 15907 }, { "epoch": 15.296153846153846, "grad_norm": 0.038905419409275055, "learning_rate": 6.925235298049906e-06, "loss": 0.0004, "step": 15908 }, { "epoch": 15.297115384615385, "grad_norm": 0.04920610785484314, "learning_rate": 6.92404999266708e-06, "loss": 0.0005, "step": 15909 }, { "epoch": 15.298076923076923, "grad_norm": 0.010898558422923088, "learning_rate": 6.9228647350139675e-06, "loss": 0.0001, "step": 15910 }, { "epoch": 15.299038461538462, "grad_norm": 0.017171049490571022, "learning_rate": 6.921679525108966e-06, "loss": 0.0001, "step": 15911 }, { "epoch": 15.3, "grad_norm": 0.029739325866103172, "learning_rate": 6.920494362970462e-06, "loss": 0.0001, "step": 15912 }, { "epoch": 15.300961538461538, "grad_norm": 0.14456264674663544, "learning_rate": 6.919309248616848e-06, "loss": 0.0005, "step": 15913 }, { "epoch": 15.301923076923076, "grad_norm": 0.02529214695096016, "learning_rate": 6.918124182066511e-06, "loss": 0.0002, "step": 15914 }, { "epoch": 15.302884615384615, "grad_norm": 0.11810655891895294, "learning_rate": 6.916939163337844e-06, "loss": 0.0005, "step": 15915 }, { "epoch": 15.303846153846154, "grad_norm": 0.0027455464005470276, "learning_rate": 6.915754192449231e-06, "loss": 0.0, "step": 15916 }, { "epoch": 15.304807692307692, "grad_norm": 0.04807950556278229, "learning_rate": 6.91456926941906e-06, "loss": 0.0002, "step": 15917 }, { "epoch": 15.305769230769231, "grad_norm": 0.40458250045776367, "learning_rate": 6.91338439426572e-06, "loss": 0.0023, "step": 15918 }, { "epoch": 15.30673076923077, "grad_norm": 0.019632330164313316, "learning_rate": 6.912199567007595e-06, "loss": 0.0001, "step": 15919 }, { "epoch": 15.307692307692308, "grad_norm": 1.1895424127578735, "learning_rate": 6.911014787663069e-06, "loss": 0.0055, "step": 15920 }, { "epoch": 15.308653846153845, "grad_norm": 0.02403140254318714, "learning_rate": 6.909830056250527e-06, "loss": 0.0002, "step": 15921 }, { "epoch": 15.309615384615384, "grad_norm": 0.12220845371484756, "learning_rate": 6.908645372788354e-06, "loss": 0.0006, "step": 15922 }, { "epoch": 15.310576923076923, "grad_norm": 4.014070510864258, "learning_rate": 6.907460737294933e-06, "loss": 0.023, "step": 15923 }, { "epoch": 15.311538461538461, "grad_norm": 0.03497178480029106, "learning_rate": 6.906276149788642e-06, "loss": 0.0001, "step": 15924 }, { "epoch": 15.3125, "grad_norm": 0.01776975765824318, "learning_rate": 6.9050916102878676e-06, "loss": 0.0002, "step": 15925 }, { "epoch": 15.313461538461539, "grad_norm": 0.5914008617401123, "learning_rate": 6.903907118810987e-06, "loss": 0.0031, "step": 15926 }, { "epoch": 15.314423076923077, "grad_norm": 0.1032903790473938, "learning_rate": 6.902722675376382e-06, "loss": 0.0009, "step": 15927 }, { "epoch": 15.315384615384616, "grad_norm": 0.06080983951687813, "learning_rate": 6.901538280002429e-06, "loss": 0.0004, "step": 15928 }, { "epoch": 15.316346153846155, "grad_norm": 0.04858686774969101, "learning_rate": 6.900353932707509e-06, "loss": 0.0004, "step": 15929 }, { "epoch": 15.317307692307692, "grad_norm": 4.648946285247803, "learning_rate": 6.899169633509999e-06, "loss": 0.0504, "step": 15930 }, { "epoch": 15.31826923076923, "grad_norm": 0.652313232421875, "learning_rate": 6.897985382428276e-06, "loss": 0.0042, "step": 15931 }, { "epoch": 15.319230769230769, "grad_norm": 0.07722286880016327, "learning_rate": 6.896801179480712e-06, "loss": 0.0004, "step": 15932 }, { "epoch": 15.320192307692308, "grad_norm": 1.5789518356323242, "learning_rate": 6.895617024685689e-06, "loss": 0.01, "step": 15933 }, { "epoch": 15.321153846153846, "grad_norm": 1.2942548990249634, "learning_rate": 6.894432918061579e-06, "loss": 0.0045, "step": 15934 }, { "epoch": 15.322115384615385, "grad_norm": 0.02523290552198887, "learning_rate": 6.893248859626753e-06, "loss": 0.0002, "step": 15935 }, { "epoch": 15.323076923076924, "grad_norm": 0.055880628526210785, "learning_rate": 6.892064849399588e-06, "loss": 0.0004, "step": 15936 }, { "epoch": 15.324038461538462, "grad_norm": 0.3457375466823578, "learning_rate": 6.890880887398455e-06, "loss": 0.0016, "step": 15937 }, { "epoch": 15.325, "grad_norm": 1.3528375625610352, "learning_rate": 6.889696973641726e-06, "loss": 0.0223, "step": 15938 }, { "epoch": 15.325961538461538, "grad_norm": 0.03495561704039574, "learning_rate": 6.888513108147768e-06, "loss": 0.0003, "step": 15939 }, { "epoch": 15.326923076923077, "grad_norm": 0.12475666403770447, "learning_rate": 6.887329290934959e-06, "loss": 0.0005, "step": 15940 }, { "epoch": 15.327884615384615, "grad_norm": 0.1628272831439972, "learning_rate": 6.886145522021661e-06, "loss": 0.0006, "step": 15941 }, { "epoch": 15.328846153846154, "grad_norm": 0.713941752910614, "learning_rate": 6.8849618014262464e-06, "loss": 0.0049, "step": 15942 }, { "epoch": 15.329807692307693, "grad_norm": 0.038153909146785736, "learning_rate": 6.883778129167081e-06, "loss": 0.0004, "step": 15943 }, { "epoch": 15.330769230769231, "grad_norm": 0.05809003487229347, "learning_rate": 6.882594505262534e-06, "loss": 0.0004, "step": 15944 }, { "epoch": 15.33173076923077, "grad_norm": 1.1552388668060303, "learning_rate": 6.881410929730972e-06, "loss": 0.0103, "step": 15945 }, { "epoch": 15.332692307692307, "grad_norm": 1.6449681520462036, "learning_rate": 6.880227402590757e-06, "loss": 0.0067, "step": 15946 }, { "epoch": 15.333653846153846, "grad_norm": 0.6949828267097473, "learning_rate": 6.8790439238602576e-06, "loss": 0.003, "step": 15947 }, { "epoch": 15.334615384615384, "grad_norm": 0.02789551019668579, "learning_rate": 6.877860493557838e-06, "loss": 0.0002, "step": 15948 }, { "epoch": 15.335576923076923, "grad_norm": 0.089689239859581, "learning_rate": 6.8766771117018585e-06, "loss": 0.0004, "step": 15949 }, { "epoch": 15.336538461538462, "grad_norm": 2.1478824615478516, "learning_rate": 6.875493778310682e-06, "loss": 0.0306, "step": 15950 }, { "epoch": 15.3375, "grad_norm": 2.4545376300811768, "learning_rate": 6.874310493402674e-06, "loss": 0.0416, "step": 15951 }, { "epoch": 15.338461538461539, "grad_norm": 0.1628984808921814, "learning_rate": 6.873127256996193e-06, "loss": 0.0007, "step": 15952 }, { "epoch": 15.339423076923078, "grad_norm": 0.10217049717903137, "learning_rate": 6.871944069109599e-06, "loss": 0.0004, "step": 15953 }, { "epoch": 15.340384615384615, "grad_norm": 0.15582939982414246, "learning_rate": 6.870760929761251e-06, "loss": 0.0004, "step": 15954 }, { "epoch": 15.341346153846153, "grad_norm": 0.05136560648679733, "learning_rate": 6.869577838969512e-06, "loss": 0.0003, "step": 15955 }, { "epoch": 15.342307692307692, "grad_norm": 0.09681311249732971, "learning_rate": 6.868394796752737e-06, "loss": 0.0007, "step": 15956 }, { "epoch": 15.34326923076923, "grad_norm": 0.1196548193693161, "learning_rate": 6.867211803129281e-06, "loss": 0.0003, "step": 15957 }, { "epoch": 15.34423076923077, "grad_norm": 0.13086843490600586, "learning_rate": 6.866028858117506e-06, "loss": 0.0005, "step": 15958 }, { "epoch": 15.345192307692308, "grad_norm": 0.018387816846370697, "learning_rate": 6.864845961735764e-06, "loss": 0.0001, "step": 15959 }, { "epoch": 15.346153846153847, "grad_norm": 0.17627041041851044, "learning_rate": 6.863663114002411e-06, "loss": 0.001, "step": 15960 }, { "epoch": 15.347115384615385, "grad_norm": 0.012015759013593197, "learning_rate": 6.8624803149358e-06, "loss": 0.0001, "step": 15961 }, { "epoch": 15.348076923076922, "grad_norm": 0.030475474894046783, "learning_rate": 6.861297564554288e-06, "loss": 0.0002, "step": 15962 }, { "epoch": 15.349038461538461, "grad_norm": 0.14821761846542358, "learning_rate": 6.860114862876226e-06, "loss": 0.0009, "step": 15963 }, { "epoch": 15.35, "grad_norm": 0.011501346714794636, "learning_rate": 6.858932209919964e-06, "loss": 0.0001, "step": 15964 }, { "epoch": 15.350961538461538, "grad_norm": 0.07408615946769714, "learning_rate": 6.857749605703856e-06, "loss": 0.0004, "step": 15965 }, { "epoch": 15.351923076923077, "grad_norm": 0.378846675157547, "learning_rate": 6.856567050246252e-06, "loss": 0.0012, "step": 15966 }, { "epoch": 15.352884615384616, "grad_norm": 0.45277053117752075, "learning_rate": 6.855384543565503e-06, "loss": 0.0018, "step": 15967 }, { "epoch": 15.353846153846154, "grad_norm": 0.007673757616430521, "learning_rate": 6.8542020856799525e-06, "loss": 0.0001, "step": 15968 }, { "epoch": 15.354807692307693, "grad_norm": 1.3043783903121948, "learning_rate": 6.853019676607957e-06, "loss": 0.0041, "step": 15969 }, { "epoch": 15.35576923076923, "grad_norm": 0.02468184567987919, "learning_rate": 6.85183731636786e-06, "loss": 0.0002, "step": 15970 }, { "epoch": 15.356730769230769, "grad_norm": 1.4591929912567139, "learning_rate": 6.850655004978006e-06, "loss": 0.0349, "step": 15971 }, { "epoch": 15.357692307692307, "grad_norm": 0.09227330982685089, "learning_rate": 6.849472742456742e-06, "loss": 0.0004, "step": 15972 }, { "epoch": 15.358653846153846, "grad_norm": 1.3760658502578735, "learning_rate": 6.848290528822417e-06, "loss": 0.0089, "step": 15973 }, { "epoch": 15.359615384615385, "grad_norm": 0.028007114306092262, "learning_rate": 6.847108364093371e-06, "loss": 0.0002, "step": 15974 }, { "epoch": 15.360576923076923, "grad_norm": 1.0129674673080444, "learning_rate": 6.845926248287948e-06, "loss": 0.0045, "step": 15975 }, { "epoch": 15.361538461538462, "grad_norm": 0.01609119214117527, "learning_rate": 6.8447441814244964e-06, "loss": 0.0001, "step": 15976 }, { "epoch": 15.3625, "grad_norm": 0.23584991693496704, "learning_rate": 6.843562163521353e-06, "loss": 0.0005, "step": 15977 }, { "epoch": 15.363461538461538, "grad_norm": 0.5008253455162048, "learning_rate": 6.842380194596862e-06, "loss": 0.0012, "step": 15978 }, { "epoch": 15.364423076923076, "grad_norm": 0.10264303535223007, "learning_rate": 6.8411982746693594e-06, "loss": 0.0005, "step": 15979 }, { "epoch": 15.365384615384615, "grad_norm": 0.7136655449867249, "learning_rate": 6.840016403757192e-06, "loss": 0.0017, "step": 15980 }, { "epoch": 15.366346153846154, "grad_norm": 0.15332123637199402, "learning_rate": 6.838834581878695e-06, "loss": 0.0012, "step": 15981 }, { "epoch": 15.367307692307692, "grad_norm": 0.03045424446463585, "learning_rate": 6.837652809052207e-06, "loss": 0.0002, "step": 15982 }, { "epoch": 15.368269230769231, "grad_norm": 0.02140590362250805, "learning_rate": 6.836471085296064e-06, "loss": 0.0002, "step": 15983 }, { "epoch": 15.36923076923077, "grad_norm": 0.036451224237680435, "learning_rate": 6.835289410628607e-06, "loss": 0.0003, "step": 15984 }, { "epoch": 15.370192307692308, "grad_norm": 2.080293893814087, "learning_rate": 6.83410778506817e-06, "loss": 0.0249, "step": 15985 }, { "epoch": 15.371153846153845, "grad_norm": 0.11056682467460632, "learning_rate": 6.8329262086330864e-06, "loss": 0.0007, "step": 15986 }, { "epoch": 15.372115384615384, "grad_norm": 2.160737991333008, "learning_rate": 6.8317446813416956e-06, "loss": 0.0073, "step": 15987 }, { "epoch": 15.373076923076923, "grad_norm": 0.5767172574996948, "learning_rate": 6.830563203212328e-06, "loss": 0.0017, "step": 15988 }, { "epoch": 15.374038461538461, "grad_norm": 0.041901227086782455, "learning_rate": 6.8293817742633175e-06, "loss": 0.0002, "step": 15989 }, { "epoch": 15.375, "grad_norm": 1.606494426727295, "learning_rate": 6.828200394512994e-06, "loss": 0.0043, "step": 15990 }, { "epoch": 15.375961538461539, "grad_norm": 3.6638314723968506, "learning_rate": 6.827019063979693e-06, "loss": 0.0835, "step": 15991 }, { "epoch": 15.376923076923077, "grad_norm": 0.04008165001869202, "learning_rate": 6.825837782681744e-06, "loss": 0.0002, "step": 15992 }, { "epoch": 15.377884615384616, "grad_norm": 0.046049583703279495, "learning_rate": 6.824656550637476e-06, "loss": 0.0003, "step": 15993 }, { "epoch": 15.378846153846155, "grad_norm": 0.03928452357649803, "learning_rate": 6.823475367865218e-06, "loss": 0.0002, "step": 15994 }, { "epoch": 15.379807692307692, "grad_norm": 0.5134623050689697, "learning_rate": 6.8222942343833e-06, "loss": 0.0026, "step": 15995 }, { "epoch": 15.38076923076923, "grad_norm": 6.4819231033325195, "learning_rate": 6.82111315021005e-06, "loss": 0.0512, "step": 15996 }, { "epoch": 15.381730769230769, "grad_norm": 0.21459609270095825, "learning_rate": 6.819932115363791e-06, "loss": 0.0009, "step": 15997 }, { "epoch": 15.382692307692308, "grad_norm": 1.6480140686035156, "learning_rate": 6.8187511298628554e-06, "loss": 0.0079, "step": 15998 }, { "epoch": 15.383653846153846, "grad_norm": 0.023939859122037888, "learning_rate": 6.8175701937255645e-06, "loss": 0.0002, "step": 15999 }, { "epoch": 15.384615384615385, "grad_norm": 0.08050612360239029, "learning_rate": 6.816389306970244e-06, "loss": 0.0003, "step": 16000 }, { "epoch": 15.385576923076924, "grad_norm": 3.3196465969085693, "learning_rate": 6.815208469615216e-06, "loss": 0.0225, "step": 16001 }, { "epoch": 15.386538461538462, "grad_norm": 0.14071865379810333, "learning_rate": 6.8140276816788065e-06, "loss": 0.0009, "step": 16002 }, { "epoch": 15.3875, "grad_norm": 0.09333813935518265, "learning_rate": 6.812846943179336e-06, "loss": 0.0005, "step": 16003 }, { "epoch": 15.388461538461538, "grad_norm": 3.0798559188842773, "learning_rate": 6.81166625413513e-06, "loss": 0.0199, "step": 16004 }, { "epoch": 15.389423076923077, "grad_norm": 0.03130466490983963, "learning_rate": 6.810485614564502e-06, "loss": 0.0001, "step": 16005 }, { "epoch": 15.390384615384615, "grad_norm": 2.073671579360962, "learning_rate": 6.809305024485777e-06, "loss": 0.0566, "step": 16006 }, { "epoch": 15.391346153846154, "grad_norm": 1.2323981523513794, "learning_rate": 6.808124483917276e-06, "loss": 0.0028, "step": 16007 }, { "epoch": 15.392307692307693, "grad_norm": 4.209728240966797, "learning_rate": 6.8069439928773105e-06, "loss": 0.011, "step": 16008 }, { "epoch": 15.393269230769231, "grad_norm": 0.0602186918258667, "learning_rate": 6.805763551384205e-06, "loss": 0.0005, "step": 16009 }, { "epoch": 15.39423076923077, "grad_norm": 0.07697085291147232, "learning_rate": 6.804583159456276e-06, "loss": 0.0008, "step": 16010 }, { "epoch": 15.395192307692307, "grad_norm": 0.0815281793475151, "learning_rate": 6.8034028171118375e-06, "loss": 0.0004, "step": 16011 }, { "epoch": 15.396153846153846, "grad_norm": 0.3439697325229645, "learning_rate": 6.802222524369202e-06, "loss": 0.0023, "step": 16012 }, { "epoch": 15.397115384615384, "grad_norm": 0.01666029542684555, "learning_rate": 6.80104228124669e-06, "loss": 0.0002, "step": 16013 }, { "epoch": 15.398076923076923, "grad_norm": 0.2658342123031616, "learning_rate": 6.799862087762614e-06, "loss": 0.0007, "step": 16014 }, { "epoch": 15.399038461538462, "grad_norm": 0.267026424407959, "learning_rate": 6.798681943935284e-06, "loss": 0.0018, "step": 16015 }, { "epoch": 15.4, "grad_norm": 0.18894332647323608, "learning_rate": 6.797501849783016e-06, "loss": 0.0008, "step": 16016 }, { "epoch": 15.400961538461539, "grad_norm": 0.0341053381562233, "learning_rate": 6.7963218053241195e-06, "loss": 0.0002, "step": 16017 }, { "epoch": 15.401923076923078, "grad_norm": 0.020872866734862328, "learning_rate": 6.795141810576906e-06, "loss": 0.0002, "step": 16018 }, { "epoch": 15.402884615384615, "grad_norm": 0.011505122296512127, "learning_rate": 6.793961865559684e-06, "loss": 0.0001, "step": 16019 }, { "epoch": 15.403846153846153, "grad_norm": 0.42409008741378784, "learning_rate": 6.792781970290767e-06, "loss": 0.0015, "step": 16020 }, { "epoch": 15.404807692307692, "grad_norm": 1.69707190990448, "learning_rate": 6.791602124788459e-06, "loss": 0.0198, "step": 16021 }, { "epoch": 15.40576923076923, "grad_norm": 0.08946320414543152, "learning_rate": 6.790422329071071e-06, "loss": 0.0003, "step": 16022 }, { "epoch": 15.40673076923077, "grad_norm": 0.5284649133682251, "learning_rate": 6.789242583156905e-06, "loss": 0.0012, "step": 16023 }, { "epoch": 15.407692307692308, "grad_norm": 0.10767465084791183, "learning_rate": 6.788062887064275e-06, "loss": 0.0006, "step": 16024 }, { "epoch": 15.408653846153847, "grad_norm": 0.14377433061599731, "learning_rate": 6.786883240811479e-06, "loss": 0.0008, "step": 16025 }, { "epoch": 15.409615384615385, "grad_norm": 0.19112224876880646, "learning_rate": 6.785703644416826e-06, "loss": 0.0012, "step": 16026 }, { "epoch": 15.410576923076922, "grad_norm": 3.390148878097534, "learning_rate": 6.784524097898619e-06, "loss": 0.0453, "step": 16027 }, { "epoch": 15.411538461538461, "grad_norm": 0.4100184738636017, "learning_rate": 6.783344601275159e-06, "loss": 0.0026, "step": 16028 }, { "epoch": 15.4125, "grad_norm": 1.83455491065979, "learning_rate": 6.782165154564753e-06, "loss": 0.0107, "step": 16029 }, { "epoch": 15.413461538461538, "grad_norm": 1.0932217836380005, "learning_rate": 6.780985757785695e-06, "loss": 0.0053, "step": 16030 }, { "epoch": 15.414423076923077, "grad_norm": 0.5183426141738892, "learning_rate": 6.7798064109562935e-06, "loss": 0.0026, "step": 16031 }, { "epoch": 15.415384615384616, "grad_norm": 0.3006725609302521, "learning_rate": 6.778627114094845e-06, "loss": 0.0008, "step": 16032 }, { "epoch": 15.416346153846154, "grad_norm": 3.339367389678955, "learning_rate": 6.777447867219651e-06, "loss": 0.0209, "step": 16033 }, { "epoch": 15.417307692307693, "grad_norm": 0.0911560207605362, "learning_rate": 6.776268670349003e-06, "loss": 0.001, "step": 16034 }, { "epoch": 15.41826923076923, "grad_norm": 1.9773380756378174, "learning_rate": 6.775089523501208e-06, "loss": 0.0174, "step": 16035 }, { "epoch": 15.419230769230769, "grad_norm": 3.475484848022461, "learning_rate": 6.773910426694557e-06, "loss": 0.0229, "step": 16036 }, { "epoch": 15.420192307692307, "grad_norm": 0.07746116071939468, "learning_rate": 6.772731379947346e-06, "loss": 0.0006, "step": 16037 }, { "epoch": 15.421153846153846, "grad_norm": 0.0746721401810646, "learning_rate": 6.771552383277875e-06, "loss": 0.0007, "step": 16038 }, { "epoch": 15.422115384615385, "grad_norm": 0.200041726231575, "learning_rate": 6.770373436704436e-06, "loss": 0.0024, "step": 16039 }, { "epoch": 15.423076923076923, "grad_norm": 0.231779545545578, "learning_rate": 6.769194540245321e-06, "loss": 0.0009, "step": 16040 }, { "epoch": 15.424038461538462, "grad_norm": 0.0570443794131279, "learning_rate": 6.7680156939188235e-06, "loss": 0.0005, "step": 16041 }, { "epoch": 15.425, "grad_norm": 0.013238598592579365, "learning_rate": 6.766836897743237e-06, "loss": 0.0001, "step": 16042 }, { "epoch": 15.425961538461538, "grad_norm": 0.2917996048927307, "learning_rate": 6.765658151736853e-06, "loss": 0.0015, "step": 16043 }, { "epoch": 15.426923076923076, "grad_norm": 0.1240357831120491, "learning_rate": 6.764479455917964e-06, "loss": 0.0005, "step": 16044 }, { "epoch": 15.427884615384615, "grad_norm": 0.03322973474860191, "learning_rate": 6.763300810304853e-06, "loss": 0.0002, "step": 16045 }, { "epoch": 15.428846153846154, "grad_norm": 2.0516016483306885, "learning_rate": 6.762122214915818e-06, "loss": 0.0373, "step": 16046 }, { "epoch": 15.429807692307692, "grad_norm": 0.44325724244117737, "learning_rate": 6.760943669769142e-06, "loss": 0.0022, "step": 16047 }, { "epoch": 15.430769230769231, "grad_norm": 0.08626548200845718, "learning_rate": 6.7597651748831125e-06, "loss": 0.0005, "step": 16048 }, { "epoch": 15.43173076923077, "grad_norm": 0.774522066116333, "learning_rate": 6.75858673027602e-06, "loss": 0.0034, "step": 16049 }, { "epoch": 15.432692307692308, "grad_norm": 0.018908977508544922, "learning_rate": 6.757408335966147e-06, "loss": 0.0002, "step": 16050 }, { "epoch": 15.433653846153845, "grad_norm": 1.8760337829589844, "learning_rate": 6.756229991971779e-06, "loss": 0.012, "step": 16051 }, { "epoch": 15.434615384615384, "grad_norm": 0.23101502656936646, "learning_rate": 6.755051698311199e-06, "loss": 0.0015, "step": 16052 }, { "epoch": 15.435576923076923, "grad_norm": 0.063518226146698, "learning_rate": 6.753873455002698e-06, "loss": 0.0004, "step": 16053 }, { "epoch": 15.436538461538461, "grad_norm": 0.029356850311160088, "learning_rate": 6.75269526206455e-06, "loss": 0.0002, "step": 16054 }, { "epoch": 15.4375, "grad_norm": 0.02874636836349964, "learning_rate": 6.7515171195150405e-06, "loss": 0.0002, "step": 16055 }, { "epoch": 15.438461538461539, "grad_norm": 2.2214860916137695, "learning_rate": 6.750339027372452e-06, "loss": 0.0244, "step": 16056 }, { "epoch": 15.439423076923077, "grad_norm": 0.7614261507987976, "learning_rate": 6.7491609856550656e-06, "loss": 0.0027, "step": 16057 }, { "epoch": 15.440384615384616, "grad_norm": 0.7581841945648193, "learning_rate": 6.747982994381159e-06, "loss": 0.0032, "step": 16058 }, { "epoch": 15.441346153846155, "grad_norm": 0.1427909880876541, "learning_rate": 6.7468050535690096e-06, "loss": 0.001, "step": 16059 }, { "epoch": 15.442307692307692, "grad_norm": 0.06657020002603531, "learning_rate": 6.7456271632369005e-06, "loss": 0.0005, "step": 16060 }, { "epoch": 15.44326923076923, "grad_norm": 0.2776162326335907, "learning_rate": 6.744449323403106e-06, "loss": 0.0024, "step": 16061 }, { "epoch": 15.444230769230769, "grad_norm": 0.021535558626055717, "learning_rate": 6.743271534085903e-06, "loss": 0.0002, "step": 16062 }, { "epoch": 15.445192307692308, "grad_norm": 2.025045156478882, "learning_rate": 6.742093795303566e-06, "loss": 0.0118, "step": 16063 }, { "epoch": 15.446153846153846, "grad_norm": 0.7968733906745911, "learning_rate": 6.740916107074372e-06, "loss": 0.0145, "step": 16064 }, { "epoch": 15.447115384615385, "grad_norm": 0.6211865544319153, "learning_rate": 6.739738469416596e-06, "loss": 0.0011, "step": 16065 }, { "epoch": 15.448076923076924, "grad_norm": 0.3997078239917755, "learning_rate": 6.738560882348509e-06, "loss": 0.0014, "step": 16066 }, { "epoch": 15.449038461538462, "grad_norm": 0.022768188267946243, "learning_rate": 6.737383345888384e-06, "loss": 0.0002, "step": 16067 }, { "epoch": 15.45, "grad_norm": 0.01347492728382349, "learning_rate": 6.736205860054495e-06, "loss": 0.0001, "step": 16068 }, { "epoch": 15.450961538461538, "grad_norm": 1.6293696165084839, "learning_rate": 6.735028424865113e-06, "loss": 0.0068, "step": 16069 }, { "epoch": 15.451923076923077, "grad_norm": 0.6981514096260071, "learning_rate": 6.7338510403385035e-06, "loss": 0.0017, "step": 16070 }, { "epoch": 15.452884615384615, "grad_norm": 0.022198719903826714, "learning_rate": 6.7326737064929444e-06, "loss": 0.0003, "step": 16071 }, { "epoch": 15.453846153846154, "grad_norm": 0.1354077011346817, "learning_rate": 6.7314964233466975e-06, "loss": 0.0009, "step": 16072 }, { "epoch": 15.454807692307693, "grad_norm": 0.03835909441113472, "learning_rate": 6.730319190918036e-06, "loss": 0.0003, "step": 16073 }, { "epoch": 15.455769230769231, "grad_norm": 0.043807074427604675, "learning_rate": 6.729142009225219e-06, "loss": 0.0005, "step": 16074 }, { "epoch": 15.45673076923077, "grad_norm": 0.01579171232879162, "learning_rate": 6.727964878286521e-06, "loss": 0.0001, "step": 16075 }, { "epoch": 15.457692307692307, "grad_norm": 0.4469454288482666, "learning_rate": 6.726787798120206e-06, "loss": 0.0023, "step": 16076 }, { "epoch": 15.458653846153846, "grad_norm": 0.02546536736190319, "learning_rate": 6.725610768744535e-06, "loss": 0.0003, "step": 16077 }, { "epoch": 15.459615384615384, "grad_norm": 0.8821563124656677, "learning_rate": 6.724433790177776e-06, "loss": 0.0017, "step": 16078 }, { "epoch": 15.460576923076923, "grad_norm": 0.07532709836959839, "learning_rate": 6.723256862438192e-06, "loss": 0.0006, "step": 16079 }, { "epoch": 15.461538461538462, "grad_norm": 0.3658613860607147, "learning_rate": 6.7220799855440434e-06, "loss": 0.0014, "step": 16080 }, { "epoch": 15.4625, "grad_norm": 2.927123785018921, "learning_rate": 6.720903159513592e-06, "loss": 0.0411, "step": 16081 }, { "epoch": 15.463461538461539, "grad_norm": 0.28798356652259827, "learning_rate": 6.7197263843651e-06, "loss": 0.0011, "step": 16082 }, { "epoch": 15.464423076923078, "grad_norm": 0.21013545989990234, "learning_rate": 6.718549660116829e-06, "loss": 0.001, "step": 16083 }, { "epoch": 15.465384615384615, "grad_norm": 0.04445922374725342, "learning_rate": 6.717372986787034e-06, "loss": 0.0004, "step": 16084 }, { "epoch": 15.466346153846153, "grad_norm": 0.12318408489227295, "learning_rate": 6.7161963643939755e-06, "loss": 0.0006, "step": 16085 }, { "epoch": 15.467307692307692, "grad_norm": 0.05116529390215874, "learning_rate": 6.715019792955912e-06, "loss": 0.0003, "step": 16086 }, { "epoch": 15.46826923076923, "grad_norm": 0.03850903734564781, "learning_rate": 6.713843272491103e-06, "loss": 0.0003, "step": 16087 }, { "epoch": 15.46923076923077, "grad_norm": 2.3382158279418945, "learning_rate": 6.712666803017798e-06, "loss": 0.012, "step": 16088 }, { "epoch": 15.470192307692308, "grad_norm": 0.24362756311893463, "learning_rate": 6.711490384554257e-06, "loss": 0.0014, "step": 16089 }, { "epoch": 15.471153846153847, "grad_norm": 2.8260929584503174, "learning_rate": 6.710314017118734e-06, "loss": 0.0707, "step": 16090 }, { "epoch": 15.472115384615385, "grad_norm": 2.9368927478790283, "learning_rate": 6.709137700729482e-06, "loss": 0.0198, "step": 16091 }, { "epoch": 15.473076923076922, "grad_norm": 0.1503823846578598, "learning_rate": 6.707961435404754e-06, "loss": 0.0008, "step": 16092 }, { "epoch": 15.474038461538461, "grad_norm": 0.1948723942041397, "learning_rate": 6.7067852211628035e-06, "loss": 0.001, "step": 16093 }, { "epoch": 15.475, "grad_norm": 0.15077072381973267, "learning_rate": 6.70560905802188e-06, "loss": 0.0007, "step": 16094 }, { "epoch": 15.475961538461538, "grad_norm": 0.02545667439699173, "learning_rate": 6.704432946000237e-06, "loss": 0.0002, "step": 16095 }, { "epoch": 15.476923076923077, "grad_norm": 0.39323240518569946, "learning_rate": 6.703256885116118e-06, "loss": 0.0014, "step": 16096 }, { "epoch": 15.477884615384616, "grad_norm": 0.7798849940299988, "learning_rate": 6.702080875387781e-06, "loss": 0.0036, "step": 16097 }, { "epoch": 15.478846153846154, "grad_norm": 0.2589607834815979, "learning_rate": 6.700904916833469e-06, "loss": 0.0021, "step": 16098 }, { "epoch": 15.479807692307693, "grad_norm": 0.03065786510705948, "learning_rate": 6.699729009471425e-06, "loss": 0.0004, "step": 16099 }, { "epoch": 15.48076923076923, "grad_norm": 0.3689878284931183, "learning_rate": 6.698553153319905e-06, "loss": 0.0013, "step": 16100 }, { "epoch": 15.481730769230769, "grad_norm": 0.4055362343788147, "learning_rate": 6.697377348397151e-06, "loss": 0.002, "step": 16101 }, { "epoch": 15.482692307692307, "grad_norm": 0.06249373033642769, "learning_rate": 6.696201594721405e-06, "loss": 0.0005, "step": 16102 }, { "epoch": 15.483653846153846, "grad_norm": 0.2045825570821762, "learning_rate": 6.695025892310913e-06, "loss": 0.0007, "step": 16103 }, { "epoch": 15.484615384615385, "grad_norm": 0.03228093311190605, "learning_rate": 6.693850241183924e-06, "loss": 0.0002, "step": 16104 }, { "epoch": 15.485576923076923, "grad_norm": 1.215889811515808, "learning_rate": 6.692674641358672e-06, "loss": 0.0101, "step": 16105 }, { "epoch": 15.486538461538462, "grad_norm": 0.041093725711107254, "learning_rate": 6.691499092853402e-06, "loss": 0.0004, "step": 16106 }, { "epoch": 15.4875, "grad_norm": 0.07439592480659485, "learning_rate": 6.690323595686358e-06, "loss": 0.0005, "step": 16107 }, { "epoch": 15.488461538461538, "grad_norm": 0.08394412696361542, "learning_rate": 6.689148149875777e-06, "loss": 0.0004, "step": 16108 }, { "epoch": 15.489423076923076, "grad_norm": 0.24525709450244904, "learning_rate": 6.6879727554398995e-06, "loss": 0.0009, "step": 16109 }, { "epoch": 15.490384615384615, "grad_norm": 1.767061471939087, "learning_rate": 6.686797412396963e-06, "loss": 0.0158, "step": 16110 }, { "epoch": 15.491346153846154, "grad_norm": 0.04336448386311531, "learning_rate": 6.685622120765209e-06, "loss": 0.0003, "step": 16111 }, { "epoch": 15.492307692307692, "grad_norm": 0.8401045203208923, "learning_rate": 6.684446880562872e-06, "loss": 0.0065, "step": 16112 }, { "epoch": 15.493269230769231, "grad_norm": 0.3638131320476532, "learning_rate": 6.683271691808187e-06, "loss": 0.001, "step": 16113 }, { "epoch": 15.49423076923077, "grad_norm": 3.1987812519073486, "learning_rate": 6.68209655451939e-06, "loss": 0.0082, "step": 16114 }, { "epoch": 15.495192307692308, "grad_norm": 1.8863409757614136, "learning_rate": 6.680921468714718e-06, "loss": 0.0243, "step": 16115 }, { "epoch": 15.496153846153845, "grad_norm": 1.6990796327590942, "learning_rate": 6.6797464344124045e-06, "loss": 0.0585, "step": 16116 }, { "epoch": 15.497115384615384, "grad_norm": 0.01938740722835064, "learning_rate": 6.678571451630678e-06, "loss": 0.0001, "step": 16117 }, { "epoch": 15.498076923076923, "grad_norm": 0.018374091014266014, "learning_rate": 6.6773965203877775e-06, "loss": 0.0002, "step": 16118 }, { "epoch": 15.499038461538461, "grad_norm": 0.152054563164711, "learning_rate": 6.67622164070193e-06, "loss": 0.0008, "step": 16119 }, { "epoch": 15.5, "grad_norm": 0.07392869144678116, "learning_rate": 6.675046812591368e-06, "loss": 0.0004, "step": 16120 }, { "epoch": 15.500961538461539, "grad_norm": 1.2865103483200073, "learning_rate": 6.673872036074321e-06, "loss": 0.0044, "step": 16121 }, { "epoch": 15.501923076923077, "grad_norm": 0.011955130845308304, "learning_rate": 6.672697311169017e-06, "loss": 0.0002, "step": 16122 }, { "epoch": 15.502884615384616, "grad_norm": 0.31601986289024353, "learning_rate": 6.671522637893686e-06, "loss": 0.001, "step": 16123 }, { "epoch": 15.503846153846155, "grad_norm": 0.03387485072016716, "learning_rate": 6.6703480162665545e-06, "loss": 0.0003, "step": 16124 }, { "epoch": 15.504807692307692, "grad_norm": 0.9666019678115845, "learning_rate": 6.6691734463058476e-06, "loss": 0.0057, "step": 16125 }, { "epoch": 15.50576923076923, "grad_norm": 0.10197088122367859, "learning_rate": 6.667998928029795e-06, "loss": 0.0008, "step": 16126 }, { "epoch": 15.506730769230769, "grad_norm": 0.10010288655757904, "learning_rate": 6.66682446145662e-06, "loss": 0.0006, "step": 16127 }, { "epoch": 15.507692307692308, "grad_norm": 1.5342586040496826, "learning_rate": 6.665650046604545e-06, "loss": 0.0142, "step": 16128 }, { "epoch": 15.508653846153846, "grad_norm": 0.1535305231809616, "learning_rate": 6.664475683491797e-06, "loss": 0.0009, "step": 16129 }, { "epoch": 15.509615384615385, "grad_norm": 1.5766592025756836, "learning_rate": 6.663301372136596e-06, "loss": 0.0045, "step": 16130 }, { "epoch": 15.510576923076924, "grad_norm": 0.010731440037488937, "learning_rate": 6.662127112557164e-06, "loss": 0.0001, "step": 16131 }, { "epoch": 15.51153846153846, "grad_norm": 0.07148101180791855, "learning_rate": 6.660952904771723e-06, "loss": 0.0004, "step": 16132 }, { "epoch": 15.5125, "grad_norm": 0.0463373176753521, "learning_rate": 6.6597787487984935e-06, "loss": 0.0004, "step": 16133 }, { "epoch": 15.513461538461538, "grad_norm": 0.02091371826827526, "learning_rate": 6.658604644655694e-06, "loss": 0.0002, "step": 16134 }, { "epoch": 15.514423076923077, "grad_norm": 0.027593204751610756, "learning_rate": 6.657430592361544e-06, "loss": 0.0003, "step": 16135 }, { "epoch": 15.515384615384615, "grad_norm": 2.3228836059570312, "learning_rate": 6.656256591934258e-06, "loss": 0.0791, "step": 16136 }, { "epoch": 15.516346153846154, "grad_norm": 0.10664841532707214, "learning_rate": 6.655082643392059e-06, "loss": 0.0006, "step": 16137 }, { "epoch": 15.517307692307693, "grad_norm": 2.113274335861206, "learning_rate": 6.653908746753161e-06, "loss": 0.035, "step": 16138 }, { "epoch": 15.518269230769231, "grad_norm": 0.10153305530548096, "learning_rate": 6.652734902035775e-06, "loss": 0.0005, "step": 16139 }, { "epoch": 15.51923076923077, "grad_norm": 0.2565053701400757, "learning_rate": 6.651561109258121e-06, "loss": 0.0013, "step": 16140 }, { "epoch": 15.520192307692307, "grad_norm": 0.518149197101593, "learning_rate": 6.650387368438412e-06, "loss": 0.0036, "step": 16141 }, { "epoch": 15.521153846153846, "grad_norm": 0.811100423336029, "learning_rate": 6.649213679594859e-06, "loss": 0.0043, "step": 16142 }, { "epoch": 15.522115384615384, "grad_norm": 0.014805427752435207, "learning_rate": 6.648040042745675e-06, "loss": 0.0002, "step": 16143 }, { "epoch": 15.523076923076923, "grad_norm": 0.047421008348464966, "learning_rate": 6.646866457909073e-06, "loss": 0.0003, "step": 16144 }, { "epoch": 15.524038461538462, "grad_norm": 0.04054097458720207, "learning_rate": 6.645692925103262e-06, "loss": 0.0005, "step": 16145 }, { "epoch": 15.525, "grad_norm": 0.10668893158435822, "learning_rate": 6.644519444346449e-06, "loss": 0.0004, "step": 16146 }, { "epoch": 15.525961538461539, "grad_norm": 1.8300158977508545, "learning_rate": 6.64334601565685e-06, "loss": 0.0383, "step": 16147 }, { "epoch": 15.526923076923078, "grad_norm": 0.03290984034538269, "learning_rate": 6.642172639052668e-06, "loss": 0.0002, "step": 16148 }, { "epoch": 15.527884615384615, "grad_norm": 0.010764263570308685, "learning_rate": 6.640999314552112e-06, "loss": 0.0001, "step": 16149 }, { "epoch": 15.528846153846153, "grad_norm": 0.17315660417079926, "learning_rate": 6.639826042173385e-06, "loss": 0.0009, "step": 16150 }, { "epoch": 15.529807692307692, "grad_norm": 0.019287995994091034, "learning_rate": 6.638652821934699e-06, "loss": 0.0002, "step": 16151 }, { "epoch": 15.53076923076923, "grad_norm": 0.017111530527472496, "learning_rate": 6.637479653854255e-06, "loss": 0.0002, "step": 16152 }, { "epoch": 15.53173076923077, "grad_norm": 0.06849011033773422, "learning_rate": 6.636306537950259e-06, "loss": 0.0006, "step": 16153 }, { "epoch": 15.532692307692308, "grad_norm": 0.01026308536529541, "learning_rate": 6.635133474240909e-06, "loss": 0.0001, "step": 16154 }, { "epoch": 15.533653846153847, "grad_norm": 2.6063220500946045, "learning_rate": 6.633960462744415e-06, "loss": 0.0169, "step": 16155 }, { "epoch": 15.534615384615385, "grad_norm": 0.05954773351550102, "learning_rate": 6.632787503478977e-06, "loss": 0.0004, "step": 16156 }, { "epoch": 15.535576923076922, "grad_norm": 0.545175313949585, "learning_rate": 6.63161459646279e-06, "loss": 0.0044, "step": 16157 }, { "epoch": 15.536538461538461, "grad_norm": 0.06347398459911346, "learning_rate": 6.6304417417140596e-06, "loss": 0.0002, "step": 16158 }, { "epoch": 15.5375, "grad_norm": 0.02015652321279049, "learning_rate": 6.629268939250984e-06, "loss": 0.0002, "step": 16159 }, { "epoch": 15.538461538461538, "grad_norm": 0.021331870928406715, "learning_rate": 6.628096189091763e-06, "loss": 0.0001, "step": 16160 }, { "epoch": 15.539423076923077, "grad_norm": 0.051564209163188934, "learning_rate": 6.6269234912545925e-06, "loss": 0.0004, "step": 16161 }, { "epoch": 15.540384615384616, "grad_norm": 0.012680510990321636, "learning_rate": 6.625750845757671e-06, "loss": 0.0001, "step": 16162 }, { "epoch": 15.541346153846154, "grad_norm": 0.020463034510612488, "learning_rate": 6.624578252619188e-06, "loss": 0.0002, "step": 16163 }, { "epoch": 15.542307692307693, "grad_norm": 0.07079211622476578, "learning_rate": 6.623405711857348e-06, "loss": 0.0003, "step": 16164 }, { "epoch": 15.54326923076923, "grad_norm": 0.02334558591246605, "learning_rate": 6.62223322349034e-06, "loss": 0.0002, "step": 16165 }, { "epoch": 15.544230769230769, "grad_norm": 0.07651042938232422, "learning_rate": 6.621060787536358e-06, "loss": 0.0004, "step": 16166 }, { "epoch": 15.545192307692307, "grad_norm": 0.5343306660652161, "learning_rate": 6.619888404013597e-06, "loss": 0.0032, "step": 16167 }, { "epoch": 15.546153846153846, "grad_norm": 1.6080549955368042, "learning_rate": 6.618716072940248e-06, "loss": 0.0179, "step": 16168 }, { "epoch": 15.547115384615385, "grad_norm": 0.07921085506677628, "learning_rate": 6.617543794334501e-06, "loss": 0.0005, "step": 16169 }, { "epoch": 15.548076923076923, "grad_norm": 0.07254219800233841, "learning_rate": 6.616371568214546e-06, "loss": 0.0004, "step": 16170 }, { "epoch": 15.549038461538462, "grad_norm": 0.02246515266597271, "learning_rate": 6.6151993945985735e-06, "loss": 0.0001, "step": 16171 }, { "epoch": 15.55, "grad_norm": 0.020911717787384987, "learning_rate": 6.614027273504773e-06, "loss": 0.0002, "step": 16172 }, { "epoch": 15.55096153846154, "grad_norm": 0.0979859009385109, "learning_rate": 6.612855204951332e-06, "loss": 0.0007, "step": 16173 }, { "epoch": 15.551923076923076, "grad_norm": 2.17879581451416, "learning_rate": 6.611683188956435e-06, "loss": 0.0067, "step": 16174 }, { "epoch": 15.552884615384615, "grad_norm": 3.1553566455841064, "learning_rate": 6.610511225538272e-06, "loss": 0.022, "step": 16175 }, { "epoch": 15.553846153846154, "grad_norm": 0.05217982828617096, "learning_rate": 6.609339314715027e-06, "loss": 0.0004, "step": 16176 }, { "epoch": 15.554807692307692, "grad_norm": 1.8437808752059937, "learning_rate": 6.608167456504882e-06, "loss": 0.0155, "step": 16177 }, { "epoch": 15.555769230769231, "grad_norm": 0.13421835005283356, "learning_rate": 6.606995650926024e-06, "loss": 0.0006, "step": 16178 }, { "epoch": 15.55673076923077, "grad_norm": 0.06996551901102066, "learning_rate": 6.605823897996637e-06, "loss": 0.0004, "step": 16179 }, { "epoch": 15.557692307692308, "grad_norm": 0.5733255743980408, "learning_rate": 6.6046521977349e-06, "loss": 0.0036, "step": 16180 }, { "epoch": 15.558653846153845, "grad_norm": 0.04736896976828575, "learning_rate": 6.603480550158995e-06, "loss": 0.0002, "step": 16181 }, { "epoch": 15.559615384615384, "grad_norm": 0.028666801750659943, "learning_rate": 6.602308955287104e-06, "loss": 0.0003, "step": 16182 }, { "epoch": 15.560576923076923, "grad_norm": 1.529942512512207, "learning_rate": 6.601137413137406e-06, "loss": 0.0068, "step": 16183 }, { "epoch": 15.561538461538461, "grad_norm": 0.5651549100875854, "learning_rate": 6.59996592372808e-06, "loss": 0.002, "step": 16184 }, { "epoch": 15.5625, "grad_norm": 4.620447158813477, "learning_rate": 6.598794487077304e-06, "loss": 0.0495, "step": 16185 }, { "epoch": 15.563461538461539, "grad_norm": 0.026272490620613098, "learning_rate": 6.597623103203255e-06, "loss": 0.0002, "step": 16186 }, { "epoch": 15.564423076923077, "grad_norm": 0.16939418017864227, "learning_rate": 6.596451772124109e-06, "loss": 0.0004, "step": 16187 }, { "epoch": 15.565384615384616, "grad_norm": 0.2116197645664215, "learning_rate": 6.5952804938580415e-06, "loss": 0.0006, "step": 16188 }, { "epoch": 15.566346153846155, "grad_norm": 1.7527375221252441, "learning_rate": 6.59410926842323e-06, "loss": 0.0652, "step": 16189 }, { "epoch": 15.567307692307692, "grad_norm": 2.4751193523406982, "learning_rate": 6.592938095837847e-06, "loss": 0.0146, "step": 16190 }, { "epoch": 15.56826923076923, "grad_norm": 0.07841310650110245, "learning_rate": 6.591766976120063e-06, "loss": 0.0004, "step": 16191 }, { "epoch": 15.569230769230769, "grad_norm": 1.8922713994979858, "learning_rate": 6.590595909288051e-06, "loss": 0.0094, "step": 16192 }, { "epoch": 15.570192307692308, "grad_norm": 0.6671411991119385, "learning_rate": 6.589424895359988e-06, "loss": 0.0023, "step": 16193 }, { "epoch": 15.571153846153846, "grad_norm": 2.0754761695861816, "learning_rate": 6.588253934354039e-06, "loss": 0.014, "step": 16194 }, { "epoch": 15.572115384615385, "grad_norm": 0.062276799231767654, "learning_rate": 6.587083026288374e-06, "loss": 0.0004, "step": 16195 }, { "epoch": 15.573076923076924, "grad_norm": 0.024298163130879402, "learning_rate": 6.585912171181166e-06, "loss": 0.0002, "step": 16196 }, { "epoch": 15.57403846153846, "grad_norm": 0.03426282852888107, "learning_rate": 6.58474136905058e-06, "loss": 0.0003, "step": 16197 }, { "epoch": 15.575, "grad_norm": 0.268903911113739, "learning_rate": 6.583570619914786e-06, "loss": 0.0015, "step": 16198 }, { "epoch": 15.575961538461538, "grad_norm": 0.07836610078811646, "learning_rate": 6.582399923791944e-06, "loss": 0.0006, "step": 16199 }, { "epoch": 15.576923076923077, "grad_norm": 2.1560285091400146, "learning_rate": 6.5812292807002295e-06, "loss": 0.0646, "step": 16200 }, { "epoch": 15.577884615384615, "grad_norm": 0.14678862690925598, "learning_rate": 6.580058690657801e-06, "loss": 0.0005, "step": 16201 }, { "epoch": 15.578846153846154, "grad_norm": 0.17926537990570068, "learning_rate": 6.578888153682825e-06, "loss": 0.0005, "step": 16202 }, { "epoch": 15.579807692307693, "grad_norm": 0.14162731170654297, "learning_rate": 6.577717669793462e-06, "loss": 0.0007, "step": 16203 }, { "epoch": 15.580769230769231, "grad_norm": 2.1041505336761475, "learning_rate": 6.576547239007878e-06, "loss": 0.01, "step": 16204 }, { "epoch": 15.58173076923077, "grad_norm": 0.23207898437976837, "learning_rate": 6.575376861344233e-06, "loss": 0.002, "step": 16205 }, { "epoch": 15.582692307692307, "grad_norm": 2.7813169956207275, "learning_rate": 6.5742065368206865e-06, "loss": 0.0067, "step": 16206 }, { "epoch": 15.583653846153846, "grad_norm": 0.016478734090924263, "learning_rate": 6.5730362654554015e-06, "loss": 0.0002, "step": 16207 }, { "epoch": 15.584615384615384, "grad_norm": 0.14986826479434967, "learning_rate": 6.571866047266535e-06, "loss": 0.0013, "step": 16208 }, { "epoch": 15.585576923076923, "grad_norm": 0.03267720341682434, "learning_rate": 6.570695882272248e-06, "loss": 0.0002, "step": 16209 }, { "epoch": 15.586538461538462, "grad_norm": 0.018347691744565964, "learning_rate": 6.569525770490693e-06, "loss": 0.0002, "step": 16210 }, { "epoch": 15.5875, "grad_norm": 1.3717117309570312, "learning_rate": 6.568355711940031e-06, "loss": 0.013, "step": 16211 }, { "epoch": 15.588461538461539, "grad_norm": 0.10275296866893768, "learning_rate": 6.567185706638417e-06, "loss": 0.0007, "step": 16212 }, { "epoch": 15.589423076923078, "grad_norm": 0.308764785528183, "learning_rate": 6.566015754604006e-06, "loss": 0.0009, "step": 16213 }, { "epoch": 15.590384615384615, "grad_norm": 0.7046715617179871, "learning_rate": 6.5648458558549496e-06, "loss": 0.0028, "step": 16214 }, { "epoch": 15.591346153846153, "grad_norm": 0.09555256366729736, "learning_rate": 6.563676010409405e-06, "loss": 0.0005, "step": 16215 }, { "epoch": 15.592307692307692, "grad_norm": 4.16945743560791, "learning_rate": 6.562506218285524e-06, "loss": 0.0694, "step": 16216 }, { "epoch": 15.59326923076923, "grad_norm": 0.13265453279018402, "learning_rate": 6.561336479501455e-06, "loss": 0.0006, "step": 16217 }, { "epoch": 15.59423076923077, "grad_norm": 0.39079776406288147, "learning_rate": 6.560166794075355e-06, "loss": 0.0016, "step": 16218 }, { "epoch": 15.595192307692308, "grad_norm": 0.3105420768260956, "learning_rate": 6.558997162025369e-06, "loss": 0.0014, "step": 16219 }, { "epoch": 15.596153846153847, "grad_norm": 0.14042682945728302, "learning_rate": 6.5578275833696485e-06, "loss": 0.0008, "step": 16220 }, { "epoch": 15.597115384615385, "grad_norm": 2.3817310333251953, "learning_rate": 6.5566580581263396e-06, "loss": 0.0419, "step": 16221 }, { "epoch": 15.598076923076922, "grad_norm": 1.437584638595581, "learning_rate": 6.555488586313592e-06, "loss": 0.0039, "step": 16222 }, { "epoch": 15.599038461538461, "grad_norm": 0.29449033737182617, "learning_rate": 6.554319167949553e-06, "loss": 0.0022, "step": 16223 }, { "epoch": 15.6, "grad_norm": 0.11815839260816574, "learning_rate": 6.553149803052366e-06, "loss": 0.0003, "step": 16224 }, { "epoch": 15.600961538461538, "grad_norm": 0.9844404458999634, "learning_rate": 6.551980491640179e-06, "loss": 0.0073, "step": 16225 }, { "epoch": 15.601923076923077, "grad_norm": 2.493659734725952, "learning_rate": 6.550811233731135e-06, "loss": 0.0113, "step": 16226 }, { "epoch": 15.602884615384616, "grad_norm": 0.08955228328704834, "learning_rate": 6.549642029343377e-06, "loss": 0.0005, "step": 16227 }, { "epoch": 15.603846153846154, "grad_norm": 2.3792996406555176, "learning_rate": 6.548472878495047e-06, "loss": 0.0159, "step": 16228 }, { "epoch": 15.604807692307693, "grad_norm": 0.033318739384412766, "learning_rate": 6.547303781204289e-06, "loss": 0.0003, "step": 16229 }, { "epoch": 15.60576923076923, "grad_norm": 0.05013009160757065, "learning_rate": 6.546134737489244e-06, "loss": 0.0003, "step": 16230 }, { "epoch": 15.606730769230769, "grad_norm": 0.030654609203338623, "learning_rate": 6.54496574736805e-06, "loss": 0.0003, "step": 16231 }, { "epoch": 15.607692307692307, "grad_norm": 3.14864444732666, "learning_rate": 6.543796810858845e-06, "loss": 0.0173, "step": 16232 }, { "epoch": 15.608653846153846, "grad_norm": 0.02795989252626896, "learning_rate": 6.542627927979772e-06, "loss": 0.0002, "step": 16233 }, { "epoch": 15.609615384615385, "grad_norm": 0.06487415730953217, "learning_rate": 6.541459098748966e-06, "loss": 0.0004, "step": 16234 }, { "epoch": 15.610576923076923, "grad_norm": 0.37970632314682007, "learning_rate": 6.540290323184561e-06, "loss": 0.0014, "step": 16235 }, { "epoch": 15.611538461538462, "grad_norm": 0.5457009673118591, "learning_rate": 6.539121601304701e-06, "loss": 0.0015, "step": 16236 }, { "epoch": 15.6125, "grad_norm": 0.24219666421413422, "learning_rate": 6.537952933127513e-06, "loss": 0.0017, "step": 16237 }, { "epoch": 15.61346153846154, "grad_norm": 0.08042958378791809, "learning_rate": 6.536784318671136e-06, "loss": 0.0004, "step": 16238 }, { "epoch": 15.614423076923076, "grad_norm": 0.11146029084920883, "learning_rate": 6.5356157579537015e-06, "loss": 0.0007, "step": 16239 }, { "epoch": 15.615384615384615, "grad_norm": 1.1633812189102173, "learning_rate": 6.534447250993343e-06, "loss": 0.0148, "step": 16240 }, { "epoch": 15.616346153846154, "grad_norm": 0.33330947160720825, "learning_rate": 6.533278797808191e-06, "loss": 0.0024, "step": 16241 }, { "epoch": 15.617307692307692, "grad_norm": 1.0337188243865967, "learning_rate": 6.532110398416379e-06, "loss": 0.0066, "step": 16242 }, { "epoch": 15.618269230769231, "grad_norm": 0.2069733887910843, "learning_rate": 6.5309420528360335e-06, "loss": 0.0014, "step": 16243 }, { "epoch": 15.61923076923077, "grad_norm": 0.028332684189081192, "learning_rate": 6.5297737610852875e-06, "loss": 0.0003, "step": 16244 }, { "epoch": 15.620192307692308, "grad_norm": 0.010486639104783535, "learning_rate": 6.5286055231822665e-06, "loss": 0.0002, "step": 16245 }, { "epoch": 15.621153846153845, "grad_norm": 0.11574160307645798, "learning_rate": 6.527437339145097e-06, "loss": 0.0008, "step": 16246 }, { "epoch": 15.622115384615384, "grad_norm": 0.1881566345691681, "learning_rate": 6.5262692089919135e-06, "loss": 0.0007, "step": 16247 }, { "epoch": 15.623076923076923, "grad_norm": 0.860759437084198, "learning_rate": 6.525101132740834e-06, "loss": 0.0044, "step": 16248 }, { "epoch": 15.624038461538461, "grad_norm": 0.03399880230426788, "learning_rate": 6.5239331104099855e-06, "loss": 0.0002, "step": 16249 }, { "epoch": 15.625, "grad_norm": 0.027606839314103127, "learning_rate": 6.522765142017493e-06, "loss": 0.0002, "step": 16250 }, { "epoch": 15.625961538461539, "grad_norm": 0.8616694808006287, "learning_rate": 6.521597227581481e-06, "loss": 0.0039, "step": 16251 }, { "epoch": 15.626923076923077, "grad_norm": 1.349688172340393, "learning_rate": 6.520429367120072e-06, "loss": 0.0162, "step": 16252 }, { "epoch": 15.627884615384616, "grad_norm": 0.012855644337832928, "learning_rate": 6.519261560651384e-06, "loss": 0.0001, "step": 16253 }, { "epoch": 15.628846153846155, "grad_norm": 0.022099629044532776, "learning_rate": 6.518093808193542e-06, "loss": 0.0002, "step": 16254 }, { "epoch": 15.629807692307692, "grad_norm": 0.534523069858551, "learning_rate": 6.516926109764665e-06, "loss": 0.002, "step": 16255 }, { "epoch": 15.63076923076923, "grad_norm": 0.006510390434414148, "learning_rate": 6.515758465382873e-06, "loss": 0.0001, "step": 16256 }, { "epoch": 15.631730769230769, "grad_norm": 0.013381531462073326, "learning_rate": 6.51459087506628e-06, "loss": 0.0002, "step": 16257 }, { "epoch": 15.632692307692308, "grad_norm": 0.0480465404689312, "learning_rate": 6.513423338833011e-06, "loss": 0.0002, "step": 16258 }, { "epoch": 15.633653846153846, "grad_norm": 0.13105495274066925, "learning_rate": 6.5122558567011775e-06, "loss": 0.0007, "step": 16259 }, { "epoch": 15.634615384615385, "grad_norm": 0.05559086427092552, "learning_rate": 6.511088428688896e-06, "loss": 0.0003, "step": 16260 }, { "epoch": 15.635576923076924, "grad_norm": 0.018053598701953888, "learning_rate": 6.509921054814283e-06, "loss": 0.0002, "step": 16261 }, { "epoch": 15.63653846153846, "grad_norm": 1.8381363153457642, "learning_rate": 6.508753735095452e-06, "loss": 0.0107, "step": 16262 }, { "epoch": 15.6375, "grad_norm": 0.2709360718727112, "learning_rate": 6.5075864695505165e-06, "loss": 0.0008, "step": 16263 }, { "epoch": 15.638461538461538, "grad_norm": 0.12837213277816772, "learning_rate": 6.506419258197587e-06, "loss": 0.0007, "step": 16264 }, { "epoch": 15.639423076923077, "grad_norm": 0.14072205126285553, "learning_rate": 6.50525210105478e-06, "loss": 0.0006, "step": 16265 }, { "epoch": 15.640384615384615, "grad_norm": 0.06865418702363968, "learning_rate": 6.504084998140202e-06, "loss": 0.0003, "step": 16266 }, { "epoch": 15.641346153846154, "grad_norm": 0.07338274270296097, "learning_rate": 6.502917949471965e-06, "loss": 0.0002, "step": 16267 }, { "epoch": 15.642307692307693, "grad_norm": 0.01773703843355179, "learning_rate": 6.501750955068175e-06, "loss": 0.0002, "step": 16268 }, { "epoch": 15.643269230769231, "grad_norm": 0.0323738269507885, "learning_rate": 6.500584014946947e-06, "loss": 0.0002, "step": 16269 }, { "epoch": 15.64423076923077, "grad_norm": 0.06811518222093582, "learning_rate": 6.499417129126382e-06, "loss": 0.0005, "step": 16270 }, { "epoch": 15.645192307692307, "grad_norm": 0.023305213078856468, "learning_rate": 6.498250297624591e-06, "loss": 0.0003, "step": 16271 }, { "epoch": 15.646153846153846, "grad_norm": 0.0676390752196312, "learning_rate": 6.497083520459674e-06, "loss": 0.0003, "step": 16272 }, { "epoch": 15.647115384615384, "grad_norm": 0.3818705081939697, "learning_rate": 6.495916797649743e-06, "loss": 0.002, "step": 16273 }, { "epoch": 15.648076923076923, "grad_norm": 0.05254153534770012, "learning_rate": 6.494750129212899e-06, "loss": 0.0003, "step": 16274 }, { "epoch": 15.649038461538462, "grad_norm": 1.0626325607299805, "learning_rate": 6.493583515167242e-06, "loss": 0.0074, "step": 16275 }, { "epoch": 15.65, "grad_norm": 0.06099732220172882, "learning_rate": 6.49241695553088e-06, "loss": 0.0005, "step": 16276 }, { "epoch": 15.650961538461539, "grad_norm": 0.16959373652935028, "learning_rate": 6.491250450321913e-06, "loss": 0.0006, "step": 16277 }, { "epoch": 15.651923076923078, "grad_norm": 0.0300868209451437, "learning_rate": 6.490083999558441e-06, "loss": 0.0003, "step": 16278 }, { "epoch": 15.652884615384615, "grad_norm": 0.01801559515297413, "learning_rate": 6.488917603258562e-06, "loss": 0.0002, "step": 16279 }, { "epoch": 15.653846153846153, "grad_norm": 0.16127832233905792, "learning_rate": 6.4877512614403784e-06, "loss": 0.001, "step": 16280 }, { "epoch": 15.654807692307692, "grad_norm": 0.7079213261604309, "learning_rate": 6.486584974121988e-06, "loss": 0.0059, "step": 16281 }, { "epoch": 15.65576923076923, "grad_norm": 2.630465269088745, "learning_rate": 6.485418741321485e-06, "loss": 0.0532, "step": 16282 }, { "epoch": 15.65673076923077, "grad_norm": 0.03949853405356407, "learning_rate": 6.484252563056967e-06, "loss": 0.0003, "step": 16283 }, { "epoch": 15.657692307692308, "grad_norm": 0.27995213866233826, "learning_rate": 6.483086439346533e-06, "loss": 0.0007, "step": 16284 }, { "epoch": 15.658653846153847, "grad_norm": 0.023953678086400032, "learning_rate": 6.481920370208274e-06, "loss": 0.0002, "step": 16285 }, { "epoch": 15.659615384615385, "grad_norm": 0.051866233348846436, "learning_rate": 6.480754355660285e-06, "loss": 0.0004, "step": 16286 }, { "epoch": 15.660576923076922, "grad_norm": 0.03890872001647949, "learning_rate": 6.479588395720661e-06, "loss": 0.0004, "step": 16287 }, { "epoch": 15.661538461538461, "grad_norm": 0.13675343990325928, "learning_rate": 6.478422490407492e-06, "loss": 0.0009, "step": 16288 }, { "epoch": 15.6625, "grad_norm": 0.013989097438752651, "learning_rate": 6.477256639738872e-06, "loss": 0.0001, "step": 16289 }, { "epoch": 15.663461538461538, "grad_norm": 0.5640013813972473, "learning_rate": 6.476090843732887e-06, "loss": 0.0028, "step": 16290 }, { "epoch": 15.664423076923077, "grad_norm": 1.9458011388778687, "learning_rate": 6.474925102407631e-06, "loss": 0.0137, "step": 16291 }, { "epoch": 15.665384615384616, "grad_norm": 0.025530362501740456, "learning_rate": 6.473759415781191e-06, "loss": 0.0002, "step": 16292 }, { "epoch": 15.666346153846154, "grad_norm": 0.08507127314805984, "learning_rate": 6.472593783871657e-06, "loss": 0.0004, "step": 16293 }, { "epoch": 15.667307692307693, "grad_norm": 4.035634517669678, "learning_rate": 6.471428206697112e-06, "loss": 0.1306, "step": 16294 }, { "epoch": 15.66826923076923, "grad_norm": 0.8826172351837158, "learning_rate": 6.470262684275648e-06, "loss": 0.0036, "step": 16295 }, { "epoch": 15.669230769230769, "grad_norm": 0.019677402451634407, "learning_rate": 6.469097216625346e-06, "loss": 0.0001, "step": 16296 }, { "epoch": 15.670192307692307, "grad_norm": 0.016873715445399284, "learning_rate": 6.46793180376429e-06, "loss": 0.0001, "step": 16297 }, { "epoch": 15.671153846153846, "grad_norm": 0.05003070831298828, "learning_rate": 6.466766445710568e-06, "loss": 0.0005, "step": 16298 }, { "epoch": 15.672115384615385, "grad_norm": 0.6005107164382935, "learning_rate": 6.465601142482263e-06, "loss": 0.0012, "step": 16299 }, { "epoch": 15.673076923076923, "grad_norm": 0.02138453722000122, "learning_rate": 6.4644358940974514e-06, "loss": 0.0002, "step": 16300 }, { "epoch": 15.674038461538462, "grad_norm": 0.02437726780772209, "learning_rate": 6.463270700574219e-06, "loss": 0.0002, "step": 16301 }, { "epoch": 15.675, "grad_norm": 0.032882481813430786, "learning_rate": 6.462105561930646e-06, "loss": 0.0004, "step": 16302 }, { "epoch": 15.67596153846154, "grad_norm": 1.3045644760131836, "learning_rate": 6.4609404781848115e-06, "loss": 0.0098, "step": 16303 }, { "epoch": 15.676923076923076, "grad_norm": 0.02046518586575985, "learning_rate": 6.459775449354792e-06, "loss": 0.0003, "step": 16304 }, { "epoch": 15.677884615384615, "grad_norm": 0.5710269808769226, "learning_rate": 6.458610475458666e-06, "loss": 0.0014, "step": 16305 }, { "epoch": 15.678846153846154, "grad_norm": 0.14098317921161652, "learning_rate": 6.457445556514516e-06, "loss": 0.0005, "step": 16306 }, { "epoch": 15.679807692307692, "grad_norm": 0.0395122766494751, "learning_rate": 6.456280692540411e-06, "loss": 0.0002, "step": 16307 }, { "epoch": 15.680769230769231, "grad_norm": 1.203253149986267, "learning_rate": 6.455115883554428e-06, "loss": 0.0043, "step": 16308 }, { "epoch": 15.68173076923077, "grad_norm": 0.015547358430922031, "learning_rate": 6.453951129574644e-06, "loss": 0.0002, "step": 16309 }, { "epoch": 15.682692307692308, "grad_norm": 0.008698890917003155, "learning_rate": 6.4527864306191315e-06, "loss": 0.0001, "step": 16310 }, { "epoch": 15.683653846153845, "grad_norm": 0.016587426885962486, "learning_rate": 6.4516217867059615e-06, "loss": 0.0001, "step": 16311 }, { "epoch": 15.684615384615384, "grad_norm": 0.027892787009477615, "learning_rate": 6.450457197853206e-06, "loss": 0.0002, "step": 16312 }, { "epoch": 15.685576923076923, "grad_norm": 0.03806029260158539, "learning_rate": 6.449292664078939e-06, "loss": 0.0002, "step": 16313 }, { "epoch": 15.686538461538461, "grad_norm": 0.09730663895606995, "learning_rate": 6.448128185401228e-06, "loss": 0.0006, "step": 16314 }, { "epoch": 15.6875, "grad_norm": 0.02644686959683895, "learning_rate": 6.446963761838141e-06, "loss": 0.0003, "step": 16315 }, { "epoch": 15.688461538461539, "grad_norm": 0.07124269753694534, "learning_rate": 6.44579939340775e-06, "loss": 0.0004, "step": 16316 }, { "epoch": 15.689423076923077, "grad_norm": 1.2621477842330933, "learning_rate": 6.444635080128121e-06, "loss": 0.0068, "step": 16317 }, { "epoch": 15.690384615384616, "grad_norm": 0.0379561185836792, "learning_rate": 6.4434708220173215e-06, "loss": 0.0004, "step": 16318 }, { "epoch": 15.691346153846155, "grad_norm": 0.045670922845602036, "learning_rate": 6.442306619093415e-06, "loss": 0.0003, "step": 16319 }, { "epoch": 15.692307692307692, "grad_norm": 0.027248648926615715, "learning_rate": 6.441142471374469e-06, "loss": 0.0002, "step": 16320 }, { "epoch": 15.69326923076923, "grad_norm": 0.055879928171634674, "learning_rate": 6.439978378878547e-06, "loss": 0.0004, "step": 16321 }, { "epoch": 15.694230769230769, "grad_norm": 0.026003913953900337, "learning_rate": 6.438814341623712e-06, "loss": 0.0003, "step": 16322 }, { "epoch": 15.695192307692308, "grad_norm": 0.28655973076820374, "learning_rate": 6.437650359628025e-06, "loss": 0.0007, "step": 16323 }, { "epoch": 15.696153846153846, "grad_norm": 0.017171679064631462, "learning_rate": 6.43648643290955e-06, "loss": 0.0001, "step": 16324 }, { "epoch": 15.697115384615385, "grad_norm": 0.01615978591144085, "learning_rate": 6.435322561486348e-06, "loss": 0.0002, "step": 16325 }, { "epoch": 15.698076923076924, "grad_norm": 0.01212160661816597, "learning_rate": 6.4341587453764756e-06, "loss": 0.0002, "step": 16326 }, { "epoch": 15.69903846153846, "grad_norm": 1.6967333555221558, "learning_rate": 6.432994984597996e-06, "loss": 0.062, "step": 16327 }, { "epoch": 15.7, "grad_norm": 0.02696262300014496, "learning_rate": 6.431831279168963e-06, "loss": 0.0002, "step": 16328 }, { "epoch": 15.700961538461538, "grad_norm": 0.08296076208353043, "learning_rate": 6.430667629107439e-06, "loss": 0.0005, "step": 16329 }, { "epoch": 15.701923076923077, "grad_norm": 0.13235197961330414, "learning_rate": 6.429504034431473e-06, "loss": 0.0007, "step": 16330 }, { "epoch": 15.702884615384615, "grad_norm": 0.06275055557489395, "learning_rate": 6.428340495159129e-06, "loss": 0.0005, "step": 16331 }, { "epoch": 15.703846153846154, "grad_norm": 2.8673455715179443, "learning_rate": 6.427177011308456e-06, "loss": 0.0062, "step": 16332 }, { "epoch": 15.704807692307693, "grad_norm": 1.0868644714355469, "learning_rate": 6.426013582897511e-06, "loss": 0.0027, "step": 16333 }, { "epoch": 15.705769230769231, "grad_norm": 0.849510908126831, "learning_rate": 6.4248502099443445e-06, "loss": 0.0022, "step": 16334 }, { "epoch": 15.70673076923077, "grad_norm": 0.176293283700943, "learning_rate": 6.423686892467009e-06, "loss": 0.0005, "step": 16335 }, { "epoch": 15.707692307692307, "grad_norm": 0.014860124327242374, "learning_rate": 6.4225236304835595e-06, "loss": 0.0002, "step": 16336 }, { "epoch": 15.708653846153846, "grad_norm": 2.4901020526885986, "learning_rate": 6.421360424012039e-06, "loss": 0.0302, "step": 16337 }, { "epoch": 15.709615384615384, "grad_norm": 0.04046681523323059, "learning_rate": 6.420197273070505e-06, "loss": 0.0003, "step": 16338 }, { "epoch": 15.710576923076923, "grad_norm": 0.010641921311616898, "learning_rate": 6.4190341776770034e-06, "loss": 0.0001, "step": 16339 }, { "epoch": 15.711538461538462, "grad_norm": 0.010278464294970036, "learning_rate": 6.417871137849581e-06, "loss": 0.0001, "step": 16340 }, { "epoch": 15.7125, "grad_norm": 0.10236511379480362, "learning_rate": 6.416708153606282e-06, "loss": 0.0005, "step": 16341 }, { "epoch": 15.713461538461539, "grad_norm": 0.9056462645530701, "learning_rate": 6.415545224965158e-06, "loss": 0.0024, "step": 16342 }, { "epoch": 15.714423076923078, "grad_norm": 0.05021428316831589, "learning_rate": 6.414382351944253e-06, "loss": 0.0003, "step": 16343 }, { "epoch": 15.715384615384615, "grad_norm": 0.03931402042508125, "learning_rate": 6.413219534561609e-06, "loss": 0.0002, "step": 16344 }, { "epoch": 15.716346153846153, "grad_norm": 0.039888590574264526, "learning_rate": 6.412056772835269e-06, "loss": 0.0002, "step": 16345 }, { "epoch": 15.717307692307692, "grad_norm": 0.2612709701061249, "learning_rate": 6.4108940667832795e-06, "loss": 0.001, "step": 16346 }, { "epoch": 15.71826923076923, "grad_norm": 0.017099693417549133, "learning_rate": 6.40973141642368e-06, "loss": 0.0002, "step": 16347 }, { "epoch": 15.71923076923077, "grad_norm": 0.05939260870218277, "learning_rate": 6.40856882177451e-06, "loss": 0.0003, "step": 16348 }, { "epoch": 15.720192307692308, "grad_norm": 1.063602328300476, "learning_rate": 6.407406282853812e-06, "loss": 0.0034, "step": 16349 }, { "epoch": 15.721153846153847, "grad_norm": 2.396071672439575, "learning_rate": 6.406243799679625e-06, "loss": 0.0573, "step": 16350 }, { "epoch": 15.722115384615385, "grad_norm": 0.07199497520923615, "learning_rate": 6.405081372269988e-06, "loss": 0.0004, "step": 16351 }, { "epoch": 15.723076923076922, "grad_norm": 0.0246901698410511, "learning_rate": 6.4039190006429345e-06, "loss": 0.0004, "step": 16352 }, { "epoch": 15.724038461538461, "grad_norm": 0.05724778771400452, "learning_rate": 6.402756684816505e-06, "loss": 0.0003, "step": 16353 }, { "epoch": 15.725, "grad_norm": 0.6527164578437805, "learning_rate": 6.401594424808735e-06, "loss": 0.0034, "step": 16354 }, { "epoch": 15.725961538461538, "grad_norm": 0.17054149508476257, "learning_rate": 6.400432220637654e-06, "loss": 0.0009, "step": 16355 }, { "epoch": 15.726923076923077, "grad_norm": 2.635568380355835, "learning_rate": 6.399270072321304e-06, "loss": 0.0121, "step": 16356 }, { "epoch": 15.727884615384616, "grad_norm": 0.5622742176055908, "learning_rate": 6.398107979877714e-06, "loss": 0.0118, "step": 16357 }, { "epoch": 15.728846153846154, "grad_norm": 0.3614218533039093, "learning_rate": 6.396945943324919e-06, "loss": 0.0029, "step": 16358 }, { "epoch": 15.729807692307693, "grad_norm": 1.4646093845367432, "learning_rate": 6.395783962680944e-06, "loss": 0.0091, "step": 16359 }, { "epoch": 15.73076923076923, "grad_norm": 0.021411433815956116, "learning_rate": 6.394622037963826e-06, "loss": 0.0003, "step": 16360 }, { "epoch": 15.731730769230769, "grad_norm": 0.09202422946691513, "learning_rate": 6.393460169191592e-06, "loss": 0.0003, "step": 16361 }, { "epoch": 15.732692307692307, "grad_norm": 0.053138550370931625, "learning_rate": 6.392298356382271e-06, "loss": 0.0003, "step": 16362 }, { "epoch": 15.733653846153846, "grad_norm": 0.25706174969673157, "learning_rate": 6.39113659955389e-06, "loss": 0.0015, "step": 16363 }, { "epoch": 15.734615384615385, "grad_norm": 0.2640831768512726, "learning_rate": 6.389974898724478e-06, "loss": 0.0016, "step": 16364 }, { "epoch": 15.735576923076923, "grad_norm": 0.025412902235984802, "learning_rate": 6.388813253912061e-06, "loss": 0.0002, "step": 16365 }, { "epoch": 15.736538461538462, "grad_norm": 0.06966324150562286, "learning_rate": 6.387651665134661e-06, "loss": 0.0004, "step": 16366 }, { "epoch": 15.7375, "grad_norm": 0.05608240142464638, "learning_rate": 6.386490132410308e-06, "loss": 0.0005, "step": 16367 }, { "epoch": 15.73846153846154, "grad_norm": 0.029027650132775307, "learning_rate": 6.385328655757021e-06, "loss": 0.0003, "step": 16368 }, { "epoch": 15.739423076923076, "grad_norm": 0.015765702351927757, "learning_rate": 6.384167235192827e-06, "loss": 0.0002, "step": 16369 }, { "epoch": 15.740384615384615, "grad_norm": 0.14427565038204193, "learning_rate": 6.383005870735741e-06, "loss": 0.0005, "step": 16370 }, { "epoch": 15.741346153846154, "grad_norm": 0.11005648225545883, "learning_rate": 6.381844562403789e-06, "loss": 0.0005, "step": 16371 }, { "epoch": 15.742307692307692, "grad_norm": 0.012060522101819515, "learning_rate": 6.380683310214993e-06, "loss": 0.0001, "step": 16372 }, { "epoch": 15.743269230769231, "grad_norm": 0.03814521059393883, "learning_rate": 6.379522114187369e-06, "loss": 0.0003, "step": 16373 }, { "epoch": 15.74423076923077, "grad_norm": 0.09319373220205307, "learning_rate": 6.378360974338933e-06, "loss": 0.0005, "step": 16374 }, { "epoch": 15.745192307692308, "grad_norm": 0.022658245638012886, "learning_rate": 6.377199890687706e-06, "loss": 0.0001, "step": 16375 }, { "epoch": 15.746153846153845, "grad_norm": 0.0395035520195961, "learning_rate": 6.376038863251706e-06, "loss": 0.0003, "step": 16376 }, { "epoch": 15.747115384615384, "grad_norm": 0.00925019383430481, "learning_rate": 6.374877892048944e-06, "loss": 0.0001, "step": 16377 }, { "epoch": 15.748076923076923, "grad_norm": 0.09883835166692734, "learning_rate": 6.37371697709744e-06, "loss": 0.0005, "step": 16378 }, { "epoch": 15.749038461538461, "grad_norm": 0.044112127274274826, "learning_rate": 6.3725561184152054e-06, "loss": 0.0003, "step": 16379 }, { "epoch": 15.75, "grad_norm": 0.010517432354390621, "learning_rate": 6.371395316020253e-06, "loss": 0.0001, "step": 16380 }, { "epoch": 15.750961538461539, "grad_norm": 0.05830251798033714, "learning_rate": 6.3702345699305935e-06, "loss": 0.0005, "step": 16381 }, { "epoch": 15.751923076923077, "grad_norm": 0.06017494946718216, "learning_rate": 6.369073880164241e-06, "loss": 0.0005, "step": 16382 }, { "epoch": 15.752884615384616, "grad_norm": 0.030458781868219376, "learning_rate": 6.367913246739208e-06, "loss": 0.0002, "step": 16383 }, { "epoch": 15.753846153846155, "grad_norm": 2.5457019805908203, "learning_rate": 6.3667526696734995e-06, "loss": 0.0057, "step": 16384 }, { "epoch": 15.754807692307692, "grad_norm": 0.012213052250444889, "learning_rate": 6.365592148985124e-06, "loss": 0.0001, "step": 16385 }, { "epoch": 15.75576923076923, "grad_norm": 3.525083541870117, "learning_rate": 6.364431684692094e-06, "loss": 0.0108, "step": 16386 }, { "epoch": 15.756730769230769, "grad_norm": 0.020898189395666122, "learning_rate": 6.363271276812414e-06, "loss": 0.0002, "step": 16387 }, { "epoch": 15.757692307692308, "grad_norm": 0.050110381096601486, "learning_rate": 6.362110925364088e-06, "loss": 0.0003, "step": 16388 }, { "epoch": 15.758653846153846, "grad_norm": 0.20740680396556854, "learning_rate": 6.360950630365126e-06, "loss": 0.001, "step": 16389 }, { "epoch": 15.759615384615385, "grad_norm": 0.18689441680908203, "learning_rate": 6.359790391833529e-06, "loss": 0.0005, "step": 16390 }, { "epoch": 15.760576923076924, "grad_norm": 0.05726918950676918, "learning_rate": 6.358630209787301e-06, "loss": 0.0005, "step": 16391 }, { "epoch": 15.76153846153846, "grad_norm": 0.028125740587711334, "learning_rate": 6.3574700842444436e-06, "loss": 0.0003, "step": 16392 }, { "epoch": 15.7625, "grad_norm": 0.005534174386411905, "learning_rate": 6.35631001522296e-06, "loss": 0.0001, "step": 16393 }, { "epoch": 15.763461538461538, "grad_norm": 0.7937008142471313, "learning_rate": 6.355150002740853e-06, "loss": 0.0027, "step": 16394 }, { "epoch": 15.764423076923077, "grad_norm": 0.008412343449890614, "learning_rate": 6.35399004681612e-06, "loss": 0.0001, "step": 16395 }, { "epoch": 15.765384615384615, "grad_norm": 1.3284839391708374, "learning_rate": 6.352830147466757e-06, "loss": 0.0108, "step": 16396 }, { "epoch": 15.766346153846154, "grad_norm": 0.04318710044026375, "learning_rate": 6.351670304710771e-06, "loss": 0.0002, "step": 16397 }, { "epoch": 15.767307692307693, "grad_norm": 0.02833116240799427, "learning_rate": 6.350510518566151e-06, "loss": 0.0002, "step": 16398 }, { "epoch": 15.768269230769231, "grad_norm": 0.13500814139842987, "learning_rate": 6.349350789050896e-06, "loss": 0.0005, "step": 16399 }, { "epoch": 15.76923076923077, "grad_norm": 2.527510166168213, "learning_rate": 6.348191116183005e-06, "loss": 0.0242, "step": 16400 }, { "epoch": 15.770192307692307, "grad_norm": 0.06798709183931351, "learning_rate": 6.347031499980468e-06, "loss": 0.0006, "step": 16401 }, { "epoch": 15.771153846153846, "grad_norm": 0.03605853021144867, "learning_rate": 6.345871940461282e-06, "loss": 0.0003, "step": 16402 }, { "epoch": 15.772115384615384, "grad_norm": 0.027499938383698463, "learning_rate": 6.344712437643436e-06, "loss": 0.0002, "step": 16403 }, { "epoch": 15.773076923076923, "grad_norm": 2.5656936168670654, "learning_rate": 6.343552991544928e-06, "loss": 0.008, "step": 16404 }, { "epoch": 15.774038461538462, "grad_norm": 0.021298089995980263, "learning_rate": 6.342393602183745e-06, "loss": 0.0002, "step": 16405 }, { "epoch": 15.775, "grad_norm": 1.8283673524856567, "learning_rate": 6.341234269577878e-06, "loss": 0.0283, "step": 16406 }, { "epoch": 15.775961538461539, "grad_norm": 0.5917606949806213, "learning_rate": 6.340074993745318e-06, "loss": 0.0023, "step": 16407 }, { "epoch": 15.776923076923078, "grad_norm": 0.026637552306056023, "learning_rate": 6.338915774704051e-06, "loss": 0.0002, "step": 16408 }, { "epoch": 15.777884615384615, "grad_norm": 0.051151569932699203, "learning_rate": 6.337756612472067e-06, "loss": 0.0003, "step": 16409 }, { "epoch": 15.778846153846153, "grad_norm": 0.5076414346694946, "learning_rate": 6.33659750706735e-06, "loss": 0.0021, "step": 16410 }, { "epoch": 15.779807692307692, "grad_norm": 0.19105352461338043, "learning_rate": 6.33543845850789e-06, "loss": 0.0006, "step": 16411 }, { "epoch": 15.78076923076923, "grad_norm": 0.6162474751472473, "learning_rate": 6.334279466811668e-06, "loss": 0.0015, "step": 16412 }, { "epoch": 15.78173076923077, "grad_norm": 0.2036101371049881, "learning_rate": 6.333120531996672e-06, "loss": 0.0006, "step": 16413 }, { "epoch": 15.782692307692308, "grad_norm": 0.7408058047294617, "learning_rate": 6.33196165408088e-06, "loss": 0.0025, "step": 16414 }, { "epoch": 15.783653846153847, "grad_norm": 0.009250507690012455, "learning_rate": 6.33080283308228e-06, "loss": 0.0001, "step": 16415 }, { "epoch": 15.784615384615385, "grad_norm": 0.7876720428466797, "learning_rate": 6.329644069018851e-06, "loss": 0.003, "step": 16416 }, { "epoch": 15.785576923076922, "grad_norm": 0.600135326385498, "learning_rate": 6.328485361908572e-06, "loss": 0.0027, "step": 16417 }, { "epoch": 15.786538461538461, "grad_norm": 0.006620578467845917, "learning_rate": 6.327326711769426e-06, "loss": 0.0001, "step": 16418 }, { "epoch": 15.7875, "grad_norm": 0.12536098062992096, "learning_rate": 6.326168118619391e-06, "loss": 0.0005, "step": 16419 }, { "epoch": 15.788461538461538, "grad_norm": 0.024748586118221283, "learning_rate": 6.325009582476442e-06, "loss": 0.0002, "step": 16420 }, { "epoch": 15.789423076923077, "grad_norm": 0.02964501827955246, "learning_rate": 6.323851103358558e-06, "loss": 0.0001, "step": 16421 }, { "epoch": 15.790384615384616, "grad_norm": 0.4596007764339447, "learning_rate": 6.3226926812837176e-06, "loss": 0.0019, "step": 16422 }, { "epoch": 15.791346153846154, "grad_norm": 0.047433074563741684, "learning_rate": 6.321534316269895e-06, "loss": 0.0003, "step": 16423 }, { "epoch": 15.792307692307693, "grad_norm": 0.11942537128925323, "learning_rate": 6.3203760083350615e-06, "loss": 0.0008, "step": 16424 }, { "epoch": 15.79326923076923, "grad_norm": 0.009466059505939484, "learning_rate": 6.319217757497192e-06, "loss": 0.0001, "step": 16425 }, { "epoch": 15.794230769230769, "grad_norm": 0.025159819051623344, "learning_rate": 6.3180595637742615e-06, "loss": 0.0001, "step": 16426 }, { "epoch": 15.795192307692307, "grad_norm": 0.03525875508785248, "learning_rate": 6.316901427184241e-06, "loss": 0.0003, "step": 16427 }, { "epoch": 15.796153846153846, "grad_norm": 0.6872939467430115, "learning_rate": 6.315743347745098e-06, "loss": 0.0045, "step": 16428 }, { "epoch": 15.797115384615385, "grad_norm": 0.6164495944976807, "learning_rate": 6.3145853254748065e-06, "loss": 0.0071, "step": 16429 }, { "epoch": 15.798076923076923, "grad_norm": 0.05330556631088257, "learning_rate": 6.313427360391336e-06, "loss": 0.0003, "step": 16430 }, { "epoch": 15.799038461538462, "grad_norm": 0.765965461730957, "learning_rate": 6.312269452512651e-06, "loss": 0.0003, "step": 16431 }, { "epoch": 15.8, "grad_norm": 2.535240650177002, "learning_rate": 6.311111601856718e-06, "loss": 0.0129, "step": 16432 }, { "epoch": 15.80096153846154, "grad_norm": 0.3977926969528198, "learning_rate": 6.30995380844151e-06, "loss": 0.0012, "step": 16433 }, { "epoch": 15.801923076923076, "grad_norm": 1.3740464448928833, "learning_rate": 6.308796072284988e-06, "loss": 0.0176, "step": 16434 }, { "epoch": 15.802884615384615, "grad_norm": 0.08439559489488602, "learning_rate": 6.3076383934051174e-06, "loss": 0.0006, "step": 16435 }, { "epoch": 15.803846153846154, "grad_norm": 0.2559049725532532, "learning_rate": 6.3064807718198605e-06, "loss": 0.0022, "step": 16436 }, { "epoch": 15.804807692307692, "grad_norm": 0.041740674525499344, "learning_rate": 6.3053232075471846e-06, "loss": 0.0003, "step": 16437 }, { "epoch": 15.805769230769231, "grad_norm": 0.11804547905921936, "learning_rate": 6.304165700605047e-06, "loss": 0.0004, "step": 16438 }, { "epoch": 15.80673076923077, "grad_norm": 0.03228731453418732, "learning_rate": 6.303008251011409e-06, "loss": 0.0003, "step": 16439 }, { "epoch": 15.807692307692308, "grad_norm": 0.0219601821154356, "learning_rate": 6.301850858784236e-06, "loss": 0.0001, "step": 16440 }, { "epoch": 15.808653846153845, "grad_norm": 0.21707898378372192, "learning_rate": 6.300693523941481e-06, "loss": 0.0009, "step": 16441 }, { "epoch": 15.809615384615384, "grad_norm": 0.6199036836624146, "learning_rate": 6.299536246501106e-06, "loss": 0.0019, "step": 16442 }, { "epoch": 15.810576923076923, "grad_norm": 0.3101538121700287, "learning_rate": 6.298379026481068e-06, "loss": 0.0018, "step": 16443 }, { "epoch": 15.811538461538461, "grad_norm": 0.0631188228726387, "learning_rate": 6.2972218638993235e-06, "loss": 0.0004, "step": 16444 }, { "epoch": 15.8125, "grad_norm": 0.025425968691706657, "learning_rate": 6.296064758773828e-06, "loss": 0.0002, "step": 16445 }, { "epoch": 15.813461538461539, "grad_norm": 0.09126988053321838, "learning_rate": 6.2949077111225345e-06, "loss": 0.0003, "step": 16446 }, { "epoch": 15.814423076923077, "grad_norm": 0.017549166455864906, "learning_rate": 6.293750720963401e-06, "loss": 0.0002, "step": 16447 }, { "epoch": 15.815384615384616, "grad_norm": 0.034874092787504196, "learning_rate": 6.292593788314378e-06, "loss": 0.0002, "step": 16448 }, { "epoch": 15.816346153846155, "grad_norm": 0.511132001876831, "learning_rate": 6.291436913193421e-06, "loss": 0.0021, "step": 16449 }, { "epoch": 15.817307692307692, "grad_norm": 0.019345277920365334, "learning_rate": 6.290280095618474e-06, "loss": 0.0001, "step": 16450 }, { "epoch": 15.81826923076923, "grad_norm": 0.01840544119477272, "learning_rate": 6.289123335607495e-06, "loss": 0.0001, "step": 16451 }, { "epoch": 15.819230769230769, "grad_norm": 0.026027262210845947, "learning_rate": 6.28796663317843e-06, "loss": 0.0002, "step": 16452 }, { "epoch": 15.820192307692308, "grad_norm": 2.3036739826202393, "learning_rate": 6.28680998834923e-06, "loss": 0.0142, "step": 16453 }, { "epoch": 15.821153846153846, "grad_norm": 0.31653016805648804, "learning_rate": 6.2856534011378365e-06, "loss": 0.0012, "step": 16454 }, { "epoch": 15.822115384615385, "grad_norm": 0.07646948844194412, "learning_rate": 6.284496871562204e-06, "loss": 0.0005, "step": 16455 }, { "epoch": 15.823076923076924, "grad_norm": 0.00789068453013897, "learning_rate": 6.283340399640274e-06, "loss": 0.0001, "step": 16456 }, { "epoch": 15.82403846153846, "grad_norm": 0.008338579908013344, "learning_rate": 6.282183985389991e-06, "loss": 0.0001, "step": 16457 }, { "epoch": 15.825, "grad_norm": 3.108091354370117, "learning_rate": 6.281027628829304e-06, "loss": 0.0159, "step": 16458 }, { "epoch": 15.825961538461538, "grad_norm": 0.016703002154827118, "learning_rate": 6.279871329976152e-06, "loss": 0.0001, "step": 16459 }, { "epoch": 15.826923076923077, "grad_norm": 0.029230570420622826, "learning_rate": 6.278715088848481e-06, "loss": 0.0003, "step": 16460 }, { "epoch": 15.827884615384615, "grad_norm": 0.01416211761534214, "learning_rate": 6.277558905464225e-06, "loss": 0.0001, "step": 16461 }, { "epoch": 15.828846153846154, "grad_norm": 0.04801690950989723, "learning_rate": 6.276402779841333e-06, "loss": 0.0002, "step": 16462 }, { "epoch": 15.829807692307693, "grad_norm": 3.68137788772583, "learning_rate": 6.2752467119977415e-06, "loss": 0.0765, "step": 16463 }, { "epoch": 15.830769230769231, "grad_norm": 2.8849070072174072, "learning_rate": 6.274090701951387e-06, "loss": 0.0782, "step": 16464 }, { "epoch": 15.83173076923077, "grad_norm": 1.7350080013275146, "learning_rate": 6.2729347497202095e-06, "loss": 0.0314, "step": 16465 }, { "epoch": 15.832692307692307, "grad_norm": 2.5041115283966064, "learning_rate": 6.2717788553221485e-06, "loss": 0.1188, "step": 16466 }, { "epoch": 15.833653846153846, "grad_norm": 0.05469926819205284, "learning_rate": 6.270623018775135e-06, "loss": 0.0003, "step": 16467 }, { "epoch": 15.834615384615384, "grad_norm": 0.20608174800872803, "learning_rate": 6.269467240097106e-06, "loss": 0.0007, "step": 16468 }, { "epoch": 15.835576923076923, "grad_norm": 0.08233229070901871, "learning_rate": 6.268311519305999e-06, "loss": 0.0003, "step": 16469 }, { "epoch": 15.836538461538462, "grad_norm": 0.06270045042037964, "learning_rate": 6.267155856419744e-06, "loss": 0.0005, "step": 16470 }, { "epoch": 15.8375, "grad_norm": 0.11246322095394135, "learning_rate": 6.266000251456275e-06, "loss": 0.0005, "step": 16471 }, { "epoch": 15.838461538461539, "grad_norm": 0.027350516989827156, "learning_rate": 6.264844704433519e-06, "loss": 0.0002, "step": 16472 }, { "epoch": 15.839423076923078, "grad_norm": 0.006067540030926466, "learning_rate": 6.263689215369414e-06, "loss": 0.0001, "step": 16473 }, { "epoch": 15.840384615384615, "grad_norm": 0.5970203876495361, "learning_rate": 6.262533784281887e-06, "loss": 0.0016, "step": 16474 }, { "epoch": 15.841346153846153, "grad_norm": 0.29966017603874207, "learning_rate": 6.261378411188865e-06, "loss": 0.001, "step": 16475 }, { "epoch": 15.842307692307692, "grad_norm": 0.02362690679728985, "learning_rate": 6.260223096108277e-06, "loss": 0.0001, "step": 16476 }, { "epoch": 15.84326923076923, "grad_norm": 0.21581777930259705, "learning_rate": 6.25906783905805e-06, "loss": 0.0009, "step": 16477 }, { "epoch": 15.84423076923077, "grad_norm": 0.7389833331108093, "learning_rate": 6.257912640056114e-06, "loss": 0.0013, "step": 16478 }, { "epoch": 15.845192307692308, "grad_norm": 0.0826878547668457, "learning_rate": 6.256757499120387e-06, "loss": 0.0006, "step": 16479 }, { "epoch": 15.846153846153847, "grad_norm": 0.44882431626319885, "learning_rate": 6.255602416268799e-06, "loss": 0.002, "step": 16480 }, { "epoch": 15.847115384615385, "grad_norm": 0.008826966397464275, "learning_rate": 6.254447391519271e-06, "loss": 0.0001, "step": 16481 }, { "epoch": 15.848076923076922, "grad_norm": 0.005831066053360701, "learning_rate": 6.25329242488973e-06, "loss": 0.0001, "step": 16482 }, { "epoch": 15.849038461538461, "grad_norm": 0.6370909214019775, "learning_rate": 6.252137516398089e-06, "loss": 0.0088, "step": 16483 }, { "epoch": 15.85, "grad_norm": 0.059710659086704254, "learning_rate": 6.250982666062276e-06, "loss": 0.0002, "step": 16484 }, { "epoch": 15.850961538461538, "grad_norm": 0.4055962860584259, "learning_rate": 6.249827873900209e-06, "loss": 0.0011, "step": 16485 }, { "epoch": 15.851923076923077, "grad_norm": 0.04979212209582329, "learning_rate": 6.248673139929806e-06, "loss": 0.0003, "step": 16486 }, { "epoch": 15.852884615384616, "grad_norm": 0.5153416395187378, "learning_rate": 6.247518464168986e-06, "loss": 0.0023, "step": 16487 }, { "epoch": 15.853846153846154, "grad_norm": 1.3354475498199463, "learning_rate": 6.246363846635668e-06, "loss": 0.0073, "step": 16488 }, { "epoch": 15.854807692307693, "grad_norm": 0.11649900674819946, "learning_rate": 6.245209287347767e-06, "loss": 0.0004, "step": 16489 }, { "epoch": 15.85576923076923, "grad_norm": 0.09741923213005066, "learning_rate": 6.244054786323192e-06, "loss": 0.0004, "step": 16490 }, { "epoch": 15.856730769230769, "grad_norm": 0.023265138268470764, "learning_rate": 6.242900343579869e-06, "loss": 0.0002, "step": 16491 }, { "epoch": 15.857692307692307, "grad_norm": 0.9552197456359863, "learning_rate": 6.241745959135703e-06, "loss": 0.0035, "step": 16492 }, { "epoch": 15.858653846153846, "grad_norm": 0.7775916457176208, "learning_rate": 6.2405916330086106e-06, "loss": 0.0029, "step": 16493 }, { "epoch": 15.859615384615385, "grad_norm": 0.15825137495994568, "learning_rate": 6.239437365216501e-06, "loss": 0.0006, "step": 16494 }, { "epoch": 15.860576923076923, "grad_norm": 0.10459783673286438, "learning_rate": 6.2382831557772875e-06, "loss": 0.0005, "step": 16495 }, { "epoch": 15.861538461538462, "grad_norm": 0.015326441265642643, "learning_rate": 6.237129004708878e-06, "loss": 0.0002, "step": 16496 }, { "epoch": 15.8625, "grad_norm": 0.013716925866901875, "learning_rate": 6.235974912029181e-06, "loss": 0.0001, "step": 16497 }, { "epoch": 15.86346153846154, "grad_norm": 1.0888525247573853, "learning_rate": 6.234820877756109e-06, "loss": 0.0016, "step": 16498 }, { "epoch": 15.864423076923076, "grad_norm": 0.2525181174278259, "learning_rate": 6.233666901907567e-06, "loss": 0.0009, "step": 16499 }, { "epoch": 15.865384615384615, "grad_norm": 0.8192707300186157, "learning_rate": 6.2325129845014585e-06, "loss": 0.0034, "step": 16500 }, { "epoch": 15.866346153846154, "grad_norm": 0.08316832035779953, "learning_rate": 6.231359125555689e-06, "loss": 0.0004, "step": 16501 }, { "epoch": 15.867307692307692, "grad_norm": 0.6464484333992004, "learning_rate": 6.230205325088169e-06, "loss": 0.0033, "step": 16502 }, { "epoch": 15.868269230769231, "grad_norm": 0.02087613195180893, "learning_rate": 6.229051583116796e-06, "loss": 0.0003, "step": 16503 }, { "epoch": 15.86923076923077, "grad_norm": 0.8845937252044678, "learning_rate": 6.227897899659476e-06, "loss": 0.0036, "step": 16504 }, { "epoch": 15.870192307692308, "grad_norm": 0.005018607247620821, "learning_rate": 6.2267442747341085e-06, "loss": 0.0, "step": 16505 }, { "epoch": 15.871153846153845, "grad_norm": 1.099068522453308, "learning_rate": 6.225590708358596e-06, "loss": 0.0245, "step": 16506 }, { "epoch": 15.872115384615384, "grad_norm": 0.0906929075717926, "learning_rate": 6.224437200550839e-06, "loss": 0.0006, "step": 16507 }, { "epoch": 15.873076923076923, "grad_norm": 0.025310160592198372, "learning_rate": 6.223283751328733e-06, "loss": 0.0002, "step": 16508 }, { "epoch": 15.874038461538461, "grad_norm": 0.018069123849272728, "learning_rate": 6.222130360710181e-06, "loss": 0.0002, "step": 16509 }, { "epoch": 15.875, "grad_norm": 0.4737822711467743, "learning_rate": 6.2209770287130776e-06, "loss": 0.0017, "step": 16510 }, { "epoch": 15.875961538461539, "grad_norm": 0.028027379885315895, "learning_rate": 6.2198237553553185e-06, "loss": 0.0002, "step": 16511 }, { "epoch": 15.876923076923077, "grad_norm": 1.749815821647644, "learning_rate": 6.2186705406547985e-06, "loss": 0.0902, "step": 16512 }, { "epoch": 15.877884615384616, "grad_norm": 0.04734744504094124, "learning_rate": 6.217517384629417e-06, "loss": 0.0002, "step": 16513 }, { "epoch": 15.878846153846155, "grad_norm": 0.011664221994578838, "learning_rate": 6.216364287297063e-06, "loss": 0.0001, "step": 16514 }, { "epoch": 15.879807692307692, "grad_norm": 2.9923694133758545, "learning_rate": 6.21521124867563e-06, "loss": 0.036, "step": 16515 }, { "epoch": 15.88076923076923, "grad_norm": 0.0039189597591757774, "learning_rate": 6.21405826878301e-06, "loss": 0.0, "step": 16516 }, { "epoch": 15.881730769230769, "grad_norm": 0.849894106388092, "learning_rate": 6.212905347637095e-06, "loss": 0.0061, "step": 16517 }, { "epoch": 15.882692307692308, "grad_norm": 0.040423907339572906, "learning_rate": 6.211752485255773e-06, "loss": 0.0005, "step": 16518 }, { "epoch": 15.883653846153846, "grad_norm": 0.03915480524301529, "learning_rate": 6.210599681656933e-06, "loss": 0.0004, "step": 16519 }, { "epoch": 15.884615384615385, "grad_norm": 0.04487815126776695, "learning_rate": 6.209446936858466e-06, "loss": 0.0003, "step": 16520 }, { "epoch": 15.885576923076924, "grad_norm": 1.7756059169769287, "learning_rate": 6.2082942508782576e-06, "loss": 0.0068, "step": 16521 }, { "epoch": 15.88653846153846, "grad_norm": 0.018949516117572784, "learning_rate": 6.207141623734194e-06, "loss": 0.0002, "step": 16522 }, { "epoch": 15.8875, "grad_norm": 0.018200721591711044, "learning_rate": 6.20598905544416e-06, "loss": 0.0001, "step": 16523 }, { "epoch": 15.888461538461538, "grad_norm": 1.114043951034546, "learning_rate": 6.204836546026039e-06, "loss": 0.0034, "step": 16524 }, { "epoch": 15.889423076923077, "grad_norm": 0.0723641887307167, "learning_rate": 6.20368409549772e-06, "loss": 0.0003, "step": 16525 }, { "epoch": 15.890384615384615, "grad_norm": 0.457819402217865, "learning_rate": 6.2025317038770796e-06, "loss": 0.0011, "step": 16526 }, { "epoch": 15.891346153846154, "grad_norm": 0.010874501429498196, "learning_rate": 6.201379371181999e-06, "loss": 0.0001, "step": 16527 }, { "epoch": 15.892307692307693, "grad_norm": 0.009458182379603386, "learning_rate": 6.200227097430365e-06, "loss": 0.0001, "step": 16528 }, { "epoch": 15.893269230769231, "grad_norm": 0.0532088540494442, "learning_rate": 6.1990748826400535e-06, "loss": 0.0003, "step": 16529 }, { "epoch": 15.89423076923077, "grad_norm": 0.010109013877809048, "learning_rate": 6.197922726828942e-06, "loss": 0.0001, "step": 16530 }, { "epoch": 15.895192307692307, "grad_norm": 2.5229976177215576, "learning_rate": 6.196770630014915e-06, "loss": 0.0784, "step": 16531 }, { "epoch": 15.896153846153846, "grad_norm": 0.14122962951660156, "learning_rate": 6.1956185922158445e-06, "loss": 0.0004, "step": 16532 }, { "epoch": 15.897115384615384, "grad_norm": 0.10929890722036362, "learning_rate": 6.1944666134496056e-06, "loss": 0.0007, "step": 16533 }, { "epoch": 15.898076923076923, "grad_norm": 0.12139289081096649, "learning_rate": 6.193314693734076e-06, "loss": 0.0006, "step": 16534 }, { "epoch": 15.899038461538462, "grad_norm": 1.069912075996399, "learning_rate": 6.192162833087132e-06, "loss": 0.006, "step": 16535 }, { "epoch": 15.9, "grad_norm": 0.02262870781123638, "learning_rate": 6.191011031526644e-06, "loss": 0.0002, "step": 16536 }, { "epoch": 15.900961538461539, "grad_norm": 0.7248636484146118, "learning_rate": 6.189859289070483e-06, "loss": 0.0017, "step": 16537 }, { "epoch": 15.901923076923078, "grad_norm": 0.023716598749160767, "learning_rate": 6.188707605736526e-06, "loss": 0.0003, "step": 16538 }, { "epoch": 15.902884615384615, "grad_norm": 0.15650278329849243, "learning_rate": 6.187555981542641e-06, "loss": 0.0005, "step": 16539 }, { "epoch": 15.903846153846153, "grad_norm": 3.507425308227539, "learning_rate": 6.186404416506697e-06, "loss": 0.0559, "step": 16540 }, { "epoch": 15.904807692307692, "grad_norm": 0.03682202473282814, "learning_rate": 6.1852529106465635e-06, "loss": 0.0004, "step": 16541 }, { "epoch": 15.90576923076923, "grad_norm": 0.7060808539390564, "learning_rate": 6.18410146398011e-06, "loss": 0.0043, "step": 16542 }, { "epoch": 15.90673076923077, "grad_norm": 3.167672634124756, "learning_rate": 6.182950076525201e-06, "loss": 0.0121, "step": 16543 }, { "epoch": 15.907692307692308, "grad_norm": 0.9287256598472595, "learning_rate": 6.181798748299706e-06, "loss": 0.0044, "step": 16544 }, { "epoch": 15.908653846153847, "grad_norm": 0.2083236128091812, "learning_rate": 6.180647479321484e-06, "loss": 0.0006, "step": 16545 }, { "epoch": 15.909615384615385, "grad_norm": 0.05805206298828125, "learning_rate": 6.179496269608406e-06, "loss": 0.0005, "step": 16546 }, { "epoch": 15.910576923076922, "grad_norm": 0.03715852275490761, "learning_rate": 6.178345119178333e-06, "loss": 0.0002, "step": 16547 }, { "epoch": 15.911538461538461, "grad_norm": 0.13078691065311432, "learning_rate": 6.177194028049125e-06, "loss": 0.0008, "step": 16548 }, { "epoch": 15.9125, "grad_norm": 0.3717658817768097, "learning_rate": 6.176042996238649e-06, "loss": 0.0012, "step": 16549 }, { "epoch": 15.913461538461538, "grad_norm": 0.09992965310811996, "learning_rate": 6.174892023764763e-06, "loss": 0.0004, "step": 16550 }, { "epoch": 15.914423076923077, "grad_norm": 0.16271725296974182, "learning_rate": 6.173741110645324e-06, "loss": 0.0011, "step": 16551 }, { "epoch": 15.915384615384616, "grad_norm": 2.172332525253296, "learning_rate": 6.172590256898192e-06, "loss": 0.0162, "step": 16552 }, { "epoch": 15.916346153846154, "grad_norm": 0.012061058543622494, "learning_rate": 6.171439462541229e-06, "loss": 0.0001, "step": 16553 }, { "epoch": 15.917307692307693, "grad_norm": 0.6903177499771118, "learning_rate": 6.170288727592287e-06, "loss": 0.0027, "step": 16554 }, { "epoch": 15.91826923076923, "grad_norm": 0.09522150456905365, "learning_rate": 6.169138052069226e-06, "loss": 0.0006, "step": 16555 }, { "epoch": 15.919230769230769, "grad_norm": 0.03113824501633644, "learning_rate": 6.167987435989895e-06, "loss": 0.0003, "step": 16556 }, { "epoch": 15.920192307692307, "grad_norm": 0.25413480401039124, "learning_rate": 6.1668368793721545e-06, "loss": 0.0011, "step": 16557 }, { "epoch": 15.921153846153846, "grad_norm": 0.2547335624694824, "learning_rate": 6.165686382233856e-06, "loss": 0.0012, "step": 16558 }, { "epoch": 15.922115384615385, "grad_norm": 0.2156147062778473, "learning_rate": 6.1645359445928485e-06, "loss": 0.0006, "step": 16559 }, { "epoch": 15.923076923076923, "grad_norm": 0.008704430423676968, "learning_rate": 6.163385566466989e-06, "loss": 0.0001, "step": 16560 }, { "epoch": 15.924038461538462, "grad_norm": 0.14090624451637268, "learning_rate": 6.162235247874126e-06, "loss": 0.0009, "step": 16561 }, { "epoch": 15.925, "grad_norm": 0.19980333745479584, "learning_rate": 6.161084988832107e-06, "loss": 0.001, "step": 16562 }, { "epoch": 15.92596153846154, "grad_norm": 0.05552319809794426, "learning_rate": 6.15993478935878e-06, "loss": 0.0003, "step": 16563 }, { "epoch": 15.926923076923076, "grad_norm": 2.2346079349517822, "learning_rate": 6.158784649471996e-06, "loss": 0.0324, "step": 16564 }, { "epoch": 15.927884615384615, "grad_norm": 0.03698403760790825, "learning_rate": 6.157634569189601e-06, "loss": 0.0003, "step": 16565 }, { "epoch": 15.928846153846154, "grad_norm": 0.33298972249031067, "learning_rate": 6.156484548529442e-06, "loss": 0.0012, "step": 16566 }, { "epoch": 15.929807692307692, "grad_norm": 1.1557449102401733, "learning_rate": 6.1553345875093586e-06, "loss": 0.0175, "step": 16567 }, { "epoch": 15.930769230769231, "grad_norm": 0.010272851213812828, "learning_rate": 6.1541846861472e-06, "loss": 0.0001, "step": 16568 }, { "epoch": 15.93173076923077, "grad_norm": 0.03848190978169441, "learning_rate": 6.153034844460808e-06, "loss": 0.0002, "step": 16569 }, { "epoch": 15.932692307692308, "grad_norm": 0.31130465865135193, "learning_rate": 6.1518850624680235e-06, "loss": 0.0017, "step": 16570 }, { "epoch": 15.933653846153845, "grad_norm": 0.007295320741832256, "learning_rate": 6.1507353401866896e-06, "loss": 0.0001, "step": 16571 }, { "epoch": 15.934615384615384, "grad_norm": 0.5368949174880981, "learning_rate": 6.149585677634646e-06, "loss": 0.0064, "step": 16572 }, { "epoch": 15.935576923076923, "grad_norm": 0.13744346797466278, "learning_rate": 6.148436074829732e-06, "loss": 0.0006, "step": 16573 }, { "epoch": 15.936538461538461, "grad_norm": 0.2577715516090393, "learning_rate": 6.1472865317897845e-06, "loss": 0.0005, "step": 16574 }, { "epoch": 15.9375, "grad_norm": 0.002893931232392788, "learning_rate": 6.146137048532643e-06, "loss": 0.0, "step": 16575 }, { "epoch": 15.938461538461539, "grad_norm": 1.3742269277572632, "learning_rate": 6.144987625076144e-06, "loss": 0.006, "step": 16576 }, { "epoch": 15.939423076923077, "grad_norm": 0.06749828159809113, "learning_rate": 6.143838261438122e-06, "loss": 0.0004, "step": 16577 }, { "epoch": 15.940384615384616, "grad_norm": 0.05369729548692703, "learning_rate": 6.142688957636413e-06, "loss": 0.0004, "step": 16578 }, { "epoch": 15.941346153846155, "grad_norm": 1.3209857940673828, "learning_rate": 6.141539713688851e-06, "loss": 0.0036, "step": 16579 }, { "epoch": 15.942307692307692, "grad_norm": 0.8629177808761597, "learning_rate": 6.140390529613269e-06, "loss": 0.0033, "step": 16580 }, { "epoch": 15.94326923076923, "grad_norm": 0.15982995927333832, "learning_rate": 6.139241405427495e-06, "loss": 0.0008, "step": 16581 }, { "epoch": 15.944230769230769, "grad_norm": 0.18424488604068756, "learning_rate": 6.1380923411493664e-06, "loss": 0.0007, "step": 16582 }, { "epoch": 15.945192307692308, "grad_norm": 0.008929669857025146, "learning_rate": 6.136943336796708e-06, "loss": 0.0001, "step": 16583 }, { "epoch": 15.946153846153846, "grad_norm": 1.3263362646102905, "learning_rate": 6.135794392387353e-06, "loss": 0.0038, "step": 16584 }, { "epoch": 15.947115384615385, "grad_norm": 0.3845628499984741, "learning_rate": 6.1346455079391254e-06, "loss": 0.0017, "step": 16585 }, { "epoch": 15.948076923076924, "grad_norm": 0.05242852866649628, "learning_rate": 6.1334966834698565e-06, "loss": 0.0004, "step": 16586 }, { "epoch": 15.94903846153846, "grad_norm": 0.09618806838989258, "learning_rate": 6.13234791899737e-06, "loss": 0.0005, "step": 16587 }, { "epoch": 15.95, "grad_norm": 0.025281045585870743, "learning_rate": 6.131199214539492e-06, "loss": 0.0002, "step": 16588 }, { "epoch": 15.950961538461538, "grad_norm": 0.054473813623189926, "learning_rate": 6.130050570114049e-06, "loss": 0.0006, "step": 16589 }, { "epoch": 15.951923076923077, "grad_norm": 0.024293329566717148, "learning_rate": 6.128901985738862e-06, "loss": 0.0002, "step": 16590 }, { "epoch": 15.952884615384615, "grad_norm": 0.011012909933924675, "learning_rate": 6.127753461431756e-06, "loss": 0.0001, "step": 16591 }, { "epoch": 15.953846153846154, "grad_norm": 2.7809336185455322, "learning_rate": 6.126604997210548e-06, "loss": 0.0672, "step": 16592 }, { "epoch": 15.954807692307693, "grad_norm": 1.0248514413833618, "learning_rate": 6.125456593093066e-06, "loss": 0.0053, "step": 16593 }, { "epoch": 15.955769230769231, "grad_norm": 0.0065202610567212105, "learning_rate": 6.124308249097124e-06, "loss": 0.0001, "step": 16594 }, { "epoch": 15.95673076923077, "grad_norm": 0.04433687776327133, "learning_rate": 6.123159965240543e-06, "loss": 0.0003, "step": 16595 }, { "epoch": 15.957692307692307, "grad_norm": 0.41496381163597107, "learning_rate": 6.122011741541141e-06, "loss": 0.0014, "step": 16596 }, { "epoch": 15.958653846153846, "grad_norm": 0.014981917105615139, "learning_rate": 6.120863578016736e-06, "loss": 0.0001, "step": 16597 }, { "epoch": 15.959615384615384, "grad_norm": 0.0666956976056099, "learning_rate": 6.119715474685144e-06, "loss": 0.0003, "step": 16598 }, { "epoch": 15.960576923076923, "grad_norm": 0.005191044881939888, "learning_rate": 6.118567431564175e-06, "loss": 0.0001, "step": 16599 }, { "epoch": 15.961538461538462, "grad_norm": 0.21218900382518768, "learning_rate": 6.117419448671651e-06, "loss": 0.0009, "step": 16600 }, { "epoch": 15.9625, "grad_norm": 0.011407868005335331, "learning_rate": 6.116271526025383e-06, "loss": 0.0001, "step": 16601 }, { "epoch": 15.963461538461539, "grad_norm": 0.008965053595602512, "learning_rate": 6.115123663643181e-06, "loss": 0.0001, "step": 16602 }, { "epoch": 15.964423076923078, "grad_norm": 0.011883902363479137, "learning_rate": 6.113975861542856e-06, "loss": 0.0001, "step": 16603 }, { "epoch": 15.965384615384615, "grad_norm": 0.8267287015914917, "learning_rate": 6.112828119742222e-06, "loss": 0.0014, "step": 16604 }, { "epoch": 15.966346153846153, "grad_norm": 0.6433497071266174, "learning_rate": 6.111680438259087e-06, "loss": 0.0017, "step": 16605 }, { "epoch": 15.967307692307692, "grad_norm": 0.0131873469799757, "learning_rate": 6.110532817111259e-06, "loss": 0.0001, "step": 16606 }, { "epoch": 15.96826923076923, "grad_norm": 0.03883311152458191, "learning_rate": 6.109385256316545e-06, "loss": 0.0004, "step": 16607 }, { "epoch": 15.96923076923077, "grad_norm": 1.3634954690933228, "learning_rate": 6.108237755892755e-06, "loss": 0.0149, "step": 16608 }, { "epoch": 15.970192307692308, "grad_norm": 0.013649923726916313, "learning_rate": 6.107090315857693e-06, "loss": 0.0002, "step": 16609 }, { "epoch": 15.971153846153847, "grad_norm": 0.1964527666568756, "learning_rate": 6.1059429362291615e-06, "loss": 0.0012, "step": 16610 }, { "epoch": 15.972115384615385, "grad_norm": 0.011744458228349686, "learning_rate": 6.104795617024968e-06, "loss": 0.0001, "step": 16611 }, { "epoch": 15.973076923076922, "grad_norm": 0.02236946113407612, "learning_rate": 6.1036483582629146e-06, "loss": 0.0003, "step": 16612 }, { "epoch": 15.974038461538461, "grad_norm": 0.02898966707289219, "learning_rate": 6.102501159960804e-06, "loss": 0.0002, "step": 16613 }, { "epoch": 15.975, "grad_norm": 0.01731916517019272, "learning_rate": 6.101354022136434e-06, "loss": 0.0001, "step": 16614 }, { "epoch": 15.975961538461538, "grad_norm": 0.4056210517883301, "learning_rate": 6.100206944807608e-06, "loss": 0.002, "step": 16615 }, { "epoch": 15.976923076923077, "grad_norm": 0.1400090456008911, "learning_rate": 6.099059927992126e-06, "loss": 0.0006, "step": 16616 }, { "epoch": 15.977884615384616, "grad_norm": 0.15148839354515076, "learning_rate": 6.097912971707784e-06, "loss": 0.0007, "step": 16617 }, { "epoch": 15.978846153846154, "grad_norm": 0.033813826739788055, "learning_rate": 6.096766075972378e-06, "loss": 0.0002, "step": 16618 }, { "epoch": 15.979807692307693, "grad_norm": 0.034744247794151306, "learning_rate": 6.09561924080371e-06, "loss": 0.0003, "step": 16619 }, { "epoch": 15.98076923076923, "grad_norm": 0.031655095517635345, "learning_rate": 6.09447246621957e-06, "loss": 0.0002, "step": 16620 }, { "epoch": 15.981730769230769, "grad_norm": 0.139204740524292, "learning_rate": 6.0933257522377545e-06, "loss": 0.0005, "step": 16621 }, { "epoch": 15.982692307692307, "grad_norm": 0.02722538262605667, "learning_rate": 6.092179098876059e-06, "loss": 0.0002, "step": 16622 }, { "epoch": 15.983653846153846, "grad_norm": 0.03358247131109238, "learning_rate": 6.091032506152274e-06, "loss": 0.0002, "step": 16623 }, { "epoch": 15.984615384615385, "grad_norm": 1.1166677474975586, "learning_rate": 6.089885974084192e-06, "loss": 0.0029, "step": 16624 }, { "epoch": 15.985576923076923, "grad_norm": 0.0713760256767273, "learning_rate": 6.088739502689603e-06, "loss": 0.0003, "step": 16625 }, { "epoch": 15.986538461538462, "grad_norm": 0.14213810861110687, "learning_rate": 6.087593091986297e-06, "loss": 0.001, "step": 16626 }, { "epoch": 15.9875, "grad_norm": 0.023960614576935768, "learning_rate": 6.086446741992064e-06, "loss": 0.0002, "step": 16627 }, { "epoch": 15.98846153846154, "grad_norm": 0.12306369841098785, "learning_rate": 6.08530045272469e-06, "loss": 0.0007, "step": 16628 }, { "epoch": 15.989423076923076, "grad_norm": 0.290058970451355, "learning_rate": 6.084154224201966e-06, "loss": 0.0017, "step": 16629 }, { "epoch": 15.990384615384615, "grad_norm": 0.35432901978492737, "learning_rate": 6.083008056441675e-06, "loss": 0.0019, "step": 16630 }, { "epoch": 15.991346153846154, "grad_norm": 0.691563606262207, "learning_rate": 6.0818619494616026e-06, "loss": 0.0414, "step": 16631 }, { "epoch": 15.992307692307692, "grad_norm": 0.024500148370862007, "learning_rate": 6.080715903279533e-06, "loss": 0.0002, "step": 16632 }, { "epoch": 15.993269230769231, "grad_norm": 0.2116825431585312, "learning_rate": 6.079569917913249e-06, "loss": 0.0007, "step": 16633 }, { "epoch": 15.99423076923077, "grad_norm": 0.027417002245783806, "learning_rate": 6.078423993380536e-06, "loss": 0.0002, "step": 16634 }, { "epoch": 15.995192307692308, "grad_norm": 1.1613329648971558, "learning_rate": 6.0772781296991715e-06, "loss": 0.0039, "step": 16635 }, { "epoch": 15.996153846153845, "grad_norm": 0.023801950737833977, "learning_rate": 6.076132326886934e-06, "loss": 0.0002, "step": 16636 }, { "epoch": 15.997115384615384, "grad_norm": 0.16860760748386383, "learning_rate": 6.074986584961611e-06, "loss": 0.0008, "step": 16637 }, { "epoch": 15.998076923076923, "grad_norm": 0.006870750337839127, "learning_rate": 6.073840903940975e-06, "loss": 0.0001, "step": 16638 }, { "epoch": 15.999038461538461, "grad_norm": 0.028941046446561813, "learning_rate": 6.072695283842802e-06, "loss": 0.0003, "step": 16639 }, { "epoch": 16.0, "grad_norm": 0.04982741177082062, "learning_rate": 6.071549724684875e-06, "loss": 0.0003, "step": 16640 }, { "epoch": 16.00096153846154, "grad_norm": 0.8836484551429749, "learning_rate": 6.070404226484967e-06, "loss": 0.0197, "step": 16641 }, { "epoch": 16.001923076923077, "grad_norm": 3.817098617553711, "learning_rate": 6.069258789260852e-06, "loss": 0.044, "step": 16642 }, { "epoch": 16.002884615384616, "grad_norm": 0.007910394109785557, "learning_rate": 6.068113413030301e-06, "loss": 0.0001, "step": 16643 }, { "epoch": 16.003846153846155, "grad_norm": 0.007879259064793587, "learning_rate": 6.066968097811092e-06, "loss": 0.0001, "step": 16644 }, { "epoch": 16.004807692307693, "grad_norm": 0.14235632121562958, "learning_rate": 6.065822843620997e-06, "loss": 0.0007, "step": 16645 }, { "epoch": 16.005769230769232, "grad_norm": 0.011967133730649948, "learning_rate": 6.064677650477781e-06, "loss": 0.0002, "step": 16646 }, { "epoch": 16.00673076923077, "grad_norm": 0.00909107830375433, "learning_rate": 6.0635325183992185e-06, "loss": 0.0001, "step": 16647 }, { "epoch": 16.00769230769231, "grad_norm": 0.009160606190562248, "learning_rate": 6.06238744740308e-06, "loss": 0.0001, "step": 16648 }, { "epoch": 16.008653846153845, "grad_norm": 0.010387597605586052, "learning_rate": 6.061242437507131e-06, "loss": 0.0001, "step": 16649 }, { "epoch": 16.009615384615383, "grad_norm": 0.019116058945655823, "learning_rate": 6.060097488729136e-06, "loss": 0.0002, "step": 16650 }, { "epoch": 16.010576923076922, "grad_norm": 0.21112293004989624, "learning_rate": 6.058952601086867e-06, "loss": 0.0009, "step": 16651 }, { "epoch": 16.01153846153846, "grad_norm": 0.014772390946745872, "learning_rate": 6.057807774598088e-06, "loss": 0.0002, "step": 16652 }, { "epoch": 16.0125, "grad_norm": 0.009567232802510262, "learning_rate": 6.056663009280561e-06, "loss": 0.0001, "step": 16653 }, { "epoch": 16.013461538461538, "grad_norm": 0.008762230165302753, "learning_rate": 6.0555183051520506e-06, "loss": 0.0001, "step": 16654 }, { "epoch": 16.014423076923077, "grad_norm": 0.03913840278983116, "learning_rate": 6.05437366223032e-06, "loss": 0.0003, "step": 16655 }, { "epoch": 16.015384615384615, "grad_norm": 0.02388632297515869, "learning_rate": 6.053229080533128e-06, "loss": 0.0003, "step": 16656 }, { "epoch": 16.016346153846154, "grad_norm": 0.011679821647703648, "learning_rate": 6.0520845600782395e-06, "loss": 0.0001, "step": 16657 }, { "epoch": 16.017307692307693, "grad_norm": 0.01674359105527401, "learning_rate": 6.050940100883409e-06, "loss": 0.0002, "step": 16658 }, { "epoch": 16.01826923076923, "grad_norm": 0.03384096547961235, "learning_rate": 6.049795702966399e-06, "loss": 0.0002, "step": 16659 }, { "epoch": 16.01923076923077, "grad_norm": 0.010990889742970467, "learning_rate": 6.048651366344966e-06, "loss": 0.0001, "step": 16660 }, { "epoch": 16.02019230769231, "grad_norm": 1.8040761947631836, "learning_rate": 6.047507091036866e-06, "loss": 0.0038, "step": 16661 }, { "epoch": 16.021153846153847, "grad_norm": 0.03663776069879532, "learning_rate": 6.0463628770598574e-06, "loss": 0.0003, "step": 16662 }, { "epoch": 16.022115384615386, "grad_norm": 0.011664263904094696, "learning_rate": 6.045218724431692e-06, "loss": 0.0001, "step": 16663 }, { "epoch": 16.023076923076925, "grad_norm": 0.012972841039299965, "learning_rate": 6.044074633170125e-06, "loss": 0.0002, "step": 16664 }, { "epoch": 16.02403846153846, "grad_norm": 0.029084863141179085, "learning_rate": 6.042930603292907e-06, "loss": 0.0003, "step": 16665 }, { "epoch": 16.025, "grad_norm": 0.024565495550632477, "learning_rate": 6.041786634817794e-06, "loss": 0.0003, "step": 16666 }, { "epoch": 16.025961538461537, "grad_norm": 1.3773329257965088, "learning_rate": 6.040642727762537e-06, "loss": 0.0044, "step": 16667 }, { "epoch": 16.026923076923076, "grad_norm": 1.2302874326705933, "learning_rate": 6.039498882144881e-06, "loss": 0.0069, "step": 16668 }, { "epoch": 16.027884615384615, "grad_norm": 0.12112817913293839, "learning_rate": 6.03835509798258e-06, "loss": 0.0005, "step": 16669 }, { "epoch": 16.028846153846153, "grad_norm": 0.14218074083328247, "learning_rate": 6.0372113752933816e-06, "loss": 0.0009, "step": 16670 }, { "epoch": 16.029807692307692, "grad_norm": 0.6196978688240051, "learning_rate": 6.036067714095032e-06, "loss": 0.0019, "step": 16671 }, { "epoch": 16.03076923076923, "grad_norm": 2.7828550338745117, "learning_rate": 6.034924114405275e-06, "loss": 0.0159, "step": 16672 }, { "epoch": 16.03173076923077, "grad_norm": 0.4743199646472931, "learning_rate": 6.033780576241861e-06, "loss": 0.0013, "step": 16673 }, { "epoch": 16.032692307692308, "grad_norm": 0.028497325256466866, "learning_rate": 6.032637099622532e-06, "loss": 0.0003, "step": 16674 }, { "epoch": 16.033653846153847, "grad_norm": 0.12898261845111847, "learning_rate": 6.0314936845650296e-06, "loss": 0.0009, "step": 16675 }, { "epoch": 16.034615384615385, "grad_norm": 0.0070623839274048805, "learning_rate": 6.0303503310870956e-06, "loss": 0.0001, "step": 16676 }, { "epoch": 16.035576923076924, "grad_norm": 0.04710189625620842, "learning_rate": 6.0292070392064774e-06, "loss": 0.0004, "step": 16677 }, { "epoch": 16.036538461538463, "grad_norm": 0.016631074249744415, "learning_rate": 6.028063808940911e-06, "loss": 0.0002, "step": 16678 }, { "epoch": 16.0375, "grad_norm": 0.004282973241060972, "learning_rate": 6.026920640308135e-06, "loss": 0.0, "step": 16679 }, { "epoch": 16.03846153846154, "grad_norm": 0.9800028204917908, "learning_rate": 6.025777533325891e-06, "loss": 0.0034, "step": 16680 }, { "epoch": 16.039423076923075, "grad_norm": 0.2108236402273178, "learning_rate": 6.0246344880119165e-06, "loss": 0.001, "step": 16681 }, { "epoch": 16.040384615384614, "grad_norm": 0.014483684673905373, "learning_rate": 6.023491504383947e-06, "loss": 0.0002, "step": 16682 }, { "epoch": 16.041346153846153, "grad_norm": 0.00729090953245759, "learning_rate": 6.022348582459716e-06, "loss": 0.0001, "step": 16683 }, { "epoch": 16.04230769230769, "grad_norm": 0.048818834125995636, "learning_rate": 6.021205722256963e-06, "loss": 0.0004, "step": 16684 }, { "epoch": 16.04326923076923, "grad_norm": 0.00689017353579402, "learning_rate": 6.02006292379342e-06, "loss": 0.0001, "step": 16685 }, { "epoch": 16.04423076923077, "grad_norm": 0.010185245424509048, "learning_rate": 6.018920187086819e-06, "loss": 0.0002, "step": 16686 }, { "epoch": 16.045192307692307, "grad_norm": 0.1426655650138855, "learning_rate": 6.01777751215489e-06, "loss": 0.0009, "step": 16687 }, { "epoch": 16.046153846153846, "grad_norm": 0.06148349866271019, "learning_rate": 6.016634899015369e-06, "loss": 0.0003, "step": 16688 }, { "epoch": 16.047115384615385, "grad_norm": 0.019157666712999344, "learning_rate": 6.015492347685983e-06, "loss": 0.0002, "step": 16689 }, { "epoch": 16.048076923076923, "grad_norm": 0.014392899349331856, "learning_rate": 6.01434985818446e-06, "loss": 0.0001, "step": 16690 }, { "epoch": 16.049038461538462, "grad_norm": 0.00905064307153225, "learning_rate": 6.01320743052853e-06, "loss": 0.0001, "step": 16691 }, { "epoch": 16.05, "grad_norm": 0.02015659026801586, "learning_rate": 6.012065064735922e-06, "loss": 0.0001, "step": 16692 }, { "epoch": 16.05096153846154, "grad_norm": 0.009816000238060951, "learning_rate": 6.010922760824357e-06, "loss": 0.0001, "step": 16693 }, { "epoch": 16.051923076923078, "grad_norm": 0.008325856178998947, "learning_rate": 6.009780518811562e-06, "loss": 0.0001, "step": 16694 }, { "epoch": 16.052884615384617, "grad_norm": 0.03570196032524109, "learning_rate": 6.008638338715265e-06, "loss": 0.0003, "step": 16695 }, { "epoch": 16.053846153846155, "grad_norm": 0.006109615322202444, "learning_rate": 6.007496220553186e-06, "loss": 0.0001, "step": 16696 }, { "epoch": 16.05480769230769, "grad_norm": 0.014514338225126266, "learning_rate": 6.006354164343047e-06, "loss": 0.0002, "step": 16697 }, { "epoch": 16.05576923076923, "grad_norm": 0.011145678348839283, "learning_rate": 6.005212170102569e-06, "loss": 0.0001, "step": 16698 }, { "epoch": 16.056730769230768, "grad_norm": 0.07321485131978989, "learning_rate": 6.004070237849475e-06, "loss": 0.0004, "step": 16699 }, { "epoch": 16.057692307692307, "grad_norm": 3.5682921409606934, "learning_rate": 6.002928367601484e-06, "loss": 0.0583, "step": 16700 }, { "epoch": 16.058653846153845, "grad_norm": 0.00411080801859498, "learning_rate": 6.00178655937631e-06, "loss": 0.0001, "step": 16701 }, { "epoch": 16.059615384615384, "grad_norm": 0.08718464523553848, "learning_rate": 6.000644813191677e-06, "loss": 0.0003, "step": 16702 }, { "epoch": 16.060576923076923, "grad_norm": 0.2061980962753296, "learning_rate": 5.999503129065298e-06, "loss": 0.0009, "step": 16703 }, { "epoch": 16.06153846153846, "grad_norm": 0.00483095133677125, "learning_rate": 5.998361507014889e-06, "loss": 0.0, "step": 16704 }, { "epoch": 16.0625, "grad_norm": 0.019040625542402267, "learning_rate": 5.997219947058164e-06, "loss": 0.0002, "step": 16705 }, { "epoch": 16.06346153846154, "grad_norm": 0.09933619201183319, "learning_rate": 5.996078449212839e-06, "loss": 0.0004, "step": 16706 }, { "epoch": 16.064423076923077, "grad_norm": 0.12195286899805069, "learning_rate": 5.994937013496624e-06, "loss": 0.0007, "step": 16707 }, { "epoch": 16.065384615384616, "grad_norm": 0.033858995884656906, "learning_rate": 5.993795639927231e-06, "loss": 0.0002, "step": 16708 }, { "epoch": 16.066346153846155, "grad_norm": 0.1187816634774208, "learning_rate": 5.9926543285223734e-06, "loss": 0.0007, "step": 16709 }, { "epoch": 16.067307692307693, "grad_norm": 0.012219533324241638, "learning_rate": 5.991513079299759e-06, "loss": 0.0001, "step": 16710 }, { "epoch": 16.068269230769232, "grad_norm": 0.009435256943106651, "learning_rate": 5.990371892277096e-06, "loss": 0.0001, "step": 16711 }, { "epoch": 16.06923076923077, "grad_norm": 0.9224720597267151, "learning_rate": 5.989230767472092e-06, "loss": 0.006, "step": 16712 }, { "epoch": 16.07019230769231, "grad_norm": 0.01519798580557108, "learning_rate": 5.988089704902458e-06, "loss": 0.0002, "step": 16713 }, { "epoch": 16.071153846153845, "grad_norm": 0.17750830948352814, "learning_rate": 5.986948704585895e-06, "loss": 0.0011, "step": 16714 }, { "epoch": 16.072115384615383, "grad_norm": 0.022616399452090263, "learning_rate": 5.9858077665401105e-06, "loss": 0.0001, "step": 16715 }, { "epoch": 16.073076923076922, "grad_norm": 0.21968521177768707, "learning_rate": 5.984666890782806e-06, "loss": 0.0011, "step": 16716 }, { "epoch": 16.07403846153846, "grad_norm": 0.005772134754806757, "learning_rate": 5.98352607733169e-06, "loss": 0.0001, "step": 16717 }, { "epoch": 16.075, "grad_norm": 0.6276267766952515, "learning_rate": 5.982385326204459e-06, "loss": 0.0051, "step": 16718 }, { "epoch": 16.075961538461538, "grad_norm": 0.023881230503320694, "learning_rate": 5.981244637418815e-06, "loss": 0.0002, "step": 16719 }, { "epoch": 16.076923076923077, "grad_norm": 0.0074264188297092915, "learning_rate": 5.980104010992462e-06, "loss": 0.0001, "step": 16720 }, { "epoch": 16.077884615384615, "grad_norm": 0.4280511140823364, "learning_rate": 5.978963446943095e-06, "loss": 0.0023, "step": 16721 }, { "epoch": 16.078846153846154, "grad_norm": 0.011698964051902294, "learning_rate": 5.977822945288415e-06, "loss": 0.0001, "step": 16722 }, { "epoch": 16.079807692307693, "grad_norm": 0.011302140541374683, "learning_rate": 5.9766825060461164e-06, "loss": 0.0001, "step": 16723 }, { "epoch": 16.08076923076923, "grad_norm": 0.010386330075562, "learning_rate": 5.975542129233898e-06, "loss": 0.0001, "step": 16724 }, { "epoch": 16.08173076923077, "grad_norm": 1.8524360656738281, "learning_rate": 5.974401814869453e-06, "loss": 0.0125, "step": 16725 }, { "epoch": 16.08269230769231, "grad_norm": 0.2914333641529083, "learning_rate": 5.973261562970478e-06, "loss": 0.001, "step": 16726 }, { "epoch": 16.083653846153847, "grad_norm": 1.7151246070861816, "learning_rate": 5.972121373554665e-06, "loss": 0.1423, "step": 16727 }, { "epoch": 16.084615384615386, "grad_norm": 0.011116205714643002, "learning_rate": 5.9709812466397056e-06, "loss": 0.0001, "step": 16728 }, { "epoch": 16.085576923076925, "grad_norm": 0.05507710203528404, "learning_rate": 5.969841182243293e-06, "loss": 0.0005, "step": 16729 }, { "epoch": 16.08653846153846, "grad_norm": 0.018230775371193886, "learning_rate": 5.968701180383116e-06, "loss": 0.0002, "step": 16730 }, { "epoch": 16.0875, "grad_norm": 0.0016435700235888362, "learning_rate": 5.967561241076867e-06, "loss": 0.0, "step": 16731 }, { "epoch": 16.088461538461537, "grad_norm": 0.007918061688542366, "learning_rate": 5.966421364342231e-06, "loss": 0.0001, "step": 16732 }, { "epoch": 16.089423076923076, "grad_norm": 0.17720253765583038, "learning_rate": 5.9652815501968975e-06, "loss": 0.0005, "step": 16733 }, { "epoch": 16.090384615384615, "grad_norm": 0.013263577595353127, "learning_rate": 5.964141798658552e-06, "loss": 0.0001, "step": 16734 }, { "epoch": 16.091346153846153, "grad_norm": 0.008785368874669075, "learning_rate": 5.963002109744881e-06, "loss": 0.0001, "step": 16735 }, { "epoch": 16.092307692307692, "grad_norm": 0.03595832362771034, "learning_rate": 5.9618624834735685e-06, "loss": 0.0002, "step": 16736 }, { "epoch": 16.09326923076923, "grad_norm": 0.021477505564689636, "learning_rate": 5.960722919862301e-06, "loss": 0.0002, "step": 16737 }, { "epoch": 16.09423076923077, "grad_norm": 0.019429922103881836, "learning_rate": 5.959583418928754e-06, "loss": 0.0002, "step": 16738 }, { "epoch": 16.095192307692308, "grad_norm": 0.03849881514906883, "learning_rate": 5.958443980690617e-06, "loss": 0.0004, "step": 16739 }, { "epoch": 16.096153846153847, "grad_norm": 0.014565303921699524, "learning_rate": 5.957304605165567e-06, "loss": 0.0001, "step": 16740 }, { "epoch": 16.097115384615385, "grad_norm": 0.7066767811775208, "learning_rate": 5.956165292371283e-06, "loss": 0.0012, "step": 16741 }, { "epoch": 16.098076923076924, "grad_norm": 0.01950468309223652, "learning_rate": 5.955026042325447e-06, "loss": 0.0002, "step": 16742 }, { "epoch": 16.099038461538463, "grad_norm": 0.0024503397289663553, "learning_rate": 5.953886855045734e-06, "loss": 0.0, "step": 16743 }, { "epoch": 16.1, "grad_norm": 0.019888920709490776, "learning_rate": 5.952747730549823e-06, "loss": 0.0001, "step": 16744 }, { "epoch": 16.10096153846154, "grad_norm": 1.3104444742202759, "learning_rate": 5.951608668855387e-06, "loss": 0.0065, "step": 16745 }, { "epoch": 16.101923076923075, "grad_norm": 0.18815423548221588, "learning_rate": 5.9504696699801035e-06, "loss": 0.0012, "step": 16746 }, { "epoch": 16.102884615384614, "grad_norm": 0.7961815595626831, "learning_rate": 5.949330733941647e-06, "loss": 0.0024, "step": 16747 }, { "epoch": 16.103846153846153, "grad_norm": 0.009554137475788593, "learning_rate": 5.948191860757687e-06, "loss": 0.0002, "step": 16748 }, { "epoch": 16.10480769230769, "grad_norm": 0.0214694831520319, "learning_rate": 5.947053050445897e-06, "loss": 0.0002, "step": 16749 }, { "epoch": 16.10576923076923, "grad_norm": 0.012517418712377548, "learning_rate": 5.94591430302395e-06, "loss": 0.0002, "step": 16750 }, { "epoch": 16.10673076923077, "grad_norm": 0.348247230052948, "learning_rate": 5.944775618509513e-06, "loss": 0.0016, "step": 16751 }, { "epoch": 16.107692307692307, "grad_norm": 0.01707310415804386, "learning_rate": 5.9436369969202565e-06, "loss": 0.0002, "step": 16752 }, { "epoch": 16.108653846153846, "grad_norm": 0.03627753257751465, "learning_rate": 5.942498438273849e-06, "loss": 0.0002, "step": 16753 }, { "epoch": 16.109615384615385, "grad_norm": 0.007344791665673256, "learning_rate": 5.941359942587959e-06, "loss": 0.0001, "step": 16754 }, { "epoch": 16.110576923076923, "grad_norm": 0.007635911460965872, "learning_rate": 5.940221509880249e-06, "loss": 0.0001, "step": 16755 }, { "epoch": 16.111538461538462, "grad_norm": 0.178920716047287, "learning_rate": 5.939083140168383e-06, "loss": 0.0007, "step": 16756 }, { "epoch": 16.1125, "grad_norm": 0.012243877165019512, "learning_rate": 5.937944833470032e-06, "loss": 0.0001, "step": 16757 }, { "epoch": 16.11346153846154, "grad_norm": 0.013837370090186596, "learning_rate": 5.936806589802855e-06, "loss": 0.0002, "step": 16758 }, { "epoch": 16.114423076923078, "grad_norm": 0.007579963654279709, "learning_rate": 5.9356684091845116e-06, "loss": 0.0001, "step": 16759 }, { "epoch": 16.115384615384617, "grad_norm": 0.06118877977132797, "learning_rate": 5.934530291632667e-06, "loss": 0.0003, "step": 16760 }, { "epoch": 16.116346153846155, "grad_norm": 0.014326685108244419, "learning_rate": 5.933392237164983e-06, "loss": 0.0002, "step": 16761 }, { "epoch": 16.11730769230769, "grad_norm": 0.34244999289512634, "learning_rate": 5.932254245799114e-06, "loss": 0.001, "step": 16762 }, { "epoch": 16.11826923076923, "grad_norm": 0.013330361805856228, "learning_rate": 5.931116317552718e-06, "loss": 0.0001, "step": 16763 }, { "epoch": 16.119230769230768, "grad_norm": 0.8104977011680603, "learning_rate": 5.929978452443459e-06, "loss": 0.0026, "step": 16764 }, { "epoch": 16.120192307692307, "grad_norm": 0.01896839030086994, "learning_rate": 5.928840650488988e-06, "loss": 0.0002, "step": 16765 }, { "epoch": 16.121153846153845, "grad_norm": 0.08802388608455658, "learning_rate": 5.927702911706961e-06, "loss": 0.0003, "step": 16766 }, { "epoch": 16.122115384615384, "grad_norm": 0.012290364131331444, "learning_rate": 5.92656523611503e-06, "loss": 0.0001, "step": 16767 }, { "epoch": 16.123076923076923, "grad_norm": 0.02811221405863762, "learning_rate": 5.925427623730855e-06, "loss": 0.0002, "step": 16768 }, { "epoch": 16.12403846153846, "grad_norm": 0.029894588515162468, "learning_rate": 5.924290074572083e-06, "loss": 0.0001, "step": 16769 }, { "epoch": 16.125, "grad_norm": 0.013140654191374779, "learning_rate": 5.923152588656366e-06, "loss": 0.0002, "step": 16770 }, { "epoch": 16.12596153846154, "grad_norm": 0.017966892570257187, "learning_rate": 5.922015166001356e-06, "loss": 0.0002, "step": 16771 }, { "epoch": 16.126923076923077, "grad_norm": 0.05235743522644043, "learning_rate": 5.920877806624703e-06, "loss": 0.0004, "step": 16772 }, { "epoch": 16.127884615384616, "grad_norm": 0.012374196201562881, "learning_rate": 5.919740510544054e-06, "loss": 0.0001, "step": 16773 }, { "epoch": 16.128846153846155, "grad_norm": 0.07043123245239258, "learning_rate": 5.918603277777056e-06, "loss": 0.0004, "step": 16774 }, { "epoch": 16.129807692307693, "grad_norm": 0.02091626636683941, "learning_rate": 5.917466108341357e-06, "loss": 0.0002, "step": 16775 }, { "epoch": 16.130769230769232, "grad_norm": 0.013666636310517788, "learning_rate": 5.916329002254602e-06, "loss": 0.0001, "step": 16776 }, { "epoch": 16.13173076923077, "grad_norm": 0.4477356970310211, "learning_rate": 5.915191959534436e-06, "loss": 0.0013, "step": 16777 }, { "epoch": 16.13269230769231, "grad_norm": 0.01698899269104004, "learning_rate": 5.9140549801985e-06, "loss": 0.0002, "step": 16778 }, { "epoch": 16.133653846153845, "grad_norm": 0.01026871893554926, "learning_rate": 5.912918064264441e-06, "loss": 0.0001, "step": 16779 }, { "epoch": 16.134615384615383, "grad_norm": 0.024270573630928993, "learning_rate": 5.911781211749897e-06, "loss": 0.0003, "step": 16780 }, { "epoch": 16.135576923076922, "grad_norm": 0.03993099555373192, "learning_rate": 5.910644422672509e-06, "loss": 0.0003, "step": 16781 }, { "epoch": 16.13653846153846, "grad_norm": 0.020895129069685936, "learning_rate": 5.909507697049918e-06, "loss": 0.0002, "step": 16782 }, { "epoch": 16.1375, "grad_norm": 0.24396979808807373, "learning_rate": 5.908371034899764e-06, "loss": 0.0018, "step": 16783 }, { "epoch": 16.138461538461538, "grad_norm": 0.017337575554847717, "learning_rate": 5.9072344362396815e-06, "loss": 0.0002, "step": 16784 }, { "epoch": 16.139423076923077, "grad_norm": 0.12724684178829193, "learning_rate": 5.906097901087306e-06, "loss": 0.0004, "step": 16785 }, { "epoch": 16.140384615384615, "grad_norm": 0.01419150736182928, "learning_rate": 5.9049614294602785e-06, "loss": 0.0002, "step": 16786 }, { "epoch": 16.141346153846154, "grad_norm": 0.5798634886741638, "learning_rate": 5.90382502137623e-06, "loss": 0.0049, "step": 16787 }, { "epoch": 16.142307692307693, "grad_norm": 0.061460305005311966, "learning_rate": 5.902688676852797e-06, "loss": 0.0003, "step": 16788 }, { "epoch": 16.14326923076923, "grad_norm": 0.025334693491458893, "learning_rate": 5.901552395907606e-06, "loss": 0.0002, "step": 16789 }, { "epoch": 16.14423076923077, "grad_norm": 0.03612437844276428, "learning_rate": 5.900416178558297e-06, "loss": 0.0003, "step": 16790 }, { "epoch": 16.14519230769231, "grad_norm": 0.0070153698325157166, "learning_rate": 5.899280024822496e-06, "loss": 0.0001, "step": 16791 }, { "epoch": 16.146153846153847, "grad_norm": 0.00867397803813219, "learning_rate": 5.898143934717831e-06, "loss": 0.0001, "step": 16792 }, { "epoch": 16.147115384615386, "grad_norm": 0.01589502952992916, "learning_rate": 5.897007908261935e-06, "loss": 0.0002, "step": 16793 }, { "epoch": 16.148076923076925, "grad_norm": 0.012022493407130241, "learning_rate": 5.895871945472434e-06, "loss": 0.0001, "step": 16794 }, { "epoch": 16.14903846153846, "grad_norm": 0.009743161499500275, "learning_rate": 5.894736046366956e-06, "loss": 0.0001, "step": 16795 }, { "epoch": 16.15, "grad_norm": 0.007622129749506712, "learning_rate": 5.893600210963124e-06, "loss": 0.0001, "step": 16796 }, { "epoch": 16.150961538461537, "grad_norm": 0.0036770799197256565, "learning_rate": 5.892464439278566e-06, "loss": 0.0, "step": 16797 }, { "epoch": 16.151923076923076, "grad_norm": 0.004259758163243532, "learning_rate": 5.891328731330903e-06, "loss": 0.0001, "step": 16798 }, { "epoch": 16.152884615384615, "grad_norm": 0.006021510809659958, "learning_rate": 5.890193087137759e-06, "loss": 0.0001, "step": 16799 }, { "epoch": 16.153846153846153, "grad_norm": 0.023233823478221893, "learning_rate": 5.8890575067167575e-06, "loss": 0.0001, "step": 16800 }, { "epoch": 16.154807692307692, "grad_norm": 0.011682970449328423, "learning_rate": 5.8879219900855185e-06, "loss": 0.0001, "step": 16801 }, { "epoch": 16.15576923076923, "grad_norm": 0.04668918997049332, "learning_rate": 5.886786537261662e-06, "loss": 0.0002, "step": 16802 }, { "epoch": 16.15673076923077, "grad_norm": 0.03569873422384262, "learning_rate": 5.8856511482628035e-06, "loss": 0.0002, "step": 16803 }, { "epoch": 16.157692307692308, "grad_norm": 0.03233857825398445, "learning_rate": 5.884515823106566e-06, "loss": 0.0002, "step": 16804 }, { "epoch": 16.158653846153847, "grad_norm": 0.042234744876623154, "learning_rate": 5.8833805618105635e-06, "loss": 0.0003, "step": 16805 }, { "epoch": 16.159615384615385, "grad_norm": 0.008608709089457989, "learning_rate": 5.8822453643924134e-06, "loss": 0.0001, "step": 16806 }, { "epoch": 16.160576923076924, "grad_norm": 0.08578520268201828, "learning_rate": 5.881110230869726e-06, "loss": 0.0006, "step": 16807 }, { "epoch": 16.161538461538463, "grad_norm": 0.06959998607635498, "learning_rate": 5.8799751612601234e-06, "loss": 0.0003, "step": 16808 }, { "epoch": 16.1625, "grad_norm": 0.021197378635406494, "learning_rate": 5.878840155581213e-06, "loss": 0.0002, "step": 16809 }, { "epoch": 16.16346153846154, "grad_norm": 0.07109290361404419, "learning_rate": 5.877705213850606e-06, "loss": 0.0002, "step": 16810 }, { "epoch": 16.164423076923075, "grad_norm": 0.011706599034368992, "learning_rate": 5.8765703360859185e-06, "loss": 0.0001, "step": 16811 }, { "epoch": 16.165384615384614, "grad_norm": 0.01186576671898365, "learning_rate": 5.875435522304755e-06, "loss": 0.0001, "step": 16812 }, { "epoch": 16.166346153846153, "grad_norm": 0.01697329245507717, "learning_rate": 5.874300772524728e-06, "loss": 0.0001, "step": 16813 }, { "epoch": 16.16730769230769, "grad_norm": 0.006661831866949797, "learning_rate": 5.873166086763442e-06, "loss": 0.0001, "step": 16814 }, { "epoch": 16.16826923076923, "grad_norm": 0.027395395562052727, "learning_rate": 5.872031465038508e-06, "loss": 0.0002, "step": 16815 }, { "epoch": 16.16923076923077, "grad_norm": 2.1780009269714355, "learning_rate": 5.870896907367532e-06, "loss": 0.0478, "step": 16816 }, { "epoch": 16.170192307692307, "grad_norm": 0.006822156719863415, "learning_rate": 5.869762413768114e-06, "loss": 0.0001, "step": 16817 }, { "epoch": 16.171153846153846, "grad_norm": 0.06597409397363663, "learning_rate": 5.868627984257862e-06, "loss": 0.0002, "step": 16818 }, { "epoch": 16.172115384615385, "grad_norm": 0.0219816192984581, "learning_rate": 5.867493618854379e-06, "loss": 0.0003, "step": 16819 }, { "epoch": 16.173076923076923, "grad_norm": 3.148953676223755, "learning_rate": 5.866359317575267e-06, "loss": 0.0645, "step": 16820 }, { "epoch": 16.174038461538462, "grad_norm": 1.196977138519287, "learning_rate": 5.865225080438124e-06, "loss": 0.0065, "step": 16821 }, { "epoch": 16.175, "grad_norm": 0.006857243366539478, "learning_rate": 5.8640909074605535e-06, "loss": 0.0001, "step": 16822 }, { "epoch": 16.17596153846154, "grad_norm": 0.818489134311676, "learning_rate": 5.862956798660154e-06, "loss": 0.0037, "step": 16823 }, { "epoch": 16.176923076923078, "grad_norm": 1.245192527770996, "learning_rate": 5.861822754054522e-06, "loss": 0.0069, "step": 16824 }, { "epoch": 16.177884615384617, "grad_norm": 0.013797577470541, "learning_rate": 5.860688773661256e-06, "loss": 0.0002, "step": 16825 }, { "epoch": 16.178846153846155, "grad_norm": 0.021959099918603897, "learning_rate": 5.859554857497951e-06, "loss": 0.0001, "step": 16826 }, { "epoch": 16.17980769230769, "grad_norm": 0.004973855800926685, "learning_rate": 5.858421005582204e-06, "loss": 0.0, "step": 16827 }, { "epoch": 16.18076923076923, "grad_norm": 2.63573956489563, "learning_rate": 5.857287217931606e-06, "loss": 0.0161, "step": 16828 }, { "epoch": 16.181730769230768, "grad_norm": 0.024344466626644135, "learning_rate": 5.856153494563752e-06, "loss": 0.0002, "step": 16829 }, { "epoch": 16.182692307692307, "grad_norm": 0.7596604228019714, "learning_rate": 5.855019835496234e-06, "loss": 0.002, "step": 16830 }, { "epoch": 16.183653846153845, "grad_norm": 0.01784900203347206, "learning_rate": 5.853886240746643e-06, "loss": 0.0001, "step": 16831 }, { "epoch": 16.184615384615384, "grad_norm": 0.020320672541856766, "learning_rate": 5.852752710332568e-06, "loss": 0.0001, "step": 16832 }, { "epoch": 16.185576923076923, "grad_norm": 0.013723999261856079, "learning_rate": 5.851619244271599e-06, "loss": 0.0001, "step": 16833 }, { "epoch": 16.18653846153846, "grad_norm": 0.004656053148210049, "learning_rate": 5.850485842581325e-06, "loss": 0.0001, "step": 16834 }, { "epoch": 16.1875, "grad_norm": 0.17597045004367828, "learning_rate": 5.849352505279332e-06, "loss": 0.0005, "step": 16835 }, { "epoch": 16.18846153846154, "grad_norm": 0.009577006101608276, "learning_rate": 5.848219232383203e-06, "loss": 0.0001, "step": 16836 }, { "epoch": 16.189423076923077, "grad_norm": 0.010307584889233112, "learning_rate": 5.847086023910531e-06, "loss": 0.0001, "step": 16837 }, { "epoch": 16.190384615384616, "grad_norm": 0.06242135539650917, "learning_rate": 5.8459528798788935e-06, "loss": 0.0004, "step": 16838 }, { "epoch": 16.191346153846155, "grad_norm": 0.27440062165260315, "learning_rate": 5.844819800305874e-06, "loss": 0.001, "step": 16839 }, { "epoch": 16.192307692307693, "grad_norm": 0.015445977449417114, "learning_rate": 5.8436867852090565e-06, "loss": 0.0002, "step": 16840 }, { "epoch": 16.193269230769232, "grad_norm": 0.44504669308662415, "learning_rate": 5.842553834606021e-06, "loss": 0.0013, "step": 16841 }, { "epoch": 16.19423076923077, "grad_norm": 0.04001321643590927, "learning_rate": 5.841420948514349e-06, "loss": 0.0004, "step": 16842 }, { "epoch": 16.19519230769231, "grad_norm": 2.70516037940979, "learning_rate": 5.840288126951618e-06, "loss": 0.0096, "step": 16843 }, { "epoch": 16.196153846153845, "grad_norm": 0.0037918081507086754, "learning_rate": 5.839155369935407e-06, "loss": 0.0001, "step": 16844 }, { "epoch": 16.197115384615383, "grad_norm": 0.010720605961978436, "learning_rate": 5.838022677483291e-06, "loss": 0.0002, "step": 16845 }, { "epoch": 16.198076923076922, "grad_norm": 0.008479257114231586, "learning_rate": 5.836890049612852e-06, "loss": 0.0001, "step": 16846 }, { "epoch": 16.19903846153846, "grad_norm": 1.9657864570617676, "learning_rate": 5.835757486341658e-06, "loss": 0.012, "step": 16847 }, { "epoch": 16.2, "grad_norm": 0.013613970018923283, "learning_rate": 5.834624987687288e-06, "loss": 0.0002, "step": 16848 }, { "epoch": 16.200961538461538, "grad_norm": 0.009791923686861992, "learning_rate": 5.83349255366731e-06, "loss": 0.0001, "step": 16849 }, { "epoch": 16.201923076923077, "grad_norm": 0.03770241141319275, "learning_rate": 5.832360184299298e-06, "loss": 0.0002, "step": 16850 }, { "epoch": 16.202884615384615, "grad_norm": 0.02076595462858677, "learning_rate": 5.8312278796008295e-06, "loss": 0.0002, "step": 16851 }, { "epoch": 16.203846153846154, "grad_norm": 0.11545006185770035, "learning_rate": 5.830095639589464e-06, "loss": 0.0009, "step": 16852 }, { "epoch": 16.204807692307693, "grad_norm": 0.7095113396644592, "learning_rate": 5.828963464282776e-06, "loss": 0.0128, "step": 16853 }, { "epoch": 16.20576923076923, "grad_norm": 0.012077823281288147, "learning_rate": 5.827831353698336e-06, "loss": 0.0001, "step": 16854 }, { "epoch": 16.20673076923077, "grad_norm": 0.023545749485492706, "learning_rate": 5.826699307853707e-06, "loss": 0.0002, "step": 16855 }, { "epoch": 16.20769230769231, "grad_norm": 0.026524459943175316, "learning_rate": 5.825567326766455e-06, "loss": 0.0001, "step": 16856 }, { "epoch": 16.208653846153847, "grad_norm": 0.019605576992034912, "learning_rate": 5.82443541045415e-06, "loss": 0.0002, "step": 16857 }, { "epoch": 16.209615384615386, "grad_norm": 1.0796878337860107, "learning_rate": 5.823303558934348e-06, "loss": 0.0052, "step": 16858 }, { "epoch": 16.210576923076925, "grad_norm": 0.11286674439907074, "learning_rate": 5.822171772224621e-06, "loss": 0.0006, "step": 16859 }, { "epoch": 16.21153846153846, "grad_norm": 0.018029555678367615, "learning_rate": 5.8210400503425215e-06, "loss": 0.0001, "step": 16860 }, { "epoch": 16.2125, "grad_norm": 0.00976718869060278, "learning_rate": 5.819908393305616e-06, "loss": 0.0001, "step": 16861 }, { "epoch": 16.213461538461537, "grad_norm": 0.09378884732723236, "learning_rate": 5.818776801131467e-06, "loss": 0.0003, "step": 16862 }, { "epoch": 16.214423076923076, "grad_norm": 0.005607571452856064, "learning_rate": 5.817645273837627e-06, "loss": 0.0001, "step": 16863 }, { "epoch": 16.215384615384615, "grad_norm": 0.04789021611213684, "learning_rate": 5.816513811441658e-06, "loss": 0.0002, "step": 16864 }, { "epoch": 16.216346153846153, "grad_norm": 0.013595238327980042, "learning_rate": 5.815382413961119e-06, "loss": 0.0002, "step": 16865 }, { "epoch": 16.217307692307692, "grad_norm": 0.03736991807818413, "learning_rate": 5.81425108141356e-06, "loss": 0.0002, "step": 16866 }, { "epoch": 16.21826923076923, "grad_norm": 0.017077704891562462, "learning_rate": 5.813119813816538e-06, "loss": 0.0002, "step": 16867 }, { "epoch": 16.21923076923077, "grad_norm": 0.05753694847226143, "learning_rate": 5.8119886111876134e-06, "loss": 0.0003, "step": 16868 }, { "epoch": 16.220192307692308, "grad_norm": 0.004565084353089333, "learning_rate": 5.810857473544329e-06, "loss": 0.0, "step": 16869 }, { "epoch": 16.221153846153847, "grad_norm": 0.053716354072093964, "learning_rate": 5.809726400904242e-06, "loss": 0.0004, "step": 16870 }, { "epoch": 16.222115384615385, "grad_norm": 0.03653901442885399, "learning_rate": 5.8085953932849074e-06, "loss": 0.0001, "step": 16871 }, { "epoch": 16.223076923076924, "grad_norm": 0.05918664485216141, "learning_rate": 5.807464450703866e-06, "loss": 0.0002, "step": 16872 }, { "epoch": 16.224038461538463, "grad_norm": 0.020547213032841682, "learning_rate": 5.806333573178675e-06, "loss": 0.0002, "step": 16873 }, { "epoch": 16.225, "grad_norm": 0.009729506447911263, "learning_rate": 5.805202760726876e-06, "loss": 0.0001, "step": 16874 }, { "epoch": 16.22596153846154, "grad_norm": 0.004440879449248314, "learning_rate": 5.804072013366017e-06, "loss": 0.0, "step": 16875 }, { "epoch": 16.226923076923075, "grad_norm": 0.015627143904566765, "learning_rate": 5.8029413311136516e-06, "loss": 0.0001, "step": 16876 }, { "epoch": 16.227884615384614, "grad_norm": 1.873755931854248, "learning_rate": 5.801810713987313e-06, "loss": 0.0198, "step": 16877 }, { "epoch": 16.228846153846153, "grad_norm": 0.5132063627243042, "learning_rate": 5.80068016200455e-06, "loss": 0.0014, "step": 16878 }, { "epoch": 16.22980769230769, "grad_norm": 0.010089610703289509, "learning_rate": 5.799549675182912e-06, "loss": 0.0001, "step": 16879 }, { "epoch": 16.23076923076923, "grad_norm": 0.0056383428163826466, "learning_rate": 5.79841925353993e-06, "loss": 0.0001, "step": 16880 }, { "epoch": 16.23173076923077, "grad_norm": 0.1878233402967453, "learning_rate": 5.797288897093149e-06, "loss": 0.0008, "step": 16881 }, { "epoch": 16.232692307692307, "grad_norm": 0.026025475934147835, "learning_rate": 5.796158605860114e-06, "loss": 0.0001, "step": 16882 }, { "epoch": 16.233653846153846, "grad_norm": 0.02389773726463318, "learning_rate": 5.795028379858355e-06, "loss": 0.0002, "step": 16883 }, { "epoch": 16.234615384615385, "grad_norm": 0.009969868697226048, "learning_rate": 5.7938982191054185e-06, "loss": 0.0001, "step": 16884 }, { "epoch": 16.235576923076923, "grad_norm": 0.5156604051589966, "learning_rate": 5.7927681236188325e-06, "loss": 0.0017, "step": 16885 }, { "epoch": 16.236538461538462, "grad_norm": 0.24723298847675323, "learning_rate": 5.7916380934161386e-06, "loss": 0.0008, "step": 16886 }, { "epoch": 16.2375, "grad_norm": 0.022653888911008835, "learning_rate": 5.790508128514872e-06, "loss": 0.0003, "step": 16887 }, { "epoch": 16.23846153846154, "grad_norm": 0.014164978638291359, "learning_rate": 5.7893782289325615e-06, "loss": 0.0001, "step": 16888 }, { "epoch": 16.239423076923078, "grad_norm": 0.009959488175809383, "learning_rate": 5.788248394686743e-06, "loss": 0.0001, "step": 16889 }, { "epoch": 16.240384615384617, "grad_norm": 0.009964109398424625, "learning_rate": 5.787118625794952e-06, "loss": 0.0001, "step": 16890 }, { "epoch": 16.241346153846155, "grad_norm": 0.030005164444446564, "learning_rate": 5.785988922274711e-06, "loss": 0.0002, "step": 16891 }, { "epoch": 16.24230769230769, "grad_norm": 1.3297098875045776, "learning_rate": 5.784859284143554e-06, "loss": 0.0072, "step": 16892 }, { "epoch": 16.24326923076923, "grad_norm": 0.013798183761537075, "learning_rate": 5.783729711419014e-06, "loss": 0.0001, "step": 16893 }, { "epoch": 16.244230769230768, "grad_norm": 2.269136905670166, "learning_rate": 5.782600204118609e-06, "loss": 0.0218, "step": 16894 }, { "epoch": 16.245192307692307, "grad_norm": 0.039701562374830246, "learning_rate": 5.781470762259877e-06, "loss": 0.0004, "step": 16895 }, { "epoch": 16.246153846153845, "grad_norm": 0.005656097084283829, "learning_rate": 5.780341385860333e-06, "loss": 0.0001, "step": 16896 }, { "epoch": 16.247115384615384, "grad_norm": 0.010320421308279037, "learning_rate": 5.779212074937505e-06, "loss": 0.0001, "step": 16897 }, { "epoch": 16.248076923076923, "grad_norm": 0.02064990997314453, "learning_rate": 5.778082829508922e-06, "loss": 0.0002, "step": 16898 }, { "epoch": 16.24903846153846, "grad_norm": 0.335605651140213, "learning_rate": 5.776953649592099e-06, "loss": 0.0015, "step": 16899 }, { "epoch": 16.25, "grad_norm": 0.0267293993383646, "learning_rate": 5.77582453520456e-06, "loss": 0.0002, "step": 16900 }, { "epoch": 16.25096153846154, "grad_norm": 0.08072855323553085, "learning_rate": 5.77469548636383e-06, "loss": 0.0004, "step": 16901 }, { "epoch": 16.251923076923077, "grad_norm": 0.010099435225129128, "learning_rate": 5.7735665030874225e-06, "loss": 0.0001, "step": 16902 }, { "epoch": 16.252884615384616, "grad_norm": 0.005812125746160746, "learning_rate": 5.772437585392858e-06, "loss": 0.0, "step": 16903 }, { "epoch": 16.253846153846155, "grad_norm": 0.19506153464317322, "learning_rate": 5.771308733297658e-06, "loss": 0.0008, "step": 16904 }, { "epoch": 16.254807692307693, "grad_norm": 0.06295067816972733, "learning_rate": 5.770179946819332e-06, "loss": 0.0003, "step": 16905 }, { "epoch": 16.255769230769232, "grad_norm": 0.01028283592313528, "learning_rate": 5.769051225975402e-06, "loss": 0.0001, "step": 16906 }, { "epoch": 16.25673076923077, "grad_norm": 0.09480089694261551, "learning_rate": 5.7679225707833765e-06, "loss": 0.0004, "step": 16907 }, { "epoch": 16.25769230769231, "grad_norm": 0.017544949427247047, "learning_rate": 5.76679398126077e-06, "loss": 0.0003, "step": 16908 }, { "epoch": 16.258653846153845, "grad_norm": 0.0380285382270813, "learning_rate": 5.765665457425102e-06, "loss": 0.0002, "step": 16909 }, { "epoch": 16.259615384615383, "grad_norm": 0.01876312494277954, "learning_rate": 5.764536999293875e-06, "loss": 0.0001, "step": 16910 }, { "epoch": 16.260576923076922, "grad_norm": 0.006125422194600105, "learning_rate": 5.763408606884602e-06, "loss": 0.0001, "step": 16911 }, { "epoch": 16.26153846153846, "grad_norm": 0.06848073750734329, "learning_rate": 5.762280280214796e-06, "loss": 0.0004, "step": 16912 }, { "epoch": 16.2625, "grad_norm": 0.4269929528236389, "learning_rate": 5.7611520193019585e-06, "loss": 0.0009, "step": 16913 }, { "epoch": 16.263461538461538, "grad_norm": 0.009661756455898285, "learning_rate": 5.760023824163603e-06, "loss": 0.0001, "step": 16914 }, { "epoch": 16.264423076923077, "grad_norm": 0.007186603732407093, "learning_rate": 5.758895694817237e-06, "loss": 0.0001, "step": 16915 }, { "epoch": 16.265384615384615, "grad_norm": 0.005920784547924995, "learning_rate": 5.757767631280358e-06, "loss": 0.0001, "step": 16916 }, { "epoch": 16.266346153846154, "grad_norm": 0.03617635369300842, "learning_rate": 5.756639633570478e-06, "loss": 0.0004, "step": 16917 }, { "epoch": 16.267307692307693, "grad_norm": 0.023156510666012764, "learning_rate": 5.7555117017050924e-06, "loss": 0.0002, "step": 16918 }, { "epoch": 16.26826923076923, "grad_norm": 2.3338725566864014, "learning_rate": 5.754383835701709e-06, "loss": 0.0431, "step": 16919 }, { "epoch": 16.26923076923077, "grad_norm": 0.016021350398659706, "learning_rate": 5.75325603557783e-06, "loss": 0.0001, "step": 16920 }, { "epoch": 16.27019230769231, "grad_norm": 0.0075702485628426075, "learning_rate": 5.752128301350951e-06, "loss": 0.0001, "step": 16921 }, { "epoch": 16.271153846153847, "grad_norm": 0.019638432189822197, "learning_rate": 5.751000633038573e-06, "loss": 0.0002, "step": 16922 }, { "epoch": 16.272115384615386, "grad_norm": 0.032576076686382294, "learning_rate": 5.7498730306581975e-06, "loss": 0.0003, "step": 16923 }, { "epoch": 16.273076923076925, "grad_norm": 0.3922938108444214, "learning_rate": 5.7487454942273155e-06, "loss": 0.0015, "step": 16924 }, { "epoch": 16.27403846153846, "grad_norm": 0.010206921026110649, "learning_rate": 5.747618023763426e-06, "loss": 0.0001, "step": 16925 }, { "epoch": 16.275, "grad_norm": 0.011154319159686565, "learning_rate": 5.746490619284029e-06, "loss": 0.0001, "step": 16926 }, { "epoch": 16.275961538461537, "grad_norm": 0.03430284559726715, "learning_rate": 5.745363280806608e-06, "loss": 0.0003, "step": 16927 }, { "epoch": 16.276923076923076, "grad_norm": 0.02065809816122055, "learning_rate": 5.744236008348667e-06, "loss": 0.0002, "step": 16928 }, { "epoch": 16.277884615384615, "grad_norm": 0.007334703579545021, "learning_rate": 5.7431088019276884e-06, "loss": 0.0001, "step": 16929 }, { "epoch": 16.278846153846153, "grad_norm": 0.14524315297603607, "learning_rate": 5.741981661561167e-06, "loss": 0.0007, "step": 16930 }, { "epoch": 16.279807692307692, "grad_norm": 0.01313048042356968, "learning_rate": 5.740854587266598e-06, "loss": 0.0001, "step": 16931 }, { "epoch": 16.28076923076923, "grad_norm": 0.08381855487823486, "learning_rate": 5.73972757906146e-06, "loss": 0.0005, "step": 16932 }, { "epoch": 16.28173076923077, "grad_norm": 0.009188718162477016, "learning_rate": 5.738600636963247e-06, "loss": 0.0002, "step": 16933 }, { "epoch": 16.282692307692308, "grad_norm": 0.03785703703761101, "learning_rate": 5.737473760989449e-06, "loss": 0.0002, "step": 16934 }, { "epoch": 16.283653846153847, "grad_norm": 0.04286079853773117, "learning_rate": 5.736346951157544e-06, "loss": 0.0003, "step": 16935 }, { "epoch": 16.284615384615385, "grad_norm": 0.006295868661254644, "learning_rate": 5.73522020748502e-06, "loss": 0.0001, "step": 16936 }, { "epoch": 16.285576923076924, "grad_norm": 0.034196414053440094, "learning_rate": 5.734093529989366e-06, "loss": 0.0003, "step": 16937 }, { "epoch": 16.286538461538463, "grad_norm": 0.02836986631155014, "learning_rate": 5.732966918688056e-06, "loss": 0.0002, "step": 16938 }, { "epoch": 16.2875, "grad_norm": 0.008410894311964512, "learning_rate": 5.731840373598581e-06, "loss": 0.0001, "step": 16939 }, { "epoch": 16.28846153846154, "grad_norm": 0.01236643921583891, "learning_rate": 5.730713894738411e-06, "loss": 0.0001, "step": 16940 }, { "epoch": 16.289423076923075, "grad_norm": 0.5985714793205261, "learning_rate": 5.729587482125031e-06, "loss": 0.0074, "step": 16941 }, { "epoch": 16.290384615384614, "grad_norm": 0.011797945946455002, "learning_rate": 5.728461135775924e-06, "loss": 0.0001, "step": 16942 }, { "epoch": 16.291346153846153, "grad_norm": 0.013270556926727295, "learning_rate": 5.727334855708559e-06, "loss": 0.0001, "step": 16943 }, { "epoch": 16.29230769230769, "grad_norm": 0.010057695209980011, "learning_rate": 5.726208641940418e-06, "loss": 0.0001, "step": 16944 }, { "epoch": 16.29326923076923, "grad_norm": 0.0066602714359760284, "learning_rate": 5.725082494488979e-06, "loss": 0.0, "step": 16945 }, { "epoch": 16.29423076923077, "grad_norm": 1.4171172380447388, "learning_rate": 5.723956413371705e-06, "loss": 0.0091, "step": 16946 }, { "epoch": 16.295192307692307, "grad_norm": 0.171571746468544, "learning_rate": 5.722830398606085e-06, "loss": 0.0005, "step": 16947 }, { "epoch": 16.296153846153846, "grad_norm": 1.4809643030166626, "learning_rate": 5.721704450209581e-06, "loss": 0.0059, "step": 16948 }, { "epoch": 16.297115384615385, "grad_norm": 0.010207034647464752, "learning_rate": 5.720578568199666e-06, "loss": 0.0001, "step": 16949 }, { "epoch": 16.298076923076923, "grad_norm": 0.00958987232297659, "learning_rate": 5.719452752593817e-06, "loss": 0.0001, "step": 16950 }, { "epoch": 16.299038461538462, "grad_norm": 0.0036285908427089453, "learning_rate": 5.718327003409492e-06, "loss": 0.0, "step": 16951 }, { "epoch": 16.3, "grad_norm": 0.0034949921537190676, "learning_rate": 5.717201320664168e-06, "loss": 0.0, "step": 16952 }, { "epoch": 16.30096153846154, "grad_norm": 0.005884426645934582, "learning_rate": 5.716075704375311e-06, "loss": 0.0001, "step": 16953 }, { "epoch": 16.301923076923078, "grad_norm": 0.022573290392756462, "learning_rate": 5.714950154560384e-06, "loss": 0.0001, "step": 16954 }, { "epoch": 16.302884615384617, "grad_norm": 0.042971592396497726, "learning_rate": 5.713824671236856e-06, "loss": 0.0003, "step": 16955 }, { "epoch": 16.303846153846155, "grad_norm": 0.008947627618908882, "learning_rate": 5.712699254422187e-06, "loss": 0.0001, "step": 16956 }, { "epoch": 16.30480769230769, "grad_norm": 0.018207555636763573, "learning_rate": 5.711573904133842e-06, "loss": 0.0001, "step": 16957 }, { "epoch": 16.30576923076923, "grad_norm": 0.11950124055147171, "learning_rate": 5.710448620389288e-06, "loss": 0.0006, "step": 16958 }, { "epoch": 16.306730769230768, "grad_norm": 4.056140899658203, "learning_rate": 5.709323403205977e-06, "loss": 0.0357, "step": 16959 }, { "epoch": 16.307692307692307, "grad_norm": 0.8349091410636902, "learning_rate": 5.708198252601374e-06, "loss": 0.0019, "step": 16960 }, { "epoch": 16.308653846153845, "grad_norm": 1.0205004215240479, "learning_rate": 5.707073168592943e-06, "loss": 0.0041, "step": 16961 }, { "epoch": 16.309615384615384, "grad_norm": 0.009986489079892635, "learning_rate": 5.705948151198131e-06, "loss": 0.0001, "step": 16962 }, { "epoch": 16.310576923076923, "grad_norm": 2.147634983062744, "learning_rate": 5.704823200434402e-06, "loss": 0.0117, "step": 16963 }, { "epoch": 16.31153846153846, "grad_norm": 0.3894430100917816, "learning_rate": 5.703698316319215e-06, "loss": 0.0013, "step": 16964 }, { "epoch": 16.3125, "grad_norm": 0.03861850127577782, "learning_rate": 5.702573498870015e-06, "loss": 0.0002, "step": 16965 }, { "epoch": 16.31346153846154, "grad_norm": 0.025811294093728065, "learning_rate": 5.701448748104267e-06, "loss": 0.0002, "step": 16966 }, { "epoch": 16.314423076923077, "grad_norm": 0.0402083620429039, "learning_rate": 5.700324064039413e-06, "loss": 0.0002, "step": 16967 }, { "epoch": 16.315384615384616, "grad_norm": 0.027399642392992973, "learning_rate": 5.699199446692911e-06, "loss": 0.0001, "step": 16968 }, { "epoch": 16.316346153846155, "grad_norm": 0.03563755750656128, "learning_rate": 5.698074896082215e-06, "loss": 0.0003, "step": 16969 }, { "epoch": 16.317307692307693, "grad_norm": 0.010407702997326851, "learning_rate": 5.696950412224767e-06, "loss": 0.0001, "step": 16970 }, { "epoch": 16.318269230769232, "grad_norm": 0.010427674278616905, "learning_rate": 5.695825995138019e-06, "loss": 0.0001, "step": 16971 }, { "epoch": 16.31923076923077, "grad_norm": 0.13329625129699707, "learning_rate": 5.694701644839422e-06, "loss": 0.0006, "step": 16972 }, { "epoch": 16.32019230769231, "grad_norm": 0.006872859783470631, "learning_rate": 5.6935773613464165e-06, "loss": 0.0001, "step": 16973 }, { "epoch": 16.321153846153845, "grad_norm": 0.01882876828312874, "learning_rate": 5.692453144676451e-06, "loss": 0.0002, "step": 16974 }, { "epoch": 16.322115384615383, "grad_norm": 0.012285270728170872, "learning_rate": 5.6913289948469745e-06, "loss": 0.0001, "step": 16975 }, { "epoch": 16.323076923076922, "grad_norm": 0.04548795893788338, "learning_rate": 5.690204911875422e-06, "loss": 0.0002, "step": 16976 }, { "epoch": 16.32403846153846, "grad_norm": 0.0214797705411911, "learning_rate": 5.689080895779244e-06, "loss": 0.0002, "step": 16977 }, { "epoch": 16.325, "grad_norm": 0.0058313170447945595, "learning_rate": 5.687956946575875e-06, "loss": 0.0001, "step": 16978 }, { "epoch": 16.325961538461538, "grad_norm": 0.026791758835315704, "learning_rate": 5.686833064282757e-06, "loss": 0.0002, "step": 16979 }, { "epoch": 16.326923076923077, "grad_norm": 0.08097723871469498, "learning_rate": 5.685709248917335e-06, "loss": 0.0003, "step": 16980 }, { "epoch": 16.327884615384615, "grad_norm": 0.041539859026670456, "learning_rate": 5.68458550049704e-06, "loss": 0.0002, "step": 16981 }, { "epoch": 16.328846153846154, "grad_norm": 0.006616962142288685, "learning_rate": 5.683461819039312e-06, "loss": 0.0001, "step": 16982 }, { "epoch": 16.329807692307693, "grad_norm": 0.04712202772498131, "learning_rate": 5.682338204561593e-06, "loss": 0.0002, "step": 16983 }, { "epoch": 16.33076923076923, "grad_norm": 0.13991956412792206, "learning_rate": 5.681214657081307e-06, "loss": 0.0006, "step": 16984 }, { "epoch": 16.33173076923077, "grad_norm": 0.24861975014209747, "learning_rate": 5.680091176615894e-06, "loss": 0.0012, "step": 16985 }, { "epoch": 16.33269230769231, "grad_norm": 1.0400015115737915, "learning_rate": 5.6789677631827925e-06, "loss": 0.0022, "step": 16986 }, { "epoch": 16.333653846153847, "grad_norm": 0.014837621711194515, "learning_rate": 5.677844416799424e-06, "loss": 0.0002, "step": 16987 }, { "epoch": 16.334615384615386, "grad_norm": 0.0638730376958847, "learning_rate": 5.676721137483226e-06, "loss": 0.0003, "step": 16988 }, { "epoch": 16.335576923076925, "grad_norm": 0.007233378943055868, "learning_rate": 5.67559792525163e-06, "loss": 0.0001, "step": 16989 }, { "epoch": 16.33653846153846, "grad_norm": 1.4545986652374268, "learning_rate": 5.674474780122059e-06, "loss": 0.0309, "step": 16990 }, { "epoch": 16.3375, "grad_norm": 3.0339503288269043, "learning_rate": 5.673351702111949e-06, "loss": 0.0197, "step": 16991 }, { "epoch": 16.338461538461537, "grad_norm": 0.04439434036612511, "learning_rate": 5.672228691238717e-06, "loss": 0.0002, "step": 16992 }, { "epoch": 16.339423076923076, "grad_norm": 0.016385549679398537, "learning_rate": 5.671105747519794e-06, "loss": 0.0002, "step": 16993 }, { "epoch": 16.340384615384615, "grad_norm": 0.013882721774280071, "learning_rate": 5.669982870972608e-06, "loss": 0.0001, "step": 16994 }, { "epoch": 16.341346153846153, "grad_norm": 4.619012355804443, "learning_rate": 5.6688600616145765e-06, "loss": 0.0781, "step": 16995 }, { "epoch": 16.342307692307692, "grad_norm": 0.01886986754834652, "learning_rate": 5.667737319463123e-06, "loss": 0.0002, "step": 16996 }, { "epoch": 16.34326923076923, "grad_norm": 0.012511581182479858, "learning_rate": 5.6666146445356775e-06, "loss": 0.0001, "step": 16997 }, { "epoch": 16.34423076923077, "grad_norm": 0.04066592827439308, "learning_rate": 5.665492036849649e-06, "loss": 0.0004, "step": 16998 }, { "epoch": 16.345192307692308, "grad_norm": 0.0052379434928298, "learning_rate": 5.664369496422463e-06, "loss": 0.0, "step": 16999 }, { "epoch": 16.346153846153847, "grad_norm": 0.03264502435922623, "learning_rate": 5.663247023271543e-06, "loss": 0.0003, "step": 17000 }, { "epoch": 16.347115384615385, "grad_norm": 0.5785736441612244, "learning_rate": 5.662124617414295e-06, "loss": 0.0011, "step": 17001 }, { "epoch": 16.348076923076924, "grad_norm": 0.015579787082970142, "learning_rate": 5.661002278868147e-06, "loss": 0.0002, "step": 17002 }, { "epoch": 16.349038461538463, "grad_norm": 0.009000975638628006, "learning_rate": 5.6598800076505025e-06, "loss": 0.0001, "step": 17003 }, { "epoch": 16.35, "grad_norm": 0.5603449940681458, "learning_rate": 5.658757803778785e-06, "loss": 0.0012, "step": 17004 }, { "epoch": 16.35096153846154, "grad_norm": 0.016339236870408058, "learning_rate": 5.6576356672704065e-06, "loss": 0.0001, "step": 17005 }, { "epoch": 16.351923076923075, "grad_norm": 0.019355103373527527, "learning_rate": 5.6565135981427746e-06, "loss": 0.0001, "step": 17006 }, { "epoch": 16.352884615384614, "grad_norm": 0.0035725480411201715, "learning_rate": 5.655391596413304e-06, "loss": 0.0, "step": 17007 }, { "epoch": 16.353846153846153, "grad_norm": 0.29816800355911255, "learning_rate": 5.654269662099408e-06, "loss": 0.0008, "step": 17008 }, { "epoch": 16.35480769230769, "grad_norm": 0.003744074609130621, "learning_rate": 5.65314779521849e-06, "loss": 0.0, "step": 17009 }, { "epoch": 16.35576923076923, "grad_norm": 0.02201925590634346, "learning_rate": 5.652025995787959e-06, "loss": 0.0002, "step": 17010 }, { "epoch": 16.35673076923077, "grad_norm": 0.011849469505250454, "learning_rate": 5.650904263825227e-06, "loss": 0.0001, "step": 17011 }, { "epoch": 16.357692307692307, "grad_norm": 0.013663611374795437, "learning_rate": 5.649782599347693e-06, "loss": 0.0001, "step": 17012 }, { "epoch": 16.358653846153846, "grad_norm": 0.021771596744656563, "learning_rate": 5.648661002372769e-06, "loss": 0.0001, "step": 17013 }, { "epoch": 16.359615384615385, "grad_norm": 0.019907774403691292, "learning_rate": 5.6475394729178514e-06, "loss": 0.0001, "step": 17014 }, { "epoch": 16.360576923076923, "grad_norm": 1.5730412006378174, "learning_rate": 5.646418011000347e-06, "loss": 0.0188, "step": 17015 }, { "epoch": 16.361538461538462, "grad_norm": 0.47681736946105957, "learning_rate": 5.645296616637661e-06, "loss": 0.0011, "step": 17016 }, { "epoch": 16.3625, "grad_norm": 0.016007062047719955, "learning_rate": 5.644175289847187e-06, "loss": 0.0001, "step": 17017 }, { "epoch": 16.36346153846154, "grad_norm": 0.016348984092473984, "learning_rate": 5.643054030646329e-06, "loss": 0.0001, "step": 17018 }, { "epoch": 16.364423076923078, "grad_norm": 1.16312575340271, "learning_rate": 5.641932839052488e-06, "loss": 0.006, "step": 17019 }, { "epoch": 16.365384615384617, "grad_norm": 0.09751211851835251, "learning_rate": 5.640811715083055e-06, "loss": 0.0003, "step": 17020 }, { "epoch": 16.366346153846155, "grad_norm": 0.008051974698901176, "learning_rate": 5.639690658755429e-06, "loss": 0.0001, "step": 17021 }, { "epoch": 16.36730769230769, "grad_norm": 0.010197886265814304, "learning_rate": 5.6385696700870114e-06, "loss": 0.0001, "step": 17022 }, { "epoch": 16.36826923076923, "grad_norm": 0.11142624914646149, "learning_rate": 5.637448749095189e-06, "loss": 0.0009, "step": 17023 }, { "epoch": 16.369230769230768, "grad_norm": 0.13733166456222534, "learning_rate": 5.63632789579736e-06, "loss": 0.0007, "step": 17024 }, { "epoch": 16.370192307692307, "grad_norm": 0.009911207482218742, "learning_rate": 5.635207110210912e-06, "loss": 0.0001, "step": 17025 }, { "epoch": 16.371153846153845, "grad_norm": 0.01349184662103653, "learning_rate": 5.63408639235324e-06, "loss": 0.0001, "step": 17026 }, { "epoch": 16.372115384615384, "grad_norm": 0.07638324797153473, "learning_rate": 5.632965742241736e-06, "loss": 0.0005, "step": 17027 }, { "epoch": 16.373076923076923, "grad_norm": 0.031105805188417435, "learning_rate": 5.631845159893783e-06, "loss": 0.0004, "step": 17028 }, { "epoch": 16.37403846153846, "grad_norm": 0.3019667863845825, "learning_rate": 5.630724645326773e-06, "loss": 0.0012, "step": 17029 }, { "epoch": 16.375, "grad_norm": 0.046541325747966766, "learning_rate": 5.629604198558096e-06, "loss": 0.0003, "step": 17030 }, { "epoch": 16.37596153846154, "grad_norm": 0.01725473441183567, "learning_rate": 5.628483819605132e-06, "loss": 0.0001, "step": 17031 }, { "epoch": 16.376923076923077, "grad_norm": 0.04569434002041817, "learning_rate": 5.6273635084852684e-06, "loss": 0.0002, "step": 17032 }, { "epoch": 16.377884615384616, "grad_norm": 0.005754122510552406, "learning_rate": 5.626243265215893e-06, "loss": 0.0, "step": 17033 }, { "epoch": 16.378846153846155, "grad_norm": 0.029300343245267868, "learning_rate": 5.6251230898143815e-06, "loss": 0.0001, "step": 17034 }, { "epoch": 16.379807692307693, "grad_norm": 1.5884865522384644, "learning_rate": 5.624002982298125e-06, "loss": 0.0011, "step": 17035 }, { "epoch": 16.380769230769232, "grad_norm": 0.014261763542890549, "learning_rate": 5.622882942684493e-06, "loss": 0.0001, "step": 17036 }, { "epoch": 16.38173076923077, "grad_norm": 0.04249231144785881, "learning_rate": 5.621762970990871e-06, "loss": 0.0002, "step": 17037 }, { "epoch": 16.38269230769231, "grad_norm": 0.023193133994936943, "learning_rate": 5.620643067234644e-06, "loss": 0.0002, "step": 17038 }, { "epoch": 16.383653846153845, "grad_norm": 1.4464292526245117, "learning_rate": 5.619523231433177e-06, "loss": 0.0595, "step": 17039 }, { "epoch": 16.384615384615383, "grad_norm": 0.11193815618753433, "learning_rate": 5.618403463603854e-06, "loss": 0.0003, "step": 17040 }, { "epoch": 16.385576923076922, "grad_norm": 0.026207076385617256, "learning_rate": 5.617283763764052e-06, "loss": 0.0002, "step": 17041 }, { "epoch": 16.38653846153846, "grad_norm": 2.406684637069702, "learning_rate": 5.616164131931141e-06, "loss": 0.027, "step": 17042 }, { "epoch": 16.3875, "grad_norm": 0.03455490246415138, "learning_rate": 5.615044568122495e-06, "loss": 0.0003, "step": 17043 }, { "epoch": 16.388461538461538, "grad_norm": 4.074919700622559, "learning_rate": 5.613925072355491e-06, "loss": 0.0709, "step": 17044 }, { "epoch": 16.389423076923077, "grad_norm": 0.04793989658355713, "learning_rate": 5.6128056446474944e-06, "loss": 0.0003, "step": 17045 }, { "epoch": 16.390384615384615, "grad_norm": 0.015938226133584976, "learning_rate": 5.611686285015881e-06, "loss": 0.0002, "step": 17046 }, { "epoch": 16.391346153846154, "grad_norm": 0.020559461787343025, "learning_rate": 5.610566993478014e-06, "loss": 0.0001, "step": 17047 }, { "epoch": 16.392307692307693, "grad_norm": 0.06363309174776077, "learning_rate": 5.609447770051264e-06, "loss": 0.0002, "step": 17048 }, { "epoch": 16.39326923076923, "grad_norm": 0.007514675613492727, "learning_rate": 5.608328614753001e-06, "loss": 0.0001, "step": 17049 }, { "epoch": 16.39423076923077, "grad_norm": 0.27959170937538147, "learning_rate": 5.607209527600586e-06, "loss": 0.001, "step": 17050 }, { "epoch": 16.39519230769231, "grad_norm": 0.1888311207294464, "learning_rate": 5.606090508611386e-06, "loss": 0.0005, "step": 17051 }, { "epoch": 16.396153846153847, "grad_norm": 0.03472262620925903, "learning_rate": 5.604971557802769e-06, "loss": 0.0002, "step": 17052 }, { "epoch": 16.397115384615386, "grad_norm": 0.02216322347521782, "learning_rate": 5.603852675192092e-06, "loss": 0.0001, "step": 17053 }, { "epoch": 16.398076923076925, "grad_norm": 0.04324183985590935, "learning_rate": 5.602733860796716e-06, "loss": 0.0004, "step": 17054 }, { "epoch": 16.39903846153846, "grad_norm": 0.059210896492004395, "learning_rate": 5.601615114634011e-06, "loss": 0.0003, "step": 17055 }, { "epoch": 16.4, "grad_norm": 0.16749396920204163, "learning_rate": 5.600496436721326e-06, "loss": 0.0007, "step": 17056 }, { "epoch": 16.400961538461537, "grad_norm": 2.3594491481781006, "learning_rate": 5.5993778270760266e-06, "loss": 0.0162, "step": 17057 }, { "epoch": 16.401923076923076, "grad_norm": 0.00964236631989479, "learning_rate": 5.598259285715465e-06, "loss": 0.0001, "step": 17058 }, { "epoch": 16.402884615384615, "grad_norm": 3.522780418395996, "learning_rate": 5.597140812656999e-06, "loss": 0.1018, "step": 17059 }, { "epoch": 16.403846153846153, "grad_norm": 0.210205540060997, "learning_rate": 5.596022407917991e-06, "loss": 0.0006, "step": 17060 }, { "epoch": 16.404807692307692, "grad_norm": 0.026581497862935066, "learning_rate": 5.594904071515784e-06, "loss": 0.0002, "step": 17061 }, { "epoch": 16.40576923076923, "grad_norm": 0.008846703916788101, "learning_rate": 5.593785803467738e-06, "loss": 0.0001, "step": 17062 }, { "epoch": 16.40673076923077, "grad_norm": 0.011588633060455322, "learning_rate": 5.5926676037912086e-06, "loss": 0.0001, "step": 17063 }, { "epoch": 16.407692307692308, "grad_norm": 0.8664278388023376, "learning_rate": 5.591549472503538e-06, "loss": 0.0222, "step": 17064 }, { "epoch": 16.408653846153847, "grad_norm": 0.02428586781024933, "learning_rate": 5.590431409622081e-06, "loss": 0.0002, "step": 17065 }, { "epoch": 16.409615384615385, "grad_norm": 2.715761661529541, "learning_rate": 5.589313415164191e-06, "loss": 0.0214, "step": 17066 }, { "epoch": 16.410576923076924, "grad_norm": 0.8594366312026978, "learning_rate": 5.588195489147208e-06, "loss": 0.0035, "step": 17067 }, { "epoch": 16.411538461538463, "grad_norm": 0.06256029009819031, "learning_rate": 5.587077631588485e-06, "loss": 0.0003, "step": 17068 }, { "epoch": 16.4125, "grad_norm": 0.011739260517060757, "learning_rate": 5.585959842505362e-06, "loss": 0.0001, "step": 17069 }, { "epoch": 16.41346153846154, "grad_norm": 0.2640986442565918, "learning_rate": 5.584842121915187e-06, "loss": 0.001, "step": 17070 }, { "epoch": 16.414423076923075, "grad_norm": 0.09422637522220612, "learning_rate": 5.5837244698353085e-06, "loss": 0.0003, "step": 17071 }, { "epoch": 16.415384615384614, "grad_norm": 0.02591012790799141, "learning_rate": 5.582606886283061e-06, "loss": 0.0002, "step": 17072 }, { "epoch": 16.416346153846153, "grad_norm": 0.3896014094352722, "learning_rate": 5.581489371275789e-06, "loss": 0.0019, "step": 17073 }, { "epoch": 16.41730769230769, "grad_norm": 0.015807705000042915, "learning_rate": 5.580371924830838e-06, "loss": 0.0001, "step": 17074 }, { "epoch": 16.41826923076923, "grad_norm": 0.02933519333600998, "learning_rate": 5.579254546965539e-06, "loss": 0.0003, "step": 17075 }, { "epoch": 16.41923076923077, "grad_norm": 0.09297344088554382, "learning_rate": 5.578137237697235e-06, "loss": 0.0004, "step": 17076 }, { "epoch": 16.420192307692307, "grad_norm": 0.007960799150168896, "learning_rate": 5.577019997043267e-06, "loss": 0.0001, "step": 17077 }, { "epoch": 16.421153846153846, "grad_norm": 0.03619550168514252, "learning_rate": 5.575902825020962e-06, "loss": 0.0002, "step": 17078 }, { "epoch": 16.422115384615385, "grad_norm": 0.9762933850288391, "learning_rate": 5.574785721647663e-06, "loss": 0.0026, "step": 17079 }, { "epoch": 16.423076923076923, "grad_norm": 0.13514161109924316, "learning_rate": 5.573668686940705e-06, "loss": 0.0006, "step": 17080 }, { "epoch": 16.424038461538462, "grad_norm": 0.8589785695075989, "learning_rate": 5.572551720917413e-06, "loss": 0.0028, "step": 17081 }, { "epoch": 16.425, "grad_norm": 0.38983580470085144, "learning_rate": 5.571434823595127e-06, "loss": 0.0016, "step": 17082 }, { "epoch": 16.42596153846154, "grad_norm": 0.9191544055938721, "learning_rate": 5.570317994991173e-06, "loss": 0.0078, "step": 17083 }, { "epoch": 16.426923076923078, "grad_norm": 0.008287187665700912, "learning_rate": 5.569201235122883e-06, "loss": 0.0001, "step": 17084 }, { "epoch": 16.427884615384617, "grad_norm": 1.7247294187545776, "learning_rate": 5.5680845440075885e-06, "loss": 0.008, "step": 17085 }, { "epoch": 16.428846153846155, "grad_norm": 0.008858933113515377, "learning_rate": 5.56696792166261e-06, "loss": 0.0001, "step": 17086 }, { "epoch": 16.42980769230769, "grad_norm": 1.2192268371582031, "learning_rate": 5.5658513681052796e-06, "loss": 0.0061, "step": 17087 }, { "epoch": 16.43076923076923, "grad_norm": 0.015817685052752495, "learning_rate": 5.564734883352925e-06, "loss": 0.0001, "step": 17088 }, { "epoch": 16.431730769230768, "grad_norm": 0.03983565792441368, "learning_rate": 5.563618467422864e-06, "loss": 0.0002, "step": 17089 }, { "epoch": 16.432692307692307, "grad_norm": 0.13053050637245178, "learning_rate": 5.562502120332424e-06, "loss": 0.0004, "step": 17090 }, { "epoch": 16.433653846153845, "grad_norm": 0.004901197738945484, "learning_rate": 5.56138584209893e-06, "loss": 0.0, "step": 17091 }, { "epoch": 16.434615384615384, "grad_norm": 0.004346125293523073, "learning_rate": 5.5602696327396985e-06, "loss": 0.0, "step": 17092 }, { "epoch": 16.435576923076923, "grad_norm": 0.02727665565907955, "learning_rate": 5.559153492272054e-06, "loss": 0.0002, "step": 17093 }, { "epoch": 16.43653846153846, "grad_norm": 0.019492002204060555, "learning_rate": 5.558037420713312e-06, "loss": 0.0002, "step": 17094 }, { "epoch": 16.4375, "grad_norm": 0.023312347009778023, "learning_rate": 5.55692141808079e-06, "loss": 0.0001, "step": 17095 }, { "epoch": 16.43846153846154, "grad_norm": 0.3875751197338104, "learning_rate": 5.555805484391812e-06, "loss": 0.0011, "step": 17096 }, { "epoch": 16.439423076923077, "grad_norm": 0.5275788903236389, "learning_rate": 5.554689619663684e-06, "loss": 0.0016, "step": 17097 }, { "epoch": 16.440384615384616, "grad_norm": 0.023299243301153183, "learning_rate": 5.553573823913728e-06, "loss": 0.0002, "step": 17098 }, { "epoch": 16.441346153846155, "grad_norm": 0.02159319631755352, "learning_rate": 5.55245809715926e-06, "loss": 0.0002, "step": 17099 }, { "epoch": 16.442307692307693, "grad_norm": 0.020333167165517807, "learning_rate": 5.551342439417584e-06, "loss": 0.0003, "step": 17100 }, { "epoch": 16.443269230769232, "grad_norm": 0.02792571671307087, "learning_rate": 5.550226850706018e-06, "loss": 0.0003, "step": 17101 }, { "epoch": 16.44423076923077, "grad_norm": 0.04807204753160477, "learning_rate": 5.549111331041874e-06, "loss": 0.0003, "step": 17102 }, { "epoch": 16.44519230769231, "grad_norm": 0.013306790962815285, "learning_rate": 5.547995880442456e-06, "loss": 0.0001, "step": 17103 }, { "epoch": 16.446153846153845, "grad_norm": 0.004087649751454592, "learning_rate": 5.546880498925079e-06, "loss": 0.0, "step": 17104 }, { "epoch": 16.447115384615383, "grad_norm": 0.025783386081457138, "learning_rate": 5.545765186507044e-06, "loss": 0.0002, "step": 17105 }, { "epoch": 16.448076923076922, "grad_norm": 0.3931719958782196, "learning_rate": 5.544649943205658e-06, "loss": 0.003, "step": 17106 }, { "epoch": 16.44903846153846, "grad_norm": 0.6560001969337463, "learning_rate": 5.543534769038235e-06, "loss": 0.0029, "step": 17107 }, { "epoch": 16.45, "grad_norm": 0.01124825980514288, "learning_rate": 5.542419664022068e-06, "loss": 0.0001, "step": 17108 }, { "epoch": 16.450961538461538, "grad_norm": 0.028396330773830414, "learning_rate": 5.541304628174464e-06, "loss": 0.0001, "step": 17109 }, { "epoch": 16.451923076923077, "grad_norm": 1.028449296951294, "learning_rate": 5.540189661512731e-06, "loss": 0.0035, "step": 17110 }, { "epoch": 16.452884615384615, "grad_norm": 0.015408947132527828, "learning_rate": 5.53907476405416e-06, "loss": 0.0001, "step": 17111 }, { "epoch": 16.453846153846154, "grad_norm": 0.04686248302459717, "learning_rate": 5.537959935816057e-06, "loss": 0.0002, "step": 17112 }, { "epoch": 16.454807692307693, "grad_norm": 0.011205635033547878, "learning_rate": 5.536845176815724e-06, "loss": 0.0001, "step": 17113 }, { "epoch": 16.45576923076923, "grad_norm": 0.05889459326863289, "learning_rate": 5.53573048707045e-06, "loss": 0.0002, "step": 17114 }, { "epoch": 16.45673076923077, "grad_norm": 2.054900646209717, "learning_rate": 5.53461586659754e-06, "loss": 0.0164, "step": 17115 }, { "epoch": 16.45769230769231, "grad_norm": 0.01809990592300892, "learning_rate": 5.533501315414282e-06, "loss": 0.0002, "step": 17116 }, { "epoch": 16.458653846153847, "grad_norm": 0.04292682930827141, "learning_rate": 5.5323868335379775e-06, "loss": 0.0002, "step": 17117 }, { "epoch": 16.459615384615386, "grad_norm": 0.03250822797417641, "learning_rate": 5.531272420985918e-06, "loss": 0.0003, "step": 17118 }, { "epoch": 16.460576923076925, "grad_norm": 0.03619065135717392, "learning_rate": 5.530158077775391e-06, "loss": 0.0003, "step": 17119 }, { "epoch": 16.46153846153846, "grad_norm": 0.006453098729252815, "learning_rate": 5.5290438039236925e-06, "loss": 0.0001, "step": 17120 }, { "epoch": 16.4625, "grad_norm": 0.006011382210999727, "learning_rate": 5.527929599448116e-06, "loss": 0.0001, "step": 17121 }, { "epoch": 16.463461538461537, "grad_norm": 0.011228535324335098, "learning_rate": 5.526815464365943e-06, "loss": 0.0002, "step": 17122 }, { "epoch": 16.464423076923076, "grad_norm": 0.10988736152648926, "learning_rate": 5.525701398694465e-06, "loss": 0.0004, "step": 17123 }, { "epoch": 16.465384615384615, "grad_norm": 0.04524581506848335, "learning_rate": 5.524587402450974e-06, "loss": 0.0002, "step": 17124 }, { "epoch": 16.466346153846153, "grad_norm": 0.010894274339079857, "learning_rate": 5.5234734756527475e-06, "loss": 0.0001, "step": 17125 }, { "epoch": 16.467307692307692, "grad_norm": 0.21773548424243927, "learning_rate": 5.522359618317077e-06, "loss": 0.0005, "step": 17126 }, { "epoch": 16.46826923076923, "grad_norm": 0.009987544268369675, "learning_rate": 5.52124583046124e-06, "loss": 0.0001, "step": 17127 }, { "epoch": 16.46923076923077, "grad_norm": 0.031184174120426178, "learning_rate": 5.5201321121025235e-06, "loss": 0.0002, "step": 17128 }, { "epoch": 16.470192307692308, "grad_norm": 0.0067185875959694386, "learning_rate": 5.519018463258212e-06, "loss": 0.0001, "step": 17129 }, { "epoch": 16.471153846153847, "grad_norm": 0.01694267801940441, "learning_rate": 5.517904883945577e-06, "loss": 0.0001, "step": 17130 }, { "epoch": 16.472115384615385, "grad_norm": 0.05072678253054619, "learning_rate": 5.516791374181906e-06, "loss": 0.0004, "step": 17131 }, { "epoch": 16.473076923076924, "grad_norm": 0.02662796340882778, "learning_rate": 5.515677933984477e-06, "loss": 0.0001, "step": 17132 }, { "epoch": 16.474038461538463, "grad_norm": 0.05364122614264488, "learning_rate": 5.514564563370562e-06, "loss": 0.0003, "step": 17133 }, { "epoch": 16.475, "grad_norm": 1.7382209300994873, "learning_rate": 5.513451262357439e-06, "loss": 0.0084, "step": 17134 }, { "epoch": 16.47596153846154, "grad_norm": 0.0030019229743629694, "learning_rate": 5.512338030962389e-06, "loss": 0.0, "step": 17135 }, { "epoch": 16.476923076923075, "grad_norm": 0.008054367266595364, "learning_rate": 5.511224869202678e-06, "loss": 0.0001, "step": 17136 }, { "epoch": 16.477884615384614, "grad_norm": 0.0052114506252110004, "learning_rate": 5.510111777095588e-06, "loss": 0.0, "step": 17137 }, { "epoch": 16.478846153846153, "grad_norm": 1.447426199913025, "learning_rate": 5.50899875465838e-06, "loss": 0.0261, "step": 17138 }, { "epoch": 16.47980769230769, "grad_norm": 0.981534481048584, "learning_rate": 5.50788580190833e-06, "loss": 0.0018, "step": 17139 }, { "epoch": 16.48076923076923, "grad_norm": 0.012790195643901825, "learning_rate": 5.506772918862713e-06, "loss": 0.0001, "step": 17140 }, { "epoch": 16.48173076923077, "grad_norm": 0.013975747860968113, "learning_rate": 5.505660105538789e-06, "loss": 0.0001, "step": 17141 }, { "epoch": 16.482692307692307, "grad_norm": 0.021668588742613792, "learning_rate": 5.504547361953829e-06, "loss": 0.0002, "step": 17142 }, { "epoch": 16.483653846153846, "grad_norm": 0.2959226071834564, "learning_rate": 5.503434688125104e-06, "loss": 0.0011, "step": 17143 }, { "epoch": 16.484615384615385, "grad_norm": 0.18347640335559845, "learning_rate": 5.502322084069871e-06, "loss": 0.0006, "step": 17144 }, { "epoch": 16.485576923076923, "grad_norm": 0.024314936250448227, "learning_rate": 5.501209549805399e-06, "loss": 0.0002, "step": 17145 }, { "epoch": 16.486538461538462, "grad_norm": 0.002815275453031063, "learning_rate": 5.500097085348957e-06, "loss": 0.0, "step": 17146 }, { "epoch": 16.4875, "grad_norm": 0.018305979669094086, "learning_rate": 5.498984690717795e-06, "loss": 0.0001, "step": 17147 }, { "epoch": 16.48846153846154, "grad_norm": 0.012642137706279755, "learning_rate": 5.497872365929184e-06, "loss": 0.0001, "step": 17148 }, { "epoch": 16.489423076923078, "grad_norm": 0.02120041474699974, "learning_rate": 5.496760111000376e-06, "loss": 0.0002, "step": 17149 }, { "epoch": 16.490384615384617, "grad_norm": 0.6845468282699585, "learning_rate": 5.495647925948636e-06, "loss": 0.0022, "step": 17150 }, { "epoch": 16.491346153846155, "grad_norm": 0.0034644571132957935, "learning_rate": 5.494535810791224e-06, "loss": 0.0, "step": 17151 }, { "epoch": 16.49230769230769, "grad_norm": 0.005637701600790024, "learning_rate": 5.493423765545387e-06, "loss": 0.0001, "step": 17152 }, { "epoch": 16.49326923076923, "grad_norm": 0.5027514100074768, "learning_rate": 5.492311790228389e-06, "loss": 0.0013, "step": 17153 }, { "epoch": 16.494230769230768, "grad_norm": 0.03706071153283119, "learning_rate": 5.491199884857483e-06, "loss": 0.0002, "step": 17154 }, { "epoch": 16.495192307692307, "grad_norm": 5.225658893585205, "learning_rate": 5.49008804944992e-06, "loss": 0.0268, "step": 17155 }, { "epoch": 16.496153846153845, "grad_norm": 1.3661631345748901, "learning_rate": 5.488976284022953e-06, "loss": 0.0181, "step": 17156 }, { "epoch": 16.497115384615384, "grad_norm": 0.020686961710453033, "learning_rate": 5.487864588593839e-06, "loss": 0.0002, "step": 17157 }, { "epoch": 16.498076923076923, "grad_norm": 1.631369948387146, "learning_rate": 5.486752963179819e-06, "loss": 0.0032, "step": 17158 }, { "epoch": 16.49903846153846, "grad_norm": 1.2448807954788208, "learning_rate": 5.485641407798151e-06, "loss": 0.0046, "step": 17159 }, { "epoch": 16.5, "grad_norm": 0.3305225670337677, "learning_rate": 5.484529922466075e-06, "loss": 0.0008, "step": 17160 }, { "epoch": 16.50096153846154, "grad_norm": 0.028568077832460403, "learning_rate": 5.483418507200842e-06, "loss": 0.0001, "step": 17161 }, { "epoch": 16.501923076923077, "grad_norm": 0.05159777030348778, "learning_rate": 5.482307162019702e-06, "loss": 0.0003, "step": 17162 }, { "epoch": 16.502884615384616, "grad_norm": 0.20423927903175354, "learning_rate": 5.4811958869398905e-06, "loss": 0.0008, "step": 17163 }, { "epoch": 16.503846153846155, "grad_norm": 0.030793827027082443, "learning_rate": 5.480084681978657e-06, "loss": 0.0002, "step": 17164 }, { "epoch": 16.504807692307693, "grad_norm": 3.5339341163635254, "learning_rate": 5.478973547153248e-06, "loss": 0.0489, "step": 17165 }, { "epoch": 16.505769230769232, "grad_norm": 0.025651661679148674, "learning_rate": 5.477862482480896e-06, "loss": 0.0002, "step": 17166 }, { "epoch": 16.50673076923077, "grad_norm": 1.182942271232605, "learning_rate": 5.476751487978846e-06, "loss": 0.0029, "step": 17167 }, { "epoch": 16.50769230769231, "grad_norm": 0.09999126195907593, "learning_rate": 5.47564056366434e-06, "loss": 0.0002, "step": 17168 }, { "epoch": 16.508653846153845, "grad_norm": 0.004464823752641678, "learning_rate": 5.4745297095546125e-06, "loss": 0.0, "step": 17169 }, { "epoch": 16.509615384615383, "grad_norm": 0.012328308075666428, "learning_rate": 5.4734189256668994e-06, "loss": 0.0001, "step": 17170 }, { "epoch": 16.510576923076922, "grad_norm": 0.013320354744791985, "learning_rate": 5.472308212018445e-06, "loss": 0.0001, "step": 17171 }, { "epoch": 16.51153846153846, "grad_norm": 0.005839035380631685, "learning_rate": 5.471197568626471e-06, "loss": 0.0001, "step": 17172 }, { "epoch": 16.5125, "grad_norm": 0.027080826461315155, "learning_rate": 5.470086995508226e-06, "loss": 0.0001, "step": 17173 }, { "epoch": 16.513461538461538, "grad_norm": 0.03132379800081253, "learning_rate": 5.468976492680932e-06, "loss": 0.0001, "step": 17174 }, { "epoch": 16.514423076923077, "grad_norm": 0.034910619258880615, "learning_rate": 5.467866060161824e-06, "loss": 0.0001, "step": 17175 }, { "epoch": 16.515384615384615, "grad_norm": 0.10057753324508667, "learning_rate": 5.466755697968135e-06, "loss": 0.0005, "step": 17176 }, { "epoch": 16.516346153846154, "grad_norm": 0.006593339145183563, "learning_rate": 5.465645406117091e-06, "loss": 0.0, "step": 17177 }, { "epoch": 16.517307692307693, "grad_norm": 0.0665864571928978, "learning_rate": 5.464535184625921e-06, "loss": 0.0004, "step": 17178 }, { "epoch": 16.51826923076923, "grad_norm": 0.006341521628201008, "learning_rate": 5.4634250335118575e-06, "loss": 0.0001, "step": 17179 }, { "epoch": 16.51923076923077, "grad_norm": 0.13419996201992035, "learning_rate": 5.46231495279212e-06, "loss": 0.0003, "step": 17180 }, { "epoch": 16.52019230769231, "grad_norm": 0.013578961603343487, "learning_rate": 5.461204942483935e-06, "loss": 0.0002, "step": 17181 }, { "epoch": 16.521153846153847, "grad_norm": 0.28561311960220337, "learning_rate": 5.460095002604533e-06, "loss": 0.0009, "step": 17182 }, { "epoch": 16.522115384615386, "grad_norm": 0.007955665700137615, "learning_rate": 5.4589851331711285e-06, "loss": 0.0, "step": 17183 }, { "epoch": 16.523076923076925, "grad_norm": 0.0265821386128664, "learning_rate": 5.4578753342009505e-06, "loss": 0.0001, "step": 17184 }, { "epoch": 16.52403846153846, "grad_norm": 1.8359475135803223, "learning_rate": 5.456765605711212e-06, "loss": 0.0043, "step": 17185 }, { "epoch": 16.525, "grad_norm": 0.6346433758735657, "learning_rate": 5.4556559477191366e-06, "loss": 0.0014, "step": 17186 }, { "epoch": 16.525961538461537, "grad_norm": 0.02481575682759285, "learning_rate": 5.454546360241948e-06, "loss": 0.0001, "step": 17187 }, { "epoch": 16.526923076923076, "grad_norm": 0.021423857659101486, "learning_rate": 5.4534368432968555e-06, "loss": 0.0002, "step": 17188 }, { "epoch": 16.527884615384615, "grad_norm": 0.01262273732572794, "learning_rate": 5.452327396901078e-06, "loss": 0.0001, "step": 17189 }, { "epoch": 16.528846153846153, "grad_norm": 0.06528973579406738, "learning_rate": 5.451218021071838e-06, "loss": 0.0003, "step": 17190 }, { "epoch": 16.529807692307692, "grad_norm": 0.02981276996433735, "learning_rate": 5.450108715826339e-06, "loss": 0.0002, "step": 17191 }, { "epoch": 16.53076923076923, "grad_norm": 1.2971043586730957, "learning_rate": 5.448999481181798e-06, "loss": 0.0072, "step": 17192 }, { "epoch": 16.53173076923077, "grad_norm": 0.007437979802489281, "learning_rate": 5.447890317155433e-06, "loss": 0.0001, "step": 17193 }, { "epoch": 16.532692307692308, "grad_norm": 0.006810902617871761, "learning_rate": 5.446781223764446e-06, "loss": 0.0001, "step": 17194 }, { "epoch": 16.533653846153847, "grad_norm": 3.8279411792755127, "learning_rate": 5.445672201026054e-06, "loss": 0.036, "step": 17195 }, { "epoch": 16.534615384615385, "grad_norm": 0.012356504797935486, "learning_rate": 5.44456324895746e-06, "loss": 0.0001, "step": 17196 }, { "epoch": 16.535576923076924, "grad_norm": 0.011642483994364738, "learning_rate": 5.443454367575873e-06, "loss": 0.0001, "step": 17197 }, { "epoch": 16.536538461538463, "grad_norm": 0.007148930802941322, "learning_rate": 5.4423455568985055e-06, "loss": 0.0001, "step": 17198 }, { "epoch": 16.5375, "grad_norm": 0.02133963257074356, "learning_rate": 5.441236816942555e-06, "loss": 0.0001, "step": 17199 }, { "epoch": 16.53846153846154, "grad_norm": 0.006485912017524242, "learning_rate": 5.4401281477252274e-06, "loss": 0.0001, "step": 17200 }, { "epoch": 16.539423076923075, "grad_norm": 0.19741690158843994, "learning_rate": 5.4390195492637335e-06, "loss": 0.0005, "step": 17201 }, { "epoch": 16.540384615384614, "grad_norm": 0.05547001585364342, "learning_rate": 5.437911021575266e-06, "loss": 0.0003, "step": 17202 }, { "epoch": 16.541346153846153, "grad_norm": 0.032051458954811096, "learning_rate": 5.436802564677028e-06, "loss": 0.0002, "step": 17203 }, { "epoch": 16.54230769230769, "grad_norm": 0.010795069858431816, "learning_rate": 5.435694178586228e-06, "loss": 0.0001, "step": 17204 }, { "epoch": 16.54326923076923, "grad_norm": 0.011957290582358837, "learning_rate": 5.434585863320052e-06, "loss": 0.0001, "step": 17205 }, { "epoch": 16.54423076923077, "grad_norm": 0.003741552121937275, "learning_rate": 5.433477618895711e-06, "loss": 0.0, "step": 17206 }, { "epoch": 16.545192307692307, "grad_norm": 0.11540702730417252, "learning_rate": 5.432369445330388e-06, "loss": 0.0002, "step": 17207 }, { "epoch": 16.546153846153846, "grad_norm": 0.0024199364706873894, "learning_rate": 5.431261342641287e-06, "loss": 0.0, "step": 17208 }, { "epoch": 16.547115384615385, "grad_norm": 0.006799318362027407, "learning_rate": 5.430153310845603e-06, "loss": 0.0, "step": 17209 }, { "epoch": 16.548076923076923, "grad_norm": 0.0055551547557115555, "learning_rate": 5.4290453499605245e-06, "loss": 0.0, "step": 17210 }, { "epoch": 16.549038461538462, "grad_norm": 0.09527590125799179, "learning_rate": 5.427937460003247e-06, "loss": 0.0004, "step": 17211 }, { "epoch": 16.55, "grad_norm": 0.09986958652734756, "learning_rate": 5.426829640990965e-06, "loss": 0.0006, "step": 17212 }, { "epoch": 16.55096153846154, "grad_norm": 0.008912404999136925, "learning_rate": 5.42572189294086e-06, "loss": 0.0001, "step": 17213 }, { "epoch": 16.551923076923078, "grad_norm": 0.0912511870265007, "learning_rate": 5.424614215870127e-06, "loss": 0.0007, "step": 17214 }, { "epoch": 16.552884615384617, "grad_norm": 1.1809735298156738, "learning_rate": 5.423506609795956e-06, "loss": 0.003, "step": 17215 }, { "epoch": 16.553846153846155, "grad_norm": 0.025411667302250862, "learning_rate": 5.422399074735527e-06, "loss": 0.0002, "step": 17216 }, { "epoch": 16.55480769230769, "grad_norm": 0.007514503784477711, "learning_rate": 5.421291610706032e-06, "loss": 0.0001, "step": 17217 }, { "epoch": 16.55576923076923, "grad_norm": 0.009585502557456493, "learning_rate": 5.420184217724651e-06, "loss": 0.0001, "step": 17218 }, { "epoch": 16.556730769230768, "grad_norm": 4.378973007202148, "learning_rate": 5.419076895808568e-06, "loss": 0.0246, "step": 17219 }, { "epoch": 16.557692307692307, "grad_norm": 0.0046470882371068, "learning_rate": 5.417969644974972e-06, "loss": 0.0001, "step": 17220 }, { "epoch": 16.558653846153845, "grad_norm": 0.009312616661190987, "learning_rate": 5.416862465241033e-06, "loss": 0.0001, "step": 17221 }, { "epoch": 16.559615384615384, "grad_norm": 0.163399800658226, "learning_rate": 5.415755356623938e-06, "loss": 0.0005, "step": 17222 }, { "epoch": 16.560576923076923, "grad_norm": 0.041956499218940735, "learning_rate": 5.4146483191408695e-06, "loss": 0.0001, "step": 17223 }, { "epoch": 16.56153846153846, "grad_norm": 0.017210671678185463, "learning_rate": 5.413541352808998e-06, "loss": 0.0001, "step": 17224 }, { "epoch": 16.5625, "grad_norm": 0.01908264309167862, "learning_rate": 5.412434457645503e-06, "loss": 0.0001, "step": 17225 }, { "epoch": 16.56346153846154, "grad_norm": 0.006947723682969809, "learning_rate": 5.411327633667564e-06, "loss": 0.0001, "step": 17226 }, { "epoch": 16.564423076923077, "grad_norm": 0.004712599329650402, "learning_rate": 5.41022088089235e-06, "loss": 0.0001, "step": 17227 }, { "epoch": 16.565384615384616, "grad_norm": 0.021473491564393044, "learning_rate": 5.40911419933704e-06, "loss": 0.0001, "step": 17228 }, { "epoch": 16.566346153846155, "grad_norm": 0.012115550227463245, "learning_rate": 5.4080075890188e-06, "loss": 0.0001, "step": 17229 }, { "epoch": 16.567307692307693, "grad_norm": 0.00867412332445383, "learning_rate": 5.406901049954805e-06, "loss": 0.0001, "step": 17230 }, { "epoch": 16.568269230769232, "grad_norm": 0.05826394632458687, "learning_rate": 5.40579458216223e-06, "loss": 0.0003, "step": 17231 }, { "epoch": 16.56923076923077, "grad_norm": 0.013794302940368652, "learning_rate": 5.4046881856582335e-06, "loss": 0.0001, "step": 17232 }, { "epoch": 16.57019230769231, "grad_norm": 0.008230178616940975, "learning_rate": 5.40358186045999e-06, "loss": 0.0001, "step": 17233 }, { "epoch": 16.571153846153845, "grad_norm": 0.011436691507697105, "learning_rate": 5.40247560658467e-06, "loss": 0.0001, "step": 17234 }, { "epoch": 16.572115384615383, "grad_norm": 0.022675035521388054, "learning_rate": 5.40136942404943e-06, "loss": 0.0002, "step": 17235 }, { "epoch": 16.573076923076922, "grad_norm": 0.014896683394908905, "learning_rate": 5.40026331287144e-06, "loss": 0.0001, "step": 17236 }, { "epoch": 16.57403846153846, "grad_norm": 0.008739950135350227, "learning_rate": 5.399157273067868e-06, "loss": 0.0001, "step": 17237 }, { "epoch": 16.575, "grad_norm": 0.015134698711335659, "learning_rate": 5.3980513046558665e-06, "loss": 0.0001, "step": 17238 }, { "epoch": 16.575961538461538, "grad_norm": 0.021599056199193, "learning_rate": 5.3969454076526076e-06, "loss": 0.0001, "step": 17239 }, { "epoch": 16.576923076923077, "grad_norm": 0.20253954827785492, "learning_rate": 5.395839582075242e-06, "loss": 0.001, "step": 17240 }, { "epoch": 16.577884615384615, "grad_norm": 3.1153721809387207, "learning_rate": 5.394733827940933e-06, "loss": 0.05, "step": 17241 }, { "epoch": 16.578846153846154, "grad_norm": 0.006160760764032602, "learning_rate": 5.393628145266843e-06, "loss": 0.0001, "step": 17242 }, { "epoch": 16.579807692307693, "grad_norm": 1.2868050336837769, "learning_rate": 5.392522534070119e-06, "loss": 0.0079, "step": 17243 }, { "epoch": 16.58076923076923, "grad_norm": 0.009199311956763268, "learning_rate": 5.391416994367924e-06, "loss": 0.0001, "step": 17244 }, { "epoch": 16.58173076923077, "grad_norm": 0.03597787022590637, "learning_rate": 5.390311526177414e-06, "loss": 0.0001, "step": 17245 }, { "epoch": 16.58269230769231, "grad_norm": 0.07059787213802338, "learning_rate": 5.3892061295157386e-06, "loss": 0.0004, "step": 17246 }, { "epoch": 16.583653846153847, "grad_norm": 0.05098164826631546, "learning_rate": 5.3881008044000495e-06, "loss": 0.0002, "step": 17247 }, { "epoch": 16.584615384615386, "grad_norm": 0.6259329319000244, "learning_rate": 5.386995550847503e-06, "loss": 0.0014, "step": 17248 }, { "epoch": 16.585576923076925, "grad_norm": 0.004146180115640163, "learning_rate": 5.3858903688752436e-06, "loss": 0.0, "step": 17249 }, { "epoch": 16.58653846153846, "grad_norm": 1.6492869853973389, "learning_rate": 5.384785258500429e-06, "loss": 0.0369, "step": 17250 }, { "epoch": 16.5875, "grad_norm": 0.0742514505982399, "learning_rate": 5.383680219740195e-06, "loss": 0.0004, "step": 17251 }, { "epoch": 16.588461538461537, "grad_norm": 1.4291762113571167, "learning_rate": 5.382575252611697e-06, "loss": 0.0152, "step": 17252 }, { "epoch": 16.589423076923076, "grad_norm": 0.06992713361978531, "learning_rate": 5.381470357132084e-06, "loss": 0.0003, "step": 17253 }, { "epoch": 16.590384615384615, "grad_norm": 0.02144530788064003, "learning_rate": 5.38036553331849e-06, "loss": 0.0001, "step": 17254 }, { "epoch": 16.591346153846153, "grad_norm": 0.009706712327897549, "learning_rate": 5.3792607811880644e-06, "loss": 0.0001, "step": 17255 }, { "epoch": 16.592307692307692, "grad_norm": 0.0037017292343080044, "learning_rate": 5.378156100757954e-06, "loss": 0.0, "step": 17256 }, { "epoch": 16.59326923076923, "grad_norm": 0.06022825464606285, "learning_rate": 5.377051492045293e-06, "loss": 0.0004, "step": 17257 }, { "epoch": 16.59423076923077, "grad_norm": 0.012611373327672482, "learning_rate": 5.375946955067224e-06, "loss": 0.0001, "step": 17258 }, { "epoch": 16.595192307692308, "grad_norm": 0.004366251174360514, "learning_rate": 5.37484248984089e-06, "loss": 0.0, "step": 17259 }, { "epoch": 16.596153846153847, "grad_norm": 0.1183764711022377, "learning_rate": 5.373738096383423e-06, "loss": 0.0004, "step": 17260 }, { "epoch": 16.597115384615385, "grad_norm": 1.8277736902236938, "learning_rate": 5.3726337747119625e-06, "loss": 0.0242, "step": 17261 }, { "epoch": 16.598076923076924, "grad_norm": 0.16272875666618347, "learning_rate": 5.371529524843648e-06, "loss": 0.0006, "step": 17262 }, { "epoch": 16.599038461538463, "grad_norm": 0.07949794828891754, "learning_rate": 5.370425346795609e-06, "loss": 0.0003, "step": 17263 }, { "epoch": 16.6, "grad_norm": 0.0063120704144239426, "learning_rate": 5.369321240584983e-06, "loss": 0.0001, "step": 17264 }, { "epoch": 16.60096153846154, "grad_norm": 0.41978389024734497, "learning_rate": 5.368217206228898e-06, "loss": 0.002, "step": 17265 }, { "epoch": 16.601923076923075, "grad_norm": 0.01828410103917122, "learning_rate": 5.367113243744486e-06, "loss": 0.0001, "step": 17266 }, { "epoch": 16.602884615384614, "grad_norm": 0.00678039388731122, "learning_rate": 5.366009353148884e-06, "loss": 0.0001, "step": 17267 }, { "epoch": 16.603846153846153, "grad_norm": 0.03036373481154442, "learning_rate": 5.364905534459213e-06, "loss": 0.0001, "step": 17268 }, { "epoch": 16.60480769230769, "grad_norm": 0.01853492483496666, "learning_rate": 5.363801787692603e-06, "loss": 0.0001, "step": 17269 }, { "epoch": 16.60576923076923, "grad_norm": 3.039184093475342, "learning_rate": 5.362698112866187e-06, "loss": 0.0385, "step": 17270 }, { "epoch": 16.60673076923077, "grad_norm": 0.014138009399175644, "learning_rate": 5.361594509997081e-06, "loss": 0.0001, "step": 17271 }, { "epoch": 16.607692307692307, "grad_norm": 2.334852457046509, "learning_rate": 5.360490979102415e-06, "loss": 0.0085, "step": 17272 }, { "epoch": 16.608653846153846, "grad_norm": 0.008087394759058952, "learning_rate": 5.359387520199317e-06, "loss": 0.0001, "step": 17273 }, { "epoch": 16.609615384615385, "grad_norm": 0.09342994540929794, "learning_rate": 5.3582841333049e-06, "loss": 0.0006, "step": 17274 }, { "epoch": 16.610576923076923, "grad_norm": 0.0667506530880928, "learning_rate": 5.357180818436294e-06, "loss": 0.0002, "step": 17275 }, { "epoch": 16.611538461538462, "grad_norm": 0.1688510626554489, "learning_rate": 5.356077575610611e-06, "loss": 0.0003, "step": 17276 }, { "epoch": 16.6125, "grad_norm": 0.010344727896153927, "learning_rate": 5.354974404844975e-06, "loss": 0.0001, "step": 17277 }, { "epoch": 16.61346153846154, "grad_norm": 0.02223585732281208, "learning_rate": 5.353871306156506e-06, "loss": 0.0002, "step": 17278 }, { "epoch": 16.614423076923078, "grad_norm": 1.154504418373108, "learning_rate": 5.352768279562315e-06, "loss": 0.0044, "step": 17279 }, { "epoch": 16.615384615384617, "grad_norm": 1.0016891956329346, "learning_rate": 5.351665325079521e-06, "loss": 0.0026, "step": 17280 }, { "epoch": 16.616346153846155, "grad_norm": 0.039052873849868774, "learning_rate": 5.350562442725242e-06, "loss": 0.0002, "step": 17281 }, { "epoch": 16.61730769230769, "grad_norm": 0.62320476770401, "learning_rate": 5.3494596325165845e-06, "loss": 0.002, "step": 17282 }, { "epoch": 16.61826923076923, "grad_norm": 0.07778535783290863, "learning_rate": 5.348356894470664e-06, "loss": 0.0006, "step": 17283 }, { "epoch": 16.619230769230768, "grad_norm": 1.5466910600662231, "learning_rate": 5.347254228604596e-06, "loss": 0.0095, "step": 17284 }, { "epoch": 16.620192307692307, "grad_norm": 0.05624131113290787, "learning_rate": 5.346151634935484e-06, "loss": 0.0003, "step": 17285 }, { "epoch": 16.621153846153845, "grad_norm": 0.01296284794807434, "learning_rate": 5.3450491134804416e-06, "loss": 0.0002, "step": 17286 }, { "epoch": 16.622115384615384, "grad_norm": 0.007369127590209246, "learning_rate": 5.343946664256572e-06, "loss": 0.0, "step": 17287 }, { "epoch": 16.623076923076923, "grad_norm": 1.891998291015625, "learning_rate": 5.342844287280984e-06, "loss": 0.0082, "step": 17288 }, { "epoch": 16.62403846153846, "grad_norm": 0.02723763696849346, "learning_rate": 5.341741982570789e-06, "loss": 0.0002, "step": 17289 }, { "epoch": 16.625, "grad_norm": 0.010374156758189201, "learning_rate": 5.340639750143081e-06, "loss": 0.0001, "step": 17290 }, { "epoch": 16.62596153846154, "grad_norm": 0.012930704280734062, "learning_rate": 5.339537590014972e-06, "loss": 0.0001, "step": 17291 }, { "epoch": 16.626923076923077, "grad_norm": 0.01461498811841011, "learning_rate": 5.338435502203563e-06, "loss": 0.0001, "step": 17292 }, { "epoch": 16.627884615384616, "grad_norm": 0.0075506907887756824, "learning_rate": 5.337333486725951e-06, "loss": 0.0001, "step": 17293 }, { "epoch": 16.628846153846155, "grad_norm": 0.006237379740923643, "learning_rate": 5.336231543599237e-06, "loss": 0.0001, "step": 17294 }, { "epoch": 16.629807692307693, "grad_norm": 0.010444605723023415, "learning_rate": 5.335129672840527e-06, "loss": 0.0001, "step": 17295 }, { "epoch": 16.630769230769232, "grad_norm": 0.005741377826780081, "learning_rate": 5.334027874466907e-06, "loss": 0.0001, "step": 17296 }, { "epoch": 16.63173076923077, "grad_norm": 0.568821907043457, "learning_rate": 5.332926148495485e-06, "loss": 0.0015, "step": 17297 }, { "epoch": 16.63269230769231, "grad_norm": 0.006471521221101284, "learning_rate": 5.331824494943349e-06, "loss": 0.0001, "step": 17298 }, { "epoch": 16.633653846153845, "grad_norm": 0.00462473277002573, "learning_rate": 5.330722913827594e-06, "loss": 0.0, "step": 17299 }, { "epoch": 16.634615384615383, "grad_norm": 0.038781821727752686, "learning_rate": 5.329621405165319e-06, "loss": 0.0002, "step": 17300 }, { "epoch": 16.635576923076922, "grad_norm": 0.0038163901772350073, "learning_rate": 5.32851996897361e-06, "loss": 0.0, "step": 17301 }, { "epoch": 16.63653846153846, "grad_norm": 0.0044199759140610695, "learning_rate": 5.327418605269559e-06, "loss": 0.0001, "step": 17302 }, { "epoch": 16.6375, "grad_norm": 0.021167203783988953, "learning_rate": 5.326317314070263e-06, "loss": 0.0001, "step": 17303 }, { "epoch": 16.638461538461538, "grad_norm": 1.8499901294708252, "learning_rate": 5.3252160953928e-06, "loss": 0.0055, "step": 17304 }, { "epoch": 16.639423076923077, "grad_norm": 0.008258859626948833, "learning_rate": 5.3241149492542635e-06, "loss": 0.0001, "step": 17305 }, { "epoch": 16.640384615384615, "grad_norm": 1.2585017681121826, "learning_rate": 5.323013875671743e-06, "loss": 0.0054, "step": 17306 }, { "epoch": 16.641346153846154, "grad_norm": 0.006587280426174402, "learning_rate": 5.321912874662316e-06, "loss": 0.0, "step": 17307 }, { "epoch": 16.642307692307693, "grad_norm": 0.007836799137294292, "learning_rate": 5.320811946243076e-06, "loss": 0.0001, "step": 17308 }, { "epoch": 16.64326923076923, "grad_norm": 0.201777845621109, "learning_rate": 5.319711090431097e-06, "loss": 0.0004, "step": 17309 }, { "epoch": 16.64423076923077, "grad_norm": 0.015560319647192955, "learning_rate": 5.318610307243465e-06, "loss": 0.0001, "step": 17310 }, { "epoch": 16.64519230769231, "grad_norm": 0.016425026580691338, "learning_rate": 5.317509596697266e-06, "loss": 0.0001, "step": 17311 }, { "epoch": 16.646153846153847, "grad_norm": 0.01666436716914177, "learning_rate": 5.3164089588095705e-06, "loss": 0.0001, "step": 17312 }, { "epoch": 16.647115384615386, "grad_norm": 0.1490059792995453, "learning_rate": 5.315308393597462e-06, "loss": 0.0005, "step": 17313 }, { "epoch": 16.648076923076925, "grad_norm": 0.008389945141971111, "learning_rate": 5.3142079010780215e-06, "loss": 0.0001, "step": 17314 }, { "epoch": 16.64903846153846, "grad_norm": 0.5962048768997192, "learning_rate": 5.313107481268318e-06, "loss": 0.0025, "step": 17315 }, { "epoch": 16.65, "grad_norm": 0.10194224864244461, "learning_rate": 5.312007134185431e-06, "loss": 0.0005, "step": 17316 }, { "epoch": 16.650961538461537, "grad_norm": 0.014168468303978443, "learning_rate": 5.310906859846439e-06, "loss": 0.0001, "step": 17317 }, { "epoch": 16.651923076923076, "grad_norm": 2.8637468814849854, "learning_rate": 5.309806658268404e-06, "loss": 0.0097, "step": 17318 }, { "epoch": 16.652884615384615, "grad_norm": 0.07455220818519592, "learning_rate": 5.308706529468408e-06, "loss": 0.0004, "step": 17319 }, { "epoch": 16.653846153846153, "grad_norm": 0.050323233008384705, "learning_rate": 5.307606473463516e-06, "loss": 0.0002, "step": 17320 }, { "epoch": 16.654807692307692, "grad_norm": 2.555560350418091, "learning_rate": 5.3065064902707985e-06, "loss": 0.0109, "step": 17321 }, { "epoch": 16.65576923076923, "grad_norm": 0.07562136650085449, "learning_rate": 5.3054065799073285e-06, "loss": 0.0004, "step": 17322 }, { "epoch": 16.65673076923077, "grad_norm": 0.04746649041771889, "learning_rate": 5.304306742390167e-06, "loss": 0.0002, "step": 17323 }, { "epoch": 16.657692307692308, "grad_norm": 0.003442911198362708, "learning_rate": 5.303206977736382e-06, "loss": 0.0, "step": 17324 }, { "epoch": 16.658653846153847, "grad_norm": 0.027973230928182602, "learning_rate": 5.302107285963045e-06, "loss": 0.0001, "step": 17325 }, { "epoch": 16.659615384615385, "grad_norm": 0.0433090440928936, "learning_rate": 5.30100766708721e-06, "loss": 0.0002, "step": 17326 }, { "epoch": 16.660576923076924, "grad_norm": 0.15057846903800964, "learning_rate": 5.299908121125945e-06, "loss": 0.0004, "step": 17327 }, { "epoch": 16.661538461538463, "grad_norm": 0.047271862626075745, "learning_rate": 5.298808648096315e-06, "loss": 0.0002, "step": 17328 }, { "epoch": 16.6625, "grad_norm": 0.010248265229165554, "learning_rate": 5.2977092480153734e-06, "loss": 0.0001, "step": 17329 }, { "epoch": 16.66346153846154, "grad_norm": 0.01617148332297802, "learning_rate": 5.296609920900187e-06, "loss": 0.0002, "step": 17330 }, { "epoch": 16.664423076923075, "grad_norm": 0.3844059705734253, "learning_rate": 5.295510666767807e-06, "loss": 0.0018, "step": 17331 }, { "epoch": 16.665384615384614, "grad_norm": 0.0057902890257537365, "learning_rate": 5.294411485635294e-06, "loss": 0.0001, "step": 17332 }, { "epoch": 16.666346153846153, "grad_norm": 0.01494276151061058, "learning_rate": 5.293312377519706e-06, "loss": 0.0002, "step": 17333 }, { "epoch": 16.66730769230769, "grad_norm": 0.09151221811771393, "learning_rate": 5.2922133424380925e-06, "loss": 0.0006, "step": 17334 }, { "epoch": 16.66826923076923, "grad_norm": 0.021496104076504707, "learning_rate": 5.291114380407512e-06, "loss": 0.0001, "step": 17335 }, { "epoch": 16.66923076923077, "grad_norm": 2.132779598236084, "learning_rate": 5.29001549144502e-06, "loss": 0.0115, "step": 17336 }, { "epoch": 16.670192307692307, "grad_norm": 0.02964784763753414, "learning_rate": 5.288916675567659e-06, "loss": 0.0002, "step": 17337 }, { "epoch": 16.671153846153846, "grad_norm": 0.003985986113548279, "learning_rate": 5.287817932792485e-06, "loss": 0.0, "step": 17338 }, { "epoch": 16.672115384615385, "grad_norm": 0.027277858927845955, "learning_rate": 5.286719263136549e-06, "loss": 0.0001, "step": 17339 }, { "epoch": 16.673076923076923, "grad_norm": 1.9979296922683716, "learning_rate": 5.285620666616894e-06, "loss": 0.0441, "step": 17340 }, { "epoch": 16.674038461538462, "grad_norm": 0.03359181061387062, "learning_rate": 5.28452214325057e-06, "loss": 0.0002, "step": 17341 }, { "epoch": 16.675, "grad_norm": 0.008761514909565449, "learning_rate": 5.2834236930546255e-06, "loss": 0.0001, "step": 17342 }, { "epoch": 16.67596153846154, "grad_norm": 0.023743128404021263, "learning_rate": 5.2823253160461e-06, "loss": 0.0001, "step": 17343 }, { "epoch": 16.676923076923078, "grad_norm": 0.014298592694103718, "learning_rate": 5.281227012242043e-06, "loss": 0.0001, "step": 17344 }, { "epoch": 16.677884615384617, "grad_norm": 0.032649002969264984, "learning_rate": 5.280128781659488e-06, "loss": 0.0001, "step": 17345 }, { "epoch": 16.678846153846155, "grad_norm": 0.019037378951907158, "learning_rate": 5.279030624315482e-06, "loss": 0.0001, "step": 17346 }, { "epoch": 16.67980769230769, "grad_norm": 0.1126687079668045, "learning_rate": 5.277932540227069e-06, "loss": 0.0004, "step": 17347 }, { "epoch": 16.68076923076923, "grad_norm": 1.731426477432251, "learning_rate": 5.276834529411281e-06, "loss": 0.0462, "step": 17348 }, { "epoch": 16.681730769230768, "grad_norm": 0.0261946152895689, "learning_rate": 5.275736591885157e-06, "loss": 0.0002, "step": 17349 }, { "epoch": 16.682692307692307, "grad_norm": 0.07068212330341339, "learning_rate": 5.274638727665741e-06, "loss": 0.0002, "step": 17350 }, { "epoch": 16.683653846153845, "grad_norm": 0.024393105879426003, "learning_rate": 5.273540936770059e-06, "loss": 0.0001, "step": 17351 }, { "epoch": 16.684615384615384, "grad_norm": 0.39711683988571167, "learning_rate": 5.272443219215149e-06, "loss": 0.0011, "step": 17352 }, { "epoch": 16.685576923076923, "grad_norm": 0.005566152278333902, "learning_rate": 5.271345575018049e-06, "loss": 0.0001, "step": 17353 }, { "epoch": 16.68653846153846, "grad_norm": 1.249886393547058, "learning_rate": 5.270248004195784e-06, "loss": 0.0106, "step": 17354 }, { "epoch": 16.6875, "grad_norm": 1.8474936485290527, "learning_rate": 5.269150506765392e-06, "loss": 0.0061, "step": 17355 }, { "epoch": 16.68846153846154, "grad_norm": 0.009778864681720734, "learning_rate": 5.268053082743893e-06, "loss": 0.0001, "step": 17356 }, { "epoch": 16.689423076923077, "grad_norm": 0.01281436812132597, "learning_rate": 5.266955732148325e-06, "loss": 0.0001, "step": 17357 }, { "epoch": 16.690384615384616, "grad_norm": 0.07638644427061081, "learning_rate": 5.265858454995716e-06, "loss": 0.0005, "step": 17358 }, { "epoch": 16.691346153846155, "grad_norm": 0.0034046084620058537, "learning_rate": 5.2647612513030855e-06, "loss": 0.0001, "step": 17359 }, { "epoch": 16.692307692307693, "grad_norm": 0.008973510004580021, "learning_rate": 5.263664121087462e-06, "loss": 0.0001, "step": 17360 }, { "epoch": 16.693269230769232, "grad_norm": 0.020292719826102257, "learning_rate": 5.262567064365875e-06, "loss": 0.0001, "step": 17361 }, { "epoch": 16.69423076923077, "grad_norm": 0.08278878033161163, "learning_rate": 5.261470081155338e-06, "loss": 0.0005, "step": 17362 }, { "epoch": 16.69519230769231, "grad_norm": 1.4861313104629517, "learning_rate": 5.260373171472879e-06, "loss": 0.0055, "step": 17363 }, { "epoch": 16.696153846153845, "grad_norm": 0.09215814620256424, "learning_rate": 5.259276335335522e-06, "loss": 0.0005, "step": 17364 }, { "epoch": 16.697115384615383, "grad_norm": 0.09528972953557968, "learning_rate": 5.258179572760277e-06, "loss": 0.0004, "step": 17365 }, { "epoch": 16.698076923076922, "grad_norm": 0.002056639175862074, "learning_rate": 5.257082883764173e-06, "loss": 0.0, "step": 17366 }, { "epoch": 16.69903846153846, "grad_norm": 0.10888783633708954, "learning_rate": 5.255986268364217e-06, "loss": 0.0005, "step": 17367 }, { "epoch": 16.7, "grad_norm": 1.3991985321044922, "learning_rate": 5.254889726577432e-06, "loss": 0.0064, "step": 17368 }, { "epoch": 16.700961538461538, "grad_norm": 0.007259547244757414, "learning_rate": 5.253793258420835e-06, "loss": 0.0001, "step": 17369 }, { "epoch": 16.701923076923077, "grad_norm": 0.004659618251025677, "learning_rate": 5.252696863911433e-06, "loss": 0.0, "step": 17370 }, { "epoch": 16.702884615384615, "grad_norm": 0.029775282368063927, "learning_rate": 5.251600543066242e-06, "loss": 0.0002, "step": 17371 }, { "epoch": 16.703846153846154, "grad_norm": 0.38128185272216797, "learning_rate": 5.250504295902279e-06, "loss": 0.0009, "step": 17372 }, { "epoch": 16.704807692307693, "grad_norm": 0.014578992500901222, "learning_rate": 5.249408122436545e-06, "loss": 0.0001, "step": 17373 }, { "epoch": 16.70576923076923, "grad_norm": 0.010588979348540306, "learning_rate": 5.248312022686055e-06, "loss": 0.0001, "step": 17374 }, { "epoch": 16.70673076923077, "grad_norm": 3.664125680923462, "learning_rate": 5.247215996667821e-06, "loss": 0.0543, "step": 17375 }, { "epoch": 16.70769230769231, "grad_norm": 0.07269252836704254, "learning_rate": 5.246120044398839e-06, "loss": 0.0003, "step": 17376 }, { "epoch": 16.708653846153847, "grad_norm": 0.029687460511922836, "learning_rate": 5.245024165896126e-06, "loss": 0.0002, "step": 17377 }, { "epoch": 16.709615384615386, "grad_norm": 0.02163674682378769, "learning_rate": 5.24392836117668e-06, "loss": 0.0001, "step": 17378 }, { "epoch": 16.710576923076925, "grad_norm": 0.20926259458065033, "learning_rate": 5.2428326302575065e-06, "loss": 0.0007, "step": 17379 }, { "epoch": 16.71153846153846, "grad_norm": 0.006149422377347946, "learning_rate": 5.241736973155611e-06, "loss": 0.0, "step": 17380 }, { "epoch": 16.7125, "grad_norm": 0.011727462522685528, "learning_rate": 5.240641389887989e-06, "loss": 0.0001, "step": 17381 }, { "epoch": 16.713461538461537, "grad_norm": 0.22138622403144836, "learning_rate": 5.239545880471645e-06, "loss": 0.0005, "step": 17382 }, { "epoch": 16.714423076923076, "grad_norm": 0.004927904345095158, "learning_rate": 5.2384504449235794e-06, "loss": 0.0, "step": 17383 }, { "epoch": 16.715384615384615, "grad_norm": 0.13696792721748352, "learning_rate": 5.237355083260787e-06, "loss": 0.0005, "step": 17384 }, { "epoch": 16.716346153846153, "grad_norm": 0.007619092706590891, "learning_rate": 5.236259795500263e-06, "loss": 0.0001, "step": 17385 }, { "epoch": 16.717307692307692, "grad_norm": 0.1483810395002365, "learning_rate": 5.235164581659009e-06, "loss": 0.0008, "step": 17386 }, { "epoch": 16.71826923076923, "grad_norm": 0.00977195706218481, "learning_rate": 5.2340694417540135e-06, "loss": 0.0001, "step": 17387 }, { "epoch": 16.71923076923077, "grad_norm": 0.24725353717803955, "learning_rate": 5.232974375802275e-06, "loss": 0.0008, "step": 17388 }, { "epoch": 16.720192307692308, "grad_norm": 0.2401653379201889, "learning_rate": 5.231879383820782e-06, "loss": 0.0013, "step": 17389 }, { "epoch": 16.721153846153847, "grad_norm": 0.0029854411259293556, "learning_rate": 5.2307844658265236e-06, "loss": 0.0, "step": 17390 }, { "epoch": 16.722115384615385, "grad_norm": 0.010193517431616783, "learning_rate": 5.229689621836499e-06, "loss": 0.0001, "step": 17391 }, { "epoch": 16.723076923076924, "grad_norm": 0.013977520167827606, "learning_rate": 5.228594851867686e-06, "loss": 0.0001, "step": 17392 }, { "epoch": 16.724038461538463, "grad_norm": 2.472689628601074, "learning_rate": 5.227500155937078e-06, "loss": 0.0441, "step": 17393 }, { "epoch": 16.725, "grad_norm": 0.0030973737593740225, "learning_rate": 5.226405534061663e-06, "loss": 0.0, "step": 17394 }, { "epoch": 16.72596153846154, "grad_norm": 0.006721260957419872, "learning_rate": 5.225310986258419e-06, "loss": 0.0001, "step": 17395 }, { "epoch": 16.726923076923075, "grad_norm": 0.03087616339325905, "learning_rate": 5.224216512544335e-06, "loss": 0.0003, "step": 17396 }, { "epoch": 16.727884615384614, "grad_norm": 0.6982834935188293, "learning_rate": 5.223122112936401e-06, "loss": 0.0018, "step": 17397 }, { "epoch": 16.728846153846153, "grad_norm": 0.0026684822514653206, "learning_rate": 5.222027787451585e-06, "loss": 0.0, "step": 17398 }, { "epoch": 16.72980769230769, "grad_norm": 0.19435915350914001, "learning_rate": 5.22093353610688e-06, "loss": 0.0004, "step": 17399 }, { "epoch": 16.73076923076923, "grad_norm": 0.9425124526023865, "learning_rate": 5.2198393589192555e-06, "loss": 0.003, "step": 17400 }, { "epoch": 16.73173076923077, "grad_norm": 0.04151686653494835, "learning_rate": 5.218745255905695e-06, "loss": 0.0003, "step": 17401 }, { "epoch": 16.732692307692307, "grad_norm": 0.04826720431447029, "learning_rate": 5.217651227083178e-06, "loss": 0.0004, "step": 17402 }, { "epoch": 16.733653846153846, "grad_norm": 0.017578251659870148, "learning_rate": 5.216557272468675e-06, "loss": 0.0002, "step": 17403 }, { "epoch": 16.734615384615385, "grad_norm": 0.010201480239629745, "learning_rate": 5.215463392079164e-06, "loss": 0.0001, "step": 17404 }, { "epoch": 16.735576923076923, "grad_norm": 0.13269764184951782, "learning_rate": 5.214369585931622e-06, "loss": 0.0005, "step": 17405 }, { "epoch": 16.736538461538462, "grad_norm": 0.0038065786939114332, "learning_rate": 5.213275854043016e-06, "loss": 0.0, "step": 17406 }, { "epoch": 16.7375, "grad_norm": 0.09408412873744965, "learning_rate": 5.212182196430317e-06, "loss": 0.0004, "step": 17407 }, { "epoch": 16.73846153846154, "grad_norm": 0.09519792348146439, "learning_rate": 5.211088613110504e-06, "loss": 0.0004, "step": 17408 }, { "epoch": 16.739423076923078, "grad_norm": 0.006500575691461563, "learning_rate": 5.209995104100538e-06, "loss": 0.0001, "step": 17409 }, { "epoch": 16.740384615384617, "grad_norm": 0.006178575102239847, "learning_rate": 5.208901669417391e-06, "loss": 0.0001, "step": 17410 }, { "epoch": 16.741346153846155, "grad_norm": 0.07903493195772171, "learning_rate": 5.207808309078026e-06, "loss": 0.0004, "step": 17411 }, { "epoch": 16.74230769230769, "grad_norm": 0.003958648070693016, "learning_rate": 5.20671502309941e-06, "loss": 0.0, "step": 17412 }, { "epoch": 16.74326923076923, "grad_norm": 0.009459538385272026, "learning_rate": 5.205621811498514e-06, "loss": 0.0001, "step": 17413 }, { "epoch": 16.744230769230768, "grad_norm": 0.012263886630535126, "learning_rate": 5.204528674292291e-06, "loss": 0.0001, "step": 17414 }, { "epoch": 16.745192307692307, "grad_norm": 2.4542362689971924, "learning_rate": 5.203435611497708e-06, "loss": 0.019, "step": 17415 }, { "epoch": 16.746153846153845, "grad_norm": 0.13100528717041016, "learning_rate": 5.202342623131731e-06, "loss": 0.0007, "step": 17416 }, { "epoch": 16.747115384615384, "grad_norm": 0.00886533036828041, "learning_rate": 5.201249709211312e-06, "loss": 0.0001, "step": 17417 }, { "epoch": 16.748076923076923, "grad_norm": 0.04529910534620285, "learning_rate": 5.200156869753413e-06, "loss": 0.0002, "step": 17418 }, { "epoch": 16.74903846153846, "grad_norm": 0.01763364113867283, "learning_rate": 5.1990641047749955e-06, "loss": 0.0002, "step": 17419 }, { "epoch": 16.75, "grad_norm": 0.050314147025346756, "learning_rate": 5.197971414293011e-06, "loss": 0.0003, "step": 17420 }, { "epoch": 16.75096153846154, "grad_norm": 0.03293550759553909, "learning_rate": 5.196878798324418e-06, "loss": 0.0002, "step": 17421 }, { "epoch": 16.751923076923077, "grad_norm": 3.363508462905884, "learning_rate": 5.195786256886166e-06, "loss": 0.0114, "step": 17422 }, { "epoch": 16.752884615384616, "grad_norm": 1.5912041664123535, "learning_rate": 5.19469378999521e-06, "loss": 0.0221, "step": 17423 }, { "epoch": 16.753846153846155, "grad_norm": 2.0446503162384033, "learning_rate": 5.1936013976685085e-06, "loss": 0.0105, "step": 17424 }, { "epoch": 16.754807692307693, "grad_norm": 0.030055439099669456, "learning_rate": 5.192509079923e-06, "loss": 0.0001, "step": 17425 }, { "epoch": 16.755769230769232, "grad_norm": 1.9451591968536377, "learning_rate": 5.191416836775645e-06, "loss": 0.0577, "step": 17426 }, { "epoch": 16.75673076923077, "grad_norm": 0.09109434485435486, "learning_rate": 5.1903246682433875e-06, "loss": 0.0002, "step": 17427 }, { "epoch": 16.75769230769231, "grad_norm": 2.0687294006347656, "learning_rate": 5.189232574343173e-06, "loss": 0.0144, "step": 17428 }, { "epoch": 16.758653846153845, "grad_norm": 0.008864586241543293, "learning_rate": 5.18814055509195e-06, "loss": 0.0001, "step": 17429 }, { "epoch": 16.759615384615383, "grad_norm": 0.010969954542815685, "learning_rate": 5.187048610506667e-06, "loss": 0.0001, "step": 17430 }, { "epoch": 16.760576923076922, "grad_norm": 0.00510047934949398, "learning_rate": 5.185956740604257e-06, "loss": 0.0001, "step": 17431 }, { "epoch": 16.76153846153846, "grad_norm": 0.0070654843002557755, "learning_rate": 5.184864945401672e-06, "loss": 0.0001, "step": 17432 }, { "epoch": 16.7625, "grad_norm": 0.07213955372571945, "learning_rate": 5.183773224915853e-06, "loss": 0.0004, "step": 17433 }, { "epoch": 16.763461538461538, "grad_norm": 0.07479514926671982, "learning_rate": 5.1826815791637354e-06, "loss": 0.0004, "step": 17434 }, { "epoch": 16.764423076923077, "grad_norm": 0.0676136314868927, "learning_rate": 5.181590008162265e-06, "loss": 0.0007, "step": 17435 }, { "epoch": 16.765384615384615, "grad_norm": 0.024167917668819427, "learning_rate": 5.180498511928371e-06, "loss": 0.0002, "step": 17436 }, { "epoch": 16.766346153846154, "grad_norm": 0.08809638768434525, "learning_rate": 5.179407090478996e-06, "loss": 0.0003, "step": 17437 }, { "epoch": 16.767307692307693, "grad_norm": 0.006933453492820263, "learning_rate": 5.178315743831079e-06, "loss": 0.0001, "step": 17438 }, { "epoch": 16.76826923076923, "grad_norm": 0.006938172969967127, "learning_rate": 5.177224472001545e-06, "loss": 0.0001, "step": 17439 }, { "epoch": 16.76923076923077, "grad_norm": 0.004676396027207375, "learning_rate": 5.176133275007334e-06, "loss": 0.0, "step": 17440 }, { "epoch": 16.77019230769231, "grad_norm": 0.018070168793201447, "learning_rate": 5.17504215286538e-06, "loss": 0.0002, "step": 17441 }, { "epoch": 16.771153846153847, "grad_norm": 0.2373344600200653, "learning_rate": 5.173951105592605e-06, "loss": 0.0008, "step": 17442 }, { "epoch": 16.772115384615386, "grad_norm": 0.5610656142234802, "learning_rate": 5.172860133205946e-06, "loss": 0.0018, "step": 17443 }, { "epoch": 16.773076923076925, "grad_norm": 0.3483792841434479, "learning_rate": 5.171769235722335e-06, "loss": 0.0011, "step": 17444 }, { "epoch": 16.77403846153846, "grad_norm": 0.016813596710562706, "learning_rate": 5.17067841315869e-06, "loss": 0.0001, "step": 17445 }, { "epoch": 16.775, "grad_norm": 0.0024136831052601337, "learning_rate": 5.169587665531945e-06, "loss": 0.0, "step": 17446 }, { "epoch": 16.775961538461537, "grad_norm": 0.3511507213115692, "learning_rate": 5.16849699285902e-06, "loss": 0.0007, "step": 17447 }, { "epoch": 16.776923076923076, "grad_norm": 0.010290575213730335, "learning_rate": 5.167406395156841e-06, "loss": 0.0001, "step": 17448 }, { "epoch": 16.777884615384615, "grad_norm": 0.0242246575653553, "learning_rate": 5.166315872442336e-06, "loss": 0.0002, "step": 17449 }, { "epoch": 16.778846153846153, "grad_norm": 3.458808422088623, "learning_rate": 5.165225424732417e-06, "loss": 0.0125, "step": 17450 }, { "epoch": 16.779807692307692, "grad_norm": 0.07781773060560226, "learning_rate": 5.164135052044008e-06, "loss": 0.0004, "step": 17451 }, { "epoch": 16.78076923076923, "grad_norm": 1.4237338304519653, "learning_rate": 5.163044754394035e-06, "loss": 0.0258, "step": 17452 }, { "epoch": 16.78173076923077, "grad_norm": 0.046889033168554306, "learning_rate": 5.161954531799409e-06, "loss": 0.0001, "step": 17453 }, { "epoch": 16.782692307692308, "grad_norm": 0.013199187815189362, "learning_rate": 5.160864384277047e-06, "loss": 0.0002, "step": 17454 }, { "epoch": 16.783653846153847, "grad_norm": 0.09781040251255035, "learning_rate": 5.1597743118438725e-06, "loss": 0.0006, "step": 17455 }, { "epoch": 16.784615384615385, "grad_norm": 0.0037558472249656916, "learning_rate": 5.15868431451679e-06, "loss": 0.0, "step": 17456 }, { "epoch": 16.785576923076924, "grad_norm": 0.6732990145683289, "learning_rate": 5.1575943923127216e-06, "loss": 0.0017, "step": 17457 }, { "epoch": 16.786538461538463, "grad_norm": 0.4104044437408447, "learning_rate": 5.156504545248573e-06, "loss": 0.0012, "step": 17458 }, { "epoch": 16.7875, "grad_norm": 0.012030594050884247, "learning_rate": 5.155414773341256e-06, "loss": 0.0001, "step": 17459 }, { "epoch": 16.78846153846154, "grad_norm": 0.018378064036369324, "learning_rate": 5.154325076607689e-06, "loss": 0.0001, "step": 17460 }, { "epoch": 16.789423076923075, "grad_norm": 0.011851479299366474, "learning_rate": 5.153235455064771e-06, "loss": 0.0001, "step": 17461 }, { "epoch": 16.790384615384614, "grad_norm": 0.054310623556375504, "learning_rate": 5.152145908729413e-06, "loss": 0.0002, "step": 17462 }, { "epoch": 16.791346153846153, "grad_norm": 0.43795567750930786, "learning_rate": 5.151056437618527e-06, "loss": 0.0016, "step": 17463 }, { "epoch": 16.79230769230769, "grad_norm": 0.015078097581863403, "learning_rate": 5.149967041749009e-06, "loss": 0.0002, "step": 17464 }, { "epoch": 16.79326923076923, "grad_norm": 0.09901577979326248, "learning_rate": 5.148877721137768e-06, "loss": 0.0005, "step": 17465 }, { "epoch": 16.79423076923077, "grad_norm": 0.032895658165216446, "learning_rate": 5.1477884758017115e-06, "loss": 0.0003, "step": 17466 }, { "epoch": 16.795192307692307, "grad_norm": 0.8384411334991455, "learning_rate": 5.146699305757732e-06, "loss": 0.0059, "step": 17467 }, { "epoch": 16.796153846153846, "grad_norm": 0.8441377282142639, "learning_rate": 5.145610211022738e-06, "loss": 0.0025, "step": 17468 }, { "epoch": 16.797115384615385, "grad_norm": 0.03531935438513756, "learning_rate": 5.144521191613624e-06, "loss": 0.0001, "step": 17469 }, { "epoch": 16.798076923076923, "grad_norm": 0.010969053022563457, "learning_rate": 5.1434322475472895e-06, "loss": 0.0001, "step": 17470 }, { "epoch": 16.799038461538462, "grad_norm": 0.02200576849281788, "learning_rate": 5.142343378840637e-06, "loss": 0.0002, "step": 17471 }, { "epoch": 16.8, "grad_norm": 0.09903431683778763, "learning_rate": 5.1412545855105535e-06, "loss": 0.0004, "step": 17472 }, { "epoch": 16.80096153846154, "grad_norm": 0.004392566625028849, "learning_rate": 5.14016586757394e-06, "loss": 0.0, "step": 17473 }, { "epoch": 16.801923076923078, "grad_norm": 0.03783734515309334, "learning_rate": 5.139077225047691e-06, "loss": 0.0002, "step": 17474 }, { "epoch": 16.802884615384617, "grad_norm": 2.27091121673584, "learning_rate": 5.1379886579486934e-06, "loss": 0.0072, "step": 17475 }, { "epoch": 16.803846153846155, "grad_norm": 4.7161712646484375, "learning_rate": 5.136900166293843e-06, "loss": 0.06, "step": 17476 }, { "epoch": 16.80480769230769, "grad_norm": 0.04595354199409485, "learning_rate": 5.1358117501000326e-06, "loss": 0.0004, "step": 17477 }, { "epoch": 16.80576923076923, "grad_norm": 0.05668560788035393, "learning_rate": 5.134723409384144e-06, "loss": 0.0003, "step": 17478 }, { "epoch": 16.806730769230768, "grad_norm": 0.02070404775440693, "learning_rate": 5.1336351441630715e-06, "loss": 0.0001, "step": 17479 }, { "epoch": 16.807692307692307, "grad_norm": 0.019610943272709846, "learning_rate": 5.132546954453694e-06, "loss": 0.0002, "step": 17480 }, { "epoch": 16.808653846153845, "grad_norm": 0.013988276943564415, "learning_rate": 5.131458840272905e-06, "loss": 0.0001, "step": 17481 }, { "epoch": 16.809615384615384, "grad_norm": 0.00754137197509408, "learning_rate": 5.1303708016375874e-06, "loss": 0.0001, "step": 17482 }, { "epoch": 16.810576923076923, "grad_norm": 0.010623045265674591, "learning_rate": 5.129282838564619e-06, "loss": 0.0001, "step": 17483 }, { "epoch": 16.81153846153846, "grad_norm": 0.13274192810058594, "learning_rate": 5.128194951070886e-06, "loss": 0.0004, "step": 17484 }, { "epoch": 16.8125, "grad_norm": 0.017642181366682053, "learning_rate": 5.127107139173273e-06, "loss": 0.0001, "step": 17485 }, { "epoch": 16.81346153846154, "grad_norm": 0.005343315191566944, "learning_rate": 5.1260194028886515e-06, "loss": 0.0, "step": 17486 }, { "epoch": 16.814423076923077, "grad_norm": 0.20111660659313202, "learning_rate": 5.124931742233905e-06, "loss": 0.0008, "step": 17487 }, { "epoch": 16.815384615384616, "grad_norm": 0.04827914759516716, "learning_rate": 5.123844157225912e-06, "loss": 0.0003, "step": 17488 }, { "epoch": 16.816346153846155, "grad_norm": 0.01997876539826393, "learning_rate": 5.122756647881543e-06, "loss": 0.0002, "step": 17489 }, { "epoch": 16.817307692307693, "grad_norm": 0.07965124398469925, "learning_rate": 5.12166921421768e-06, "loss": 0.0003, "step": 17490 }, { "epoch": 16.818269230769232, "grad_norm": 0.017811153084039688, "learning_rate": 5.1205818562511884e-06, "loss": 0.0002, "step": 17491 }, { "epoch": 16.81923076923077, "grad_norm": 0.026282887905836105, "learning_rate": 5.119494573998946e-06, "loss": 0.0001, "step": 17492 }, { "epoch": 16.82019230769231, "grad_norm": 0.01157406996935606, "learning_rate": 5.118407367477829e-06, "loss": 0.0001, "step": 17493 }, { "epoch": 16.821153846153845, "grad_norm": 0.05393725633621216, "learning_rate": 5.117320236704697e-06, "loss": 0.0004, "step": 17494 }, { "epoch": 16.822115384615383, "grad_norm": 0.008079888299107552, "learning_rate": 5.116233181696424e-06, "loss": 0.0001, "step": 17495 }, { "epoch": 16.823076923076922, "grad_norm": 0.08528073132038116, "learning_rate": 5.115146202469883e-06, "loss": 0.0006, "step": 17496 }, { "epoch": 16.82403846153846, "grad_norm": 0.01828809827566147, "learning_rate": 5.114059299041932e-06, "loss": 0.0001, "step": 17497 }, { "epoch": 16.825, "grad_norm": 0.05799412727355957, "learning_rate": 5.112972471429441e-06, "loss": 0.0003, "step": 17498 }, { "epoch": 16.825961538461538, "grad_norm": 2.360949993133545, "learning_rate": 5.1118857196492775e-06, "loss": 0.0237, "step": 17499 }, { "epoch": 16.826923076923077, "grad_norm": 0.05087537690997124, "learning_rate": 5.110799043718298e-06, "loss": 0.0003, "step": 17500 }, { "epoch": 16.827884615384615, "grad_norm": 0.06528327614068985, "learning_rate": 5.109712443653371e-06, "loss": 0.0003, "step": 17501 }, { "epoch": 16.828846153846154, "grad_norm": 0.013004980981349945, "learning_rate": 5.10862591947135e-06, "loss": 0.0001, "step": 17502 }, { "epoch": 16.829807692307693, "grad_norm": 0.2635711133480072, "learning_rate": 5.107539471189099e-06, "loss": 0.0007, "step": 17503 }, { "epoch": 16.83076923076923, "grad_norm": 0.04749952629208565, "learning_rate": 5.10645309882348e-06, "loss": 0.0003, "step": 17504 }, { "epoch": 16.83173076923077, "grad_norm": 0.27073803544044495, "learning_rate": 5.105366802391343e-06, "loss": 0.0008, "step": 17505 }, { "epoch": 16.83269230769231, "grad_norm": 0.006994619965553284, "learning_rate": 5.104280581909547e-06, "loss": 0.0001, "step": 17506 }, { "epoch": 16.833653846153847, "grad_norm": 0.07247303426265717, "learning_rate": 5.103194437394952e-06, "loss": 0.0005, "step": 17507 }, { "epoch": 16.834615384615386, "grad_norm": 0.00869524385780096, "learning_rate": 5.102108368864403e-06, "loss": 0.0001, "step": 17508 }, { "epoch": 16.835576923076925, "grad_norm": 0.019908955320715904, "learning_rate": 5.1010223763347565e-06, "loss": 0.0002, "step": 17509 }, { "epoch": 16.83653846153846, "grad_norm": 0.2486312985420227, "learning_rate": 5.09993645982287e-06, "loss": 0.0007, "step": 17510 }, { "epoch": 16.8375, "grad_norm": 0.018272187560796738, "learning_rate": 5.098850619345584e-06, "loss": 0.0001, "step": 17511 }, { "epoch": 16.838461538461537, "grad_norm": 0.027922432869672775, "learning_rate": 5.097764854919754e-06, "loss": 0.0002, "step": 17512 }, { "epoch": 16.839423076923076, "grad_norm": 0.017526717856526375, "learning_rate": 5.096679166562221e-06, "loss": 0.0002, "step": 17513 }, { "epoch": 16.840384615384615, "grad_norm": 0.015114969573915005, "learning_rate": 5.0955935542898395e-06, "loss": 0.0001, "step": 17514 }, { "epoch": 16.841346153846153, "grad_norm": 0.21644476056098938, "learning_rate": 5.094508018119454e-06, "loss": 0.0013, "step": 17515 }, { "epoch": 16.842307692307692, "grad_norm": 0.005406093783676624, "learning_rate": 5.093422558067902e-06, "loss": 0.0001, "step": 17516 }, { "epoch": 16.84326923076923, "grad_norm": 0.03536785766482353, "learning_rate": 5.092337174152032e-06, "loss": 0.0003, "step": 17517 }, { "epoch": 16.84423076923077, "grad_norm": 0.15008644759655, "learning_rate": 5.0912518663886885e-06, "loss": 0.0007, "step": 17518 }, { "epoch": 16.845192307692308, "grad_norm": 0.17784757912158966, "learning_rate": 5.0901666347947065e-06, "loss": 0.0005, "step": 17519 }, { "epoch": 16.846153846153847, "grad_norm": 0.1500563770532608, "learning_rate": 5.089081479386928e-06, "loss": 0.0005, "step": 17520 }, { "epoch": 16.847115384615385, "grad_norm": 0.06571809947490692, "learning_rate": 5.087996400182193e-06, "loss": 0.0007, "step": 17521 }, { "epoch": 16.848076923076924, "grad_norm": 0.006033468525856733, "learning_rate": 5.086911397197336e-06, "loss": 0.0001, "step": 17522 }, { "epoch": 16.849038461538463, "grad_norm": 0.00865930039435625, "learning_rate": 5.0858264704491944e-06, "loss": 0.0001, "step": 17523 }, { "epoch": 16.85, "grad_norm": 0.014491640031337738, "learning_rate": 5.084741619954605e-06, "loss": 0.0001, "step": 17524 }, { "epoch": 16.85096153846154, "grad_norm": 1.0447250604629517, "learning_rate": 5.083656845730398e-06, "loss": 0.0064, "step": 17525 }, { "epoch": 16.851923076923075, "grad_norm": 3.0336971282958984, "learning_rate": 5.08257214779341e-06, "loss": 0.0582, "step": 17526 }, { "epoch": 16.852884615384614, "grad_norm": 0.013407363556325436, "learning_rate": 5.0814875261604656e-06, "loss": 0.0, "step": 17527 }, { "epoch": 16.853846153846153, "grad_norm": 0.017737535759806633, "learning_rate": 5.0804029808484e-06, "loss": 0.0001, "step": 17528 }, { "epoch": 16.85480769230769, "grad_norm": 3.719531774520874, "learning_rate": 5.079318511874045e-06, "loss": 0.0559, "step": 17529 }, { "epoch": 16.85576923076923, "grad_norm": 0.04315963014960289, "learning_rate": 5.07823411925422e-06, "loss": 0.0002, "step": 17530 }, { "epoch": 16.85673076923077, "grad_norm": 0.02977844513952732, "learning_rate": 5.077149803005757e-06, "loss": 0.0002, "step": 17531 }, { "epoch": 16.857692307692307, "grad_norm": 0.0635797530412674, "learning_rate": 5.076065563145485e-06, "loss": 0.0004, "step": 17532 }, { "epoch": 16.858653846153846, "grad_norm": 1.1571484804153442, "learning_rate": 5.074981399690219e-06, "loss": 0.016, "step": 17533 }, { "epoch": 16.859615384615385, "grad_norm": 0.8275517821311951, "learning_rate": 5.073897312656787e-06, "loss": 0.0033, "step": 17534 }, { "epoch": 16.860576923076923, "grad_norm": 0.39190372824668884, "learning_rate": 5.072813302062015e-06, "loss": 0.0012, "step": 17535 }, { "epoch": 16.861538461538462, "grad_norm": 0.09527644515037537, "learning_rate": 5.071729367922715e-06, "loss": 0.0004, "step": 17536 }, { "epoch": 16.8625, "grad_norm": 0.5012062191963196, "learning_rate": 5.070645510255716e-06, "loss": 0.0099, "step": 17537 }, { "epoch": 16.86346153846154, "grad_norm": 0.02888770028948784, "learning_rate": 5.069561729077826e-06, "loss": 0.0001, "step": 17538 }, { "epoch": 16.864423076923078, "grad_norm": 0.009637726470828056, "learning_rate": 5.068478024405868e-06, "loss": 0.0001, "step": 17539 }, { "epoch": 16.865384615384617, "grad_norm": 0.016850771382451057, "learning_rate": 5.0673943962566615e-06, "loss": 0.0001, "step": 17540 }, { "epoch": 16.866346153846155, "grad_norm": 0.04432213678956032, "learning_rate": 5.066310844647012e-06, "loss": 0.0001, "step": 17541 }, { "epoch": 16.86730769230769, "grad_norm": 0.003654699306935072, "learning_rate": 5.0652273695937395e-06, "loss": 0.0, "step": 17542 }, { "epoch": 16.86826923076923, "grad_norm": 0.003566676052287221, "learning_rate": 5.064143971113657e-06, "loss": 0.0, "step": 17543 }, { "epoch": 16.869230769230768, "grad_norm": 0.0143553726375103, "learning_rate": 5.06306064922357e-06, "loss": 0.0001, "step": 17544 }, { "epoch": 16.870192307692307, "grad_norm": 0.04675954952836037, "learning_rate": 5.061977403940292e-06, "loss": 0.0003, "step": 17545 }, { "epoch": 16.871153846153845, "grad_norm": 0.02240237034857273, "learning_rate": 5.060894235280637e-06, "loss": 0.0001, "step": 17546 }, { "epoch": 16.872115384615384, "grad_norm": 0.017301253974437714, "learning_rate": 5.059811143261402e-06, "loss": 0.0001, "step": 17547 }, { "epoch": 16.873076923076923, "grad_norm": 0.006688154302537441, "learning_rate": 5.058728127899404e-06, "loss": 0.0, "step": 17548 }, { "epoch": 16.87403846153846, "grad_norm": 0.028327977284789085, "learning_rate": 5.057645189211437e-06, "loss": 0.0001, "step": 17549 }, { "epoch": 16.875, "grad_norm": 0.007908684201538563, "learning_rate": 5.056562327214314e-06, "loss": 0.0001, "step": 17550 }, { "epoch": 16.87596153846154, "grad_norm": 0.265144407749176, "learning_rate": 5.055479541924837e-06, "loss": 0.0009, "step": 17551 }, { "epoch": 16.876923076923077, "grad_norm": 0.015430324710905552, "learning_rate": 5.0543968333598e-06, "loss": 0.0001, "step": 17552 }, { "epoch": 16.877884615384616, "grad_norm": 0.1735503226518631, "learning_rate": 5.0533142015360115e-06, "loss": 0.0007, "step": 17553 }, { "epoch": 16.878846153846155, "grad_norm": 0.19719098508358002, "learning_rate": 5.052231646470271e-06, "loss": 0.0007, "step": 17554 }, { "epoch": 16.879807692307693, "grad_norm": 0.17774631083011627, "learning_rate": 5.05114916817937e-06, "loss": 0.0006, "step": 17555 }, { "epoch": 16.880769230769232, "grad_norm": 3.8060686588287354, "learning_rate": 5.050066766680111e-06, "loss": 0.0295, "step": 17556 }, { "epoch": 16.88173076923077, "grad_norm": 0.5609592199325562, "learning_rate": 5.048984441989289e-06, "loss": 0.0025, "step": 17557 }, { "epoch": 16.88269230769231, "grad_norm": 0.025973621755838394, "learning_rate": 5.047902194123695e-06, "loss": 0.0001, "step": 17558 }, { "epoch": 16.883653846153845, "grad_norm": 0.021741071715950966, "learning_rate": 5.046820023100129e-06, "loss": 0.0003, "step": 17559 }, { "epoch": 16.884615384615383, "grad_norm": 0.0211494117975235, "learning_rate": 5.045737928935374e-06, "loss": 0.0002, "step": 17560 }, { "epoch": 16.885576923076922, "grad_norm": 0.01228542160242796, "learning_rate": 5.044655911646226e-06, "loss": 0.0001, "step": 17561 }, { "epoch": 16.88653846153846, "grad_norm": 0.013046768493950367, "learning_rate": 5.043573971249478e-06, "loss": 0.0001, "step": 17562 }, { "epoch": 16.8875, "grad_norm": 0.06169215217232704, "learning_rate": 5.042492107761912e-06, "loss": 0.0004, "step": 17563 }, { "epoch": 16.888461538461538, "grad_norm": 0.41416066884994507, "learning_rate": 5.0414103212003176e-06, "loss": 0.0024, "step": 17564 }, { "epoch": 16.889423076923077, "grad_norm": 0.05388490855693817, "learning_rate": 5.040328611581487e-06, "loss": 0.0003, "step": 17565 }, { "epoch": 16.890384615384615, "grad_norm": 0.01775960810482502, "learning_rate": 5.039246978922194e-06, "loss": 0.0001, "step": 17566 }, { "epoch": 16.891346153846154, "grad_norm": 0.02078179270029068, "learning_rate": 5.03816542323923e-06, "loss": 0.0001, "step": 17567 }, { "epoch": 16.892307692307693, "grad_norm": 0.008449596352875233, "learning_rate": 5.037083944549378e-06, "loss": 0.0001, "step": 17568 }, { "epoch": 16.89326923076923, "grad_norm": 0.03828088194131851, "learning_rate": 5.036002542869414e-06, "loss": 0.0002, "step": 17569 }, { "epoch": 16.89423076923077, "grad_norm": 0.009251991286873817, "learning_rate": 5.034921218216126e-06, "loss": 0.0001, "step": 17570 }, { "epoch": 16.89519230769231, "grad_norm": 0.019590256735682487, "learning_rate": 5.033839970606283e-06, "loss": 0.0001, "step": 17571 }, { "epoch": 16.896153846153847, "grad_norm": 1.1520497798919678, "learning_rate": 5.03275880005667e-06, "loss": 0.0036, "step": 17572 }, { "epoch": 16.897115384615386, "grad_norm": 0.029480798169970512, "learning_rate": 5.031677706584064e-06, "loss": 0.0002, "step": 17573 }, { "epoch": 16.898076923076925, "grad_norm": 1.8751301765441895, "learning_rate": 5.030596690205236e-06, "loss": 0.036, "step": 17574 }, { "epoch": 16.89903846153846, "grad_norm": 0.043916501104831696, "learning_rate": 5.02951575093696e-06, "loss": 0.0003, "step": 17575 }, { "epoch": 16.9, "grad_norm": 0.009219875559210777, "learning_rate": 5.0284348887960165e-06, "loss": 0.0001, "step": 17576 }, { "epoch": 16.900961538461537, "grad_norm": 0.0344487801194191, "learning_rate": 5.027354103799169e-06, "loss": 0.0002, "step": 17577 }, { "epoch": 16.901923076923076, "grad_norm": 0.005380176939070225, "learning_rate": 5.026273395963191e-06, "loss": 0.0001, "step": 17578 }, { "epoch": 16.902884615384615, "grad_norm": 0.05855715274810791, "learning_rate": 5.025192765304856e-06, "loss": 0.0003, "step": 17579 }, { "epoch": 16.903846153846153, "grad_norm": 0.016517262905836105, "learning_rate": 5.0241122118409235e-06, "loss": 0.0002, "step": 17580 }, { "epoch": 16.904807692307692, "grad_norm": 4.318572998046875, "learning_rate": 5.023031735588171e-06, "loss": 0.0359, "step": 17581 }, { "epoch": 16.90576923076923, "grad_norm": 0.04641420394182205, "learning_rate": 5.021951336563353e-06, "loss": 0.0003, "step": 17582 }, { "epoch": 16.90673076923077, "grad_norm": 0.08787354081869125, "learning_rate": 5.020871014783241e-06, "loss": 0.0004, "step": 17583 }, { "epoch": 16.907692307692308, "grad_norm": 0.007245194632560015, "learning_rate": 5.019790770264601e-06, "loss": 0.0001, "step": 17584 }, { "epoch": 16.908653846153847, "grad_norm": 0.01235619280487299, "learning_rate": 5.018710603024187e-06, "loss": 0.0001, "step": 17585 }, { "epoch": 16.909615384615385, "grad_norm": 0.012034202925860882, "learning_rate": 5.017630513078765e-06, "loss": 0.0001, "step": 17586 }, { "epoch": 16.910576923076924, "grad_norm": 0.07249582558870316, "learning_rate": 5.016550500445099e-06, "loss": 0.0003, "step": 17587 }, { "epoch": 16.911538461538463, "grad_norm": 0.05680336803197861, "learning_rate": 5.015470565139937e-06, "loss": 0.0005, "step": 17588 }, { "epoch": 16.9125, "grad_norm": 0.09658443927764893, "learning_rate": 5.0143907071800445e-06, "loss": 0.0003, "step": 17589 }, { "epoch": 16.91346153846154, "grad_norm": 0.10510781407356262, "learning_rate": 5.013310926582179e-06, "loss": 0.0004, "step": 17590 }, { "epoch": 16.914423076923075, "grad_norm": 0.030699972063302994, "learning_rate": 5.012231223363089e-06, "loss": 0.0002, "step": 17591 }, { "epoch": 16.915384615384614, "grad_norm": 0.008667177520692348, "learning_rate": 5.011151597539534e-06, "loss": 0.0001, "step": 17592 }, { "epoch": 16.916346153846153, "grad_norm": 3.038236379623413, "learning_rate": 5.0100720491282605e-06, "loss": 0.021, "step": 17593 }, { "epoch": 16.91730769230769, "grad_norm": 0.04014919698238373, "learning_rate": 5.008992578146023e-06, "loss": 0.0001, "step": 17594 }, { "epoch": 16.91826923076923, "grad_norm": 0.11283248662948608, "learning_rate": 5.007913184609578e-06, "loss": 0.0002, "step": 17595 }, { "epoch": 16.91923076923077, "grad_norm": 0.017297469079494476, "learning_rate": 5.006833868535663e-06, "loss": 0.0002, "step": 17596 }, { "epoch": 16.920192307692307, "grad_norm": 0.13250015676021576, "learning_rate": 5.005754629941033e-06, "loss": 0.0004, "step": 17597 }, { "epoch": 16.921153846153846, "grad_norm": 0.4287746846675873, "learning_rate": 5.004675468842436e-06, "loss": 0.0024, "step": 17598 }, { "epoch": 16.922115384615385, "grad_norm": 0.027128633111715317, "learning_rate": 5.003596385256611e-06, "loss": 0.0002, "step": 17599 }, { "epoch": 16.923076923076923, "grad_norm": 0.048949044197797775, "learning_rate": 5.002517379200307e-06, "loss": 0.0002, "step": 17600 }, { "epoch": 16.924038461538462, "grad_norm": 2.116992950439453, "learning_rate": 5.001438450690268e-06, "loss": 0.0157, "step": 17601 }, { "epoch": 16.925, "grad_norm": 0.06810752302408218, "learning_rate": 5.00035959974323e-06, "loss": 0.0003, "step": 17602 }, { "epoch": 16.92596153846154, "grad_norm": 0.004176202230155468, "learning_rate": 4.9992808263759414e-06, "loss": 0.0, "step": 17603 }, { "epoch": 16.926923076923078, "grad_norm": 0.03086543083190918, "learning_rate": 4.9982021306051345e-06, "loss": 0.0001, "step": 17604 }, { "epoch": 16.927884615384617, "grad_norm": 0.03785775601863861, "learning_rate": 4.997123512447549e-06, "loss": 0.0002, "step": 17605 }, { "epoch": 16.928846153846155, "grad_norm": 0.005609757732599974, "learning_rate": 4.996044971919928e-06, "loss": 0.0001, "step": 17606 }, { "epoch": 16.92980769230769, "grad_norm": 0.8137338757514954, "learning_rate": 4.994966509038999e-06, "loss": 0.0036, "step": 17607 }, { "epoch": 16.93076923076923, "grad_norm": 0.018430210649967194, "learning_rate": 4.9938881238215e-06, "loss": 0.0003, "step": 17608 }, { "epoch": 16.931730769230768, "grad_norm": 0.0046609085984528065, "learning_rate": 4.992809816284168e-06, "loss": 0.0, "step": 17609 }, { "epoch": 16.932692307692307, "grad_norm": 0.037166185677051544, "learning_rate": 4.9917315864437275e-06, "loss": 0.0002, "step": 17610 }, { "epoch": 16.933653846153845, "grad_norm": 0.004352607298642397, "learning_rate": 4.990653434316915e-06, "loss": 0.0001, "step": 17611 }, { "epoch": 16.934615384615384, "grad_norm": 0.017871277406811714, "learning_rate": 4.989575359920462e-06, "loss": 0.0001, "step": 17612 }, { "epoch": 16.935576923076923, "grad_norm": 1.822569727897644, "learning_rate": 4.98849736327109e-06, "loss": 0.0163, "step": 17613 }, { "epoch": 16.93653846153846, "grad_norm": 0.03960201516747475, "learning_rate": 4.987419444385531e-06, "loss": 0.0001, "step": 17614 }, { "epoch": 16.9375, "grad_norm": 2.8949007987976074, "learning_rate": 4.986341603280513e-06, "loss": 0.0237, "step": 17615 }, { "epoch": 16.93846153846154, "grad_norm": 0.6776453852653503, "learning_rate": 4.985263839972756e-06, "loss": 0.0023, "step": 17616 }, { "epoch": 16.939423076923077, "grad_norm": 0.006979634054005146, "learning_rate": 4.9841861544789895e-06, "loss": 0.0001, "step": 17617 }, { "epoch": 16.940384615384616, "grad_norm": 2.0684823989868164, "learning_rate": 4.983108546815929e-06, "loss": 0.0154, "step": 17618 }, { "epoch": 16.941346153846155, "grad_norm": 0.03667829558253288, "learning_rate": 4.982031017000299e-06, "loss": 0.0003, "step": 17619 }, { "epoch": 16.942307692307693, "grad_norm": 0.038397710770368576, "learning_rate": 4.980953565048824e-06, "loss": 0.0003, "step": 17620 }, { "epoch": 16.943269230769232, "grad_norm": 0.004855563398450613, "learning_rate": 4.979876190978215e-06, "loss": 0.0001, "step": 17621 }, { "epoch": 16.94423076923077, "grad_norm": 0.0038055521436035633, "learning_rate": 4.978798894805194e-06, "loss": 0.0, "step": 17622 }, { "epoch": 16.94519230769231, "grad_norm": 0.01573423109948635, "learning_rate": 4.977721676546481e-06, "loss": 0.0002, "step": 17623 }, { "epoch": 16.946153846153845, "grad_norm": 2.3348095417022705, "learning_rate": 4.976644536218783e-06, "loss": 0.0073, "step": 17624 }, { "epoch": 16.947115384615383, "grad_norm": 0.004917608108371496, "learning_rate": 4.975567473838818e-06, "loss": 0.0, "step": 17625 }, { "epoch": 16.948076923076922, "grad_norm": 0.01204865612089634, "learning_rate": 4.974490489423305e-06, "loss": 0.0001, "step": 17626 }, { "epoch": 16.94903846153846, "grad_norm": 0.09276009351015091, "learning_rate": 4.973413582988944e-06, "loss": 0.0005, "step": 17627 }, { "epoch": 16.95, "grad_norm": 0.8273094296455383, "learning_rate": 4.972336754552457e-06, "loss": 0.0035, "step": 17628 }, { "epoch": 16.950961538461538, "grad_norm": 1.1848185062408447, "learning_rate": 4.9712600041305425e-06, "loss": 0.0368, "step": 17629 }, { "epoch": 16.951923076923077, "grad_norm": 0.09572868049144745, "learning_rate": 4.970183331739914e-06, "loss": 0.0003, "step": 17630 }, { "epoch": 16.952884615384615, "grad_norm": 0.016232430934906006, "learning_rate": 4.969106737397281e-06, "loss": 0.0001, "step": 17631 }, { "epoch": 16.953846153846154, "grad_norm": 0.021396638825535774, "learning_rate": 4.9680302211193435e-06, "loss": 0.0001, "step": 17632 }, { "epoch": 16.954807692307693, "grad_norm": 0.0766414925456047, "learning_rate": 4.966953782922806e-06, "loss": 0.0005, "step": 17633 }, { "epoch": 16.95576923076923, "grad_norm": 1.332881212234497, "learning_rate": 4.96587742282438e-06, "loss": 0.0033, "step": 17634 }, { "epoch": 16.95673076923077, "grad_norm": 0.15008585155010223, "learning_rate": 4.964801140840757e-06, "loss": 0.0006, "step": 17635 }, { "epoch": 16.95769230769231, "grad_norm": 0.013942232355475426, "learning_rate": 4.963724936988642e-06, "loss": 0.0001, "step": 17636 }, { "epoch": 16.958653846153847, "grad_norm": 0.009612358175218105, "learning_rate": 4.9626488112847384e-06, "loss": 0.0001, "step": 17637 }, { "epoch": 16.959615384615386, "grad_norm": 0.0829605981707573, "learning_rate": 4.961572763745737e-06, "loss": 0.0003, "step": 17638 }, { "epoch": 16.960576923076925, "grad_norm": 0.04158305004239082, "learning_rate": 4.960496794388343e-06, "loss": 0.0001, "step": 17639 }, { "epoch": 16.96153846153846, "grad_norm": 0.007891830988228321, "learning_rate": 4.959420903229244e-06, "loss": 0.0001, "step": 17640 }, { "epoch": 16.9625, "grad_norm": 0.022390196099877357, "learning_rate": 4.958345090285138e-06, "loss": 0.0001, "step": 17641 }, { "epoch": 16.963461538461537, "grad_norm": 0.6482970714569092, "learning_rate": 4.957269355572724e-06, "loss": 0.002, "step": 17642 }, { "epoch": 16.964423076923076, "grad_norm": 1.1032353639602661, "learning_rate": 4.956193699108685e-06, "loss": 0.0066, "step": 17643 }, { "epoch": 16.965384615384615, "grad_norm": 0.7750635147094727, "learning_rate": 4.955118120909716e-06, "loss": 0.0024, "step": 17644 }, { "epoch": 16.966346153846153, "grad_norm": 0.5399326682090759, "learning_rate": 4.95404262099251e-06, "loss": 0.0012, "step": 17645 }, { "epoch": 16.967307692307692, "grad_norm": 1.2590221166610718, "learning_rate": 4.9529671993737516e-06, "loss": 0.0071, "step": 17646 }, { "epoch": 16.96826923076923, "grad_norm": 0.7509742379188538, "learning_rate": 4.951891856070128e-06, "loss": 0.0021, "step": 17647 }, { "epoch": 16.96923076923077, "grad_norm": 0.12165660411119461, "learning_rate": 4.950816591098331e-06, "loss": 0.0005, "step": 17648 }, { "epoch": 16.970192307692308, "grad_norm": 0.008087827824056149, "learning_rate": 4.949741404475037e-06, "loss": 0.0001, "step": 17649 }, { "epoch": 16.971153846153847, "grad_norm": 0.09186295419931412, "learning_rate": 4.948666296216938e-06, "loss": 0.0005, "step": 17650 }, { "epoch": 16.972115384615385, "grad_norm": 0.009046055376529694, "learning_rate": 4.947591266340709e-06, "loss": 0.0001, "step": 17651 }, { "epoch": 16.973076923076924, "grad_norm": 0.01255466416478157, "learning_rate": 4.946516314863035e-06, "loss": 0.0001, "step": 17652 }, { "epoch": 16.974038461538463, "grad_norm": 0.09626153856515884, "learning_rate": 4.9454414418006e-06, "loss": 0.0005, "step": 17653 }, { "epoch": 16.975, "grad_norm": 0.03197062015533447, "learning_rate": 4.944366647170074e-06, "loss": 0.0003, "step": 17654 }, { "epoch": 16.97596153846154, "grad_norm": 2.5097053050994873, "learning_rate": 4.943291930988141e-06, "loss": 0.0199, "step": 17655 }, { "epoch": 16.976923076923075, "grad_norm": 0.02286417968571186, "learning_rate": 4.942217293271478e-06, "loss": 0.0003, "step": 17656 }, { "epoch": 16.977884615384614, "grad_norm": 0.2010618895292282, "learning_rate": 4.9411427340367555e-06, "loss": 0.0006, "step": 17657 }, { "epoch": 16.978846153846153, "grad_norm": 0.10141824930906296, "learning_rate": 4.94006825330065e-06, "loss": 0.0003, "step": 17658 }, { "epoch": 16.97980769230769, "grad_norm": 0.0061230589635670185, "learning_rate": 4.93899385107984e-06, "loss": 0.0001, "step": 17659 }, { "epoch": 16.98076923076923, "grad_norm": 0.24544084072113037, "learning_rate": 4.937919527390985e-06, "loss": 0.0011, "step": 17660 }, { "epoch": 16.98173076923077, "grad_norm": 0.1911212056875229, "learning_rate": 4.936845282250767e-06, "loss": 0.002, "step": 17661 }, { "epoch": 16.982692307692307, "grad_norm": 0.04759436845779419, "learning_rate": 4.935771115675846e-06, "loss": 0.0002, "step": 17662 }, { "epoch": 16.983653846153846, "grad_norm": 2.206441879272461, "learning_rate": 4.934697027682894e-06, "loss": 0.0649, "step": 17663 }, { "epoch": 16.984615384615385, "grad_norm": 0.04533364251255989, "learning_rate": 4.933623018288582e-06, "loss": 0.0002, "step": 17664 }, { "epoch": 16.985576923076923, "grad_norm": 0.011802847497165203, "learning_rate": 4.932549087509567e-06, "loss": 0.0001, "step": 17665 }, { "epoch": 16.986538461538462, "grad_norm": 0.007642840966582298, "learning_rate": 4.931475235362518e-06, "loss": 0.0001, "step": 17666 }, { "epoch": 16.9875, "grad_norm": 0.005850458983331919, "learning_rate": 4.930401461864099e-06, "loss": 0.0001, "step": 17667 }, { "epoch": 16.98846153846154, "grad_norm": 0.29538723826408386, "learning_rate": 4.929327767030968e-06, "loss": 0.0016, "step": 17668 }, { "epoch": 16.989423076923078, "grad_norm": 0.010904353111982346, "learning_rate": 4.9282541508797875e-06, "loss": 0.0001, "step": 17669 }, { "epoch": 16.990384615384617, "grad_norm": 0.1688668429851532, "learning_rate": 4.927180613427221e-06, "loss": 0.0006, "step": 17670 }, { "epoch": 16.991346153846155, "grad_norm": 0.003669547149911523, "learning_rate": 4.92610715468992e-06, "loss": 0.0, "step": 17671 }, { "epoch": 16.99230769230769, "grad_norm": 0.010721569880843163, "learning_rate": 4.925033774684547e-06, "loss": 0.0001, "step": 17672 }, { "epoch": 16.99326923076923, "grad_norm": 0.006864962633699179, "learning_rate": 4.9239604734277515e-06, "loss": 0.0001, "step": 17673 }, { "epoch": 16.994230769230768, "grad_norm": 0.024299757555127144, "learning_rate": 4.922887250936192e-06, "loss": 0.0001, "step": 17674 }, { "epoch": 16.995192307692307, "grad_norm": 0.047841884195804596, "learning_rate": 4.921814107226523e-06, "loss": 0.0002, "step": 17675 }, { "epoch": 16.996153846153845, "grad_norm": 0.08805713802576065, "learning_rate": 4.9207410423153925e-06, "loss": 0.0003, "step": 17676 }, { "epoch": 16.997115384615384, "grad_norm": 0.04219622164964676, "learning_rate": 4.919668056219453e-06, "loss": 0.0002, "step": 17677 }, { "epoch": 16.998076923076923, "grad_norm": 0.7890726327896118, "learning_rate": 4.9185951489553596e-06, "loss": 0.0017, "step": 17678 }, { "epoch": 16.99903846153846, "grad_norm": 0.3456186056137085, "learning_rate": 4.91752232053975e-06, "loss": 0.0006, "step": 17679 }, { "epoch": 17.0, "grad_norm": 0.019741090014576912, "learning_rate": 4.916449570989279e-06, "loss": 0.0001, "step": 17680 }, { "epoch": 17.00096153846154, "grad_norm": 0.03082846663892269, "learning_rate": 4.915376900320594e-06, "loss": 0.0002, "step": 17681 }, { "epoch": 17.001923076923077, "grad_norm": 0.005575668998062611, "learning_rate": 4.914304308550331e-06, "loss": 0.0001, "step": 17682 }, { "epoch": 17.002884615384616, "grad_norm": 0.026365958154201508, "learning_rate": 4.913231795695145e-06, "loss": 0.0001, "step": 17683 }, { "epoch": 17.003846153846155, "grad_norm": 0.020171595737338066, "learning_rate": 4.912159361771667e-06, "loss": 0.0002, "step": 17684 }, { "epoch": 17.004807692307693, "grad_norm": 0.008569195866584778, "learning_rate": 4.911087006796543e-06, "loss": 0.0001, "step": 17685 }, { "epoch": 17.005769230769232, "grad_norm": 0.011858947575092316, "learning_rate": 4.910014730786416e-06, "loss": 0.0001, "step": 17686 }, { "epoch": 17.00673076923077, "grad_norm": 0.014506293460726738, "learning_rate": 4.90894253375792e-06, "loss": 0.0001, "step": 17687 }, { "epoch": 17.00769230769231, "grad_norm": 0.004736084491014481, "learning_rate": 4.907870415727691e-06, "loss": 0.0, "step": 17688 }, { "epoch": 17.008653846153845, "grad_norm": 0.010152311064302921, "learning_rate": 4.9067983767123736e-06, "loss": 0.0001, "step": 17689 }, { "epoch": 17.009615384615383, "grad_norm": 0.013747277669608593, "learning_rate": 4.905726416728592e-06, "loss": 0.0001, "step": 17690 }, { "epoch": 17.010576923076922, "grad_norm": 0.03493862599134445, "learning_rate": 4.904654535792986e-06, "loss": 0.0002, "step": 17691 }, { "epoch": 17.01153846153846, "grad_norm": 0.03702976182103157, "learning_rate": 4.903582733922191e-06, "loss": 0.0001, "step": 17692 }, { "epoch": 17.0125, "grad_norm": 0.007843063212931156, "learning_rate": 4.902511011132829e-06, "loss": 0.0001, "step": 17693 }, { "epoch": 17.013461538461538, "grad_norm": 0.2278934270143509, "learning_rate": 4.901439367441539e-06, "loss": 0.0011, "step": 17694 }, { "epoch": 17.014423076923077, "grad_norm": 0.00397424167022109, "learning_rate": 4.900367802864944e-06, "loss": 0.0, "step": 17695 }, { "epoch": 17.015384615384615, "grad_norm": 0.007559963967651129, "learning_rate": 4.899296317419672e-06, "loss": 0.0001, "step": 17696 }, { "epoch": 17.016346153846154, "grad_norm": 0.010520211420953274, "learning_rate": 4.898224911122354e-06, "loss": 0.0001, "step": 17697 }, { "epoch": 17.017307692307693, "grad_norm": 0.019352106377482414, "learning_rate": 4.897153583989609e-06, "loss": 0.0001, "step": 17698 }, { "epoch": 17.01826923076923, "grad_norm": 0.0066437008790671825, "learning_rate": 4.896082336038064e-06, "loss": 0.0001, "step": 17699 }, { "epoch": 17.01923076923077, "grad_norm": 0.009160078130662441, "learning_rate": 4.895011167284344e-06, "loss": 0.0001, "step": 17700 }, { "epoch": 17.02019230769231, "grad_norm": 0.008194264955818653, "learning_rate": 4.893940077745065e-06, "loss": 0.0001, "step": 17701 }, { "epoch": 17.021153846153847, "grad_norm": 0.004545915871858597, "learning_rate": 4.8928690674368495e-06, "loss": 0.0, "step": 17702 }, { "epoch": 17.022115384615386, "grad_norm": 0.03247528523206711, "learning_rate": 4.89179813637632e-06, "loss": 0.0002, "step": 17703 }, { "epoch": 17.023076923076925, "grad_norm": 0.0027190293185412884, "learning_rate": 4.890727284580088e-06, "loss": 0.0, "step": 17704 }, { "epoch": 17.02403846153846, "grad_norm": 0.00784675870090723, "learning_rate": 4.889656512064773e-06, "loss": 0.0001, "step": 17705 }, { "epoch": 17.025, "grad_norm": 0.006665087770670652, "learning_rate": 4.888585818846994e-06, "loss": 0.0, "step": 17706 }, { "epoch": 17.025961538461537, "grad_norm": 0.008471352979540825, "learning_rate": 4.887515204943357e-06, "loss": 0.0001, "step": 17707 }, { "epoch": 17.026923076923076, "grad_norm": 0.5060986280441284, "learning_rate": 4.886444670370483e-06, "loss": 0.0015, "step": 17708 }, { "epoch": 17.027884615384615, "grad_norm": 4.422797203063965, "learning_rate": 4.885374215144976e-06, "loss": 0.0514, "step": 17709 }, { "epoch": 17.028846153846153, "grad_norm": 0.027190599590539932, "learning_rate": 4.8843038392834494e-06, "loss": 0.0002, "step": 17710 }, { "epoch": 17.029807692307692, "grad_norm": 0.00634932704269886, "learning_rate": 4.883233542802518e-06, "loss": 0.0001, "step": 17711 }, { "epoch": 17.03076923076923, "grad_norm": 0.21901008486747742, "learning_rate": 4.88216332571878e-06, "loss": 0.0011, "step": 17712 }, { "epoch": 17.03173076923077, "grad_norm": 0.04273313656449318, "learning_rate": 4.881093188048846e-06, "loss": 0.0002, "step": 17713 }, { "epoch": 17.032692307692308, "grad_norm": 0.11859025061130524, "learning_rate": 4.880023129809326e-06, "loss": 0.0004, "step": 17714 }, { "epoch": 17.033653846153847, "grad_norm": 0.005839191842824221, "learning_rate": 4.878953151016816e-06, "loss": 0.0001, "step": 17715 }, { "epoch": 17.034615384615385, "grad_norm": 0.02154226042330265, "learning_rate": 4.8778832516879236e-06, "loss": 0.0002, "step": 17716 }, { "epoch": 17.035576923076924, "grad_norm": 0.017340537160634995, "learning_rate": 4.8768134318392535e-06, "loss": 0.0001, "step": 17717 }, { "epoch": 17.036538461538463, "grad_norm": 0.5478761792182922, "learning_rate": 4.875743691487398e-06, "loss": 0.002, "step": 17718 }, { "epoch": 17.0375, "grad_norm": 0.009504789486527443, "learning_rate": 4.874674030648966e-06, "loss": 0.0001, "step": 17719 }, { "epoch": 17.03846153846154, "grad_norm": 0.01701868698000908, "learning_rate": 4.873604449340545e-06, "loss": 0.0002, "step": 17720 }, { "epoch": 17.039423076923075, "grad_norm": 0.00991426408290863, "learning_rate": 4.872534947578739e-06, "loss": 0.0001, "step": 17721 }, { "epoch": 17.040384615384614, "grad_norm": 0.05255858972668648, "learning_rate": 4.871465525380144e-06, "loss": 0.0003, "step": 17722 }, { "epoch": 17.041346153846153, "grad_norm": 0.005866035353392363, "learning_rate": 4.870396182761349e-06, "loss": 0.0001, "step": 17723 }, { "epoch": 17.04230769230769, "grad_norm": 0.00956154428422451, "learning_rate": 4.869326919738951e-06, "loss": 0.0001, "step": 17724 }, { "epoch": 17.04326923076923, "grad_norm": 0.23488296568393707, "learning_rate": 4.868257736329543e-06, "loss": 0.0008, "step": 17725 }, { "epoch": 17.04423076923077, "grad_norm": 0.014273487962782383, "learning_rate": 4.86718863254971e-06, "loss": 0.0001, "step": 17726 }, { "epoch": 17.045192307692307, "grad_norm": 0.16083888709545135, "learning_rate": 4.866119608416045e-06, "loss": 0.001, "step": 17727 }, { "epoch": 17.046153846153846, "grad_norm": 0.007772553246468306, "learning_rate": 4.8650506639451385e-06, "loss": 0.0, "step": 17728 }, { "epoch": 17.047115384615385, "grad_norm": 0.015041336417198181, "learning_rate": 4.863981799153573e-06, "loss": 0.0001, "step": 17729 }, { "epoch": 17.048076923076923, "grad_norm": 0.03320169076323509, "learning_rate": 4.862913014057938e-06, "loss": 0.0001, "step": 17730 }, { "epoch": 17.049038461538462, "grad_norm": 0.13932235538959503, "learning_rate": 4.861844308674813e-06, "loss": 0.0006, "step": 17731 }, { "epoch": 17.05, "grad_norm": 0.1656569242477417, "learning_rate": 4.8607756830207854e-06, "loss": 0.0006, "step": 17732 }, { "epoch": 17.05096153846154, "grad_norm": 0.2063070684671402, "learning_rate": 4.859707137112437e-06, "loss": 0.0005, "step": 17733 }, { "epoch": 17.051923076923078, "grad_norm": 0.04861525073647499, "learning_rate": 4.858638670966341e-06, "loss": 0.0001, "step": 17734 }, { "epoch": 17.052884615384617, "grad_norm": 0.008786754682660103, "learning_rate": 4.857570284599091e-06, "loss": 0.0001, "step": 17735 }, { "epoch": 17.053846153846155, "grad_norm": 0.008936901576817036, "learning_rate": 4.856501978027253e-06, "loss": 0.0001, "step": 17736 }, { "epoch": 17.05480769230769, "grad_norm": 0.047094136476516724, "learning_rate": 4.855433751267408e-06, "loss": 0.0001, "step": 17737 }, { "epoch": 17.05576923076923, "grad_norm": 0.03869425505399704, "learning_rate": 4.854365604336136e-06, "loss": 0.0001, "step": 17738 }, { "epoch": 17.056730769230768, "grad_norm": 0.009656785055994987, "learning_rate": 4.853297537250005e-06, "loss": 0.0001, "step": 17739 }, { "epoch": 17.057692307692307, "grad_norm": 0.0033198422752320766, "learning_rate": 4.852229550025589e-06, "loss": 0.0, "step": 17740 }, { "epoch": 17.058653846153845, "grad_norm": 0.007424529641866684, "learning_rate": 4.851161642679466e-06, "loss": 0.0001, "step": 17741 }, { "epoch": 17.059615384615384, "grad_norm": 0.023680537939071655, "learning_rate": 4.8500938152282e-06, "loss": 0.0002, "step": 17742 }, { "epoch": 17.060576923076923, "grad_norm": 0.1833416372537613, "learning_rate": 4.849026067688363e-06, "loss": 0.0006, "step": 17743 }, { "epoch": 17.06153846153846, "grad_norm": 0.01397403609007597, "learning_rate": 4.847958400076527e-06, "loss": 0.0001, "step": 17744 }, { "epoch": 17.0625, "grad_norm": 0.006609789561480284, "learning_rate": 4.846890812409251e-06, "loss": 0.0001, "step": 17745 }, { "epoch": 17.06346153846154, "grad_norm": 0.004806789103895426, "learning_rate": 4.84582330470311e-06, "loss": 0.0001, "step": 17746 }, { "epoch": 17.064423076923077, "grad_norm": 0.007187330164015293, "learning_rate": 4.84475587697466e-06, "loss": 0.0001, "step": 17747 }, { "epoch": 17.065384615384616, "grad_norm": 0.03808506578207016, "learning_rate": 4.843688529240468e-06, "loss": 0.0002, "step": 17748 }, { "epoch": 17.066346153846155, "grad_norm": 0.004496417939662933, "learning_rate": 4.8426212615171e-06, "loss": 0.0001, "step": 17749 }, { "epoch": 17.067307692307693, "grad_norm": 0.007491903379559517, "learning_rate": 4.8415540738211105e-06, "loss": 0.0, "step": 17750 }, { "epoch": 17.068269230769232, "grad_norm": 0.012221076525747776, "learning_rate": 4.840486966169061e-06, "loss": 0.0001, "step": 17751 }, { "epoch": 17.06923076923077, "grad_norm": 0.021458875387907028, "learning_rate": 4.8394199385775154e-06, "loss": 0.0002, "step": 17752 }, { "epoch": 17.07019230769231, "grad_norm": 0.04156999662518501, "learning_rate": 4.838352991063022e-06, "loss": 0.0002, "step": 17753 }, { "epoch": 17.071153846153845, "grad_norm": 0.01338291447609663, "learning_rate": 4.837286123642141e-06, "loss": 0.0001, "step": 17754 }, { "epoch": 17.072115384615383, "grad_norm": 0.012855129316449165, "learning_rate": 4.836219336331431e-06, "loss": 0.0001, "step": 17755 }, { "epoch": 17.073076923076922, "grad_norm": 0.016933761537075043, "learning_rate": 4.835152629147437e-06, "loss": 0.0001, "step": 17756 }, { "epoch": 17.07403846153846, "grad_norm": 0.007945718243718147, "learning_rate": 4.834086002106719e-06, "loss": 0.0001, "step": 17757 }, { "epoch": 17.075, "grad_norm": 1.090196967124939, "learning_rate": 4.8330194552258225e-06, "loss": 0.0084, "step": 17758 }, { "epoch": 17.075961538461538, "grad_norm": 0.016251370310783386, "learning_rate": 4.831952988521298e-06, "loss": 0.0001, "step": 17759 }, { "epoch": 17.076923076923077, "grad_norm": 0.3515022397041321, "learning_rate": 4.830886602009699e-06, "loss": 0.0008, "step": 17760 }, { "epoch": 17.077884615384615, "grad_norm": 0.004749419633299112, "learning_rate": 4.829820295707564e-06, "loss": 0.0001, "step": 17761 }, { "epoch": 17.078846153846154, "grad_norm": 0.04799732565879822, "learning_rate": 4.828754069631445e-06, "loss": 0.0002, "step": 17762 }, { "epoch": 17.079807692307693, "grad_norm": 0.005252023693174124, "learning_rate": 4.827687923797889e-06, "loss": 0.0001, "step": 17763 }, { "epoch": 17.08076923076923, "grad_norm": 0.007573366165161133, "learning_rate": 4.826621858223431e-06, "loss": 0.0001, "step": 17764 }, { "epoch": 17.08173076923077, "grad_norm": 0.2550964951515198, "learning_rate": 4.8255558729246195e-06, "loss": 0.0005, "step": 17765 }, { "epoch": 17.08269230769231, "grad_norm": 0.011210096068680286, "learning_rate": 4.824489967917997e-06, "loss": 0.0001, "step": 17766 }, { "epoch": 17.083653846153847, "grad_norm": 0.014175649732351303, "learning_rate": 4.823424143220097e-06, "loss": 0.0001, "step": 17767 }, { "epoch": 17.084615384615386, "grad_norm": 0.014839047566056252, "learning_rate": 4.822358398847466e-06, "loss": 0.0001, "step": 17768 }, { "epoch": 17.085576923076925, "grad_norm": 0.00749996630474925, "learning_rate": 4.82129273481663e-06, "loss": 0.0, "step": 17769 }, { "epoch": 17.08653846153846, "grad_norm": 0.01778406836092472, "learning_rate": 4.820227151144134e-06, "loss": 0.0002, "step": 17770 }, { "epoch": 17.0875, "grad_norm": 0.01511284802109003, "learning_rate": 4.819161647846512e-06, "loss": 0.0001, "step": 17771 }, { "epoch": 17.088461538461537, "grad_norm": 1.6743232011795044, "learning_rate": 4.818096224940293e-06, "loss": 0.0086, "step": 17772 }, { "epoch": 17.089423076923076, "grad_norm": 1.921649694442749, "learning_rate": 4.817030882442011e-06, "loss": 0.006, "step": 17773 }, { "epoch": 17.090384615384615, "grad_norm": 0.011005213484168053, "learning_rate": 4.815965620368202e-06, "loss": 0.0001, "step": 17774 }, { "epoch": 17.091346153846153, "grad_norm": 0.006030161865055561, "learning_rate": 4.8149004387353884e-06, "loss": 0.0001, "step": 17775 }, { "epoch": 17.092307692307692, "grad_norm": 0.004885193891823292, "learning_rate": 4.813835337560102e-06, "loss": 0.0, "step": 17776 }, { "epoch": 17.09326923076923, "grad_norm": 1.637293815612793, "learning_rate": 4.812770316858872e-06, "loss": 0.0025, "step": 17777 }, { "epoch": 17.09423076923077, "grad_norm": 0.00737787364050746, "learning_rate": 4.811705376648222e-06, "loss": 0.0001, "step": 17778 }, { "epoch": 17.095192307692308, "grad_norm": 0.004630297888070345, "learning_rate": 4.810640516944678e-06, "loss": 0.0, "step": 17779 }, { "epoch": 17.096153846153847, "grad_norm": 0.010976647958159447, "learning_rate": 4.809575737764759e-06, "loss": 0.0001, "step": 17780 }, { "epoch": 17.097115384615385, "grad_norm": 0.003931509796530008, "learning_rate": 4.808511039124991e-06, "loss": 0.0, "step": 17781 }, { "epoch": 17.098076923076924, "grad_norm": 0.01630263403058052, "learning_rate": 4.807446421041899e-06, "loss": 0.0002, "step": 17782 }, { "epoch": 17.099038461538463, "grad_norm": 0.013102577067911625, "learning_rate": 4.806381883531995e-06, "loss": 0.0001, "step": 17783 }, { "epoch": 17.1, "grad_norm": 0.0025356553960591555, "learning_rate": 4.805317426611801e-06, "loss": 0.0, "step": 17784 }, { "epoch": 17.10096153846154, "grad_norm": 0.0073504140600562096, "learning_rate": 4.804253050297838e-06, "loss": 0.0001, "step": 17785 }, { "epoch": 17.101923076923075, "grad_norm": 2.2308223247528076, "learning_rate": 4.803188754606615e-06, "loss": 0.0133, "step": 17786 }, { "epoch": 17.102884615384614, "grad_norm": 0.06793929636478424, "learning_rate": 4.802124539554649e-06, "loss": 0.0003, "step": 17787 }, { "epoch": 17.103846153846153, "grad_norm": 0.0015251787845045328, "learning_rate": 4.801060405158459e-06, "loss": 0.0, "step": 17788 }, { "epoch": 17.10480769230769, "grad_norm": 0.008841526694595814, "learning_rate": 4.79999635143455e-06, "loss": 0.0001, "step": 17789 }, { "epoch": 17.10576923076923, "grad_norm": 0.013681650161743164, "learning_rate": 4.798932378399438e-06, "loss": 0.0001, "step": 17790 }, { "epoch": 17.10673076923077, "grad_norm": 0.05692017450928688, "learning_rate": 4.797868486069627e-06, "loss": 0.0004, "step": 17791 }, { "epoch": 17.107692307692307, "grad_norm": 0.03896418958902359, "learning_rate": 4.796804674461629e-06, "loss": 0.0002, "step": 17792 }, { "epoch": 17.108653846153846, "grad_norm": 0.01658407226204872, "learning_rate": 4.795740943591955e-06, "loss": 0.0001, "step": 17793 }, { "epoch": 17.109615384615385, "grad_norm": 0.017231784760951996, "learning_rate": 4.7946772934771046e-06, "loss": 0.0001, "step": 17794 }, { "epoch": 17.110576923076923, "grad_norm": 0.005743945948779583, "learning_rate": 4.793613724133584e-06, "loss": 0.0001, "step": 17795 }, { "epoch": 17.111538461538462, "grad_norm": 0.04179215058684349, "learning_rate": 4.792550235577902e-06, "loss": 0.0003, "step": 17796 }, { "epoch": 17.1125, "grad_norm": 0.016891252249479294, "learning_rate": 4.791486827826554e-06, "loss": 0.0002, "step": 17797 }, { "epoch": 17.11346153846154, "grad_norm": 0.011548035778105259, "learning_rate": 4.790423500896042e-06, "loss": 0.0001, "step": 17798 }, { "epoch": 17.114423076923078, "grad_norm": 0.04337526112794876, "learning_rate": 4.7893602548028715e-06, "loss": 0.0001, "step": 17799 }, { "epoch": 17.115384615384617, "grad_norm": 0.034637726843357086, "learning_rate": 4.788297089563533e-06, "loss": 0.0003, "step": 17800 }, { "epoch": 17.116346153846155, "grad_norm": 0.10892903804779053, "learning_rate": 4.787234005194531e-06, "loss": 0.0007, "step": 17801 }, { "epoch": 17.11730769230769, "grad_norm": 0.0019965176470577717, "learning_rate": 4.786171001712355e-06, "loss": 0.0, "step": 17802 }, { "epoch": 17.11826923076923, "grad_norm": 0.012482616119086742, "learning_rate": 4.785108079133503e-06, "loss": 0.0001, "step": 17803 }, { "epoch": 17.119230769230768, "grad_norm": 0.006695798598229885, "learning_rate": 4.7840452374744705e-06, "loss": 0.0001, "step": 17804 }, { "epoch": 17.120192307692307, "grad_norm": 1.2138326168060303, "learning_rate": 4.7829824767517444e-06, "loss": 0.0036, "step": 17805 }, { "epoch": 17.121153846153845, "grad_norm": 0.003553867805749178, "learning_rate": 4.781919796981818e-06, "loss": 0.0, "step": 17806 }, { "epoch": 17.122115384615384, "grad_norm": 2.268336057662964, "learning_rate": 4.780857198181186e-06, "loss": 0.0561, "step": 17807 }, { "epoch": 17.123076923076923, "grad_norm": 0.012700184248387814, "learning_rate": 4.779794680366328e-06, "loss": 0.0001, "step": 17808 }, { "epoch": 17.12403846153846, "grad_norm": 0.16083286702632904, "learning_rate": 4.778732243553734e-06, "loss": 0.0003, "step": 17809 }, { "epoch": 17.125, "grad_norm": 0.008980821818113327, "learning_rate": 4.777669887759897e-06, "loss": 0.0001, "step": 17810 }, { "epoch": 17.12596153846154, "grad_norm": 0.46953198313713074, "learning_rate": 4.7766076130012914e-06, "loss": 0.0015, "step": 17811 }, { "epoch": 17.126923076923077, "grad_norm": 0.014086696319282055, "learning_rate": 4.775545419294409e-06, "loss": 0.0001, "step": 17812 }, { "epoch": 17.127884615384616, "grad_norm": 0.016973160207271576, "learning_rate": 4.774483306655725e-06, "loss": 0.0001, "step": 17813 }, { "epoch": 17.128846153846155, "grad_norm": 0.04728369414806366, "learning_rate": 4.773421275101724e-06, "loss": 0.0003, "step": 17814 }, { "epoch": 17.129807692307693, "grad_norm": 0.654707133769989, "learning_rate": 4.772359324648888e-06, "loss": 0.0014, "step": 17815 }, { "epoch": 17.130769230769232, "grad_norm": 0.015327240340411663, "learning_rate": 4.771297455313688e-06, "loss": 0.0002, "step": 17816 }, { "epoch": 17.13173076923077, "grad_norm": 0.071592777967453, "learning_rate": 4.770235667112605e-06, "loss": 0.0002, "step": 17817 }, { "epoch": 17.13269230769231, "grad_norm": 0.015693610534071922, "learning_rate": 4.76917396006212e-06, "loss": 0.0001, "step": 17818 }, { "epoch": 17.133653846153845, "grad_norm": 0.012272410094738007, "learning_rate": 4.7681123341787e-06, "loss": 0.0002, "step": 17819 }, { "epoch": 17.134615384615383, "grad_norm": 0.0224534310400486, "learning_rate": 4.767050789478819e-06, "loss": 0.0002, "step": 17820 }, { "epoch": 17.135576923076922, "grad_norm": 0.013817696832120419, "learning_rate": 4.765989325978956e-06, "loss": 0.0001, "step": 17821 }, { "epoch": 17.13653846153846, "grad_norm": 0.05153267830610275, "learning_rate": 4.7649279436955734e-06, "loss": 0.0002, "step": 17822 }, { "epoch": 17.1375, "grad_norm": 0.00555298151448369, "learning_rate": 4.7638666426451445e-06, "loss": 0.0, "step": 17823 }, { "epoch": 17.138461538461538, "grad_norm": 0.01784924603998661, "learning_rate": 4.762805422844141e-06, "loss": 0.0001, "step": 17824 }, { "epoch": 17.139423076923077, "grad_norm": 0.01063491590321064, "learning_rate": 4.761744284309022e-06, "loss": 0.0001, "step": 17825 }, { "epoch": 17.140384615384615, "grad_norm": 0.01155312079936266, "learning_rate": 4.760683227056262e-06, "loss": 0.0001, "step": 17826 }, { "epoch": 17.141346153846154, "grad_norm": 0.015924545004963875, "learning_rate": 4.759622251102317e-06, "loss": 0.0001, "step": 17827 }, { "epoch": 17.142307692307693, "grad_norm": 0.0057242619805037975, "learning_rate": 4.758561356463655e-06, "loss": 0.0001, "step": 17828 }, { "epoch": 17.14326923076923, "grad_norm": 0.01763446070253849, "learning_rate": 4.7575005431567415e-06, "loss": 0.0002, "step": 17829 }, { "epoch": 17.14423076923077, "grad_norm": 2.7229230403900146, "learning_rate": 4.756439811198028e-06, "loss": 0.0069, "step": 17830 }, { "epoch": 17.14519230769231, "grad_norm": 1.6783051490783691, "learning_rate": 4.7553791606039786e-06, "loss": 0.0094, "step": 17831 }, { "epoch": 17.146153846153847, "grad_norm": 0.005250500049442053, "learning_rate": 4.754318591391057e-06, "loss": 0.0001, "step": 17832 }, { "epoch": 17.147115384615386, "grad_norm": 0.06033908575773239, "learning_rate": 4.7532581035757105e-06, "loss": 0.0002, "step": 17833 }, { "epoch": 17.148076923076925, "grad_norm": 0.0029525659047067165, "learning_rate": 4.7521976971744e-06, "loss": 0.0, "step": 17834 }, { "epoch": 17.14903846153846, "grad_norm": 0.00796568300575018, "learning_rate": 4.751137372203582e-06, "loss": 0.0001, "step": 17835 }, { "epoch": 17.15, "grad_norm": 0.003864889731630683, "learning_rate": 4.750077128679703e-06, "loss": 0.0, "step": 17836 }, { "epoch": 17.150961538461537, "grad_norm": 0.002470961306244135, "learning_rate": 4.749016966619223e-06, "loss": 0.0, "step": 17837 }, { "epoch": 17.151923076923076, "grad_norm": 0.003146569477394223, "learning_rate": 4.747956886038584e-06, "loss": 0.0, "step": 17838 }, { "epoch": 17.152884615384615, "grad_norm": 0.006659391336143017, "learning_rate": 4.7468968869542395e-06, "loss": 0.0, "step": 17839 }, { "epoch": 17.153846153846153, "grad_norm": 0.012297851033508778, "learning_rate": 4.745836969382641e-06, "loss": 0.0001, "step": 17840 }, { "epoch": 17.154807692307692, "grad_norm": 0.01290991809219122, "learning_rate": 4.744777133340229e-06, "loss": 0.0001, "step": 17841 }, { "epoch": 17.15576923076923, "grad_norm": 0.03025255911052227, "learning_rate": 4.743717378843452e-06, "loss": 0.0001, "step": 17842 }, { "epoch": 17.15673076923077, "grad_norm": 2.019763469696045, "learning_rate": 4.742657705908758e-06, "loss": 0.0319, "step": 17843 }, { "epoch": 17.157692307692308, "grad_norm": 0.019632931798696518, "learning_rate": 4.7415981145525825e-06, "loss": 0.0002, "step": 17844 }, { "epoch": 17.158653846153847, "grad_norm": 0.012523545883595943, "learning_rate": 4.740538604791371e-06, "loss": 0.0, "step": 17845 }, { "epoch": 17.159615384615385, "grad_norm": 0.015110260806977749, "learning_rate": 4.739479176641567e-06, "loss": 0.0001, "step": 17846 }, { "epoch": 17.160576923076924, "grad_norm": 1.1991280317306519, "learning_rate": 4.738419830119604e-06, "loss": 0.0084, "step": 17847 }, { "epoch": 17.161538461538463, "grad_norm": 0.007867326959967613, "learning_rate": 4.737360565241925e-06, "loss": 0.0001, "step": 17848 }, { "epoch": 17.1625, "grad_norm": 0.006533801555633545, "learning_rate": 4.736301382024963e-06, "loss": 0.0001, "step": 17849 }, { "epoch": 17.16346153846154, "grad_norm": 0.009800208732485771, "learning_rate": 4.7352422804851526e-06, "loss": 0.0001, "step": 17850 }, { "epoch": 17.164423076923075, "grad_norm": 0.025179732590913773, "learning_rate": 4.734183260638935e-06, "loss": 0.0002, "step": 17851 }, { "epoch": 17.165384615384614, "grad_norm": 0.0048680598847568035, "learning_rate": 4.733124322502735e-06, "loss": 0.0, "step": 17852 }, { "epoch": 17.166346153846153, "grad_norm": 0.003285496262833476, "learning_rate": 4.7320654660929854e-06, "loss": 0.0, "step": 17853 }, { "epoch": 17.16730769230769, "grad_norm": 0.029480576515197754, "learning_rate": 4.731006691426124e-06, "loss": 0.0001, "step": 17854 }, { "epoch": 17.16826923076923, "grad_norm": 0.10507030040025711, "learning_rate": 4.729947998518571e-06, "loss": 0.0004, "step": 17855 }, { "epoch": 17.16923076923077, "grad_norm": 0.01141312625259161, "learning_rate": 4.7288893873867555e-06, "loss": 0.0001, "step": 17856 }, { "epoch": 17.170192307692307, "grad_norm": 0.0034989563282579184, "learning_rate": 4.727830858047112e-06, "loss": 0.0, "step": 17857 }, { "epoch": 17.171153846153846, "grad_norm": 0.013141762465238571, "learning_rate": 4.726772410516055e-06, "loss": 0.0001, "step": 17858 }, { "epoch": 17.172115384615385, "grad_norm": 0.00830566044896841, "learning_rate": 4.725714044810018e-06, "loss": 0.0001, "step": 17859 }, { "epoch": 17.173076923076923, "grad_norm": 0.010284804739058018, "learning_rate": 4.7246557609454144e-06, "loss": 0.0001, "step": 17860 }, { "epoch": 17.174038461538462, "grad_norm": 0.05484164506196976, "learning_rate": 4.7235975589386715e-06, "loss": 0.0002, "step": 17861 }, { "epoch": 17.175, "grad_norm": 0.0032618148252367973, "learning_rate": 4.722539438806212e-06, "loss": 0.0, "step": 17862 }, { "epoch": 17.17596153846154, "grad_norm": 0.01979159563779831, "learning_rate": 4.721481400564447e-06, "loss": 0.0002, "step": 17863 }, { "epoch": 17.176923076923078, "grad_norm": 0.0015297180507332087, "learning_rate": 4.720423444229799e-06, "loss": 0.0, "step": 17864 }, { "epoch": 17.177884615384617, "grad_norm": 0.006518831942230463, "learning_rate": 4.719365569818687e-06, "loss": 0.0001, "step": 17865 }, { "epoch": 17.178846153846155, "grad_norm": 0.06084325164556503, "learning_rate": 4.71830777734752e-06, "loss": 0.0002, "step": 17866 }, { "epoch": 17.17980769230769, "grad_norm": 0.00535804545506835, "learning_rate": 4.7172500668327135e-06, "loss": 0.0, "step": 17867 }, { "epoch": 17.18076923076923, "grad_norm": 0.012233900837600231, "learning_rate": 4.716192438290686e-06, "loss": 0.0002, "step": 17868 }, { "epoch": 17.181730769230768, "grad_norm": 0.013937755487859249, "learning_rate": 4.715134891737841e-06, "loss": 0.0001, "step": 17869 }, { "epoch": 17.182692307692307, "grad_norm": 0.9494853615760803, "learning_rate": 4.714077427190593e-06, "loss": 0.0235, "step": 17870 }, { "epoch": 17.183653846153845, "grad_norm": 0.02714342065155506, "learning_rate": 4.713020044665348e-06, "loss": 0.0001, "step": 17871 }, { "epoch": 17.184615384615384, "grad_norm": 0.18963320553302765, "learning_rate": 4.711962744178513e-06, "loss": 0.0004, "step": 17872 }, { "epoch": 17.185576923076923, "grad_norm": 0.009081698022782803, "learning_rate": 4.7109055257465e-06, "loss": 0.0001, "step": 17873 }, { "epoch": 17.18653846153846, "grad_norm": 0.007734983693808317, "learning_rate": 4.709848389385707e-06, "loss": 0.0001, "step": 17874 }, { "epoch": 17.1875, "grad_norm": 0.011249527335166931, "learning_rate": 4.70879133511254e-06, "loss": 0.0001, "step": 17875 }, { "epoch": 17.18846153846154, "grad_norm": 0.14154013991355896, "learning_rate": 4.7077343629434056e-06, "loss": 0.0004, "step": 17876 }, { "epoch": 17.189423076923077, "grad_norm": 0.02196132391691208, "learning_rate": 4.706677472894699e-06, "loss": 0.0001, "step": 17877 }, { "epoch": 17.190384615384616, "grad_norm": 0.08130617439746857, "learning_rate": 4.705620664982821e-06, "loss": 0.0004, "step": 17878 }, { "epoch": 17.191346153846155, "grad_norm": 0.005240296479314566, "learning_rate": 4.704563939224175e-06, "loss": 0.0001, "step": 17879 }, { "epoch": 17.192307692307693, "grad_norm": 0.007623457815498114, "learning_rate": 4.703507295635151e-06, "loss": 0.0001, "step": 17880 }, { "epoch": 17.193269230769232, "grad_norm": 0.049750979989767075, "learning_rate": 4.702450734232154e-06, "loss": 0.0002, "step": 17881 }, { "epoch": 17.19423076923077, "grad_norm": 0.007290957495570183, "learning_rate": 4.701394255031567e-06, "loss": 0.0001, "step": 17882 }, { "epoch": 17.19519230769231, "grad_norm": 0.03497990965843201, "learning_rate": 4.700337858049792e-06, "loss": 0.0001, "step": 17883 }, { "epoch": 17.196153846153845, "grad_norm": 0.004844435956329107, "learning_rate": 4.699281543303222e-06, "loss": 0.0, "step": 17884 }, { "epoch": 17.197115384615383, "grad_norm": 0.0049649374559521675, "learning_rate": 4.698225310808242e-06, "loss": 0.0001, "step": 17885 }, { "epoch": 17.198076923076922, "grad_norm": 0.0011879387311637402, "learning_rate": 4.697169160581244e-06, "loss": 0.0, "step": 17886 }, { "epoch": 17.19903846153846, "grad_norm": 0.9133760333061218, "learning_rate": 4.696113092638621e-06, "loss": 0.0128, "step": 17887 }, { "epoch": 17.2, "grad_norm": 0.008809773251414299, "learning_rate": 4.695057106996753e-06, "loss": 0.0001, "step": 17888 }, { "epoch": 17.200961538461538, "grad_norm": 0.01863475888967514, "learning_rate": 4.694001203672028e-06, "loss": 0.0001, "step": 17889 }, { "epoch": 17.201923076923077, "grad_norm": 0.009642884135246277, "learning_rate": 4.692945382680836e-06, "loss": 0.0001, "step": 17890 }, { "epoch": 17.202884615384615, "grad_norm": 0.45447027683258057, "learning_rate": 4.691889644039551e-06, "loss": 0.0009, "step": 17891 }, { "epoch": 17.203846153846154, "grad_norm": 0.05696214362978935, "learning_rate": 4.690833987764564e-06, "loss": 0.0002, "step": 17892 }, { "epoch": 17.204807692307693, "grad_norm": 0.005724962800741196, "learning_rate": 4.6897784138722465e-06, "loss": 0.0001, "step": 17893 }, { "epoch": 17.20576923076923, "grad_norm": 0.010204282589256763, "learning_rate": 4.688722922378984e-06, "loss": 0.0001, "step": 17894 }, { "epoch": 17.20673076923077, "grad_norm": 0.018364733085036278, "learning_rate": 4.687667513301156e-06, "loss": 0.0001, "step": 17895 }, { "epoch": 17.20769230769231, "grad_norm": 0.009161882102489471, "learning_rate": 4.686612186655134e-06, "loss": 0.0001, "step": 17896 }, { "epoch": 17.208653846153847, "grad_norm": 0.016443584114313126, "learning_rate": 4.685556942457296e-06, "loss": 0.0001, "step": 17897 }, { "epoch": 17.209615384615386, "grad_norm": 0.015727544203400612, "learning_rate": 4.68450178072402e-06, "loss": 0.0002, "step": 17898 }, { "epoch": 17.210576923076925, "grad_norm": 0.016233738511800766, "learning_rate": 4.683446701471671e-06, "loss": 0.0002, "step": 17899 }, { "epoch": 17.21153846153846, "grad_norm": 0.006296033039689064, "learning_rate": 4.682391704716627e-06, "loss": 0.0, "step": 17900 }, { "epoch": 17.2125, "grad_norm": 0.016122572124004364, "learning_rate": 4.681336790475259e-06, "loss": 0.0002, "step": 17901 }, { "epoch": 17.213461538461537, "grad_norm": 0.004985989537090063, "learning_rate": 4.680281958763931e-06, "loss": 0.0001, "step": 17902 }, { "epoch": 17.214423076923076, "grad_norm": 0.015695402398705482, "learning_rate": 4.679227209599017e-06, "loss": 0.0001, "step": 17903 }, { "epoch": 17.215384615384615, "grad_norm": 0.06489034742116928, "learning_rate": 4.678172542996877e-06, "loss": 0.0002, "step": 17904 }, { "epoch": 17.216346153846153, "grad_norm": 0.9332101941108704, "learning_rate": 4.67711795897388e-06, "loss": 0.0062, "step": 17905 }, { "epoch": 17.217307692307692, "grad_norm": 0.026014702394604683, "learning_rate": 4.676063457546394e-06, "loss": 0.0001, "step": 17906 }, { "epoch": 17.21826923076923, "grad_norm": 0.006498751696199179, "learning_rate": 4.675009038730773e-06, "loss": 0.0001, "step": 17907 }, { "epoch": 17.21923076923077, "grad_norm": 0.02081499993801117, "learning_rate": 4.6739547025433825e-06, "loss": 0.0001, "step": 17908 }, { "epoch": 17.220192307692308, "grad_norm": 0.01566452346742153, "learning_rate": 4.672900449000588e-06, "loss": 0.0001, "step": 17909 }, { "epoch": 17.221153846153847, "grad_norm": 0.02749123051762581, "learning_rate": 4.67184627811874e-06, "loss": 0.0001, "step": 17910 }, { "epoch": 17.222115384615385, "grad_norm": 0.004562752787023783, "learning_rate": 4.6707921899142e-06, "loss": 0.0001, "step": 17911 }, { "epoch": 17.223076923076924, "grad_norm": 2.5760607719421387, "learning_rate": 4.669738184403327e-06, "loss": 0.0093, "step": 17912 }, { "epoch": 17.224038461538463, "grad_norm": 2.2515459060668945, "learning_rate": 4.668684261602469e-06, "loss": 0.0098, "step": 17913 }, { "epoch": 17.225, "grad_norm": 0.01997234858572483, "learning_rate": 4.667630421527985e-06, "loss": 0.0002, "step": 17914 }, { "epoch": 17.22596153846154, "grad_norm": 0.030776886269450188, "learning_rate": 4.666576664196231e-06, "loss": 0.0002, "step": 17915 }, { "epoch": 17.226923076923075, "grad_norm": 0.0052758692763745785, "learning_rate": 4.665522989623549e-06, "loss": 0.0001, "step": 17916 }, { "epoch": 17.227884615384614, "grad_norm": 0.002599275903776288, "learning_rate": 4.664469397826297e-06, "loss": 0.0, "step": 17917 }, { "epoch": 17.228846153846153, "grad_norm": 0.006960578262805939, "learning_rate": 4.663415888820818e-06, "loss": 0.0001, "step": 17918 }, { "epoch": 17.22980769230769, "grad_norm": 0.03669502213597298, "learning_rate": 4.662362462623461e-06, "loss": 0.0002, "step": 17919 }, { "epoch": 17.23076923076923, "grad_norm": 0.01897740736603737, "learning_rate": 4.661309119250576e-06, "loss": 0.0001, "step": 17920 }, { "epoch": 17.23173076923077, "grad_norm": 0.009630660526454449, "learning_rate": 4.660255858718502e-06, "loss": 0.0001, "step": 17921 }, { "epoch": 17.232692307692307, "grad_norm": 0.0026222961023449898, "learning_rate": 4.659202681043585e-06, "loss": 0.0, "step": 17922 }, { "epoch": 17.233653846153846, "grad_norm": 0.007212383672595024, "learning_rate": 4.65814958624217e-06, "loss": 0.0001, "step": 17923 }, { "epoch": 17.234615384615385, "grad_norm": 0.007714032661169767, "learning_rate": 4.657096574330593e-06, "loss": 0.0001, "step": 17924 }, { "epoch": 17.235576923076923, "grad_norm": 0.07827863097190857, "learning_rate": 4.656043645325194e-06, "loss": 0.0002, "step": 17925 }, { "epoch": 17.236538461538462, "grad_norm": 0.023843998089432716, "learning_rate": 4.65499079924232e-06, "loss": 0.0002, "step": 17926 }, { "epoch": 17.2375, "grad_norm": 0.021642794832587242, "learning_rate": 4.653938036098295e-06, "loss": 0.0001, "step": 17927 }, { "epoch": 17.23846153846154, "grad_norm": 0.012403003871440887, "learning_rate": 4.6528853559094655e-06, "loss": 0.0001, "step": 17928 }, { "epoch": 17.239423076923078, "grad_norm": 0.013508927077054977, "learning_rate": 4.651832758692158e-06, "loss": 0.0001, "step": 17929 }, { "epoch": 17.240384615384617, "grad_norm": 0.008250223472714424, "learning_rate": 4.650780244462709e-06, "loss": 0.0001, "step": 17930 }, { "epoch": 17.241346153846155, "grad_norm": 0.08138839900493622, "learning_rate": 4.649727813237456e-06, "loss": 0.0004, "step": 17931 }, { "epoch": 17.24230769230769, "grad_norm": 0.08832362294197083, "learning_rate": 4.6486754650327195e-06, "loss": 0.0006, "step": 17932 }, { "epoch": 17.24326923076923, "grad_norm": 0.006965595297515392, "learning_rate": 4.647623199864834e-06, "loss": 0.0001, "step": 17933 }, { "epoch": 17.244230769230768, "grad_norm": 0.0032713874243199825, "learning_rate": 4.6465710177501314e-06, "loss": 0.0, "step": 17934 }, { "epoch": 17.245192307692307, "grad_norm": 0.003902154043316841, "learning_rate": 4.645518918704931e-06, "loss": 0.0001, "step": 17935 }, { "epoch": 17.246153846153845, "grad_norm": 0.020886149257421494, "learning_rate": 4.6444669027455615e-06, "loss": 0.0002, "step": 17936 }, { "epoch": 17.247115384615384, "grad_norm": 0.0077748713083565235, "learning_rate": 4.64341496988835e-06, "loss": 0.0001, "step": 17937 }, { "epoch": 17.248076923076923, "grad_norm": 0.018728984519839287, "learning_rate": 4.642363120149615e-06, "loss": 0.0002, "step": 17938 }, { "epoch": 17.24903846153846, "grad_norm": 0.020897209644317627, "learning_rate": 4.641311353545682e-06, "loss": 0.0002, "step": 17939 }, { "epoch": 17.25, "grad_norm": 0.010266478173434734, "learning_rate": 4.640259670092867e-06, "loss": 0.0002, "step": 17940 }, { "epoch": 17.25096153846154, "grad_norm": 0.00798784289509058, "learning_rate": 4.63920806980749e-06, "loss": 0.0001, "step": 17941 }, { "epoch": 17.251923076923077, "grad_norm": 0.22743305563926697, "learning_rate": 4.638156552705875e-06, "loss": 0.0007, "step": 17942 }, { "epoch": 17.252884615384616, "grad_norm": 0.0065051787532866, "learning_rate": 4.637105118804328e-06, "loss": 0.0001, "step": 17943 }, { "epoch": 17.253846153846155, "grad_norm": 0.006372480653226376, "learning_rate": 4.636053768119171e-06, "loss": 0.0001, "step": 17944 }, { "epoch": 17.254807692307693, "grad_norm": 0.007432890590280294, "learning_rate": 4.63500250066672e-06, "loss": 0.0001, "step": 17945 }, { "epoch": 17.255769230769232, "grad_norm": 0.03131166845560074, "learning_rate": 4.63395131646328e-06, "loss": 0.0003, "step": 17946 }, { "epoch": 17.25673076923077, "grad_norm": 0.0032795702572911978, "learning_rate": 4.632900215525167e-06, "loss": 0.0, "step": 17947 }, { "epoch": 17.25769230769231, "grad_norm": 0.008317750878632069, "learning_rate": 4.631849197868694e-06, "loss": 0.0001, "step": 17948 }, { "epoch": 17.258653846153845, "grad_norm": 0.007659857161343098, "learning_rate": 4.630798263510162e-06, "loss": 0.0001, "step": 17949 }, { "epoch": 17.259615384615383, "grad_norm": 0.005504147615283728, "learning_rate": 4.629747412465886e-06, "loss": 0.0001, "step": 17950 }, { "epoch": 17.260576923076922, "grad_norm": 0.011168242432177067, "learning_rate": 4.628696644752166e-06, "loss": 0.0001, "step": 17951 }, { "epoch": 17.26153846153846, "grad_norm": 0.03150015324354172, "learning_rate": 4.627645960385309e-06, "loss": 0.0002, "step": 17952 }, { "epoch": 17.2625, "grad_norm": 0.008697114884853363, "learning_rate": 4.626595359381623e-06, "loss": 0.0001, "step": 17953 }, { "epoch": 17.263461538461538, "grad_norm": 0.012570805847644806, "learning_rate": 4.625544841757401e-06, "loss": 0.0001, "step": 17954 }, { "epoch": 17.264423076923077, "grad_norm": 0.011269641108810902, "learning_rate": 4.624494407528951e-06, "loss": 0.0001, "step": 17955 }, { "epoch": 17.265384615384615, "grad_norm": 0.008231781423091888, "learning_rate": 4.6234440567125735e-06, "loss": 0.0001, "step": 17956 }, { "epoch": 17.266346153846154, "grad_norm": 0.014114164747297764, "learning_rate": 4.62239378932456e-06, "loss": 0.0002, "step": 17957 }, { "epoch": 17.267307692307693, "grad_norm": 0.01374280359596014, "learning_rate": 4.621343605381215e-06, "loss": 0.0001, "step": 17958 }, { "epoch": 17.26826923076923, "grad_norm": 0.0090884268283844, "learning_rate": 4.620293504898832e-06, "loss": 0.0001, "step": 17959 }, { "epoch": 17.26923076923077, "grad_norm": 1.0204253196716309, "learning_rate": 4.619243487893701e-06, "loss": 0.0033, "step": 17960 }, { "epoch": 17.27019230769231, "grad_norm": 0.01584082655608654, "learning_rate": 4.618193554382123e-06, "loss": 0.0001, "step": 17961 }, { "epoch": 17.271153846153847, "grad_norm": 0.004929093178361654, "learning_rate": 4.617143704380382e-06, "loss": 0.0001, "step": 17962 }, { "epoch": 17.272115384615386, "grad_norm": 0.01073630154132843, "learning_rate": 4.616093937904772e-06, "loss": 0.0001, "step": 17963 }, { "epoch": 17.273076923076925, "grad_norm": 0.007104428019374609, "learning_rate": 4.615044254971585e-06, "loss": 0.0001, "step": 17964 }, { "epoch": 17.27403846153846, "grad_norm": 0.011613664217293262, "learning_rate": 4.613994655597105e-06, "loss": 0.0001, "step": 17965 }, { "epoch": 17.275, "grad_norm": 2.7289607524871826, "learning_rate": 4.612945139797619e-06, "loss": 0.0182, "step": 17966 }, { "epoch": 17.275961538461537, "grad_norm": 0.019721616059541702, "learning_rate": 4.611895707589418e-06, "loss": 0.0002, "step": 17967 }, { "epoch": 17.276923076923076, "grad_norm": 0.015843873843550682, "learning_rate": 4.610846358988778e-06, "loss": 0.0001, "step": 17968 }, { "epoch": 17.277884615384615, "grad_norm": 0.02328641712665558, "learning_rate": 4.609797094011984e-06, "loss": 0.0002, "step": 17969 }, { "epoch": 17.278846153846153, "grad_norm": 0.006775084417313337, "learning_rate": 4.608747912675324e-06, "loss": 0.0001, "step": 17970 }, { "epoch": 17.279807692307692, "grad_norm": 3.5280942916870117, "learning_rate": 4.60769881499507e-06, "loss": 0.0293, "step": 17971 }, { "epoch": 17.28076923076923, "grad_norm": 0.022525599226355553, "learning_rate": 4.606649800987507e-06, "loss": 0.0001, "step": 17972 }, { "epoch": 17.28173076923077, "grad_norm": 1.5822211503982544, "learning_rate": 4.605600870668906e-06, "loss": 0.0131, "step": 17973 }, { "epoch": 17.282692307692308, "grad_norm": 0.025299519300460815, "learning_rate": 4.604552024055547e-06, "loss": 0.0002, "step": 17974 }, { "epoch": 17.283653846153847, "grad_norm": 0.005213301163166761, "learning_rate": 4.60350326116371e-06, "loss": 0.0001, "step": 17975 }, { "epoch": 17.284615384615385, "grad_norm": 0.01708727516233921, "learning_rate": 4.60245458200966e-06, "loss": 0.0001, "step": 17976 }, { "epoch": 17.285576923076924, "grad_norm": 0.01997157745063305, "learning_rate": 4.601405986609673e-06, "loss": 0.0001, "step": 17977 }, { "epoch": 17.286538461538463, "grad_norm": 0.03953486680984497, "learning_rate": 4.600357474980024e-06, "loss": 0.0002, "step": 17978 }, { "epoch": 17.2875, "grad_norm": 0.03739021718502045, "learning_rate": 4.599309047136975e-06, "loss": 0.0002, "step": 17979 }, { "epoch": 17.28846153846154, "grad_norm": 0.01310774963349104, "learning_rate": 4.598260703096799e-06, "loss": 0.0001, "step": 17980 }, { "epoch": 17.289423076923075, "grad_norm": 0.01751311868429184, "learning_rate": 4.597212442875767e-06, "loss": 0.0002, "step": 17981 }, { "epoch": 17.290384615384614, "grad_norm": 0.7945754528045654, "learning_rate": 4.596164266490137e-06, "loss": 0.0025, "step": 17982 }, { "epoch": 17.291346153846153, "grad_norm": 4.1380414962768555, "learning_rate": 4.595116173956181e-06, "loss": 0.0781, "step": 17983 }, { "epoch": 17.29230769230769, "grad_norm": 0.00402545090764761, "learning_rate": 4.594068165290156e-06, "loss": 0.0, "step": 17984 }, { "epoch": 17.29326923076923, "grad_norm": 0.0047834161669015884, "learning_rate": 4.593020240508326e-06, "loss": 0.0, "step": 17985 }, { "epoch": 17.29423076923077, "grad_norm": 0.025127720087766647, "learning_rate": 4.591972399626956e-06, "loss": 0.0002, "step": 17986 }, { "epoch": 17.295192307692307, "grad_norm": 0.010730394162237644, "learning_rate": 4.590924642662299e-06, "loss": 0.0001, "step": 17987 }, { "epoch": 17.296153846153846, "grad_norm": 0.0977415144443512, "learning_rate": 4.589876969630616e-06, "loss": 0.0004, "step": 17988 }, { "epoch": 17.297115384615385, "grad_norm": 0.017458291724324226, "learning_rate": 4.588829380548168e-06, "loss": 0.0002, "step": 17989 }, { "epoch": 17.298076923076923, "grad_norm": 0.05906638875603676, "learning_rate": 4.587781875431202e-06, "loss": 0.0003, "step": 17990 }, { "epoch": 17.299038461538462, "grad_norm": 0.028058430179953575, "learning_rate": 4.5867344542959765e-06, "loss": 0.0003, "step": 17991 }, { "epoch": 17.3, "grad_norm": 0.07460809499025345, "learning_rate": 4.585687117158748e-06, "loss": 0.0004, "step": 17992 }, { "epoch": 17.30096153846154, "grad_norm": 0.03275308385491371, "learning_rate": 4.5846398640357625e-06, "loss": 0.0001, "step": 17993 }, { "epoch": 17.301923076923078, "grad_norm": 0.008261818438768387, "learning_rate": 4.583592694943275e-06, "loss": 0.0001, "step": 17994 }, { "epoch": 17.302884615384617, "grad_norm": 0.6317092776298523, "learning_rate": 4.582545609897528e-06, "loss": 0.0023, "step": 17995 }, { "epoch": 17.303846153846155, "grad_norm": 0.010249985381960869, "learning_rate": 4.581498608914774e-06, "loss": 0.0001, "step": 17996 }, { "epoch": 17.30480769230769, "grad_norm": 0.44368332624435425, "learning_rate": 4.5804516920112606e-06, "loss": 0.0034, "step": 17997 }, { "epoch": 17.30576923076923, "grad_norm": 0.007248151581734419, "learning_rate": 4.579404859203228e-06, "loss": 0.0001, "step": 17998 }, { "epoch": 17.306730769230768, "grad_norm": 0.017516477033495903, "learning_rate": 4.578358110506923e-06, "loss": 0.0002, "step": 17999 }, { "epoch": 17.307692307692307, "grad_norm": 0.026308104395866394, "learning_rate": 4.577311445938591e-06, "loss": 0.0002, "step": 18000 }, { "epoch": 17.308653846153845, "grad_norm": 0.017916448414325714, "learning_rate": 4.576264865514467e-06, "loss": 0.0002, "step": 18001 }, { "epoch": 17.309615384615384, "grad_norm": 0.01652303710579872, "learning_rate": 4.575218369250794e-06, "loss": 0.0001, "step": 18002 }, { "epoch": 17.310576923076923, "grad_norm": 0.009080617688596249, "learning_rate": 4.574171957163814e-06, "loss": 0.0001, "step": 18003 }, { "epoch": 17.31153846153846, "grad_norm": 0.0034498076420277357, "learning_rate": 4.5731256292697554e-06, "loss": 0.0, "step": 18004 }, { "epoch": 17.3125, "grad_norm": 0.0077643138356506824, "learning_rate": 4.57207938558486e-06, "loss": 0.0001, "step": 18005 }, { "epoch": 17.31346153846154, "grad_norm": 0.04117567092180252, "learning_rate": 4.571033226125365e-06, "loss": 0.0002, "step": 18006 }, { "epoch": 17.314423076923077, "grad_norm": 0.5613024234771729, "learning_rate": 4.569987150907499e-06, "loss": 0.0015, "step": 18007 }, { "epoch": 17.315384615384616, "grad_norm": 0.012318874709308147, "learning_rate": 4.568941159947497e-06, "loss": 0.0001, "step": 18008 }, { "epoch": 17.316346153846155, "grad_norm": 0.02005751058459282, "learning_rate": 4.567895253261585e-06, "loss": 0.0001, "step": 18009 }, { "epoch": 17.317307692307693, "grad_norm": 1.6166913509368896, "learning_rate": 4.566849430865996e-06, "loss": 0.0038, "step": 18010 }, { "epoch": 17.318269230769232, "grad_norm": 0.010416863486170769, "learning_rate": 4.565803692776962e-06, "loss": 0.0001, "step": 18011 }, { "epoch": 17.31923076923077, "grad_norm": 0.022694947198033333, "learning_rate": 4.564758039010701e-06, "loss": 0.0002, "step": 18012 }, { "epoch": 17.32019230769231, "grad_norm": 0.005143668968230486, "learning_rate": 4.563712469583443e-06, "loss": 0.0, "step": 18013 }, { "epoch": 17.321153846153845, "grad_norm": 0.03636513277888298, "learning_rate": 4.562666984511416e-06, "loss": 0.0003, "step": 18014 }, { "epoch": 17.322115384615383, "grad_norm": 0.009394745342433453, "learning_rate": 4.5616215838108355e-06, "loss": 0.0001, "step": 18015 }, { "epoch": 17.323076923076922, "grad_norm": 0.0064045460894703865, "learning_rate": 4.560576267497927e-06, "loss": 0.0001, "step": 18016 }, { "epoch": 17.32403846153846, "grad_norm": 0.013664108701050282, "learning_rate": 4.559531035588913e-06, "loss": 0.0001, "step": 18017 }, { "epoch": 17.325, "grad_norm": 0.022413093596696854, "learning_rate": 4.558485888100006e-06, "loss": 0.0002, "step": 18018 }, { "epoch": 17.325961538461538, "grad_norm": 0.07170573621988297, "learning_rate": 4.557440825047432e-06, "loss": 0.0003, "step": 18019 }, { "epoch": 17.326923076923077, "grad_norm": 0.024122394621372223, "learning_rate": 4.556395846447399e-06, "loss": 0.0002, "step": 18020 }, { "epoch": 17.327884615384615, "grad_norm": 0.01153050921857357, "learning_rate": 4.555350952316124e-06, "loss": 0.0001, "step": 18021 }, { "epoch": 17.328846153846154, "grad_norm": 0.005497848615050316, "learning_rate": 4.554306142669828e-06, "loss": 0.0001, "step": 18022 }, { "epoch": 17.329807692307693, "grad_norm": 0.00811450183391571, "learning_rate": 4.553261417524713e-06, "loss": 0.0001, "step": 18023 }, { "epoch": 17.33076923076923, "grad_norm": 0.014542373828589916, "learning_rate": 4.552216776896996e-06, "loss": 0.0001, "step": 18024 }, { "epoch": 17.33173076923077, "grad_norm": 0.022413870319724083, "learning_rate": 4.5511722208028875e-06, "loss": 0.0001, "step": 18025 }, { "epoch": 17.33269230769231, "grad_norm": 0.08955993503332138, "learning_rate": 4.550127749258592e-06, "loss": 0.0006, "step": 18026 }, { "epoch": 17.333653846153847, "grad_norm": 0.40136611461639404, "learning_rate": 4.549083362280318e-06, "loss": 0.0008, "step": 18027 }, { "epoch": 17.334615384615386, "grad_norm": 0.006864347495138645, "learning_rate": 4.548039059884275e-06, "loss": 0.0001, "step": 18028 }, { "epoch": 17.335576923076925, "grad_norm": 0.1318853199481964, "learning_rate": 4.546994842086662e-06, "loss": 0.0008, "step": 18029 }, { "epoch": 17.33653846153846, "grad_norm": 0.025885025039315224, "learning_rate": 4.545950708903687e-06, "loss": 0.0002, "step": 18030 }, { "epoch": 17.3375, "grad_norm": 0.15662555396556854, "learning_rate": 4.544906660351547e-06, "loss": 0.0007, "step": 18031 }, { "epoch": 17.338461538461537, "grad_norm": 1.7176789045333862, "learning_rate": 4.543862696446444e-06, "loss": 0.0094, "step": 18032 }, { "epoch": 17.339423076923076, "grad_norm": 0.2832756042480469, "learning_rate": 4.542818817204583e-06, "loss": 0.0007, "step": 18033 }, { "epoch": 17.340384615384615, "grad_norm": 0.009828454814851284, "learning_rate": 4.541775022642153e-06, "loss": 0.0001, "step": 18034 }, { "epoch": 17.341346153846153, "grad_norm": 0.01572071947157383, "learning_rate": 4.540731312775356e-06, "loss": 0.0001, "step": 18035 }, { "epoch": 17.342307692307692, "grad_norm": 0.004387525375932455, "learning_rate": 4.539687687620389e-06, "loss": 0.0, "step": 18036 }, { "epoch": 17.34326923076923, "grad_norm": 0.28421351313591003, "learning_rate": 4.53864414719344e-06, "loss": 0.0008, "step": 18037 }, { "epoch": 17.34423076923077, "grad_norm": 0.005071557592600584, "learning_rate": 4.537600691510704e-06, "loss": 0.0, "step": 18038 }, { "epoch": 17.345192307692308, "grad_norm": 0.022998539730906487, "learning_rate": 4.536557320588378e-06, "loss": 0.0001, "step": 18039 }, { "epoch": 17.346153846153847, "grad_norm": 2.256655693054199, "learning_rate": 4.535514034442644e-06, "loss": 0.0134, "step": 18040 }, { "epoch": 17.347115384615385, "grad_norm": 0.010632894933223724, "learning_rate": 4.534470833089697e-06, "loss": 0.0001, "step": 18041 }, { "epoch": 17.348076923076924, "grad_norm": 0.024159228429198265, "learning_rate": 4.533427716545719e-06, "loss": 0.0002, "step": 18042 }, { "epoch": 17.349038461538463, "grad_norm": 0.0630040094256401, "learning_rate": 4.532384684826897e-06, "loss": 0.0003, "step": 18043 }, { "epoch": 17.35, "grad_norm": 0.00877399742603302, "learning_rate": 4.531341737949422e-06, "loss": 0.0001, "step": 18044 }, { "epoch": 17.35096153846154, "grad_norm": 0.11342225968837738, "learning_rate": 4.530298875929469e-06, "loss": 0.0005, "step": 18045 }, { "epoch": 17.351923076923075, "grad_norm": 0.05144137516617775, "learning_rate": 4.529256098783225e-06, "loss": 0.0003, "step": 18046 }, { "epoch": 17.352884615384614, "grad_norm": 0.021891050040721893, "learning_rate": 4.528213406526872e-06, "loss": 0.0002, "step": 18047 }, { "epoch": 17.353846153846153, "grad_norm": 0.016049852594733238, "learning_rate": 4.5271707991765844e-06, "loss": 0.0001, "step": 18048 }, { "epoch": 17.35480769230769, "grad_norm": 0.011520983651280403, "learning_rate": 4.526128276748544e-06, "loss": 0.0001, "step": 18049 }, { "epoch": 17.35576923076923, "grad_norm": 0.011618970893323421, "learning_rate": 4.5250858392589304e-06, "loss": 0.0001, "step": 18050 }, { "epoch": 17.35673076923077, "grad_norm": 0.0071635800413787365, "learning_rate": 4.524043486723911e-06, "loss": 0.0, "step": 18051 }, { "epoch": 17.357692307692307, "grad_norm": 0.010963519103825092, "learning_rate": 4.523001219159671e-06, "loss": 0.0001, "step": 18052 }, { "epoch": 17.358653846153846, "grad_norm": 0.007565651088953018, "learning_rate": 4.521959036582372e-06, "loss": 0.0001, "step": 18053 }, { "epoch": 17.359615384615385, "grad_norm": 0.07607382535934448, "learning_rate": 4.520916939008191e-06, "loss": 0.0004, "step": 18054 }, { "epoch": 17.360576923076923, "grad_norm": 0.16503410041332245, "learning_rate": 4.519874926453303e-06, "loss": 0.0008, "step": 18055 }, { "epoch": 17.361538461538462, "grad_norm": 0.006398135796189308, "learning_rate": 4.518832998933868e-06, "loss": 0.0001, "step": 18056 }, { "epoch": 17.3625, "grad_norm": 0.004991207271814346, "learning_rate": 4.517791156466057e-06, "loss": 0.0, "step": 18057 }, { "epoch": 17.36346153846154, "grad_norm": 0.03531956672668457, "learning_rate": 4.51674939906604e-06, "loss": 0.0002, "step": 18058 }, { "epoch": 17.364423076923078, "grad_norm": 0.03320483863353729, "learning_rate": 4.5157077267499775e-06, "loss": 0.0002, "step": 18059 }, { "epoch": 17.365384615384617, "grad_norm": 0.495637446641922, "learning_rate": 4.514666139534034e-06, "loss": 0.0014, "step": 18060 }, { "epoch": 17.366346153846155, "grad_norm": 0.003327338257804513, "learning_rate": 4.513624637434377e-06, "loss": 0.0, "step": 18061 }, { "epoch": 17.36730769230769, "grad_norm": 0.017476415261626244, "learning_rate": 4.512583220467159e-06, "loss": 0.0001, "step": 18062 }, { "epoch": 17.36826923076923, "grad_norm": 0.016124529764056206, "learning_rate": 4.511541888648549e-06, "loss": 0.0001, "step": 18063 }, { "epoch": 17.369230769230768, "grad_norm": 0.009044723585247993, "learning_rate": 4.510500641994695e-06, "loss": 0.0001, "step": 18064 }, { "epoch": 17.370192307692307, "grad_norm": 0.01855248212814331, "learning_rate": 4.509459480521761e-06, "loss": 0.0002, "step": 18065 }, { "epoch": 17.371153846153845, "grad_norm": 0.07096855342388153, "learning_rate": 4.508418404245903e-06, "loss": 0.0004, "step": 18066 }, { "epoch": 17.372115384615384, "grad_norm": 0.013030575588345528, "learning_rate": 4.507377413183271e-06, "loss": 0.0001, "step": 18067 }, { "epoch": 17.373076923076923, "grad_norm": 0.021172339096665382, "learning_rate": 4.506336507350022e-06, "loss": 0.0001, "step": 18068 }, { "epoch": 17.37403846153846, "grad_norm": 2.0254967212677, "learning_rate": 4.50529568676231e-06, "loss": 0.0286, "step": 18069 }, { "epoch": 17.375, "grad_norm": 0.010327314026653767, "learning_rate": 4.504254951436278e-06, "loss": 0.0001, "step": 18070 }, { "epoch": 17.37596153846154, "grad_norm": 0.020897481590509415, "learning_rate": 4.503214301388079e-06, "loss": 0.0002, "step": 18071 }, { "epoch": 17.376923076923077, "grad_norm": 0.026110390201210976, "learning_rate": 4.5021737366338655e-06, "loss": 0.0001, "step": 18072 }, { "epoch": 17.377884615384616, "grad_norm": 0.02166847325861454, "learning_rate": 4.501133257189776e-06, "loss": 0.0001, "step": 18073 }, { "epoch": 17.378846153846155, "grad_norm": 0.013250201009213924, "learning_rate": 4.5000928630719635e-06, "loss": 0.0001, "step": 18074 }, { "epoch": 17.379807692307693, "grad_norm": 0.010106055065989494, "learning_rate": 4.4990525542965635e-06, "loss": 0.0, "step": 18075 }, { "epoch": 17.380769230769232, "grad_norm": 0.00305618392303586, "learning_rate": 4.498012330879722e-06, "loss": 0.0, "step": 18076 }, { "epoch": 17.38173076923077, "grad_norm": 0.006488729268312454, "learning_rate": 4.496972192837585e-06, "loss": 0.0001, "step": 18077 }, { "epoch": 17.38269230769231, "grad_norm": 0.016531765460968018, "learning_rate": 4.495932140186285e-06, "loss": 0.0001, "step": 18078 }, { "epoch": 17.383653846153845, "grad_norm": 0.0018788151210173965, "learning_rate": 4.494892172941965e-06, "loss": 0.0, "step": 18079 }, { "epoch": 17.384615384615383, "grad_norm": 0.012296621687710285, "learning_rate": 4.493852291120764e-06, "loss": 0.0001, "step": 18080 }, { "epoch": 17.385576923076922, "grad_norm": 0.0036525086034089327, "learning_rate": 4.492812494738811e-06, "loss": 0.0, "step": 18081 }, { "epoch": 17.38653846153846, "grad_norm": 0.1546028107404709, "learning_rate": 4.491772783812245e-06, "loss": 0.0007, "step": 18082 }, { "epoch": 17.3875, "grad_norm": 0.03592726215720177, "learning_rate": 4.4907331583572025e-06, "loss": 0.0002, "step": 18083 }, { "epoch": 17.388461538461538, "grad_norm": 0.0077117132022976875, "learning_rate": 4.489693618389808e-06, "loss": 0.0001, "step": 18084 }, { "epoch": 17.389423076923077, "grad_norm": 0.005674021318554878, "learning_rate": 4.488654163926196e-06, "loss": 0.0, "step": 18085 }, { "epoch": 17.390384615384615, "grad_norm": 1.2183456420898438, "learning_rate": 4.4876147949825e-06, "loss": 0.005, "step": 18086 }, { "epoch": 17.391346153846154, "grad_norm": 0.014545897021889687, "learning_rate": 4.48657551157484e-06, "loss": 0.0001, "step": 18087 }, { "epoch": 17.392307692307693, "grad_norm": 0.0026229338254779577, "learning_rate": 4.48553631371935e-06, "loss": 0.0, "step": 18088 }, { "epoch": 17.39326923076923, "grad_norm": 2.480712652206421, "learning_rate": 4.484497201432147e-06, "loss": 0.02, "step": 18089 }, { "epoch": 17.39423076923077, "grad_norm": 0.0017254818230867386, "learning_rate": 4.48345817472936e-06, "loss": 0.0, "step": 18090 }, { "epoch": 17.39519230769231, "grad_norm": 0.03737936541438103, "learning_rate": 4.482419233627116e-06, "loss": 0.0002, "step": 18091 }, { "epoch": 17.396153846153847, "grad_norm": 0.015428909100592136, "learning_rate": 4.481380378141528e-06, "loss": 0.0001, "step": 18092 }, { "epoch": 17.397115384615386, "grad_norm": 0.010505925863981247, "learning_rate": 4.480341608288718e-06, "loss": 0.0001, "step": 18093 }, { "epoch": 17.398076923076925, "grad_norm": 0.025965776294469833, "learning_rate": 4.4793029240848105e-06, "loss": 0.0002, "step": 18094 }, { "epoch": 17.39903846153846, "grad_norm": 0.04470835253596306, "learning_rate": 4.478264325545915e-06, "loss": 0.0002, "step": 18095 }, { "epoch": 17.4, "grad_norm": 0.011923156678676605, "learning_rate": 4.477225812688152e-06, "loss": 0.0001, "step": 18096 }, { "epoch": 17.400961538461537, "grad_norm": 0.01834150403738022, "learning_rate": 4.476187385527639e-06, "loss": 0.0002, "step": 18097 }, { "epoch": 17.401923076923076, "grad_norm": 0.045491304248571396, "learning_rate": 4.47514904408048e-06, "loss": 0.0002, "step": 18098 }, { "epoch": 17.402884615384615, "grad_norm": 0.011495772749185562, "learning_rate": 4.474110788362798e-06, "loss": 0.0001, "step": 18099 }, { "epoch": 17.403846153846153, "grad_norm": 0.012857683002948761, "learning_rate": 4.473072618390694e-06, "loss": 0.0, "step": 18100 }, { "epoch": 17.404807692307692, "grad_norm": 0.3035043478012085, "learning_rate": 4.472034534180281e-06, "loss": 0.0009, "step": 18101 }, { "epoch": 17.40576923076923, "grad_norm": 0.014746044762432575, "learning_rate": 4.470996535747672e-06, "loss": 0.0001, "step": 18102 }, { "epoch": 17.40673076923077, "grad_norm": 0.00664172787219286, "learning_rate": 4.469958623108966e-06, "loss": 0.0, "step": 18103 }, { "epoch": 17.407692307692308, "grad_norm": 0.023472849279642105, "learning_rate": 4.468920796280271e-06, "loss": 0.0001, "step": 18104 }, { "epoch": 17.408653846153847, "grad_norm": 0.024750152602791786, "learning_rate": 4.467883055277696e-06, "loss": 0.0002, "step": 18105 }, { "epoch": 17.409615384615385, "grad_norm": 2.3213438987731934, "learning_rate": 4.466845400117334e-06, "loss": 0.0236, "step": 18106 }, { "epoch": 17.410576923076924, "grad_norm": 0.007408039644360542, "learning_rate": 4.465807830815293e-06, "loss": 0.0001, "step": 18107 }, { "epoch": 17.411538461538463, "grad_norm": 0.012407653033733368, "learning_rate": 4.464770347387676e-06, "loss": 0.0001, "step": 18108 }, { "epoch": 17.4125, "grad_norm": 0.30146509408950806, "learning_rate": 4.463732949850574e-06, "loss": 0.0007, "step": 18109 }, { "epoch": 17.41346153846154, "grad_norm": 0.00560302659869194, "learning_rate": 4.462695638220091e-06, "loss": 0.0, "step": 18110 }, { "epoch": 17.414423076923075, "grad_norm": 0.037756770849227905, "learning_rate": 4.4616584125123155e-06, "loss": 0.0001, "step": 18111 }, { "epoch": 17.415384615384614, "grad_norm": 0.6704052686691284, "learning_rate": 4.460621272743347e-06, "loss": 0.0013, "step": 18112 }, { "epoch": 17.416346153846153, "grad_norm": 0.0030450227204710245, "learning_rate": 4.459584218929283e-06, "loss": 0.0, "step": 18113 }, { "epoch": 17.41730769230769, "grad_norm": 0.0325496606528759, "learning_rate": 4.458547251086206e-06, "loss": 0.0002, "step": 18114 }, { "epoch": 17.41826923076923, "grad_norm": 0.0051955049857497215, "learning_rate": 4.4575103692302125e-06, "loss": 0.0001, "step": 18115 }, { "epoch": 17.41923076923077, "grad_norm": 0.006066983565688133, "learning_rate": 4.456473573377395e-06, "loss": 0.0001, "step": 18116 }, { "epoch": 17.420192307692307, "grad_norm": 0.014007629826664925, "learning_rate": 4.455436863543834e-06, "loss": 0.0002, "step": 18117 }, { "epoch": 17.421153846153846, "grad_norm": 0.2783694863319397, "learning_rate": 4.454400239745619e-06, "loss": 0.0007, "step": 18118 }, { "epoch": 17.422115384615385, "grad_norm": 0.01793671026825905, "learning_rate": 4.45336370199884e-06, "loss": 0.0001, "step": 18119 }, { "epoch": 17.423076923076923, "grad_norm": 0.09528950601816177, "learning_rate": 4.452327250319574e-06, "loss": 0.0004, "step": 18120 }, { "epoch": 17.424038461538462, "grad_norm": 0.03753720596432686, "learning_rate": 4.45129088472391e-06, "loss": 0.0002, "step": 18121 }, { "epoch": 17.425, "grad_norm": 2.858311414718628, "learning_rate": 4.450254605227923e-06, "loss": 0.0155, "step": 18122 }, { "epoch": 17.42596153846154, "grad_norm": 0.10056295990943909, "learning_rate": 4.449218411847696e-06, "loss": 0.0005, "step": 18123 }, { "epoch": 17.426923076923078, "grad_norm": 0.07230237871408463, "learning_rate": 4.448182304599311e-06, "loss": 0.0003, "step": 18124 }, { "epoch": 17.427884615384617, "grad_norm": 0.004112855065613985, "learning_rate": 4.4471462834988385e-06, "loss": 0.0, "step": 18125 }, { "epoch": 17.428846153846155, "grad_norm": 0.009305250830948353, "learning_rate": 4.446110348562359e-06, "loss": 0.0001, "step": 18126 }, { "epoch": 17.42980769230769, "grad_norm": 0.027544507756829262, "learning_rate": 4.44507449980595e-06, "loss": 0.0002, "step": 18127 }, { "epoch": 17.43076923076923, "grad_norm": 0.010012729093432426, "learning_rate": 4.444038737245677e-06, "loss": 0.0001, "step": 18128 }, { "epoch": 17.431730769230768, "grad_norm": 0.012312134727835655, "learning_rate": 4.443003060897617e-06, "loss": 0.0001, "step": 18129 }, { "epoch": 17.432692307692307, "grad_norm": 0.06858751177787781, "learning_rate": 4.441967470777844e-06, "loss": 0.0002, "step": 18130 }, { "epoch": 17.433653846153845, "grad_norm": 0.10126986354589462, "learning_rate": 4.440931966902419e-06, "loss": 0.0003, "step": 18131 }, { "epoch": 17.434615384615384, "grad_norm": 0.011320328339934349, "learning_rate": 4.439896549287418e-06, "loss": 0.0001, "step": 18132 }, { "epoch": 17.435576923076923, "grad_norm": 0.034274738281965256, "learning_rate": 4.4388612179489e-06, "loss": 0.0001, "step": 18133 }, { "epoch": 17.43653846153846, "grad_norm": 0.01154549140483141, "learning_rate": 4.437825972902934e-06, "loss": 0.0001, "step": 18134 }, { "epoch": 17.4375, "grad_norm": 0.011061274446547031, "learning_rate": 4.436790814165587e-06, "loss": 0.0001, "step": 18135 }, { "epoch": 17.43846153846154, "grad_norm": 0.012449994683265686, "learning_rate": 4.435755741752916e-06, "loss": 0.0001, "step": 18136 }, { "epoch": 17.439423076923077, "grad_norm": 0.02177945151925087, "learning_rate": 4.434720755680986e-06, "loss": 0.0001, "step": 18137 }, { "epoch": 17.440384615384616, "grad_norm": 2.1273353099823, "learning_rate": 4.433685855965858e-06, "loss": 0.0051, "step": 18138 }, { "epoch": 17.441346153846155, "grad_norm": 1.1236083507537842, "learning_rate": 4.432651042623586e-06, "loss": 0.0051, "step": 18139 }, { "epoch": 17.442307692307693, "grad_norm": 0.012298566289246082, "learning_rate": 4.4316163156702284e-06, "loss": 0.0001, "step": 18140 }, { "epoch": 17.443269230769232, "grad_norm": 0.04376688972115517, "learning_rate": 4.4305816751218475e-06, "loss": 0.0002, "step": 18141 }, { "epoch": 17.44423076923077, "grad_norm": 0.00581352086737752, "learning_rate": 4.429547120994488e-06, "loss": 0.0, "step": 18142 }, { "epoch": 17.44519230769231, "grad_norm": 0.030158743262290955, "learning_rate": 4.428512653304214e-06, "loss": 0.0001, "step": 18143 }, { "epoch": 17.446153846153845, "grad_norm": 0.016383634880185127, "learning_rate": 4.427478272067066e-06, "loss": 0.0001, "step": 18144 }, { "epoch": 17.447115384615383, "grad_norm": 0.12445656955242157, "learning_rate": 4.426443977299101e-06, "loss": 0.0005, "step": 18145 }, { "epoch": 17.448076923076922, "grad_norm": 0.007114517502486706, "learning_rate": 4.425409769016371e-06, "loss": 0.0001, "step": 18146 }, { "epoch": 17.44903846153846, "grad_norm": 0.012490899302065372, "learning_rate": 4.424375647234915e-06, "loss": 0.0001, "step": 18147 }, { "epoch": 17.45, "grad_norm": 0.003955160267651081, "learning_rate": 4.423341611970786e-06, "loss": 0.0, "step": 18148 }, { "epoch": 17.450961538461538, "grad_norm": 0.01542029157280922, "learning_rate": 4.422307663240032e-06, "loss": 0.0001, "step": 18149 }, { "epoch": 17.451923076923077, "grad_norm": 0.007672539446502924, "learning_rate": 4.421273801058689e-06, "loss": 0.0001, "step": 18150 }, { "epoch": 17.452884615384615, "grad_norm": 0.014693168923258781, "learning_rate": 4.420240025442802e-06, "loss": 0.0001, "step": 18151 }, { "epoch": 17.453846153846154, "grad_norm": 0.008695723488926888, "learning_rate": 4.419206336408418e-06, "loss": 0.0001, "step": 18152 }, { "epoch": 17.454807692307693, "grad_norm": 0.0015028567286208272, "learning_rate": 4.4181727339715675e-06, "loss": 0.0, "step": 18153 }, { "epoch": 17.45576923076923, "grad_norm": 0.009644134901463985, "learning_rate": 4.417139218148299e-06, "loss": 0.0001, "step": 18154 }, { "epoch": 17.45673076923077, "grad_norm": 0.011121578514575958, "learning_rate": 4.4161057889546385e-06, "loss": 0.0001, "step": 18155 }, { "epoch": 17.45769230769231, "grad_norm": 0.00577358016744256, "learning_rate": 4.41507244640663e-06, "loss": 0.0, "step": 18156 }, { "epoch": 17.458653846153847, "grad_norm": 0.08524592220783234, "learning_rate": 4.414039190520308e-06, "loss": 0.0003, "step": 18157 }, { "epoch": 17.459615384615386, "grad_norm": 0.0905742421746254, "learning_rate": 4.4130060213117e-06, "loss": 0.0003, "step": 18158 }, { "epoch": 17.460576923076925, "grad_norm": 0.023318110033869743, "learning_rate": 4.4119729387968405e-06, "loss": 0.0001, "step": 18159 }, { "epoch": 17.46153846153846, "grad_norm": 0.020173702389001846, "learning_rate": 4.4109399429917644e-06, "loss": 0.0001, "step": 18160 }, { "epoch": 17.4625, "grad_norm": 0.008244998753070831, "learning_rate": 4.409907033912492e-06, "loss": 0.0001, "step": 18161 }, { "epoch": 17.463461538461537, "grad_norm": 0.006447482388466597, "learning_rate": 4.408874211575056e-06, "loss": 0.0, "step": 18162 }, { "epoch": 17.464423076923076, "grad_norm": 0.008473577909171581, "learning_rate": 4.407841475995487e-06, "loss": 0.0001, "step": 18163 }, { "epoch": 17.465384615384615, "grad_norm": 0.0032422710210084915, "learning_rate": 4.406808827189801e-06, "loss": 0.0, "step": 18164 }, { "epoch": 17.466346153846153, "grad_norm": 0.013438443653285503, "learning_rate": 4.405776265174031e-06, "loss": 0.0001, "step": 18165 }, { "epoch": 17.467307692307692, "grad_norm": 0.010437650606036186, "learning_rate": 4.40474378996419e-06, "loss": 0.0001, "step": 18166 }, { "epoch": 17.46826923076923, "grad_norm": 0.0030026244930922985, "learning_rate": 4.4037114015763035e-06, "loss": 0.0, "step": 18167 }, { "epoch": 17.46923076923077, "grad_norm": 0.03193191438913345, "learning_rate": 4.402679100026395e-06, "loss": 0.0001, "step": 18168 }, { "epoch": 17.470192307692308, "grad_norm": 0.014982381835579872, "learning_rate": 4.401646885330475e-06, "loss": 0.0001, "step": 18169 }, { "epoch": 17.471153846153847, "grad_norm": 0.01233157329261303, "learning_rate": 4.400614757504565e-06, "loss": 0.0002, "step": 18170 }, { "epoch": 17.472115384615385, "grad_norm": 0.028746241703629494, "learning_rate": 4.399582716564682e-06, "loss": 0.0002, "step": 18171 }, { "epoch": 17.473076923076924, "grad_norm": 1.2689536809921265, "learning_rate": 4.398550762526836e-06, "loss": 0.0099, "step": 18172 }, { "epoch": 17.474038461538463, "grad_norm": 0.07127708196640015, "learning_rate": 4.397518895407043e-06, "loss": 0.0003, "step": 18173 }, { "epoch": 17.475, "grad_norm": 0.015129311941564083, "learning_rate": 4.3964871152213154e-06, "loss": 0.0001, "step": 18174 }, { "epoch": 17.47596153846154, "grad_norm": 0.036464206874370575, "learning_rate": 4.395455421985658e-06, "loss": 0.0002, "step": 18175 }, { "epoch": 17.476923076923075, "grad_norm": 0.023069191724061966, "learning_rate": 4.3944238157160845e-06, "loss": 0.0002, "step": 18176 }, { "epoch": 17.477884615384614, "grad_norm": 1.459122657775879, "learning_rate": 4.393392296428605e-06, "loss": 0.0059, "step": 18177 }, { "epoch": 17.478846153846153, "grad_norm": 0.024093305692076683, "learning_rate": 4.392360864139217e-06, "loss": 0.0002, "step": 18178 }, { "epoch": 17.47980769230769, "grad_norm": 0.026359381154179573, "learning_rate": 4.391329518863935e-06, "loss": 0.0001, "step": 18179 }, { "epoch": 17.48076923076923, "grad_norm": 0.00867327768355608, "learning_rate": 4.390298260618753e-06, "loss": 0.0001, "step": 18180 }, { "epoch": 17.48173076923077, "grad_norm": 0.007770789321511984, "learning_rate": 4.389267089419678e-06, "loss": 0.0001, "step": 18181 }, { "epoch": 17.482692307692307, "grad_norm": 0.015521219931542873, "learning_rate": 4.388236005282714e-06, "loss": 0.0001, "step": 18182 }, { "epoch": 17.483653846153846, "grad_norm": 0.14321845769882202, "learning_rate": 4.3872050082238535e-06, "loss": 0.0003, "step": 18183 }, { "epoch": 17.484615384615385, "grad_norm": 0.0050140079110860825, "learning_rate": 4.386174098259098e-06, "loss": 0.0, "step": 18184 }, { "epoch": 17.485576923076923, "grad_norm": 0.02935330756008625, "learning_rate": 4.385143275404447e-06, "loss": 0.0002, "step": 18185 }, { "epoch": 17.486538461538462, "grad_norm": 0.10061387717723846, "learning_rate": 4.384112539675891e-06, "loss": 0.0004, "step": 18186 }, { "epoch": 17.4875, "grad_norm": 0.02597898244857788, "learning_rate": 4.383081891089425e-06, "loss": 0.0001, "step": 18187 }, { "epoch": 17.48846153846154, "grad_norm": 0.012618948705494404, "learning_rate": 4.3820513296610456e-06, "loss": 0.0001, "step": 18188 }, { "epoch": 17.489423076923078, "grad_norm": 0.007069513201713562, "learning_rate": 4.381020855406738e-06, "loss": 0.0001, "step": 18189 }, { "epoch": 17.490384615384617, "grad_norm": 0.01556241512298584, "learning_rate": 4.379990468342499e-06, "loss": 0.0001, "step": 18190 }, { "epoch": 17.491346153846155, "grad_norm": 1.4109258651733398, "learning_rate": 4.37896016848431e-06, "loss": 0.0064, "step": 18191 }, { "epoch": 17.49230769230769, "grad_norm": 0.8982200622558594, "learning_rate": 4.37792995584816e-06, "loss": 0.0057, "step": 18192 }, { "epoch": 17.49326923076923, "grad_norm": 0.008738057687878609, "learning_rate": 4.376899830450042e-06, "loss": 0.0, "step": 18193 }, { "epoch": 17.494230769230768, "grad_norm": 0.00442667119204998, "learning_rate": 4.375869792305931e-06, "loss": 0.0, "step": 18194 }, { "epoch": 17.495192307692307, "grad_norm": 0.017728203907608986, "learning_rate": 4.374839841431814e-06, "loss": 0.0001, "step": 18195 }, { "epoch": 17.496153846153845, "grad_norm": 1.0050677061080933, "learning_rate": 4.373809977843676e-06, "loss": 0.0048, "step": 18196 }, { "epoch": 17.497115384615384, "grad_norm": 0.3179722726345062, "learning_rate": 4.372780201557491e-06, "loss": 0.0011, "step": 18197 }, { "epoch": 17.498076923076923, "grad_norm": 0.012056848034262657, "learning_rate": 4.371750512589244e-06, "loss": 0.0001, "step": 18198 }, { "epoch": 17.49903846153846, "grad_norm": 0.8209428191184998, "learning_rate": 4.370720910954911e-06, "loss": 0.0046, "step": 18199 }, { "epoch": 17.5, "grad_norm": 0.004361409693956375, "learning_rate": 4.369691396670464e-06, "loss": 0.0001, "step": 18200 }, { "epoch": 17.50096153846154, "grad_norm": 0.006529496982693672, "learning_rate": 4.368661969751887e-06, "loss": 0.0, "step": 18201 }, { "epoch": 17.501923076923077, "grad_norm": 0.005648443475365639, "learning_rate": 4.367632630215145e-06, "loss": 0.0001, "step": 18202 }, { "epoch": 17.502884615384616, "grad_norm": 0.007673238404095173, "learning_rate": 4.366603378076213e-06, "loss": 0.0001, "step": 18203 }, { "epoch": 17.503846153846155, "grad_norm": 0.010329972952604294, "learning_rate": 4.365574213351066e-06, "loss": 0.0001, "step": 18204 }, { "epoch": 17.504807692307693, "grad_norm": 1.3060874938964844, "learning_rate": 4.3645451360556676e-06, "loss": 0.0045, "step": 18205 }, { "epoch": 17.505769230769232, "grad_norm": 0.0068413750268518925, "learning_rate": 4.363516146205988e-06, "loss": 0.0001, "step": 18206 }, { "epoch": 17.50673076923077, "grad_norm": 0.009070907719433308, "learning_rate": 4.362487243818e-06, "loss": 0.0001, "step": 18207 }, { "epoch": 17.50769230769231, "grad_norm": 0.009759614244103432, "learning_rate": 4.3614584289076585e-06, "loss": 0.0001, "step": 18208 }, { "epoch": 17.508653846153845, "grad_norm": 0.08795692026615143, "learning_rate": 4.360429701490935e-06, "loss": 0.0002, "step": 18209 }, { "epoch": 17.509615384615383, "grad_norm": 0.005474523175507784, "learning_rate": 4.359401061583793e-06, "loss": 0.0001, "step": 18210 }, { "epoch": 17.510576923076922, "grad_norm": 0.7326597571372986, "learning_rate": 4.358372509202189e-06, "loss": 0.0025, "step": 18211 }, { "epoch": 17.51153846153846, "grad_norm": 1.92399001121521, "learning_rate": 4.357344044362089e-06, "loss": 0.0144, "step": 18212 }, { "epoch": 17.5125, "grad_norm": 2.5689375400543213, "learning_rate": 4.3563156670794435e-06, "loss": 0.0154, "step": 18213 }, { "epoch": 17.513461538461538, "grad_norm": 0.009054578840732574, "learning_rate": 4.355287377370215e-06, "loss": 0.0, "step": 18214 }, { "epoch": 17.514423076923077, "grad_norm": 1.1828112602233887, "learning_rate": 4.354259175250365e-06, "loss": 0.0089, "step": 18215 }, { "epoch": 17.515384615384615, "grad_norm": 0.009815518744289875, "learning_rate": 4.3532310607358375e-06, "loss": 0.0001, "step": 18216 }, { "epoch": 17.516346153846154, "grad_norm": 0.06625065952539444, "learning_rate": 4.352203033842591e-06, "loss": 0.0006, "step": 18217 }, { "epoch": 17.517307692307693, "grad_norm": 2.934987783432007, "learning_rate": 4.35117509458658e-06, "loss": 0.0088, "step": 18218 }, { "epoch": 17.51826923076923, "grad_norm": 0.031303420662879944, "learning_rate": 4.350147242983751e-06, "loss": 0.0001, "step": 18219 }, { "epoch": 17.51923076923077, "grad_norm": 0.004714601673185825, "learning_rate": 4.349119479050053e-06, "loss": 0.0, "step": 18220 }, { "epoch": 17.52019230769231, "grad_norm": 0.010252865962684155, "learning_rate": 4.348091802801439e-06, "loss": 0.0001, "step": 18221 }, { "epoch": 17.521153846153847, "grad_norm": 0.01356339082121849, "learning_rate": 4.34706421425385e-06, "loss": 0.0001, "step": 18222 }, { "epoch": 17.522115384615386, "grad_norm": 0.22088229656219482, "learning_rate": 4.346036713423236e-06, "loss": 0.0004, "step": 18223 }, { "epoch": 17.523076923076925, "grad_norm": 0.05469876527786255, "learning_rate": 4.3450093003255344e-06, "loss": 0.0002, "step": 18224 }, { "epoch": 17.52403846153846, "grad_norm": 0.011479265056550503, "learning_rate": 4.343981974976691e-06, "loss": 0.0001, "step": 18225 }, { "epoch": 17.525, "grad_norm": 0.009213556535542011, "learning_rate": 4.342954737392652e-06, "loss": 0.0001, "step": 18226 }, { "epoch": 17.525961538461537, "grad_norm": 0.00716823898255825, "learning_rate": 4.341927587589347e-06, "loss": 0.0001, "step": 18227 }, { "epoch": 17.526923076923076, "grad_norm": 0.020306993275880814, "learning_rate": 4.340900525582721e-06, "loss": 0.0001, "step": 18228 }, { "epoch": 17.527884615384615, "grad_norm": 0.027492761611938477, "learning_rate": 4.339873551388711e-06, "loss": 0.0001, "step": 18229 }, { "epoch": 17.528846153846153, "grad_norm": 2.5456602573394775, "learning_rate": 4.338846665023249e-06, "loss": 0.0362, "step": 18230 }, { "epoch": 17.529807692307692, "grad_norm": 0.003965680953115225, "learning_rate": 4.337819866502272e-06, "loss": 0.0, "step": 18231 }, { "epoch": 17.53076923076923, "grad_norm": 0.006330551113933325, "learning_rate": 4.336793155841716e-06, "loss": 0.0001, "step": 18232 }, { "epoch": 17.53173076923077, "grad_norm": 1.8008699417114258, "learning_rate": 4.335766533057505e-06, "loss": 0.0154, "step": 18233 }, { "epoch": 17.532692307692308, "grad_norm": 2.7162086963653564, "learning_rate": 4.334739998165576e-06, "loss": 0.006, "step": 18234 }, { "epoch": 17.533653846153847, "grad_norm": 0.059254687279462814, "learning_rate": 4.3337135511818514e-06, "loss": 0.0003, "step": 18235 }, { "epoch": 17.534615384615385, "grad_norm": 0.02030564472079277, "learning_rate": 4.332687192122263e-06, "loss": 0.0002, "step": 18236 }, { "epoch": 17.535576923076924, "grad_norm": 0.016395412385463715, "learning_rate": 4.33166092100274e-06, "loss": 0.0002, "step": 18237 }, { "epoch": 17.536538461538463, "grad_norm": 0.013784190639853477, "learning_rate": 4.330634737839198e-06, "loss": 0.0001, "step": 18238 }, { "epoch": 17.5375, "grad_norm": 0.010117205791175365, "learning_rate": 4.329608642647566e-06, "loss": 0.0001, "step": 18239 }, { "epoch": 17.53846153846154, "grad_norm": 0.0076291183941066265, "learning_rate": 4.328582635443769e-06, "loss": 0.0, "step": 18240 }, { "epoch": 17.539423076923075, "grad_norm": 0.008785434998571873, "learning_rate": 4.32755671624372e-06, "loss": 0.0001, "step": 18241 }, { "epoch": 17.540384615384614, "grad_norm": 0.04879283159971237, "learning_rate": 4.326530885063344e-06, "loss": 0.0002, "step": 18242 }, { "epoch": 17.541346153846153, "grad_norm": 0.08858811110258102, "learning_rate": 4.325505141918559e-06, "loss": 0.0003, "step": 18243 }, { "epoch": 17.54230769230769, "grad_norm": 0.054540492594242096, "learning_rate": 4.324479486825277e-06, "loss": 0.0002, "step": 18244 }, { "epoch": 17.54326923076923, "grad_norm": 0.009129393845796585, "learning_rate": 4.32345391979942e-06, "loss": 0.0001, "step": 18245 }, { "epoch": 17.54423076923077, "grad_norm": 0.015318349935114384, "learning_rate": 4.322428440856894e-06, "loss": 0.0002, "step": 18246 }, { "epoch": 17.545192307692307, "grad_norm": 0.002451479434967041, "learning_rate": 4.321403050013614e-06, "loss": 0.0, "step": 18247 }, { "epoch": 17.546153846153846, "grad_norm": 0.018772171810269356, "learning_rate": 4.320377747285497e-06, "loss": 0.0001, "step": 18248 }, { "epoch": 17.547115384615385, "grad_norm": 0.02696222811937332, "learning_rate": 4.319352532688444e-06, "loss": 0.0001, "step": 18249 }, { "epoch": 17.548076923076923, "grad_norm": 0.003418014384806156, "learning_rate": 4.318327406238366e-06, "loss": 0.0, "step": 18250 }, { "epoch": 17.549038461538462, "grad_norm": 0.00632573151960969, "learning_rate": 4.3173023679511764e-06, "loss": 0.0, "step": 18251 }, { "epoch": 17.55, "grad_norm": 0.004727635998278856, "learning_rate": 4.316277417842772e-06, "loss": 0.0001, "step": 18252 }, { "epoch": 17.55096153846154, "grad_norm": 0.0008567883633077145, "learning_rate": 4.3152525559290596e-06, "loss": 0.0, "step": 18253 }, { "epoch": 17.551923076923078, "grad_norm": 0.004682592116296291, "learning_rate": 4.314227782225946e-06, "loss": 0.0, "step": 18254 }, { "epoch": 17.552884615384617, "grad_norm": 0.005242725368589163, "learning_rate": 4.313203096749327e-06, "loss": 0.0001, "step": 18255 }, { "epoch": 17.553846153846155, "grad_norm": 0.15842418372631073, "learning_rate": 4.3121784995151096e-06, "loss": 0.0006, "step": 18256 }, { "epoch": 17.55480769230769, "grad_norm": 0.32689476013183594, "learning_rate": 4.311153990539183e-06, "loss": 0.0014, "step": 18257 }, { "epoch": 17.55576923076923, "grad_norm": 0.007748065050691366, "learning_rate": 4.310129569837451e-06, "loss": 0.0001, "step": 18258 }, { "epoch": 17.556730769230768, "grad_norm": 0.042945656925439835, "learning_rate": 4.309105237425812e-06, "loss": 0.0002, "step": 18259 }, { "epoch": 17.557692307692307, "grad_norm": 0.011762735433876514, "learning_rate": 4.308080993320154e-06, "loss": 0.0001, "step": 18260 }, { "epoch": 17.558653846153845, "grad_norm": 0.001249707187525928, "learning_rate": 4.307056837536373e-06, "loss": 0.0, "step": 18261 }, { "epoch": 17.559615384615384, "grad_norm": 0.0023205429315567017, "learning_rate": 4.306032770090365e-06, "loss": 0.0, "step": 18262 }, { "epoch": 17.560576923076923, "grad_norm": 0.0819275975227356, "learning_rate": 4.305008790998013e-06, "loss": 0.0003, "step": 18263 }, { "epoch": 17.56153846153846, "grad_norm": 0.010908330790698528, "learning_rate": 4.30398490027521e-06, "loss": 0.0001, "step": 18264 }, { "epoch": 17.5625, "grad_norm": 0.005553813185542822, "learning_rate": 4.302961097937847e-06, "loss": 0.0001, "step": 18265 }, { "epoch": 17.56346153846154, "grad_norm": 0.03768317401409149, "learning_rate": 4.3019373840018045e-06, "loss": 0.0002, "step": 18266 }, { "epoch": 17.564423076923077, "grad_norm": 0.01045640092343092, "learning_rate": 4.30091375848297e-06, "loss": 0.0001, "step": 18267 }, { "epoch": 17.565384615384616, "grad_norm": 0.27814120054244995, "learning_rate": 4.299890221397229e-06, "loss": 0.0008, "step": 18268 }, { "epoch": 17.566346153846155, "grad_norm": 0.02969186007976532, "learning_rate": 4.298866772760461e-06, "loss": 0.0001, "step": 18269 }, { "epoch": 17.567307692307693, "grad_norm": 0.08137036114931107, "learning_rate": 4.297843412588551e-06, "loss": 0.0002, "step": 18270 }, { "epoch": 17.568269230769232, "grad_norm": 0.007257259450852871, "learning_rate": 4.296820140897371e-06, "loss": 0.0001, "step": 18271 }, { "epoch": 17.56923076923077, "grad_norm": 0.006188945844769478, "learning_rate": 4.295796957702805e-06, "loss": 0.0001, "step": 18272 }, { "epoch": 17.57019230769231, "grad_norm": 0.03506775200366974, "learning_rate": 4.294773863020732e-06, "loss": 0.0001, "step": 18273 }, { "epoch": 17.571153846153845, "grad_norm": 0.05610028654336929, "learning_rate": 4.2937508568670194e-06, "loss": 0.0003, "step": 18274 }, { "epoch": 17.572115384615383, "grad_norm": 0.054810699075460434, "learning_rate": 4.292727939257547e-06, "loss": 0.0001, "step": 18275 }, { "epoch": 17.573076923076922, "grad_norm": 0.02144516073167324, "learning_rate": 4.29170511020819e-06, "loss": 0.0001, "step": 18276 }, { "epoch": 17.57403846153846, "grad_norm": 0.010949035175144672, "learning_rate": 4.290682369734812e-06, "loss": 0.0001, "step": 18277 }, { "epoch": 17.575, "grad_norm": 0.049798931926488876, "learning_rate": 4.289659717853286e-06, "loss": 0.0001, "step": 18278 }, { "epoch": 17.575961538461538, "grad_norm": 0.00557309202849865, "learning_rate": 4.288637154579487e-06, "loss": 0.0001, "step": 18279 }, { "epoch": 17.576923076923077, "grad_norm": 0.12018664926290512, "learning_rate": 4.287614679929272e-06, "loss": 0.0003, "step": 18280 }, { "epoch": 17.577884615384615, "grad_norm": 0.03272665664553642, "learning_rate": 4.286592293918515e-06, "loss": 0.0001, "step": 18281 }, { "epoch": 17.578846153846154, "grad_norm": 0.014937144704163074, "learning_rate": 4.285569996563074e-06, "loss": 0.0001, "step": 18282 }, { "epoch": 17.579807692307693, "grad_norm": 0.005793454125523567, "learning_rate": 4.2845477878788145e-06, "loss": 0.0001, "step": 18283 }, { "epoch": 17.58076923076923, "grad_norm": 0.0026468550786376, "learning_rate": 4.283525667881603e-06, "loss": 0.0, "step": 18284 }, { "epoch": 17.58173076923077, "grad_norm": 0.021569181233644485, "learning_rate": 4.2825036365872904e-06, "loss": 0.0001, "step": 18285 }, { "epoch": 17.58269230769231, "grad_norm": 0.005793552380055189, "learning_rate": 4.281481694011742e-06, "loss": 0.0, "step": 18286 }, { "epoch": 17.583653846153847, "grad_norm": 0.018630996346473694, "learning_rate": 4.280459840170818e-06, "loss": 0.0001, "step": 18287 }, { "epoch": 17.584615384615386, "grad_norm": 0.0038764597848057747, "learning_rate": 4.279438075080366e-06, "loss": 0.0, "step": 18288 }, { "epoch": 17.585576923076925, "grad_norm": 0.007865766994655132, "learning_rate": 4.278416398756245e-06, "loss": 0.0001, "step": 18289 }, { "epoch": 17.58653846153846, "grad_norm": 0.005071599036455154, "learning_rate": 4.2773948112143136e-06, "loss": 0.0, "step": 18290 }, { "epoch": 17.5875, "grad_norm": 0.13975396752357483, "learning_rate": 4.276373312470416e-06, "loss": 0.0006, "step": 18291 }, { "epoch": 17.588461538461537, "grad_norm": 0.016064485535025597, "learning_rate": 4.275351902540408e-06, "loss": 0.0, "step": 18292 }, { "epoch": 17.589423076923076, "grad_norm": 0.007906617596745491, "learning_rate": 4.274330581440133e-06, "loss": 0.0001, "step": 18293 }, { "epoch": 17.590384615384615, "grad_norm": 0.019349707290530205, "learning_rate": 4.273309349185445e-06, "loss": 0.0001, "step": 18294 }, { "epoch": 17.591346153846153, "grad_norm": 0.005323664750903845, "learning_rate": 4.27228820579219e-06, "loss": 0.0, "step": 18295 }, { "epoch": 17.592307692307692, "grad_norm": 0.238200381398201, "learning_rate": 4.271267151276208e-06, "loss": 0.0006, "step": 18296 }, { "epoch": 17.59326923076923, "grad_norm": 0.054238151758909225, "learning_rate": 4.270246185653347e-06, "loss": 0.0002, "step": 18297 }, { "epoch": 17.59423076923077, "grad_norm": 0.011707094497978687, "learning_rate": 4.269225308939451e-06, "loss": 0.0001, "step": 18298 }, { "epoch": 17.595192307692308, "grad_norm": 0.013474909588694572, "learning_rate": 4.268204521150357e-06, "loss": 0.0001, "step": 18299 }, { "epoch": 17.596153846153847, "grad_norm": 0.10584326088428497, "learning_rate": 4.267183822301903e-06, "loss": 0.0004, "step": 18300 }, { "epoch": 17.597115384615385, "grad_norm": 0.2822413444519043, "learning_rate": 4.2661632124099376e-06, "loss": 0.0006, "step": 18301 }, { "epoch": 17.598076923076924, "grad_norm": 0.006305285729467869, "learning_rate": 4.265142691490285e-06, "loss": 0.0, "step": 18302 }, { "epoch": 17.599038461538463, "grad_norm": 0.011168181896209717, "learning_rate": 4.264122259558789e-06, "loss": 0.0001, "step": 18303 }, { "epoch": 17.6, "grad_norm": 0.029253607615828514, "learning_rate": 4.263101916631279e-06, "loss": 0.0001, "step": 18304 }, { "epoch": 17.60096153846154, "grad_norm": 1.1719332933425903, "learning_rate": 4.26208166272359e-06, "loss": 0.0033, "step": 18305 }, { "epoch": 17.601923076923075, "grad_norm": 1.7557789087295532, "learning_rate": 4.261061497851555e-06, "loss": 0.0036, "step": 18306 }, { "epoch": 17.602884615384614, "grad_norm": 0.00539802759885788, "learning_rate": 4.2600414220309984e-06, "loss": 0.0001, "step": 18307 }, { "epoch": 17.603846153846153, "grad_norm": 0.004603093955665827, "learning_rate": 4.259021435277753e-06, "loss": 0.0, "step": 18308 }, { "epoch": 17.60480769230769, "grad_norm": 0.061984650790691376, "learning_rate": 4.258001537607649e-06, "loss": 0.0002, "step": 18309 }, { "epoch": 17.60576923076923, "grad_norm": 0.014317168854176998, "learning_rate": 4.256981729036506e-06, "loss": 0.0001, "step": 18310 }, { "epoch": 17.60673076923077, "grad_norm": 2.5534675121307373, "learning_rate": 4.255962009580149e-06, "loss": 0.013, "step": 18311 }, { "epoch": 17.607692307692307, "grad_norm": 0.005087571684271097, "learning_rate": 4.254942379254407e-06, "loss": 0.0, "step": 18312 }, { "epoch": 17.608653846153846, "grad_norm": 0.05545276775956154, "learning_rate": 4.2539228380750955e-06, "loss": 0.0002, "step": 18313 }, { "epoch": 17.609615384615385, "grad_norm": 0.08146870881319046, "learning_rate": 4.252903386058039e-06, "loss": 0.0003, "step": 18314 }, { "epoch": 17.610576923076923, "grad_norm": 0.016694186255335808, "learning_rate": 4.251884023219053e-06, "loss": 0.0001, "step": 18315 }, { "epoch": 17.611538461538462, "grad_norm": 0.009778212755918503, "learning_rate": 4.250864749573954e-06, "loss": 0.0001, "step": 18316 }, { "epoch": 17.6125, "grad_norm": 0.35311922430992126, "learning_rate": 4.2498455651385665e-06, "loss": 0.0008, "step": 18317 }, { "epoch": 17.61346153846154, "grad_norm": 0.0028949191328138113, "learning_rate": 4.248826469928694e-06, "loss": 0.0, "step": 18318 }, { "epoch": 17.614423076923078, "grad_norm": 0.009602759033441544, "learning_rate": 4.247807463960155e-06, "loss": 0.0001, "step": 18319 }, { "epoch": 17.615384615384617, "grad_norm": 0.16689369082450867, "learning_rate": 4.246788547248767e-06, "loss": 0.0005, "step": 18320 }, { "epoch": 17.616346153846155, "grad_norm": 0.017369549721479416, "learning_rate": 4.24576971981033e-06, "loss": 0.0001, "step": 18321 }, { "epoch": 17.61730769230769, "grad_norm": 0.002118364442139864, "learning_rate": 4.244750981660659e-06, "loss": 0.0, "step": 18322 }, { "epoch": 17.61826923076923, "grad_norm": 0.01011042483150959, "learning_rate": 4.243732332815565e-06, "loss": 0.0001, "step": 18323 }, { "epoch": 17.619230769230768, "grad_norm": 0.00888893660157919, "learning_rate": 4.242713773290847e-06, "loss": 0.0001, "step": 18324 }, { "epoch": 17.620192307692307, "grad_norm": 0.08770190179347992, "learning_rate": 4.241695303102318e-06, "loss": 0.0002, "step": 18325 }, { "epoch": 17.621153846153845, "grad_norm": 0.006903496105223894, "learning_rate": 4.240676922265774e-06, "loss": 0.0, "step": 18326 }, { "epoch": 17.622115384615384, "grad_norm": 0.003359247697517276, "learning_rate": 4.23965863079702e-06, "loss": 0.0, "step": 18327 }, { "epoch": 17.623076923076923, "grad_norm": 0.010320722125470638, "learning_rate": 4.238640428711861e-06, "loss": 0.0001, "step": 18328 }, { "epoch": 17.62403846153846, "grad_norm": 0.06908413767814636, "learning_rate": 4.237622316026091e-06, "loss": 0.0003, "step": 18329 }, { "epoch": 17.625, "grad_norm": 0.003989419899880886, "learning_rate": 4.236604292755509e-06, "loss": 0.0, "step": 18330 }, { "epoch": 17.62596153846154, "grad_norm": 0.0024485306348651648, "learning_rate": 4.235586358915917e-06, "loss": 0.0, "step": 18331 }, { "epoch": 17.626923076923077, "grad_norm": 0.005542843136936426, "learning_rate": 4.234568514523102e-06, "loss": 0.0, "step": 18332 }, { "epoch": 17.627884615384616, "grad_norm": 0.2970423698425293, "learning_rate": 4.233550759592864e-06, "loss": 0.0006, "step": 18333 }, { "epoch": 17.628846153846155, "grad_norm": 0.0012309261364862323, "learning_rate": 4.232533094140996e-06, "loss": 0.0, "step": 18334 }, { "epoch": 17.629807692307693, "grad_norm": 0.004930038470774889, "learning_rate": 4.231515518183283e-06, "loss": 0.0001, "step": 18335 }, { "epoch": 17.630769230769232, "grad_norm": 0.023018015548586845, "learning_rate": 4.230498031735525e-06, "loss": 0.0001, "step": 18336 }, { "epoch": 17.63173076923077, "grad_norm": 0.012819909490644932, "learning_rate": 4.229480634813499e-06, "loss": 0.0001, "step": 18337 }, { "epoch": 17.63269230769231, "grad_norm": 0.003368289675563574, "learning_rate": 4.228463327432996e-06, "loss": 0.0, "step": 18338 }, { "epoch": 17.633653846153845, "grad_norm": 0.015910783782601357, "learning_rate": 4.2274461096098085e-06, "loss": 0.0001, "step": 18339 }, { "epoch": 17.634615384615383, "grad_norm": 0.006872151046991348, "learning_rate": 4.226428981359711e-06, "loss": 0.0, "step": 18340 }, { "epoch": 17.635576923076922, "grad_norm": 0.010353261604905128, "learning_rate": 4.225411942698491e-06, "loss": 0.0001, "step": 18341 }, { "epoch": 17.63653846153846, "grad_norm": 0.002376979449763894, "learning_rate": 4.224394993641931e-06, "loss": 0.0, "step": 18342 }, { "epoch": 17.6375, "grad_norm": 0.004403914324939251, "learning_rate": 4.223378134205808e-06, "loss": 0.0, "step": 18343 }, { "epoch": 17.638461538461538, "grad_norm": 0.006559750065207481, "learning_rate": 4.222361364405901e-06, "loss": 0.0, "step": 18344 }, { "epoch": 17.639423076923077, "grad_norm": 0.006117833312600851, "learning_rate": 4.221344684257993e-06, "loss": 0.0001, "step": 18345 }, { "epoch": 17.640384615384615, "grad_norm": 0.005876644980162382, "learning_rate": 4.220328093777851e-06, "loss": 0.0, "step": 18346 }, { "epoch": 17.641346153846154, "grad_norm": 0.008543195202946663, "learning_rate": 4.219311592981258e-06, "loss": 0.0001, "step": 18347 }, { "epoch": 17.642307692307693, "grad_norm": 0.011133153922855854, "learning_rate": 4.218295181883979e-06, "loss": 0.0001, "step": 18348 }, { "epoch": 17.64326923076923, "grad_norm": 0.021829308941960335, "learning_rate": 4.21727886050179e-06, "loss": 0.0001, "step": 18349 }, { "epoch": 17.64423076923077, "grad_norm": 0.005645363591611385, "learning_rate": 4.216262628850465e-06, "loss": 0.0, "step": 18350 }, { "epoch": 17.64519230769231, "grad_norm": 0.005638898350298405, "learning_rate": 4.215246486945764e-06, "loss": 0.0, "step": 18351 }, { "epoch": 17.646153846153847, "grad_norm": 0.06430815160274506, "learning_rate": 4.21423043480346e-06, "loss": 0.0001, "step": 18352 }, { "epoch": 17.647115384615386, "grad_norm": 0.7338520884513855, "learning_rate": 4.2132144724393235e-06, "loss": 0.001, "step": 18353 }, { "epoch": 17.648076923076925, "grad_norm": 0.1651390790939331, "learning_rate": 4.21219859986911e-06, "loss": 0.0003, "step": 18354 }, { "epoch": 17.64903846153846, "grad_norm": 0.01772896945476532, "learning_rate": 4.2111828171085865e-06, "loss": 0.0001, "step": 18355 }, { "epoch": 17.65, "grad_norm": 0.013802597299218178, "learning_rate": 4.2101671241735195e-06, "loss": 0.0001, "step": 18356 }, { "epoch": 17.650961538461537, "grad_norm": 0.00692724296823144, "learning_rate": 4.209151521079662e-06, "loss": 0.0, "step": 18357 }, { "epoch": 17.651923076923076, "grad_norm": 0.0022804022300988436, "learning_rate": 4.208136007842778e-06, "loss": 0.0, "step": 18358 }, { "epoch": 17.652884615384615, "grad_norm": 0.09109822660684586, "learning_rate": 4.207120584478627e-06, "loss": 0.0002, "step": 18359 }, { "epoch": 17.653846153846153, "grad_norm": 0.017102062702178955, "learning_rate": 4.206105251002959e-06, "loss": 0.0001, "step": 18360 }, { "epoch": 17.654807692307692, "grad_norm": 0.005253711715340614, "learning_rate": 4.205090007431537e-06, "loss": 0.0001, "step": 18361 }, { "epoch": 17.65576923076923, "grad_norm": 0.0026107802987098694, "learning_rate": 4.204074853780106e-06, "loss": 0.0, "step": 18362 }, { "epoch": 17.65673076923077, "grad_norm": 0.0050068809650838375, "learning_rate": 4.2030597900644224e-06, "loss": 0.0, "step": 18363 }, { "epoch": 17.657692307692308, "grad_norm": 0.007202147971838713, "learning_rate": 4.202044816300241e-06, "loss": 0.0001, "step": 18364 }, { "epoch": 17.658653846153847, "grad_norm": 0.061550483107566833, "learning_rate": 4.201029932503303e-06, "loss": 0.0003, "step": 18365 }, { "epoch": 17.659615384615385, "grad_norm": 0.0015348898014053702, "learning_rate": 4.200015138689362e-06, "loss": 0.0, "step": 18366 }, { "epoch": 17.660576923076924, "grad_norm": 1.1134413480758667, "learning_rate": 4.1990004348741656e-06, "loss": 0.0144, "step": 18367 }, { "epoch": 17.661538461538463, "grad_norm": 0.012576255947351456, "learning_rate": 4.1979858210734534e-06, "loss": 0.0, "step": 18368 }, { "epoch": 17.6625, "grad_norm": 0.018537044525146484, "learning_rate": 4.196971297302972e-06, "loss": 0.0001, "step": 18369 }, { "epoch": 17.66346153846154, "grad_norm": 0.02135458216071129, "learning_rate": 4.1959568635784685e-06, "loss": 0.0001, "step": 18370 }, { "epoch": 17.664423076923075, "grad_norm": 0.38398516178131104, "learning_rate": 4.1949425199156765e-06, "loss": 0.0007, "step": 18371 }, { "epoch": 17.665384615384614, "grad_norm": 0.0038655658718198538, "learning_rate": 4.1939282663303425e-06, "loss": 0.0, "step": 18372 }, { "epoch": 17.666346153846153, "grad_norm": 0.025817904621362686, "learning_rate": 4.192914102838198e-06, "loss": 0.0001, "step": 18373 }, { "epoch": 17.66730769230769, "grad_norm": 0.0030830372124910355, "learning_rate": 4.1919000294549825e-06, "loss": 0.0, "step": 18374 }, { "epoch": 17.66826923076923, "grad_norm": 0.004719175398349762, "learning_rate": 4.190886046196435e-06, "loss": 0.0, "step": 18375 }, { "epoch": 17.66923076923077, "grad_norm": 0.00710099283605814, "learning_rate": 4.189872153078284e-06, "loss": 0.0001, "step": 18376 }, { "epoch": 17.670192307692307, "grad_norm": 0.00395023450255394, "learning_rate": 4.1888583501162635e-06, "loss": 0.0, "step": 18377 }, { "epoch": 17.671153846153846, "grad_norm": 0.0033047671895474195, "learning_rate": 4.18784463732611e-06, "loss": 0.0, "step": 18378 }, { "epoch": 17.672115384615385, "grad_norm": 0.005763009190559387, "learning_rate": 4.186831014723547e-06, "loss": 0.0, "step": 18379 }, { "epoch": 17.673076923076923, "grad_norm": 0.051440320909023285, "learning_rate": 4.185817482324304e-06, "loss": 0.0002, "step": 18380 }, { "epoch": 17.674038461538462, "grad_norm": 2.2597546577453613, "learning_rate": 4.184804040144113e-06, "loss": 0.0216, "step": 18381 }, { "epoch": 17.675, "grad_norm": 0.004486331716179848, "learning_rate": 4.1837906881986925e-06, "loss": 0.0, "step": 18382 }, { "epoch": 17.67596153846154, "grad_norm": 0.002103540813550353, "learning_rate": 4.182777426503774e-06, "loss": 0.0, "step": 18383 }, { "epoch": 17.676923076923078, "grad_norm": 0.10066370666027069, "learning_rate": 4.181764255075072e-06, "loss": 0.0002, "step": 18384 }, { "epoch": 17.677884615384617, "grad_norm": 0.005724078975617886, "learning_rate": 4.180751173928312e-06, "loss": 0.0001, "step": 18385 }, { "epoch": 17.678846153846155, "grad_norm": 0.005483873654156923, "learning_rate": 4.17973818307922e-06, "loss": 0.0, "step": 18386 }, { "epoch": 17.67980769230769, "grad_norm": 0.3251258134841919, "learning_rate": 4.178725282543503e-06, "loss": 0.0005, "step": 18387 }, { "epoch": 17.68076923076923, "grad_norm": 0.022907769307494164, "learning_rate": 4.177712472336886e-06, "loss": 0.0001, "step": 18388 }, { "epoch": 17.681730769230768, "grad_norm": 0.008757470175623894, "learning_rate": 4.176699752475087e-06, "loss": 0.0001, "step": 18389 }, { "epoch": 17.682692307692307, "grad_norm": 0.007400408387184143, "learning_rate": 4.175687122973813e-06, "loss": 0.0001, "step": 18390 }, { "epoch": 17.683653846153845, "grad_norm": 0.00459336256608367, "learning_rate": 4.17467458384878e-06, "loss": 0.0001, "step": 18391 }, { "epoch": 17.684615384615384, "grad_norm": 0.0034887748770415783, "learning_rate": 4.173662135115705e-06, "loss": 0.0, "step": 18392 }, { "epoch": 17.685576923076923, "grad_norm": 0.0010063799563795328, "learning_rate": 4.172649776790288e-06, "loss": 0.0, "step": 18393 }, { "epoch": 17.68653846153846, "grad_norm": 0.05283578112721443, "learning_rate": 4.171637508888249e-06, "loss": 0.0003, "step": 18394 }, { "epoch": 17.6875, "grad_norm": 0.006394773256033659, "learning_rate": 4.170625331425285e-06, "loss": 0.0001, "step": 18395 }, { "epoch": 17.68846153846154, "grad_norm": 0.03676005080342293, "learning_rate": 4.169613244417108e-06, "loss": 0.0001, "step": 18396 }, { "epoch": 17.689423076923077, "grad_norm": 0.00488309795036912, "learning_rate": 4.1686012478794245e-06, "loss": 0.0, "step": 18397 }, { "epoch": 17.690384615384616, "grad_norm": 0.005773233715444803, "learning_rate": 4.167589341827931e-06, "loss": 0.0001, "step": 18398 }, { "epoch": 17.691346153846155, "grad_norm": 0.3745814263820648, "learning_rate": 4.1665775262783325e-06, "loss": 0.0011, "step": 18399 }, { "epoch": 17.692307692307693, "grad_norm": 0.010341736488044262, "learning_rate": 4.165565801246335e-06, "loss": 0.0001, "step": 18400 }, { "epoch": 17.693269230769232, "grad_norm": 0.0992266908288002, "learning_rate": 4.1645541667476276e-06, "loss": 0.0003, "step": 18401 }, { "epoch": 17.69423076923077, "grad_norm": 0.008744918741285801, "learning_rate": 4.1635426227979125e-06, "loss": 0.0, "step": 18402 }, { "epoch": 17.69519230769231, "grad_norm": 0.025399096310138702, "learning_rate": 4.16253116941289e-06, "loss": 0.0002, "step": 18403 }, { "epoch": 17.696153846153845, "grad_norm": 0.02974947728216648, "learning_rate": 4.1615198066082475e-06, "loss": 0.0002, "step": 18404 }, { "epoch": 17.697115384615383, "grad_norm": 0.005143851973116398, "learning_rate": 4.160508534399685e-06, "loss": 0.0, "step": 18405 }, { "epoch": 17.698076923076922, "grad_norm": 0.08679857105016708, "learning_rate": 4.159497352802889e-06, "loss": 0.0004, "step": 18406 }, { "epoch": 17.69903846153846, "grad_norm": 0.08070356398820877, "learning_rate": 4.158486261833552e-06, "loss": 0.0003, "step": 18407 }, { "epoch": 17.7, "grad_norm": 0.7679235339164734, "learning_rate": 4.157475261507366e-06, "loss": 0.0017, "step": 18408 }, { "epoch": 17.700961538461538, "grad_norm": 0.2160414457321167, "learning_rate": 4.1564643518400135e-06, "loss": 0.0005, "step": 18409 }, { "epoch": 17.701923076923077, "grad_norm": 2.374567985534668, "learning_rate": 4.155453532847184e-06, "loss": 0.0077, "step": 18410 }, { "epoch": 17.702884615384615, "grad_norm": 0.0031584720127284527, "learning_rate": 4.154442804544566e-06, "loss": 0.0, "step": 18411 }, { "epoch": 17.703846153846154, "grad_norm": 0.023099791258573532, "learning_rate": 4.153432166947835e-06, "loss": 0.0001, "step": 18412 }, { "epoch": 17.704807692307693, "grad_norm": 0.009074104018509388, "learning_rate": 4.1524216200726775e-06, "loss": 0.0001, "step": 18413 }, { "epoch": 17.70576923076923, "grad_norm": 0.0035171201452612877, "learning_rate": 4.151411163934778e-06, "loss": 0.0, "step": 18414 }, { "epoch": 17.70673076923077, "grad_norm": 0.022030534222722054, "learning_rate": 4.150400798549806e-06, "loss": 0.0001, "step": 18415 }, { "epoch": 17.70769230769231, "grad_norm": 0.012896769680082798, "learning_rate": 4.149390523933451e-06, "loss": 0.0001, "step": 18416 }, { "epoch": 17.708653846153847, "grad_norm": 4.04883337020874, "learning_rate": 4.14838034010138e-06, "loss": 0.0738, "step": 18417 }, { "epoch": 17.709615384615386, "grad_norm": 0.01936250552535057, "learning_rate": 4.147370247069271e-06, "loss": 0.0002, "step": 18418 }, { "epoch": 17.710576923076925, "grad_norm": 0.02483467198908329, "learning_rate": 4.146360244852803e-06, "loss": 0.0001, "step": 18419 }, { "epoch": 17.71153846153846, "grad_norm": 0.0034850353840738535, "learning_rate": 4.14535033346764e-06, "loss": 0.0, "step": 18420 }, { "epoch": 17.7125, "grad_norm": 0.01024471316486597, "learning_rate": 4.144340512929457e-06, "loss": 0.0001, "step": 18421 }, { "epoch": 17.713461538461537, "grad_norm": 0.005409283097833395, "learning_rate": 4.1433307832539246e-06, "loss": 0.0001, "step": 18422 }, { "epoch": 17.714423076923076, "grad_norm": 0.0031641689129173756, "learning_rate": 4.142321144456708e-06, "loss": 0.0, "step": 18423 }, { "epoch": 17.715384615384615, "grad_norm": 0.007763978559523821, "learning_rate": 4.141311596553475e-06, "loss": 0.0001, "step": 18424 }, { "epoch": 17.716346153846153, "grad_norm": 0.0439925342798233, "learning_rate": 4.140302139559893e-06, "loss": 0.0002, "step": 18425 }, { "epoch": 17.717307692307692, "grad_norm": 0.0013491989811882377, "learning_rate": 4.139292773491621e-06, "loss": 0.0, "step": 18426 }, { "epoch": 17.71826923076923, "grad_norm": 0.0021484887693077326, "learning_rate": 4.138283498364328e-06, "loss": 0.0, "step": 18427 }, { "epoch": 17.71923076923077, "grad_norm": 0.03753453865647316, "learning_rate": 4.137274314193668e-06, "loss": 0.0001, "step": 18428 }, { "epoch": 17.720192307692308, "grad_norm": 0.6448591351509094, "learning_rate": 4.136265220995303e-06, "loss": 0.0099, "step": 18429 }, { "epoch": 17.721153846153847, "grad_norm": 0.010906868614256382, "learning_rate": 4.135256218784896e-06, "loss": 0.0, "step": 18430 }, { "epoch": 17.722115384615385, "grad_norm": 0.027129309251904488, "learning_rate": 4.134247307578096e-06, "loss": 0.0001, "step": 18431 }, { "epoch": 17.723076923076924, "grad_norm": 0.0021046744659543037, "learning_rate": 4.133238487390562e-06, "loss": 0.0, "step": 18432 }, { "epoch": 17.724038461538463, "grad_norm": 0.02523224428296089, "learning_rate": 4.132229758237951e-06, "loss": 0.0002, "step": 18433 }, { "epoch": 17.725, "grad_norm": 0.01667129248380661, "learning_rate": 4.131221120135909e-06, "loss": 0.0001, "step": 18434 }, { "epoch": 17.72596153846154, "grad_norm": 0.033907659351825714, "learning_rate": 4.130212573100091e-06, "loss": 0.0001, "step": 18435 }, { "epoch": 17.726923076923075, "grad_norm": 0.0035653349477797747, "learning_rate": 4.129204117146149e-06, "loss": 0.0, "step": 18436 }, { "epoch": 17.727884615384614, "grad_norm": 1.8861417770385742, "learning_rate": 4.128195752289726e-06, "loss": 0.0724, "step": 18437 }, { "epoch": 17.728846153846153, "grad_norm": 0.029339762404561043, "learning_rate": 4.127187478546475e-06, "loss": 0.0003, "step": 18438 }, { "epoch": 17.72980769230769, "grad_norm": 0.013561954721808434, "learning_rate": 4.126179295932033e-06, "loss": 0.0001, "step": 18439 }, { "epoch": 17.73076923076923, "grad_norm": 0.018191080540418625, "learning_rate": 4.12517120446205e-06, "loss": 0.0001, "step": 18440 }, { "epoch": 17.73173076923077, "grad_norm": 0.0047810557298362255, "learning_rate": 4.1241632041521705e-06, "loss": 0.0, "step": 18441 }, { "epoch": 17.732692307692307, "grad_norm": 0.030124440789222717, "learning_rate": 4.12315529501803e-06, "loss": 0.0002, "step": 18442 }, { "epoch": 17.733653846153846, "grad_norm": 0.023811889812350273, "learning_rate": 4.12214747707527e-06, "loss": 0.0001, "step": 18443 }, { "epoch": 17.734615384615385, "grad_norm": 1.1002169847488403, "learning_rate": 4.121139750339534e-06, "loss": 0.0392, "step": 18444 }, { "epoch": 17.735576923076923, "grad_norm": 0.008498787879943848, "learning_rate": 4.120132114826451e-06, "loss": 0.0001, "step": 18445 }, { "epoch": 17.736538461538462, "grad_norm": 0.008937932550907135, "learning_rate": 4.11912457055166e-06, "loss": 0.0001, "step": 18446 }, { "epoch": 17.7375, "grad_norm": 0.03583892062306404, "learning_rate": 4.1181171175308e-06, "loss": 0.0002, "step": 18447 }, { "epoch": 17.73846153846154, "grad_norm": 0.1243559718132019, "learning_rate": 4.117109755779495e-06, "loss": 0.0004, "step": 18448 }, { "epoch": 17.739423076923078, "grad_norm": 0.04446059837937355, "learning_rate": 4.11610248531338e-06, "loss": 0.0001, "step": 18449 }, { "epoch": 17.740384615384617, "grad_norm": 0.006137951742857695, "learning_rate": 4.115095306148089e-06, "loss": 0.0, "step": 18450 }, { "epoch": 17.741346153846155, "grad_norm": 0.002122317673638463, "learning_rate": 4.114088218299244e-06, "loss": 0.0, "step": 18451 }, { "epoch": 17.74230769230769, "grad_norm": 0.02544153295457363, "learning_rate": 4.113081221782479e-06, "loss": 0.0001, "step": 18452 }, { "epoch": 17.74326923076923, "grad_norm": 0.006008478347212076, "learning_rate": 4.1120743166134115e-06, "loss": 0.0001, "step": 18453 }, { "epoch": 17.744230769230768, "grad_norm": 0.007070722058415413, "learning_rate": 4.11106750280767e-06, "loss": 0.0, "step": 18454 }, { "epoch": 17.745192307692307, "grad_norm": 0.07330655306577682, "learning_rate": 4.11006078038088e-06, "loss": 0.0003, "step": 18455 }, { "epoch": 17.746153846153845, "grad_norm": 3.3439507484436035, "learning_rate": 4.1090541493486555e-06, "loss": 0.0855, "step": 18456 }, { "epoch": 17.747115384615384, "grad_norm": 0.0071983919478952885, "learning_rate": 4.108047609726622e-06, "loss": 0.0001, "step": 18457 }, { "epoch": 17.748076923076923, "grad_norm": 0.00935355108231306, "learning_rate": 4.1070411615304005e-06, "loss": 0.0001, "step": 18458 }, { "epoch": 17.74903846153846, "grad_norm": 0.003334739012643695, "learning_rate": 4.106034804775602e-06, "loss": 0.0, "step": 18459 }, { "epoch": 17.75, "grad_norm": 0.038224827498197556, "learning_rate": 4.1050285394778434e-06, "loss": 0.0002, "step": 18460 }, { "epoch": 17.75096153846154, "grad_norm": 0.023202909156680107, "learning_rate": 4.104022365652744e-06, "loss": 0.0001, "step": 18461 }, { "epoch": 17.751923076923077, "grad_norm": 3.718531608581543, "learning_rate": 4.10301628331591e-06, "loss": 0.0741, "step": 18462 }, { "epoch": 17.752884615384616, "grad_norm": 0.003943440970033407, "learning_rate": 4.102010292482958e-06, "loss": 0.0, "step": 18463 }, { "epoch": 17.753846153846155, "grad_norm": 0.019344789907336235, "learning_rate": 4.101004393169493e-06, "loss": 0.0002, "step": 18464 }, { "epoch": 17.754807692307693, "grad_norm": 0.025072716176509857, "learning_rate": 4.099998585391128e-06, "loss": 0.0001, "step": 18465 }, { "epoch": 17.755769230769232, "grad_norm": 0.06261126697063446, "learning_rate": 4.098992869163471e-06, "loss": 0.0002, "step": 18466 }, { "epoch": 17.75673076923077, "grad_norm": 0.007149905432015657, "learning_rate": 4.097987244502121e-06, "loss": 0.0001, "step": 18467 }, { "epoch": 17.75769230769231, "grad_norm": 1.8778526782989502, "learning_rate": 4.096981711422687e-06, "loss": 0.1054, "step": 18468 }, { "epoch": 17.758653846153845, "grad_norm": 0.009073624387383461, "learning_rate": 4.095976269940777e-06, "loss": 0.0001, "step": 18469 }, { "epoch": 17.759615384615383, "grad_norm": 0.012907014228403568, "learning_rate": 4.094970920071982e-06, "loss": 0.0002, "step": 18470 }, { "epoch": 17.760576923076922, "grad_norm": 0.04229477792978287, "learning_rate": 4.093965661831908e-06, "loss": 0.0002, "step": 18471 }, { "epoch": 17.76153846153846, "grad_norm": 0.022644339129328728, "learning_rate": 4.092960495236155e-06, "loss": 0.0001, "step": 18472 }, { "epoch": 17.7625, "grad_norm": 0.06778798997402191, "learning_rate": 4.091955420300318e-06, "loss": 0.0002, "step": 18473 }, { "epoch": 17.763461538461538, "grad_norm": 0.25289708375930786, "learning_rate": 4.0909504370399935e-06, "loss": 0.0007, "step": 18474 }, { "epoch": 17.764423076923077, "grad_norm": 0.007114344276487827, "learning_rate": 4.089945545470772e-06, "loss": 0.0001, "step": 18475 }, { "epoch": 17.765384615384615, "grad_norm": 0.0024964548647403717, "learning_rate": 4.088940745608251e-06, "loss": 0.0, "step": 18476 }, { "epoch": 17.766346153846154, "grad_norm": 0.020776623860001564, "learning_rate": 4.087936037468024e-06, "loss": 0.0001, "step": 18477 }, { "epoch": 17.767307692307693, "grad_norm": 0.6450389623641968, "learning_rate": 4.086931421065674e-06, "loss": 0.0017, "step": 18478 }, { "epoch": 17.76826923076923, "grad_norm": 0.9541122913360596, "learning_rate": 4.085926896416794e-06, "loss": 0.0021, "step": 18479 }, { "epoch": 17.76923076923077, "grad_norm": 0.009732297621667385, "learning_rate": 4.084922463536975e-06, "loss": 0.0001, "step": 18480 }, { "epoch": 17.77019230769231, "grad_norm": 0.032140351831912994, "learning_rate": 4.083918122441795e-06, "loss": 0.0003, "step": 18481 }, { "epoch": 17.771153846153847, "grad_norm": 0.0055771139450371265, "learning_rate": 4.082913873146842e-06, "loss": 0.0001, "step": 18482 }, { "epoch": 17.772115384615386, "grad_norm": 0.03468100354075432, "learning_rate": 4.081909715667704e-06, "loss": 0.0002, "step": 18483 }, { "epoch": 17.773076923076925, "grad_norm": 0.012575430795550346, "learning_rate": 4.080905650019953e-06, "loss": 0.0001, "step": 18484 }, { "epoch": 17.77403846153846, "grad_norm": 0.02808641642332077, "learning_rate": 4.079901676219177e-06, "loss": 0.0002, "step": 18485 }, { "epoch": 17.775, "grad_norm": 0.0075686173513531685, "learning_rate": 4.078897794280949e-06, "loss": 0.0001, "step": 18486 }, { "epoch": 17.775961538461537, "grad_norm": 0.0040846290066838264, "learning_rate": 4.07789400422085e-06, "loss": 0.0, "step": 18487 }, { "epoch": 17.776923076923076, "grad_norm": 0.30985647439956665, "learning_rate": 4.076890306054456e-06, "loss": 0.0007, "step": 18488 }, { "epoch": 17.777884615384615, "grad_norm": 0.0050567020662128925, "learning_rate": 4.075886699797338e-06, "loss": 0.0001, "step": 18489 }, { "epoch": 17.778846153846153, "grad_norm": 0.1143902838230133, "learning_rate": 4.074883185465072e-06, "loss": 0.0004, "step": 18490 }, { "epoch": 17.779807692307692, "grad_norm": 0.0039059182163327932, "learning_rate": 4.0738797630732315e-06, "loss": 0.0, "step": 18491 }, { "epoch": 17.78076923076923, "grad_norm": 0.0065145972184836864, "learning_rate": 4.072876432637382e-06, "loss": 0.0001, "step": 18492 }, { "epoch": 17.78173076923077, "grad_norm": 1.3533692359924316, "learning_rate": 4.071873194173093e-06, "loss": 0.0374, "step": 18493 }, { "epoch": 17.782692307692308, "grad_norm": 0.04217763617634773, "learning_rate": 4.070870047695937e-06, "loss": 0.0001, "step": 18494 }, { "epoch": 17.783653846153847, "grad_norm": 1.0385600328445435, "learning_rate": 4.069866993221473e-06, "loss": 0.0452, "step": 18495 }, { "epoch": 17.784615384615385, "grad_norm": 0.015981093049049377, "learning_rate": 4.068864030765272e-06, "loss": 0.0002, "step": 18496 }, { "epoch": 17.785576923076924, "grad_norm": 0.018226834014058113, "learning_rate": 4.0678611603428884e-06, "loss": 0.0002, "step": 18497 }, { "epoch": 17.786538461538463, "grad_norm": 0.006343357730656862, "learning_rate": 4.066858381969891e-06, "loss": 0.0, "step": 18498 }, { "epoch": 17.7875, "grad_norm": 0.004664428997784853, "learning_rate": 4.065855695661841e-06, "loss": 0.0001, "step": 18499 }, { "epoch": 17.78846153846154, "grad_norm": 0.049858201295137405, "learning_rate": 4.064853101434291e-06, "loss": 0.0003, "step": 18500 }, { "epoch": 17.789423076923075, "grad_norm": 2.3796885013580322, "learning_rate": 4.0638505993028e-06, "loss": 0.0118, "step": 18501 }, { "epoch": 17.790384615384614, "grad_norm": 0.03070755861699581, "learning_rate": 4.06284818928293e-06, "loss": 0.0002, "step": 18502 }, { "epoch": 17.791346153846153, "grad_norm": 0.023913806304335594, "learning_rate": 4.061845871390226e-06, "loss": 0.0001, "step": 18503 }, { "epoch": 17.79230769230769, "grad_norm": 0.005583753809332848, "learning_rate": 4.060843645640247e-06, "loss": 0.0001, "step": 18504 }, { "epoch": 17.79326923076923, "grad_norm": 0.1333419680595398, "learning_rate": 4.059841512048547e-06, "loss": 0.0004, "step": 18505 }, { "epoch": 17.79423076923077, "grad_norm": 0.10722877830266953, "learning_rate": 4.058839470630669e-06, "loss": 0.0003, "step": 18506 }, { "epoch": 17.795192307692307, "grad_norm": 0.012659948319196701, "learning_rate": 4.057837521402168e-06, "loss": 0.0002, "step": 18507 }, { "epoch": 17.796153846153846, "grad_norm": 0.01913222298026085, "learning_rate": 4.056835664378585e-06, "loss": 0.0002, "step": 18508 }, { "epoch": 17.797115384615385, "grad_norm": 0.22284676134586334, "learning_rate": 4.055833899575471e-06, "loss": 0.001, "step": 18509 }, { "epoch": 17.798076923076923, "grad_norm": 0.03138593211770058, "learning_rate": 4.054832227008372e-06, "loss": 0.0003, "step": 18510 }, { "epoch": 17.799038461538462, "grad_norm": 0.007906624116003513, "learning_rate": 4.0538306466928234e-06, "loss": 0.0001, "step": 18511 }, { "epoch": 17.8, "grad_norm": 0.013047018088400364, "learning_rate": 4.052829158644372e-06, "loss": 0.0001, "step": 18512 }, { "epoch": 17.80096153846154, "grad_norm": 0.004956424236297607, "learning_rate": 4.051827762878561e-06, "loss": 0.0, "step": 18513 }, { "epoch": 17.801923076923078, "grad_norm": 0.01006262842565775, "learning_rate": 4.050826459410922e-06, "loss": 0.0001, "step": 18514 }, { "epoch": 17.802884615384617, "grad_norm": 2.2668099403381348, "learning_rate": 4.049825248256996e-06, "loss": 0.0372, "step": 18515 }, { "epoch": 17.803846153846155, "grad_norm": 0.009576805867254734, "learning_rate": 4.048824129432321e-06, "loss": 0.0001, "step": 18516 }, { "epoch": 17.80480769230769, "grad_norm": 0.012952432967722416, "learning_rate": 4.0478231029524265e-06, "loss": 0.0001, "step": 18517 }, { "epoch": 17.80576923076923, "grad_norm": 0.2998853921890259, "learning_rate": 4.046822168832851e-06, "loss": 0.0013, "step": 18518 }, { "epoch": 17.806730769230768, "grad_norm": 0.31752994656562805, "learning_rate": 4.04582132708912e-06, "loss": 0.0007, "step": 18519 }, { "epoch": 17.807692307692307, "grad_norm": 0.009338662028312683, "learning_rate": 4.044820577736766e-06, "loss": 0.0001, "step": 18520 }, { "epoch": 17.808653846153845, "grad_norm": 0.0181001964956522, "learning_rate": 4.043819920791322e-06, "loss": 0.0001, "step": 18521 }, { "epoch": 17.809615384615384, "grad_norm": 1.1841691732406616, "learning_rate": 4.042819356268305e-06, "loss": 0.0187, "step": 18522 }, { "epoch": 17.810576923076923, "grad_norm": 0.15691006183624268, "learning_rate": 4.041818884183255e-06, "loss": 0.0005, "step": 18523 }, { "epoch": 17.81153846153846, "grad_norm": 0.013946385122835636, "learning_rate": 4.040818504551685e-06, "loss": 0.0001, "step": 18524 }, { "epoch": 17.8125, "grad_norm": 0.00869714841246605, "learning_rate": 4.039818217389121e-06, "loss": 0.0001, "step": 18525 }, { "epoch": 17.81346153846154, "grad_norm": 0.012383373454213142, "learning_rate": 4.03881802271109e-06, "loss": 0.0001, "step": 18526 }, { "epoch": 17.814423076923077, "grad_norm": 0.029484275728464127, "learning_rate": 4.037817920533103e-06, "loss": 0.0002, "step": 18527 }, { "epoch": 17.815384615384616, "grad_norm": 0.06896044313907623, "learning_rate": 4.036817910870684e-06, "loss": 0.0003, "step": 18528 }, { "epoch": 17.816346153846155, "grad_norm": 0.04065089300274849, "learning_rate": 4.035817993739351e-06, "loss": 0.0003, "step": 18529 }, { "epoch": 17.817307692307693, "grad_norm": 0.02288931980729103, "learning_rate": 4.0348181691546174e-06, "loss": 0.0003, "step": 18530 }, { "epoch": 17.818269230769232, "grad_norm": 0.005237898789346218, "learning_rate": 4.033818437131997e-06, "loss": 0.0, "step": 18531 }, { "epoch": 17.81923076923077, "grad_norm": 0.02625882439315319, "learning_rate": 4.032818797687006e-06, "loss": 0.0001, "step": 18532 }, { "epoch": 17.82019230769231, "grad_norm": 0.3675229847431183, "learning_rate": 4.031819250835152e-06, "loss": 0.0012, "step": 18533 }, { "epoch": 17.821153846153845, "grad_norm": 0.004995664115995169, "learning_rate": 4.03081979659195e-06, "loss": 0.0001, "step": 18534 }, { "epoch": 17.822115384615383, "grad_norm": 0.03721816465258598, "learning_rate": 4.0298204349729005e-06, "loss": 0.0001, "step": 18535 }, { "epoch": 17.823076923076922, "grad_norm": 0.016653019934892654, "learning_rate": 4.028821165993517e-06, "loss": 0.0001, "step": 18536 }, { "epoch": 17.82403846153846, "grad_norm": 0.0063097733072936535, "learning_rate": 4.0278219896693075e-06, "loss": 0.0001, "step": 18537 }, { "epoch": 17.825, "grad_norm": 0.06614052504301071, "learning_rate": 4.026822906015768e-06, "loss": 0.0002, "step": 18538 }, { "epoch": 17.825961538461538, "grad_norm": 0.0029923897236585617, "learning_rate": 4.025823915048407e-06, "loss": 0.0, "step": 18539 }, { "epoch": 17.826923076923077, "grad_norm": 0.03584948182106018, "learning_rate": 4.024825016782727e-06, "loss": 0.0002, "step": 18540 }, { "epoch": 17.827884615384615, "grad_norm": 0.020775508135557175, "learning_rate": 4.023826211234224e-06, "loss": 0.0002, "step": 18541 }, { "epoch": 17.828846153846154, "grad_norm": 0.06312381476163864, "learning_rate": 4.022827498418398e-06, "loss": 0.0002, "step": 18542 }, { "epoch": 17.829807692307693, "grad_norm": 0.011874898336827755, "learning_rate": 4.021828878350749e-06, "loss": 0.0001, "step": 18543 }, { "epoch": 17.83076923076923, "grad_norm": 0.011095676571130753, "learning_rate": 4.0208303510467674e-06, "loss": 0.0001, "step": 18544 }, { "epoch": 17.83173076923077, "grad_norm": 0.012994090095162392, "learning_rate": 4.019831916521954e-06, "loss": 0.0002, "step": 18545 }, { "epoch": 17.83269230769231, "grad_norm": 0.006453045178204775, "learning_rate": 4.018833574791792e-06, "loss": 0.0, "step": 18546 }, { "epoch": 17.833653846153847, "grad_norm": 0.04290956258773804, "learning_rate": 4.017835325871781e-06, "loss": 0.0003, "step": 18547 }, { "epoch": 17.834615384615386, "grad_norm": 0.0073314993642270565, "learning_rate": 4.01683716977741e-06, "loss": 0.0, "step": 18548 }, { "epoch": 17.835576923076925, "grad_norm": 0.025986218824982643, "learning_rate": 4.015839106524163e-06, "loss": 0.0001, "step": 18549 }, { "epoch": 17.83653846153846, "grad_norm": 0.019221095368266106, "learning_rate": 4.0148411361275295e-06, "loss": 0.0002, "step": 18550 }, { "epoch": 17.8375, "grad_norm": 0.006723691243678331, "learning_rate": 4.013843258603e-06, "loss": 0.0, "step": 18551 }, { "epoch": 17.838461538461537, "grad_norm": 0.10791342705488205, "learning_rate": 4.012845473966048e-06, "loss": 0.0005, "step": 18552 }, { "epoch": 17.839423076923076, "grad_norm": 0.06041784957051277, "learning_rate": 4.011847782232164e-06, "loss": 0.0003, "step": 18553 }, { "epoch": 17.840384615384615, "grad_norm": 0.21694430708885193, "learning_rate": 4.010850183416831e-06, "loss": 0.0006, "step": 18554 }, { "epoch": 17.841346153846153, "grad_norm": 0.009157106280326843, "learning_rate": 4.009852677535521e-06, "loss": 0.0001, "step": 18555 }, { "epoch": 17.842307692307692, "grad_norm": 0.0007941314252093434, "learning_rate": 4.00885526460372e-06, "loss": 0.0, "step": 18556 }, { "epoch": 17.84326923076923, "grad_norm": 0.002922964049503207, "learning_rate": 4.007857944636898e-06, "loss": 0.0, "step": 18557 }, { "epoch": 17.84423076923077, "grad_norm": 0.06846824288368225, "learning_rate": 4.006860717650535e-06, "loss": 0.0003, "step": 18558 }, { "epoch": 17.845192307692308, "grad_norm": 1.6871587038040161, "learning_rate": 4.005863583660107e-06, "loss": 0.009, "step": 18559 }, { "epoch": 17.846153846153847, "grad_norm": 0.029876193031668663, "learning_rate": 4.004866542681079e-06, "loss": 0.0001, "step": 18560 }, { "epoch": 17.847115384615385, "grad_norm": 0.07531745731830597, "learning_rate": 4.003869594728929e-06, "loss": 0.0002, "step": 18561 }, { "epoch": 17.848076923076924, "grad_norm": 0.05426331236958504, "learning_rate": 4.0028727398191255e-06, "loss": 0.0002, "step": 18562 }, { "epoch": 17.849038461538463, "grad_norm": 0.027744019404053688, "learning_rate": 4.001875977967133e-06, "loss": 0.0002, "step": 18563 }, { "epoch": 17.85, "grad_norm": 0.06272545456886292, "learning_rate": 4.000879309188423e-06, "loss": 0.0003, "step": 18564 }, { "epoch": 17.85096153846154, "grad_norm": 0.014903758652508259, "learning_rate": 3.999882733498461e-06, "loss": 0.0002, "step": 18565 }, { "epoch": 17.851923076923075, "grad_norm": 0.9192079305648804, "learning_rate": 3.998886250912703e-06, "loss": 0.0052, "step": 18566 }, { "epoch": 17.852884615384614, "grad_norm": 0.018157977610826492, "learning_rate": 3.99788986144662e-06, "loss": 0.0002, "step": 18567 }, { "epoch": 17.853846153846153, "grad_norm": 0.01473076269030571, "learning_rate": 3.9968935651156734e-06, "loss": 0.0001, "step": 18568 }, { "epoch": 17.85480769230769, "grad_norm": 0.08145707845687866, "learning_rate": 3.995897361935316e-06, "loss": 0.0004, "step": 18569 }, { "epoch": 17.85576923076923, "grad_norm": 0.0253555066883564, "learning_rate": 3.994901251921014e-06, "loss": 0.0001, "step": 18570 }, { "epoch": 17.85673076923077, "grad_norm": 0.00917801819741726, "learning_rate": 3.9939052350882156e-06, "loss": 0.0001, "step": 18571 }, { "epoch": 17.857692307692307, "grad_norm": 0.009574607014656067, "learning_rate": 3.99290931145238e-06, "loss": 0.0001, "step": 18572 }, { "epoch": 17.858653846153846, "grad_norm": 0.013791593722999096, "learning_rate": 3.991913481028965e-06, "loss": 0.0002, "step": 18573 }, { "epoch": 17.859615384615385, "grad_norm": 0.14873382449150085, "learning_rate": 3.990917743833417e-06, "loss": 0.0007, "step": 18574 }, { "epoch": 17.860576923076923, "grad_norm": 0.18971458077430725, "learning_rate": 3.989922099881187e-06, "loss": 0.0009, "step": 18575 }, { "epoch": 17.861538461538462, "grad_norm": 0.01058618351817131, "learning_rate": 3.988926549187732e-06, "loss": 0.0001, "step": 18576 }, { "epoch": 17.8625, "grad_norm": 0.02385430969297886, "learning_rate": 3.98793109176849e-06, "loss": 0.0002, "step": 18577 }, { "epoch": 17.86346153846154, "grad_norm": 1.4594333171844482, "learning_rate": 3.986935727638913e-06, "loss": 0.011, "step": 18578 }, { "epoch": 17.864423076923078, "grad_norm": 0.020067820325493813, "learning_rate": 3.985940456814448e-06, "loss": 0.0001, "step": 18579 }, { "epoch": 17.865384615384617, "grad_norm": 0.008537973277270794, "learning_rate": 3.9849452793105335e-06, "loss": 0.0001, "step": 18580 }, { "epoch": 17.866346153846155, "grad_norm": 0.04988083988428116, "learning_rate": 3.983950195142618e-06, "loss": 0.0001, "step": 18581 }, { "epoch": 17.86730769230769, "grad_norm": 0.007038178853690624, "learning_rate": 3.9829552043261335e-06, "loss": 0.0, "step": 18582 }, { "epoch": 17.86826923076923, "grad_norm": 0.019763005897402763, "learning_rate": 3.981960306876526e-06, "loss": 0.0002, "step": 18583 }, { "epoch": 17.869230769230768, "grad_norm": 0.6613258719444275, "learning_rate": 3.980965502809234e-06, "loss": 0.0016, "step": 18584 }, { "epoch": 17.870192307692307, "grad_norm": 0.008773078210651875, "learning_rate": 3.979970792139689e-06, "loss": 0.0001, "step": 18585 }, { "epoch": 17.871153846153845, "grad_norm": 0.011050792410969734, "learning_rate": 3.978976174883329e-06, "loss": 0.0001, "step": 18586 }, { "epoch": 17.872115384615384, "grad_norm": 0.008828721009194851, "learning_rate": 3.97798165105559e-06, "loss": 0.0001, "step": 18587 }, { "epoch": 17.873076923076923, "grad_norm": 0.08187434822320938, "learning_rate": 3.976987220671899e-06, "loss": 0.0004, "step": 18588 }, { "epoch": 17.87403846153846, "grad_norm": 0.021894194185733795, "learning_rate": 3.975992883747688e-06, "loss": 0.0001, "step": 18589 }, { "epoch": 17.875, "grad_norm": 0.028379930183291435, "learning_rate": 3.974998640298391e-06, "loss": 0.0002, "step": 18590 }, { "epoch": 17.87596153846154, "grad_norm": 0.01945532113313675, "learning_rate": 3.9740044903394285e-06, "loss": 0.0001, "step": 18591 }, { "epoch": 17.876923076923077, "grad_norm": 0.971234917640686, "learning_rate": 3.973010433886233e-06, "loss": 0.0029, "step": 18592 }, { "epoch": 17.877884615384616, "grad_norm": 0.03093622624874115, "learning_rate": 3.972016470954224e-06, "loss": 0.0002, "step": 18593 }, { "epoch": 17.878846153846155, "grad_norm": 0.04348507896065712, "learning_rate": 3.971022601558827e-06, "loss": 0.0003, "step": 18594 }, { "epoch": 17.879807692307693, "grad_norm": 0.7894837856292725, "learning_rate": 3.970028825715468e-06, "loss": 0.0186, "step": 18595 }, { "epoch": 17.880769230769232, "grad_norm": 0.006863914430141449, "learning_rate": 3.96903514343956e-06, "loss": 0.0001, "step": 18596 }, { "epoch": 17.88173076923077, "grad_norm": 0.004489511717110872, "learning_rate": 3.968041554746527e-06, "loss": 0.0, "step": 18597 }, { "epoch": 17.88269230769231, "grad_norm": 0.011244997382164001, "learning_rate": 3.967048059651788e-06, "loss": 0.0001, "step": 18598 }, { "epoch": 17.883653846153845, "grad_norm": 0.32174649834632874, "learning_rate": 3.966054658170754e-06, "loss": 0.0006, "step": 18599 }, { "epoch": 17.884615384615383, "grad_norm": 0.0308514591306448, "learning_rate": 3.96506135031884e-06, "loss": 0.0002, "step": 18600 }, { "epoch": 17.885576923076922, "grad_norm": 0.025738181546330452, "learning_rate": 3.964068136111467e-06, "loss": 0.0002, "step": 18601 }, { "epoch": 17.88653846153846, "grad_norm": 0.5787124633789062, "learning_rate": 3.963075015564038e-06, "loss": 0.0013, "step": 18602 }, { "epoch": 17.8875, "grad_norm": 0.0359923280775547, "learning_rate": 3.96208198869197e-06, "loss": 0.0002, "step": 18603 }, { "epoch": 17.888461538461538, "grad_norm": 0.0702233761548996, "learning_rate": 3.961089055510664e-06, "loss": 0.0005, "step": 18604 }, { "epoch": 17.889423076923077, "grad_norm": 0.16323308646678925, "learning_rate": 3.9600962160355324e-06, "loss": 0.0005, "step": 18605 }, { "epoch": 17.890384615384615, "grad_norm": 0.0338980033993721, "learning_rate": 3.959103470281984e-06, "loss": 0.0003, "step": 18606 }, { "epoch": 17.891346153846154, "grad_norm": 0.011758670210838318, "learning_rate": 3.958110818265417e-06, "loss": 0.0001, "step": 18607 }, { "epoch": 17.892307692307693, "grad_norm": 0.001424679416231811, "learning_rate": 3.957118260001237e-06, "loss": 0.0, "step": 18608 }, { "epoch": 17.89326923076923, "grad_norm": 0.042227353900671005, "learning_rate": 3.956125795504849e-06, "loss": 0.0001, "step": 18609 }, { "epoch": 17.89423076923077, "grad_norm": 0.07232651859521866, "learning_rate": 3.955133424791647e-06, "loss": 0.0002, "step": 18610 }, { "epoch": 17.89519230769231, "grad_norm": 6.624589443206787, "learning_rate": 3.954141147877033e-06, "loss": 0.0285, "step": 18611 }, { "epoch": 17.896153846153847, "grad_norm": 0.48056671023368835, "learning_rate": 3.953148964776408e-06, "loss": 0.0076, "step": 18612 }, { "epoch": 17.897115384615386, "grad_norm": 0.058687999844551086, "learning_rate": 3.952156875505159e-06, "loss": 0.0004, "step": 18613 }, { "epoch": 17.898076923076925, "grad_norm": 0.009906325489282608, "learning_rate": 3.951164880078689e-06, "loss": 0.0001, "step": 18614 }, { "epoch": 17.89903846153846, "grad_norm": 0.012612621299922466, "learning_rate": 3.950172978512383e-06, "loss": 0.0001, "step": 18615 }, { "epoch": 17.9, "grad_norm": 2.6290760040283203, "learning_rate": 3.9491811708216375e-06, "loss": 0.0285, "step": 18616 }, { "epoch": 17.900961538461537, "grad_norm": 0.007284182123839855, "learning_rate": 3.948189457021844e-06, "loss": 0.0001, "step": 18617 }, { "epoch": 17.901923076923076, "grad_norm": 0.2356383353471756, "learning_rate": 3.9471978371283845e-06, "loss": 0.0007, "step": 18618 }, { "epoch": 17.902884615384615, "grad_norm": 0.010358461178839207, "learning_rate": 3.94620631115665e-06, "loss": 0.0001, "step": 18619 }, { "epoch": 17.903846153846153, "grad_norm": 0.07409980148077011, "learning_rate": 3.9452148791220306e-06, "loss": 0.0003, "step": 18620 }, { "epoch": 17.904807692307692, "grad_norm": 0.04410343989729881, "learning_rate": 3.944223541039901e-06, "loss": 0.0002, "step": 18621 }, { "epoch": 17.90576923076923, "grad_norm": 0.00983930379152298, "learning_rate": 3.943232296925649e-06, "loss": 0.0001, "step": 18622 }, { "epoch": 17.90673076923077, "grad_norm": 0.839694619178772, "learning_rate": 3.9422411467946585e-06, "loss": 0.002, "step": 18623 }, { "epoch": 17.907692307692308, "grad_norm": 0.012170707806944847, "learning_rate": 3.941250090662303e-06, "loss": 0.0001, "step": 18624 }, { "epoch": 17.908653846153847, "grad_norm": 0.00790037214756012, "learning_rate": 3.940259128543967e-06, "loss": 0.0001, "step": 18625 }, { "epoch": 17.909615384615385, "grad_norm": 0.0049240016378462315, "learning_rate": 3.93926826045502e-06, "loss": 0.0, "step": 18626 }, { "epoch": 17.910576923076924, "grad_norm": 0.009689224883913994, "learning_rate": 3.938277486410843e-06, "loss": 0.0001, "step": 18627 }, { "epoch": 17.911538461538463, "grad_norm": 2.620748281478882, "learning_rate": 3.937286806426811e-06, "loss": 0.0656, "step": 18628 }, { "epoch": 17.9125, "grad_norm": 0.11633507162332535, "learning_rate": 3.936296220518291e-06, "loss": 0.0003, "step": 18629 }, { "epoch": 17.91346153846154, "grad_norm": 0.021450718864798546, "learning_rate": 3.935305728700657e-06, "loss": 0.0001, "step": 18630 }, { "epoch": 17.914423076923075, "grad_norm": 0.017256062477827072, "learning_rate": 3.934315330989281e-06, "loss": 0.0001, "step": 18631 }, { "epoch": 17.915384615384614, "grad_norm": 0.029772059991955757, "learning_rate": 3.933325027399526e-06, "loss": 0.0002, "step": 18632 }, { "epoch": 17.916346153846153, "grad_norm": 0.015809183940291405, "learning_rate": 3.932334817946761e-06, "loss": 0.0001, "step": 18633 }, { "epoch": 17.91730769230769, "grad_norm": 0.013341160491108894, "learning_rate": 3.9313447026463544e-06, "loss": 0.0001, "step": 18634 }, { "epoch": 17.91826923076923, "grad_norm": 0.014613796025514603, "learning_rate": 3.930354681513663e-06, "loss": 0.0001, "step": 18635 }, { "epoch": 17.91923076923077, "grad_norm": 0.003774830373004079, "learning_rate": 3.929364754564057e-06, "loss": 0.0, "step": 18636 }, { "epoch": 17.920192307692307, "grad_norm": 0.01557263731956482, "learning_rate": 3.9283749218128885e-06, "loss": 0.0001, "step": 18637 }, { "epoch": 17.921153846153846, "grad_norm": 0.03706365078687668, "learning_rate": 3.927385183275522e-06, "loss": 0.0002, "step": 18638 }, { "epoch": 17.922115384615385, "grad_norm": 0.0817270576953888, "learning_rate": 3.926395538967317e-06, "loss": 0.0003, "step": 18639 }, { "epoch": 17.923076923076923, "grad_norm": 0.13905389606952667, "learning_rate": 3.925405988903625e-06, "loss": 0.0009, "step": 18640 }, { "epoch": 17.924038461538462, "grad_norm": 0.028451764956116676, "learning_rate": 3.924416533099803e-06, "loss": 0.0002, "step": 18641 }, { "epoch": 17.925, "grad_norm": 0.045842595398426056, "learning_rate": 3.923427171571208e-06, "loss": 0.0002, "step": 18642 }, { "epoch": 17.92596153846154, "grad_norm": 0.034548189491033554, "learning_rate": 3.922437904333185e-06, "loss": 0.0002, "step": 18643 }, { "epoch": 17.926923076923078, "grad_norm": 0.023057846352458, "learning_rate": 3.92144873140109e-06, "loss": 0.0001, "step": 18644 }, { "epoch": 17.927884615384617, "grad_norm": 0.007158742286264896, "learning_rate": 3.920459652790271e-06, "loss": 0.0, "step": 18645 }, { "epoch": 17.928846153846155, "grad_norm": 0.014135506004095078, "learning_rate": 3.919470668516074e-06, "loss": 0.0001, "step": 18646 }, { "epoch": 17.92980769230769, "grad_norm": 0.014403664506971836, "learning_rate": 3.918481778593849e-06, "loss": 0.0001, "step": 18647 }, { "epoch": 17.93076923076923, "grad_norm": 0.008800779469311237, "learning_rate": 3.917492983038933e-06, "loss": 0.0001, "step": 18648 }, { "epoch": 17.931730769230768, "grad_norm": 0.00494117708876729, "learning_rate": 3.916504281866675e-06, "loss": 0.0, "step": 18649 }, { "epoch": 17.932692307692307, "grad_norm": 0.6952948570251465, "learning_rate": 3.915515675092419e-06, "loss": 0.0024, "step": 18650 }, { "epoch": 17.933653846153845, "grad_norm": 0.02182411588728428, "learning_rate": 3.914527162731498e-06, "loss": 0.0002, "step": 18651 }, { "epoch": 17.934615384615384, "grad_norm": 0.03657521679997444, "learning_rate": 3.913538744799256e-06, "loss": 0.0003, "step": 18652 }, { "epoch": 17.935576923076923, "grad_norm": 0.016637148335576057, "learning_rate": 3.912550421311032e-06, "loss": 0.0001, "step": 18653 }, { "epoch": 17.93653846153846, "grad_norm": 0.01428189780563116, "learning_rate": 3.911562192282156e-06, "loss": 0.0001, "step": 18654 }, { "epoch": 17.9375, "grad_norm": 0.03467626869678497, "learning_rate": 3.9105740577279635e-06, "loss": 0.0002, "step": 18655 }, { "epoch": 17.93846153846154, "grad_norm": 0.02211562730371952, "learning_rate": 3.909586017663795e-06, "loss": 0.0001, "step": 18656 }, { "epoch": 17.939423076923077, "grad_norm": 1.27880859375, "learning_rate": 3.908598072104972e-06, "loss": 0.0044, "step": 18657 }, { "epoch": 17.940384615384616, "grad_norm": 2.41062068939209, "learning_rate": 3.9076102210668286e-06, "loss": 0.0506, "step": 18658 }, { "epoch": 17.941346153846155, "grad_norm": 0.016177399083971977, "learning_rate": 3.9066224645646976e-06, "loss": 0.0001, "step": 18659 }, { "epoch": 17.942307692307693, "grad_norm": 0.23184481263160706, "learning_rate": 3.905634802613899e-06, "loss": 0.0007, "step": 18660 }, { "epoch": 17.943269230769232, "grad_norm": 0.022933823987841606, "learning_rate": 3.904647235229765e-06, "loss": 0.0002, "step": 18661 }, { "epoch": 17.94423076923077, "grad_norm": 0.03707518428564072, "learning_rate": 3.903659762427612e-06, "loss": 0.0004, "step": 18662 }, { "epoch": 17.94519230769231, "grad_norm": 0.02100214920938015, "learning_rate": 3.902672384222767e-06, "loss": 0.0001, "step": 18663 }, { "epoch": 17.946153846153845, "grad_norm": 0.2966636121273041, "learning_rate": 3.901685100630554e-06, "loss": 0.0007, "step": 18664 }, { "epoch": 17.947115384615383, "grad_norm": 0.004046197980642319, "learning_rate": 3.900697911666288e-06, "loss": 0.0, "step": 18665 }, { "epoch": 17.948076923076922, "grad_norm": 0.0159329604357481, "learning_rate": 3.8997108173452895e-06, "loss": 0.0001, "step": 18666 }, { "epoch": 17.94903846153846, "grad_norm": 0.016746127977967262, "learning_rate": 3.898723817682877e-06, "loss": 0.0001, "step": 18667 }, { "epoch": 17.95, "grad_norm": 1.4189246892929077, "learning_rate": 3.8977369126943595e-06, "loss": 0.0465, "step": 18668 }, { "epoch": 17.950961538461538, "grad_norm": 0.20591296255588531, "learning_rate": 3.896750102395057e-06, "loss": 0.0008, "step": 18669 }, { "epoch": 17.951923076923077, "grad_norm": 0.016725147143006325, "learning_rate": 3.8957633868002835e-06, "loss": 0.0002, "step": 18670 }, { "epoch": 17.952884615384615, "grad_norm": 0.0037029841914772987, "learning_rate": 3.894776765925342e-06, "loss": 0.0, "step": 18671 }, { "epoch": 17.953846153846154, "grad_norm": 0.09476900845766068, "learning_rate": 3.893790239785551e-06, "loss": 0.0004, "step": 18672 }, { "epoch": 17.954807692307693, "grad_norm": 1.971269130706787, "learning_rate": 3.8928038083962095e-06, "loss": 0.0305, "step": 18673 }, { "epoch": 17.95576923076923, "grad_norm": 0.11407949775457382, "learning_rate": 3.89181747177263e-06, "loss": 0.0005, "step": 18674 }, { "epoch": 17.95673076923077, "grad_norm": 0.11135692894458771, "learning_rate": 3.890831229930117e-06, "loss": 0.0004, "step": 18675 }, { "epoch": 17.95769230769231, "grad_norm": 0.006414792500436306, "learning_rate": 3.8898450828839715e-06, "loss": 0.0001, "step": 18676 }, { "epoch": 17.958653846153847, "grad_norm": 0.028849028050899506, "learning_rate": 3.888859030649498e-06, "loss": 0.0001, "step": 18677 }, { "epoch": 17.959615384615386, "grad_norm": 0.004633949138224125, "learning_rate": 3.887873073241998e-06, "loss": 0.0, "step": 18678 }, { "epoch": 17.960576923076925, "grad_norm": 1.394340991973877, "learning_rate": 3.886887210676767e-06, "loss": 0.011, "step": 18679 }, { "epoch": 17.96153846153846, "grad_norm": 0.028163475915789604, "learning_rate": 3.885901442969103e-06, "loss": 0.0001, "step": 18680 }, { "epoch": 17.9625, "grad_norm": 0.13916876912117004, "learning_rate": 3.884915770134309e-06, "loss": 0.0004, "step": 18681 }, { "epoch": 17.963461538461537, "grad_norm": 0.01244163978844881, "learning_rate": 3.883930192187671e-06, "loss": 0.0001, "step": 18682 }, { "epoch": 17.964423076923076, "grad_norm": 0.06987396627664566, "learning_rate": 3.8829447091444886e-06, "loss": 0.0002, "step": 18683 }, { "epoch": 17.965384615384615, "grad_norm": 0.10245927423238754, "learning_rate": 3.881959321020048e-06, "loss": 0.0003, "step": 18684 }, { "epoch": 17.966346153846153, "grad_norm": 0.006894108839333057, "learning_rate": 3.880974027829641e-06, "loss": 0.0, "step": 18685 }, { "epoch": 17.967307692307692, "grad_norm": 0.03570454567670822, "learning_rate": 3.8799888295885635e-06, "loss": 0.0002, "step": 18686 }, { "epoch": 17.96826923076923, "grad_norm": 0.010393036529421806, "learning_rate": 3.879003726312094e-06, "loss": 0.0001, "step": 18687 }, { "epoch": 17.96923076923077, "grad_norm": 0.04518952593207359, "learning_rate": 3.8780187180155205e-06, "loss": 0.0002, "step": 18688 }, { "epoch": 17.970192307692308, "grad_norm": 1.4354861974716187, "learning_rate": 3.877033804714133e-06, "loss": 0.0298, "step": 18689 }, { "epoch": 17.971153846153847, "grad_norm": 0.004397494252771139, "learning_rate": 3.876048986423207e-06, "loss": 0.0, "step": 18690 }, { "epoch": 17.972115384615385, "grad_norm": 0.009603098034858704, "learning_rate": 3.875064263158026e-06, "loss": 0.0001, "step": 18691 }, { "epoch": 17.973076923076924, "grad_norm": 0.030169246718287468, "learning_rate": 3.874079634933875e-06, "loss": 0.0002, "step": 18692 }, { "epoch": 17.974038461538463, "grad_norm": 0.01467825286090374, "learning_rate": 3.873095101766025e-06, "loss": 0.0001, "step": 18693 }, { "epoch": 17.975, "grad_norm": 0.03648907318711281, "learning_rate": 3.872110663669762e-06, "loss": 0.0001, "step": 18694 }, { "epoch": 17.97596153846154, "grad_norm": 0.017616737633943558, "learning_rate": 3.87112632066035e-06, "loss": 0.0001, "step": 18695 }, { "epoch": 17.976923076923075, "grad_norm": 0.06306079030036926, "learning_rate": 3.870142072753071e-06, "loss": 0.0003, "step": 18696 }, { "epoch": 17.977884615384614, "grad_norm": 0.006479195784777403, "learning_rate": 3.8691579199632e-06, "loss": 0.0001, "step": 18697 }, { "epoch": 17.978846153846153, "grad_norm": 0.11387146264314651, "learning_rate": 3.868173862306001e-06, "loss": 0.0003, "step": 18698 }, { "epoch": 17.97980769230769, "grad_norm": 0.014321373775601387, "learning_rate": 3.867189899796746e-06, "loss": 0.0001, "step": 18699 }, { "epoch": 17.98076923076923, "grad_norm": 0.0033626651857048273, "learning_rate": 3.866206032450708e-06, "loss": 0.0, "step": 18700 }, { "epoch": 17.98173076923077, "grad_norm": 0.007631204556673765, "learning_rate": 3.865222260283146e-06, "loss": 0.0001, "step": 18701 }, { "epoch": 17.982692307692307, "grad_norm": 1.514487624168396, "learning_rate": 3.86423858330933e-06, "loss": 0.0086, "step": 18702 }, { "epoch": 17.983653846153846, "grad_norm": 1.3358548879623413, "learning_rate": 3.863255001544526e-06, "loss": 0.0042, "step": 18703 }, { "epoch": 17.984615384615385, "grad_norm": 0.013156244531273842, "learning_rate": 3.862271515003989e-06, "loss": 0.0001, "step": 18704 }, { "epoch": 17.985576923076923, "grad_norm": 0.04255969077348709, "learning_rate": 3.861288123702989e-06, "loss": 0.0001, "step": 18705 }, { "epoch": 17.986538461538462, "grad_norm": 0.02738121524453163, "learning_rate": 3.860304827656776e-06, "loss": 0.0002, "step": 18706 }, { "epoch": 17.9875, "grad_norm": 0.007333871442824602, "learning_rate": 3.859321626880612e-06, "loss": 0.0, "step": 18707 }, { "epoch": 17.98846153846154, "grad_norm": 0.009162469767034054, "learning_rate": 3.858338521389758e-06, "loss": 0.0001, "step": 18708 }, { "epoch": 17.989423076923078, "grad_norm": 0.017184656113386154, "learning_rate": 3.857355511199461e-06, "loss": 0.0001, "step": 18709 }, { "epoch": 17.990384615384617, "grad_norm": 0.008394387550652027, "learning_rate": 3.856372596324978e-06, "loss": 0.0, "step": 18710 }, { "epoch": 17.991346153846155, "grad_norm": 0.008249265141785145, "learning_rate": 3.855389776781564e-06, "loss": 0.0001, "step": 18711 }, { "epoch": 17.99230769230769, "grad_norm": 0.12286543846130371, "learning_rate": 3.854407052584463e-06, "loss": 0.0004, "step": 18712 }, { "epoch": 17.99326923076923, "grad_norm": 0.05751300975680351, "learning_rate": 3.853424423748928e-06, "loss": 0.0003, "step": 18713 }, { "epoch": 17.994230769230768, "grad_norm": 0.02642352506518364, "learning_rate": 3.8524418902902094e-06, "loss": 0.0002, "step": 18714 }, { "epoch": 17.995192307692307, "grad_norm": 0.010130983777344227, "learning_rate": 3.851459452223546e-06, "loss": 0.0001, "step": 18715 }, { "epoch": 17.996153846153845, "grad_norm": 0.0199002455919981, "learning_rate": 3.8504771095641905e-06, "loss": 0.0001, "step": 18716 }, { "epoch": 17.997115384615384, "grad_norm": 0.02028038538992405, "learning_rate": 3.849494862327376e-06, "loss": 0.0001, "step": 18717 }, { "epoch": 17.998076923076923, "grad_norm": 0.023211034014821053, "learning_rate": 3.8485127105283514e-06, "loss": 0.0001, "step": 18718 }, { "epoch": 17.99903846153846, "grad_norm": 0.006338911596685648, "learning_rate": 3.847530654182359e-06, "loss": 0.0001, "step": 18719 }, { "epoch": 18.0, "grad_norm": 0.04566720500588417, "learning_rate": 3.846548693304629e-06, "loss": 0.0004, "step": 18720 }, { "epoch": 18.00096153846154, "grad_norm": 0.009475898928940296, "learning_rate": 3.845566827910403e-06, "loss": 0.0001, "step": 18721 }, { "epoch": 18.001923076923077, "grad_norm": 0.006840044632554054, "learning_rate": 3.8445850580149216e-06, "loss": 0.0, "step": 18722 }, { "epoch": 18.002884615384616, "grad_norm": 0.03043474443256855, "learning_rate": 3.8436033836334096e-06, "loss": 0.0002, "step": 18723 }, { "epoch": 18.003846153846155, "grad_norm": 0.012001012451946735, "learning_rate": 3.842621804781105e-06, "loss": 0.0001, "step": 18724 }, { "epoch": 18.004807692307693, "grad_norm": 0.017100512981414795, "learning_rate": 3.84164032147324e-06, "loss": 0.0001, "step": 18725 }, { "epoch": 18.005769230769232, "grad_norm": 0.0038767517544329166, "learning_rate": 3.840658933725041e-06, "loss": 0.0, "step": 18726 }, { "epoch": 18.00673076923077, "grad_norm": 0.008830384351313114, "learning_rate": 3.8396776415517415e-06, "loss": 0.0001, "step": 18727 }, { "epoch": 18.00769230769231, "grad_norm": 0.009817956015467644, "learning_rate": 3.8386964449685606e-06, "loss": 0.0001, "step": 18728 }, { "epoch": 18.008653846153845, "grad_norm": 0.005947276018559933, "learning_rate": 3.837715343990727e-06, "loss": 0.0001, "step": 18729 }, { "epoch": 18.009615384615383, "grad_norm": 0.04849708080291748, "learning_rate": 3.836734338633469e-06, "loss": 0.0004, "step": 18730 }, { "epoch": 18.010576923076922, "grad_norm": 0.010646323673427105, "learning_rate": 3.835753428912003e-06, "loss": 0.0001, "step": 18731 }, { "epoch": 18.01153846153846, "grad_norm": 0.02045224793255329, "learning_rate": 3.83477261484155e-06, "loss": 0.0001, "step": 18732 }, { "epoch": 18.0125, "grad_norm": 0.0166489090770483, "learning_rate": 3.833791896437336e-06, "loss": 0.0001, "step": 18733 }, { "epoch": 18.013461538461538, "grad_norm": 0.016793008893728256, "learning_rate": 3.832811273714569e-06, "loss": 0.0001, "step": 18734 }, { "epoch": 18.014423076923077, "grad_norm": 0.00520735839381814, "learning_rate": 3.831830746688472e-06, "loss": 0.0001, "step": 18735 }, { "epoch": 18.015384615384615, "grad_norm": 0.008089889772236347, "learning_rate": 3.830850315374263e-06, "loss": 0.0001, "step": 18736 }, { "epoch": 18.016346153846154, "grad_norm": 0.00853236485272646, "learning_rate": 3.829869979787145e-06, "loss": 0.0001, "step": 18737 }, { "epoch": 18.017307692307693, "grad_norm": 0.007370071951299906, "learning_rate": 3.828889739942338e-06, "loss": 0.0, "step": 18738 }, { "epoch": 18.01826923076923, "grad_norm": 0.014664924703538418, "learning_rate": 3.827909595855051e-06, "loss": 0.0001, "step": 18739 }, { "epoch": 18.01923076923077, "grad_norm": 0.011005103588104248, "learning_rate": 3.826929547540491e-06, "loss": 0.0001, "step": 18740 }, { "epoch": 18.02019230769231, "grad_norm": 0.00324839586392045, "learning_rate": 3.825949595013871e-06, "loss": 0.0, "step": 18741 }, { "epoch": 18.021153846153847, "grad_norm": 0.008024615235626698, "learning_rate": 3.824969738290386e-06, "loss": 0.0001, "step": 18742 }, { "epoch": 18.022115384615386, "grad_norm": 1.7933756113052368, "learning_rate": 3.82398997738525e-06, "loss": 0.0163, "step": 18743 }, { "epoch": 18.023076923076925, "grad_norm": 0.00850837491452694, "learning_rate": 3.823010312313665e-06, "loss": 0.0001, "step": 18744 }, { "epoch": 18.02403846153846, "grad_norm": 0.7706717252731323, "learning_rate": 3.822030743090828e-06, "loss": 0.0021, "step": 18745 }, { "epoch": 18.025, "grad_norm": 0.005111436825245619, "learning_rate": 3.821051269731941e-06, "loss": 0.0001, "step": 18746 }, { "epoch": 18.025961538461537, "grad_norm": 0.0037611001171171665, "learning_rate": 3.820071892252208e-06, "loss": 0.0, "step": 18747 }, { "epoch": 18.026923076923076, "grad_norm": 0.008057883009314537, "learning_rate": 3.819092610666817e-06, "loss": 0.0001, "step": 18748 }, { "epoch": 18.027884615384615, "grad_norm": 0.0032379818148911, "learning_rate": 3.818113424990968e-06, "loss": 0.0, "step": 18749 }, { "epoch": 18.028846153846153, "grad_norm": 0.06764877587556839, "learning_rate": 3.8171343352398585e-06, "loss": 0.0002, "step": 18750 }, { "epoch": 18.029807692307692, "grad_norm": 0.0029283363837748766, "learning_rate": 3.8161553414286746e-06, "loss": 0.0, "step": 18751 }, { "epoch": 18.03076923076923, "grad_norm": 0.04170781001448631, "learning_rate": 3.8151764435726134e-06, "loss": 0.0002, "step": 18752 }, { "epoch": 18.03173076923077, "grad_norm": 0.0036976663395762444, "learning_rate": 3.8141976416868574e-06, "loss": 0.0, "step": 18753 }, { "epoch": 18.032692307692308, "grad_norm": 0.020923500880599022, "learning_rate": 3.8132189357866e-06, "loss": 0.0001, "step": 18754 }, { "epoch": 18.033653846153847, "grad_norm": 0.02471267059445381, "learning_rate": 3.81224032588703e-06, "loss": 0.0002, "step": 18755 }, { "epoch": 18.034615384615385, "grad_norm": 0.007775317877531052, "learning_rate": 3.811261812003326e-06, "loss": 0.0001, "step": 18756 }, { "epoch": 18.035576923076924, "grad_norm": 0.004497759975492954, "learning_rate": 3.8102833941506743e-06, "loss": 0.0, "step": 18757 }, { "epoch": 18.036538461538463, "grad_norm": 0.0039908383041620255, "learning_rate": 3.8093050723442616e-06, "loss": 0.0, "step": 18758 }, { "epoch": 18.0375, "grad_norm": 0.01583448052406311, "learning_rate": 3.8083268465992616e-06, "loss": 0.0001, "step": 18759 }, { "epoch": 18.03846153846154, "grad_norm": 0.0040830890648067, "learning_rate": 3.8073487169308567e-06, "loss": 0.0, "step": 18760 }, { "epoch": 18.039423076923075, "grad_norm": 0.012421045452356339, "learning_rate": 3.806370683354227e-06, "loss": 0.0001, "step": 18761 }, { "epoch": 18.040384615384614, "grad_norm": 0.010294555686414242, "learning_rate": 3.805392745884543e-06, "loss": 0.0001, "step": 18762 }, { "epoch": 18.041346153846153, "grad_norm": 0.0023944485001266003, "learning_rate": 3.804414904536986e-06, "loss": 0.0, "step": 18763 }, { "epoch": 18.04230769230769, "grad_norm": 0.01750156469643116, "learning_rate": 3.8034371593267227e-06, "loss": 0.0002, "step": 18764 }, { "epoch": 18.04326923076923, "grad_norm": 2.13145112991333, "learning_rate": 3.8024595102689286e-06, "loss": 0.0653, "step": 18765 }, { "epoch": 18.04423076923077, "grad_norm": 1.4314286708831787, "learning_rate": 3.8014819573787766e-06, "loss": 0.0169, "step": 18766 }, { "epoch": 18.045192307692307, "grad_norm": 0.06843076646327972, "learning_rate": 3.800504500671428e-06, "loss": 0.0005, "step": 18767 }, { "epoch": 18.046153846153846, "grad_norm": 0.014630873687565327, "learning_rate": 3.7995271401620548e-06, "loss": 0.0001, "step": 18768 }, { "epoch": 18.047115384615385, "grad_norm": 0.04171055555343628, "learning_rate": 3.7985498758658255e-06, "loss": 0.0004, "step": 18769 }, { "epoch": 18.048076923076923, "grad_norm": 0.0026751181576400995, "learning_rate": 3.7975727077978975e-06, "loss": 0.0, "step": 18770 }, { "epoch": 18.049038461538462, "grad_norm": 0.009231443516910076, "learning_rate": 3.7965956359734367e-06, "loss": 0.0001, "step": 18771 }, { "epoch": 18.05, "grad_norm": 0.011973002925515175, "learning_rate": 3.7956186604076097e-06, "loss": 0.0001, "step": 18772 }, { "epoch": 18.05096153846154, "grad_norm": 0.008334904909133911, "learning_rate": 3.7946417811155665e-06, "loss": 0.0001, "step": 18773 }, { "epoch": 18.051923076923078, "grad_norm": 0.014702321961522102, "learning_rate": 3.793664998112475e-06, "loss": 0.0001, "step": 18774 }, { "epoch": 18.052884615384617, "grad_norm": 0.0061522894538939, "learning_rate": 3.7926883114134827e-06, "loss": 0.0, "step": 18775 }, { "epoch": 18.053846153846155, "grad_norm": 0.010547840036451817, "learning_rate": 3.7917117210337506e-06, "loss": 0.0001, "step": 18776 }, { "epoch": 18.05480769230769, "grad_norm": 0.07050479203462601, "learning_rate": 3.7907352269884346e-06, "loss": 0.0003, "step": 18777 }, { "epoch": 18.05576923076923, "grad_norm": 0.014624604023993015, "learning_rate": 3.78975882929268e-06, "loss": 0.0001, "step": 18778 }, { "epoch": 18.056730769230768, "grad_norm": 0.011556060053408146, "learning_rate": 3.788782527961642e-06, "loss": 0.0001, "step": 18779 }, { "epoch": 18.057692307692307, "grad_norm": 0.05798333138227463, "learning_rate": 3.7878063230104733e-06, "loss": 0.0003, "step": 18780 }, { "epoch": 18.058653846153845, "grad_norm": 0.022949401289224625, "learning_rate": 3.7868302144543146e-06, "loss": 0.0001, "step": 18781 }, { "epoch": 18.059615384615384, "grad_norm": 0.7152126431465149, "learning_rate": 3.7858542023083146e-06, "loss": 0.0061, "step": 18782 }, { "epoch": 18.060576923076923, "grad_norm": 0.005933275446295738, "learning_rate": 3.784878286587622e-06, "loss": 0.0, "step": 18783 }, { "epoch": 18.06153846153846, "grad_norm": 2.4066030979156494, "learning_rate": 3.7839024673073756e-06, "loss": 0.0125, "step": 18784 }, { "epoch": 18.0625, "grad_norm": 0.007373817730695009, "learning_rate": 3.782926744482721e-06, "loss": 0.0001, "step": 18785 }, { "epoch": 18.06346153846154, "grad_norm": 0.04255123436450958, "learning_rate": 3.7819511181287938e-06, "loss": 0.0002, "step": 18786 }, { "epoch": 18.064423076923077, "grad_norm": 0.007448846474289894, "learning_rate": 3.780975588260735e-06, "loss": 0.0001, "step": 18787 }, { "epoch": 18.065384615384616, "grad_norm": 0.015674788504838943, "learning_rate": 3.780000154893686e-06, "loss": 0.0001, "step": 18788 }, { "epoch": 18.066346153846155, "grad_norm": 0.020809656009078026, "learning_rate": 3.7790248180427758e-06, "loss": 0.0001, "step": 18789 }, { "epoch": 18.067307692307693, "grad_norm": 0.007141156122088432, "learning_rate": 3.778049577723142e-06, "loss": 0.0001, "step": 18790 }, { "epoch": 18.068269230769232, "grad_norm": 0.004149906802922487, "learning_rate": 3.77707443394992e-06, "loss": 0.0001, "step": 18791 }, { "epoch": 18.06923076923077, "grad_norm": 0.0920504629611969, "learning_rate": 3.776099386738237e-06, "loss": 0.0003, "step": 18792 }, { "epoch": 18.07019230769231, "grad_norm": 0.0058662123046815395, "learning_rate": 3.7751244361032234e-06, "loss": 0.0, "step": 18793 }, { "epoch": 18.071153846153845, "grad_norm": 0.010269989259541035, "learning_rate": 3.7741495820600128e-06, "loss": 0.0002, "step": 18794 }, { "epoch": 18.072115384615383, "grad_norm": 0.005268517881631851, "learning_rate": 3.7731748246237244e-06, "loss": 0.0, "step": 18795 }, { "epoch": 18.073076923076922, "grad_norm": 0.00415083160623908, "learning_rate": 3.7722001638094907e-06, "loss": 0.0001, "step": 18796 }, { "epoch": 18.07403846153846, "grad_norm": 0.005809375084936619, "learning_rate": 3.771225599632429e-06, "loss": 0.0001, "step": 18797 }, { "epoch": 18.075, "grad_norm": 0.004776379559189081, "learning_rate": 3.770251132107664e-06, "loss": 0.0001, "step": 18798 }, { "epoch": 18.075961538461538, "grad_norm": 0.002913747215643525, "learning_rate": 3.769276761250321e-06, "loss": 0.0, "step": 18799 }, { "epoch": 18.076923076923077, "grad_norm": 0.09423168748617172, "learning_rate": 3.7683024870755124e-06, "loss": 0.0003, "step": 18800 }, { "epoch": 18.077884615384615, "grad_norm": 0.004513954743742943, "learning_rate": 3.7673283095983593e-06, "loss": 0.0001, "step": 18801 }, { "epoch": 18.078846153846154, "grad_norm": 1.197824239730835, "learning_rate": 3.7663542288339815e-06, "loss": 0.0029, "step": 18802 }, { "epoch": 18.079807692307693, "grad_norm": 0.008327236399054527, "learning_rate": 3.7653802447974873e-06, "loss": 0.0001, "step": 18803 }, { "epoch": 18.08076923076923, "grad_norm": 0.012075462378561497, "learning_rate": 3.7644063575039935e-06, "loss": 0.0001, "step": 18804 }, { "epoch": 18.08173076923077, "grad_norm": 0.021920060738921165, "learning_rate": 3.7634325669686144e-06, "loss": 0.0002, "step": 18805 }, { "epoch": 18.08269230769231, "grad_norm": 0.00958329252898693, "learning_rate": 3.7624588732064547e-06, "loss": 0.0001, "step": 18806 }, { "epoch": 18.083653846153847, "grad_norm": 0.0035034185275435448, "learning_rate": 3.7614852762326303e-06, "loss": 0.0, "step": 18807 }, { "epoch": 18.084615384615386, "grad_norm": 0.018486402928829193, "learning_rate": 3.760511776062241e-06, "loss": 0.0001, "step": 18808 }, { "epoch": 18.085576923076925, "grad_norm": 0.01132669486105442, "learning_rate": 3.7595383727103964e-06, "loss": 0.0001, "step": 18809 }, { "epoch": 18.08653846153846, "grad_norm": 0.014620275236666203, "learning_rate": 3.7585650661922036e-06, "loss": 0.0001, "step": 18810 }, { "epoch": 18.0875, "grad_norm": 0.11290397495031357, "learning_rate": 3.75759185652276e-06, "loss": 0.0003, "step": 18811 }, { "epoch": 18.088461538461537, "grad_norm": 0.1427408754825592, "learning_rate": 3.7566187437171685e-06, "loss": 0.0006, "step": 18812 }, { "epoch": 18.089423076923076, "grad_norm": 0.005257720593363047, "learning_rate": 3.7556457277905334e-06, "loss": 0.0001, "step": 18813 }, { "epoch": 18.090384615384615, "grad_norm": 0.029332980513572693, "learning_rate": 3.7546728087579465e-06, "loss": 0.0002, "step": 18814 }, { "epoch": 18.091346153846153, "grad_norm": 0.007604475598782301, "learning_rate": 3.753699986634508e-06, "loss": 0.0001, "step": 18815 }, { "epoch": 18.092307692307692, "grad_norm": 0.03930695354938507, "learning_rate": 3.752727261435317e-06, "loss": 0.0002, "step": 18816 }, { "epoch": 18.09326923076923, "grad_norm": 0.005572556052356958, "learning_rate": 3.7517546331754594e-06, "loss": 0.0001, "step": 18817 }, { "epoch": 18.09423076923077, "grad_norm": 0.010604280047118664, "learning_rate": 3.750782101870034e-06, "loss": 0.0001, "step": 18818 }, { "epoch": 18.095192307692308, "grad_norm": 0.012090303935110569, "learning_rate": 3.7498096675341276e-06, "loss": 0.0001, "step": 18819 }, { "epoch": 18.096153846153847, "grad_norm": 0.014671946875751019, "learning_rate": 3.7488373301828296e-06, "loss": 0.0001, "step": 18820 }, { "epoch": 18.097115384615385, "grad_norm": 0.0214968491345644, "learning_rate": 3.747865089831233e-06, "loss": 0.0002, "step": 18821 }, { "epoch": 18.098076923076924, "grad_norm": 0.050199251621961594, "learning_rate": 3.7468929464944183e-06, "loss": 0.0002, "step": 18822 }, { "epoch": 18.099038461538463, "grad_norm": 0.003016709815710783, "learning_rate": 3.745920900187471e-06, "loss": 0.0, "step": 18823 }, { "epoch": 18.1, "grad_norm": 0.011211289092898369, "learning_rate": 3.744948950925479e-06, "loss": 0.0001, "step": 18824 }, { "epoch": 18.10096153846154, "grad_norm": 0.05990040674805641, "learning_rate": 3.743977098723518e-06, "loss": 0.0003, "step": 18825 }, { "epoch": 18.101923076923075, "grad_norm": 0.0031332275830209255, "learning_rate": 3.7430053435966716e-06, "loss": 0.0, "step": 18826 }, { "epoch": 18.102884615384614, "grad_norm": 0.009481396526098251, "learning_rate": 3.7420336855600214e-06, "loss": 0.0001, "step": 18827 }, { "epoch": 18.103846153846153, "grad_norm": 0.008653921075165272, "learning_rate": 3.741062124628638e-06, "loss": 0.0001, "step": 18828 }, { "epoch": 18.10480769230769, "grad_norm": 0.003562211524695158, "learning_rate": 3.7400906608176002e-06, "loss": 0.0, "step": 18829 }, { "epoch": 18.10576923076923, "grad_norm": 1.4567052125930786, "learning_rate": 3.7391192941419875e-06, "loss": 0.0502, "step": 18830 }, { "epoch": 18.10673076923077, "grad_norm": 0.003932825289666653, "learning_rate": 3.738148024616863e-06, "loss": 0.0, "step": 18831 }, { "epoch": 18.107692307692307, "grad_norm": 0.005565775092691183, "learning_rate": 3.737176852257307e-06, "loss": 0.0001, "step": 18832 }, { "epoch": 18.108653846153846, "grad_norm": 0.005212633404880762, "learning_rate": 3.736205777078381e-06, "loss": 0.0, "step": 18833 }, { "epoch": 18.109615384615385, "grad_norm": 0.009074836038053036, "learning_rate": 3.7352347990951586e-06, "loss": 0.0001, "step": 18834 }, { "epoch": 18.110576923076923, "grad_norm": 0.00789475068449974, "learning_rate": 3.734263918322708e-06, "loss": 0.0001, "step": 18835 }, { "epoch": 18.111538461538462, "grad_norm": 0.027958711609244347, "learning_rate": 3.733293134776088e-06, "loss": 0.0001, "step": 18836 }, { "epoch": 18.1125, "grad_norm": 0.0033201363403350115, "learning_rate": 3.732322448470366e-06, "loss": 0.0, "step": 18837 }, { "epoch": 18.11346153846154, "grad_norm": 0.009150632657110691, "learning_rate": 3.7313518594206077e-06, "loss": 0.0001, "step": 18838 }, { "epoch": 18.114423076923078, "grad_norm": 0.015593894757330418, "learning_rate": 3.7303813676418665e-06, "loss": 0.0001, "step": 18839 }, { "epoch": 18.115384615384617, "grad_norm": 0.003551217494532466, "learning_rate": 3.729410973149206e-06, "loss": 0.0, "step": 18840 }, { "epoch": 18.116346153846155, "grad_norm": 0.0742042064666748, "learning_rate": 3.7284406759576864e-06, "loss": 0.0003, "step": 18841 }, { "epoch": 18.11730769230769, "grad_norm": 0.009332142770290375, "learning_rate": 3.727470476082358e-06, "loss": 0.0001, "step": 18842 }, { "epoch": 18.11826923076923, "grad_norm": 0.05973312631249428, "learning_rate": 3.726500373538281e-06, "loss": 0.0003, "step": 18843 }, { "epoch": 18.119230769230768, "grad_norm": 2.1445441246032715, "learning_rate": 3.725530368340502e-06, "loss": 0.0024, "step": 18844 }, { "epoch": 18.120192307692307, "grad_norm": 0.032141298055648804, "learning_rate": 3.7245604605040765e-06, "loss": 0.0003, "step": 18845 }, { "epoch": 18.121153846153845, "grad_norm": 0.01943812519311905, "learning_rate": 3.7235906500440576e-06, "loss": 0.0001, "step": 18846 }, { "epoch": 18.122115384615384, "grad_norm": 0.00786317978054285, "learning_rate": 3.7226209369754875e-06, "loss": 0.0001, "step": 18847 }, { "epoch": 18.123076923076923, "grad_norm": 0.08970452845096588, "learning_rate": 3.7216513213134164e-06, "loss": 0.0003, "step": 18848 }, { "epoch": 18.12403846153846, "grad_norm": 0.10016889870166779, "learning_rate": 3.7206818030728943e-06, "loss": 0.0004, "step": 18849 }, { "epoch": 18.125, "grad_norm": 0.016399510204792023, "learning_rate": 3.7197123822689575e-06, "loss": 0.0001, "step": 18850 }, { "epoch": 18.12596153846154, "grad_norm": 0.0014909597812220454, "learning_rate": 3.7187430589166516e-06, "loss": 0.0, "step": 18851 }, { "epoch": 18.126923076923077, "grad_norm": 0.009601839818060398, "learning_rate": 3.717773833031021e-06, "loss": 0.0001, "step": 18852 }, { "epoch": 18.127884615384616, "grad_norm": 0.005138488486409187, "learning_rate": 3.7168047046271007e-06, "loss": 0.0, "step": 18853 }, { "epoch": 18.128846153846155, "grad_norm": 0.08780951052904129, "learning_rate": 3.715835673719932e-06, "loss": 0.0003, "step": 18854 }, { "epoch": 18.129807692307693, "grad_norm": 0.005838683340698481, "learning_rate": 3.7148667403245477e-06, "loss": 0.0, "step": 18855 }, { "epoch": 18.130769230769232, "grad_norm": 0.008407879620790482, "learning_rate": 3.7138979044559842e-06, "loss": 0.0001, "step": 18856 }, { "epoch": 18.13173076923077, "grad_norm": 0.0066728671081364155, "learning_rate": 3.71292916612928e-06, "loss": 0.0001, "step": 18857 }, { "epoch": 18.13269230769231, "grad_norm": 0.010785984806716442, "learning_rate": 3.711960525359458e-06, "loss": 0.0001, "step": 18858 }, { "epoch": 18.133653846153845, "grad_norm": 0.007600424811244011, "learning_rate": 3.7109919821615546e-06, "loss": 0.0001, "step": 18859 }, { "epoch": 18.134615384615383, "grad_norm": 0.006718013901263475, "learning_rate": 3.710023536550601e-06, "loss": 0.0001, "step": 18860 }, { "epoch": 18.135576923076922, "grad_norm": 0.012205059640109539, "learning_rate": 3.709055188541617e-06, "loss": 0.0001, "step": 18861 }, { "epoch": 18.13653846153846, "grad_norm": 0.02486640401184559, "learning_rate": 3.708086938149633e-06, "loss": 0.0001, "step": 18862 }, { "epoch": 18.1375, "grad_norm": 0.010813501663506031, "learning_rate": 3.7071187853896774e-06, "loss": 0.0001, "step": 18863 }, { "epoch": 18.138461538461538, "grad_norm": 0.01904076337814331, "learning_rate": 3.706150730276765e-06, "loss": 0.0001, "step": 18864 }, { "epoch": 18.139423076923077, "grad_norm": 0.0041390713304281235, "learning_rate": 3.7051827728259248e-06, "loss": 0.0, "step": 18865 }, { "epoch": 18.140384615384615, "grad_norm": 0.13480469584465027, "learning_rate": 3.7042149130521686e-06, "loss": 0.0006, "step": 18866 }, { "epoch": 18.141346153846154, "grad_norm": 0.008620194159448147, "learning_rate": 3.7032471509705203e-06, "loss": 0.0001, "step": 18867 }, { "epoch": 18.142307692307693, "grad_norm": 0.003887694561854005, "learning_rate": 3.702279486595998e-06, "loss": 0.0, "step": 18868 }, { "epoch": 18.14326923076923, "grad_norm": 0.02218557707965374, "learning_rate": 3.701311919943611e-06, "loss": 0.0001, "step": 18869 }, { "epoch": 18.14423076923077, "grad_norm": 0.0309100653976202, "learning_rate": 3.700344451028377e-06, "loss": 0.0001, "step": 18870 }, { "epoch": 18.14519230769231, "grad_norm": 0.010564110241830349, "learning_rate": 3.6993770798653107e-06, "loss": 0.0001, "step": 18871 }, { "epoch": 18.146153846153847, "grad_norm": 0.005855820141732693, "learning_rate": 3.6984098064694174e-06, "loss": 0.0001, "step": 18872 }, { "epoch": 18.147115384615386, "grad_norm": 0.002814503386616707, "learning_rate": 3.697442630855709e-06, "loss": 0.0, "step": 18873 }, { "epoch": 18.148076923076925, "grad_norm": 0.004886440001428127, "learning_rate": 3.696475553039196e-06, "loss": 0.0, "step": 18874 }, { "epoch": 18.14903846153846, "grad_norm": 0.005718254949897528, "learning_rate": 3.6955085730348784e-06, "loss": 0.0001, "step": 18875 }, { "epoch": 18.15, "grad_norm": 0.04647823050618172, "learning_rate": 3.6945416908577667e-06, "loss": 0.0003, "step": 18876 }, { "epoch": 18.150961538461537, "grad_norm": 0.007927751168608665, "learning_rate": 3.69357490652286e-06, "loss": 0.0001, "step": 18877 }, { "epoch": 18.151923076923076, "grad_norm": 0.09798038005828857, "learning_rate": 3.6926082200451606e-06, "loss": 0.0004, "step": 18878 }, { "epoch": 18.152884615384615, "grad_norm": 0.004515462089329958, "learning_rate": 3.6916416314396743e-06, "loss": 0.0001, "step": 18879 }, { "epoch": 18.153846153846153, "grad_norm": 0.008301096968352795, "learning_rate": 3.69067514072139e-06, "loss": 0.0001, "step": 18880 }, { "epoch": 18.154807692307692, "grad_norm": 0.005856428295373917, "learning_rate": 3.6897087479053106e-06, "loss": 0.0001, "step": 18881 }, { "epoch": 18.15576923076923, "grad_norm": 0.0036413271445780993, "learning_rate": 3.6887424530064354e-06, "loss": 0.0, "step": 18882 }, { "epoch": 18.15673076923077, "grad_norm": 0.025449447333812714, "learning_rate": 3.687776256039749e-06, "loss": 0.0001, "step": 18883 }, { "epoch": 18.157692307692308, "grad_norm": 0.002600308507680893, "learning_rate": 3.6868101570202495e-06, "loss": 0.0, "step": 18884 }, { "epoch": 18.158653846153847, "grad_norm": 0.0056760478764772415, "learning_rate": 3.685844155962931e-06, "loss": 0.0, "step": 18885 }, { "epoch": 18.159615384615385, "grad_norm": 0.005381265189498663, "learning_rate": 3.684878252882775e-06, "loss": 0.0001, "step": 18886 }, { "epoch": 18.160576923076924, "grad_norm": 0.007310930639505386, "learning_rate": 3.6839124477947786e-06, "loss": 0.0001, "step": 18887 }, { "epoch": 18.161538461538463, "grad_norm": 2.925562620162964, "learning_rate": 3.6829467407139197e-06, "loss": 0.0522, "step": 18888 }, { "epoch": 18.1625, "grad_norm": 0.0016772877424955368, "learning_rate": 3.6819811316551867e-06, "loss": 0.0, "step": 18889 }, { "epoch": 18.16346153846154, "grad_norm": 0.008138217031955719, "learning_rate": 3.681015620633568e-06, "loss": 0.0001, "step": 18890 }, { "epoch": 18.164423076923075, "grad_norm": 0.01096486859023571, "learning_rate": 3.6800502076640353e-06, "loss": 0.0, "step": 18891 }, { "epoch": 18.165384615384614, "grad_norm": 0.00678321672603488, "learning_rate": 3.6790848927615765e-06, "loss": 0.0001, "step": 18892 }, { "epoch": 18.166346153846153, "grad_norm": 0.3173062801361084, "learning_rate": 3.678119675941171e-06, "loss": 0.001, "step": 18893 }, { "epoch": 18.16730769230769, "grad_norm": 0.03629394620656967, "learning_rate": 3.6771545572177903e-06, "loss": 0.0002, "step": 18894 }, { "epoch": 18.16826923076923, "grad_norm": 0.11146177351474762, "learning_rate": 3.6761895366064135e-06, "loss": 0.0003, "step": 18895 }, { "epoch": 18.16923076923077, "grad_norm": 0.0035800295881927013, "learning_rate": 3.675224614122018e-06, "loss": 0.0, "step": 18896 }, { "epoch": 18.170192307692307, "grad_norm": 0.008257062174379826, "learning_rate": 3.6742597897795717e-06, "loss": 0.0001, "step": 18897 }, { "epoch": 18.171153846153846, "grad_norm": 0.0053080786019563675, "learning_rate": 3.673295063594049e-06, "loss": 0.0, "step": 18898 }, { "epoch": 18.172115384615385, "grad_norm": 0.00225840974599123, "learning_rate": 3.6723304355804157e-06, "loss": 0.0, "step": 18899 }, { "epoch": 18.173076923076923, "grad_norm": 0.10196121782064438, "learning_rate": 3.6713659057536433e-06, "loss": 0.0004, "step": 18900 }, { "epoch": 18.174038461538462, "grad_norm": 0.007512490265071392, "learning_rate": 3.6704014741286996e-06, "loss": 0.0001, "step": 18901 }, { "epoch": 18.175, "grad_norm": 0.004340807441622019, "learning_rate": 3.6694371407205455e-06, "loss": 0.0, "step": 18902 }, { "epoch": 18.17596153846154, "grad_norm": 0.056151196360588074, "learning_rate": 3.668472905544147e-06, "loss": 0.0003, "step": 18903 }, { "epoch": 18.176923076923078, "grad_norm": 0.07918228954076767, "learning_rate": 3.6675087686144695e-06, "loss": 0.0005, "step": 18904 }, { "epoch": 18.177884615384617, "grad_norm": 0.014336108230054379, "learning_rate": 3.6665447299464665e-06, "loss": 0.0001, "step": 18905 }, { "epoch": 18.178846153846155, "grad_norm": 0.55939781665802, "learning_rate": 3.665580789555101e-06, "loss": 0.0029, "step": 18906 }, { "epoch": 18.17980769230769, "grad_norm": 0.006588831543922424, "learning_rate": 3.6646169474553338e-06, "loss": 0.0001, "step": 18907 }, { "epoch": 18.18076923076923, "grad_norm": 0.01146145910024643, "learning_rate": 3.663653203662114e-06, "loss": 0.0001, "step": 18908 }, { "epoch": 18.181730769230768, "grad_norm": 0.010103113017976284, "learning_rate": 3.662689558190403e-06, "loss": 0.0001, "step": 18909 }, { "epoch": 18.182692307692307, "grad_norm": 0.010408214293420315, "learning_rate": 3.661726011055148e-06, "loss": 0.0001, "step": 18910 }, { "epoch": 18.183653846153845, "grad_norm": 1.027164340019226, "learning_rate": 3.6607625622713005e-06, "loss": 0.0039, "step": 18911 }, { "epoch": 18.184615384615384, "grad_norm": 0.010709645226597786, "learning_rate": 3.659799211853817e-06, "loss": 0.0001, "step": 18912 }, { "epoch": 18.185576923076923, "grad_norm": 0.008696413598954678, "learning_rate": 3.658835959817638e-06, "loss": 0.0001, "step": 18913 }, { "epoch": 18.18653846153846, "grad_norm": 0.008629937656223774, "learning_rate": 3.6578728061777147e-06, "loss": 0.0001, "step": 18914 }, { "epoch": 18.1875, "grad_norm": 0.017710328102111816, "learning_rate": 3.6569097509489938e-06, "loss": 0.0001, "step": 18915 }, { "epoch": 18.18846153846154, "grad_norm": 0.009659563191235065, "learning_rate": 3.6559467941464144e-06, "loss": 0.0001, "step": 18916 }, { "epoch": 18.189423076923077, "grad_norm": 1.174208164215088, "learning_rate": 3.6549839357849205e-06, "loss": 0.0158, "step": 18917 }, { "epoch": 18.190384615384616, "grad_norm": 0.10846161842346191, "learning_rate": 3.6540211758794564e-06, "loss": 0.0004, "step": 18918 }, { "epoch": 18.191346153846155, "grad_norm": 1.7063542604446411, "learning_rate": 3.653058514444956e-06, "loss": 0.0062, "step": 18919 }, { "epoch": 18.192307692307693, "grad_norm": 0.9363627433776855, "learning_rate": 3.6520959514963583e-06, "loss": 0.0027, "step": 18920 }, { "epoch": 18.193269230769232, "grad_norm": 0.0030967385973781347, "learning_rate": 3.651133487048605e-06, "loss": 0.0, "step": 18921 }, { "epoch": 18.19423076923077, "grad_norm": 0.0037458178121596575, "learning_rate": 3.6501711211166224e-06, "loss": 0.0, "step": 18922 }, { "epoch": 18.19519230769231, "grad_norm": 0.05720968171954155, "learning_rate": 3.6492088537153515e-06, "loss": 0.0001, "step": 18923 }, { "epoch": 18.196153846153845, "grad_norm": 0.14536114037036896, "learning_rate": 3.6482466848597164e-06, "loss": 0.0003, "step": 18924 }, { "epoch": 18.197115384615383, "grad_norm": 0.003432546742260456, "learning_rate": 3.64728461456465e-06, "loss": 0.0, "step": 18925 }, { "epoch": 18.198076923076922, "grad_norm": 0.003802796360105276, "learning_rate": 3.6463226428450847e-06, "loss": 0.0, "step": 18926 }, { "epoch": 18.19903846153846, "grad_norm": 0.0026618402916938066, "learning_rate": 3.645360769715941e-06, "loss": 0.0, "step": 18927 }, { "epoch": 18.2, "grad_norm": 0.004297138191759586, "learning_rate": 3.6443989951921478e-06, "loss": 0.0, "step": 18928 }, { "epoch": 18.200961538461538, "grad_norm": 0.04084496945142746, "learning_rate": 3.643437319288632e-06, "loss": 0.0002, "step": 18929 }, { "epoch": 18.201923076923077, "grad_norm": 0.004161597695201635, "learning_rate": 3.642475742020308e-06, "loss": 0.0, "step": 18930 }, { "epoch": 18.202884615384615, "grad_norm": 0.1058855950832367, "learning_rate": 3.6415142634021026e-06, "loss": 0.0003, "step": 18931 }, { "epoch": 18.203846153846154, "grad_norm": 0.010005959309637547, "learning_rate": 3.640552883448938e-06, "loss": 0.0001, "step": 18932 }, { "epoch": 18.204807692307693, "grad_norm": 0.019277993589639664, "learning_rate": 3.639591602175724e-06, "loss": 0.0001, "step": 18933 }, { "epoch": 18.20576923076923, "grad_norm": 0.2155199944972992, "learning_rate": 3.6386304195973832e-06, "loss": 0.0008, "step": 18934 }, { "epoch": 18.20673076923077, "grad_norm": 0.008727852255105972, "learning_rate": 3.637669335728825e-06, "loss": 0.0001, "step": 18935 }, { "epoch": 18.20769230769231, "grad_norm": 0.00860372930765152, "learning_rate": 3.6367083505849663e-06, "loss": 0.0001, "step": 18936 }, { "epoch": 18.208653846153847, "grad_norm": 0.044236015528440475, "learning_rate": 3.63574746418072e-06, "loss": 0.0001, "step": 18937 }, { "epoch": 18.209615384615386, "grad_norm": 0.04714192822575569, "learning_rate": 3.6347866765309925e-06, "loss": 0.0002, "step": 18938 }, { "epoch": 18.210576923076925, "grad_norm": 0.005464137531816959, "learning_rate": 3.633825987650693e-06, "loss": 0.0, "step": 18939 }, { "epoch": 18.21153846153846, "grad_norm": 0.004052298609167337, "learning_rate": 3.6328653975547334e-06, "loss": 0.0, "step": 18940 }, { "epoch": 18.2125, "grad_norm": 0.004531952552497387, "learning_rate": 3.6319049062580114e-06, "loss": 0.0, "step": 18941 }, { "epoch": 18.213461538461537, "grad_norm": 0.01207814458757639, "learning_rate": 3.630944513775435e-06, "loss": 0.0001, "step": 18942 }, { "epoch": 18.214423076923076, "grad_norm": 0.0024461145512759686, "learning_rate": 3.6299842201219117e-06, "loss": 0.0, "step": 18943 }, { "epoch": 18.215384615384615, "grad_norm": 0.003660523099824786, "learning_rate": 3.6290240253123334e-06, "loss": 0.0, "step": 18944 }, { "epoch": 18.216346153846153, "grad_norm": 0.016482962295413017, "learning_rate": 3.6280639293616072e-06, "loss": 0.0001, "step": 18945 }, { "epoch": 18.217307692307692, "grad_norm": 0.007339586038142443, "learning_rate": 3.6271039322846237e-06, "loss": 0.0001, "step": 18946 }, { "epoch": 18.21826923076923, "grad_norm": 0.007989413104951382, "learning_rate": 3.626144034096283e-06, "loss": 0.0001, "step": 18947 }, { "epoch": 18.21923076923077, "grad_norm": 0.05698484927415848, "learning_rate": 3.6251842348114828e-06, "loss": 0.0003, "step": 18948 }, { "epoch": 18.220192307692308, "grad_norm": 0.005642269738018513, "learning_rate": 3.62422453444511e-06, "loss": 0.0001, "step": 18949 }, { "epoch": 18.221153846153847, "grad_norm": 3.9772841930389404, "learning_rate": 3.6232649330120608e-06, "loss": 0.0349, "step": 18950 }, { "epoch": 18.222115384615385, "grad_norm": 0.004071188159286976, "learning_rate": 3.6223054305272264e-06, "loss": 0.0, "step": 18951 }, { "epoch": 18.223076923076924, "grad_norm": 0.012812117114663124, "learning_rate": 3.621346027005491e-06, "loss": 0.0002, "step": 18952 }, { "epoch": 18.224038461538463, "grad_norm": 0.004506370984017849, "learning_rate": 3.620386722461744e-06, "loss": 0.0, "step": 18953 }, { "epoch": 18.225, "grad_norm": 1.1154330968856812, "learning_rate": 3.6194275169108737e-06, "loss": 0.007, "step": 18954 }, { "epoch": 18.22596153846154, "grad_norm": 0.0030333856120705605, "learning_rate": 3.6184684103677594e-06, "loss": 0.0, "step": 18955 }, { "epoch": 18.226923076923075, "grad_norm": 0.02659055031836033, "learning_rate": 3.617509402847289e-06, "loss": 0.0001, "step": 18956 }, { "epoch": 18.227884615384614, "grad_norm": 0.004471649415791035, "learning_rate": 3.616550494364336e-06, "loss": 0.0, "step": 18957 }, { "epoch": 18.228846153846153, "grad_norm": 0.009992475621402264, "learning_rate": 3.615591684933785e-06, "loss": 0.0001, "step": 18958 }, { "epoch": 18.22980769230769, "grad_norm": 0.008223187178373337, "learning_rate": 3.6146329745705157e-06, "loss": 0.0001, "step": 18959 }, { "epoch": 18.23076923076923, "grad_norm": 0.02341107465326786, "learning_rate": 3.613674363289399e-06, "loss": 0.0002, "step": 18960 }, { "epoch": 18.23173076923077, "grad_norm": 0.004203857388347387, "learning_rate": 3.612715851105313e-06, "loss": 0.0, "step": 18961 }, { "epoch": 18.232692307692307, "grad_norm": 0.007433989550918341, "learning_rate": 3.6117574380331332e-06, "loss": 0.0001, "step": 18962 }, { "epoch": 18.233653846153846, "grad_norm": 0.006783462129533291, "learning_rate": 3.610799124087725e-06, "loss": 0.0, "step": 18963 }, { "epoch": 18.234615384615385, "grad_norm": 0.006428249180316925, "learning_rate": 3.6098409092839625e-06, "loss": 0.0001, "step": 18964 }, { "epoch": 18.235576923076923, "grad_norm": 0.006456460803747177, "learning_rate": 3.6088827936367177e-06, "loss": 0.0001, "step": 18965 }, { "epoch": 18.236538461538462, "grad_norm": 0.019481590017676353, "learning_rate": 3.6079247771608518e-06, "loss": 0.0001, "step": 18966 }, { "epoch": 18.2375, "grad_norm": 0.009020428173244, "learning_rate": 3.606966859871235e-06, "loss": 0.0001, "step": 18967 }, { "epoch": 18.23846153846154, "grad_norm": 0.007672640960663557, "learning_rate": 3.6060090417827266e-06, "loss": 0.0001, "step": 18968 }, { "epoch": 18.239423076923078, "grad_norm": 0.004091701470315456, "learning_rate": 3.6050513229101914e-06, "loss": 0.0, "step": 18969 }, { "epoch": 18.240384615384617, "grad_norm": 0.12859578430652618, "learning_rate": 3.604093703268493e-06, "loss": 0.0003, "step": 18970 }, { "epoch": 18.241346153846155, "grad_norm": 0.008507496677339077, "learning_rate": 3.6031361828724863e-06, "loss": 0.0001, "step": 18971 }, { "epoch": 18.24230769230769, "grad_norm": 2.6140284538269043, "learning_rate": 3.6021787617370317e-06, "loss": 0.0229, "step": 18972 }, { "epoch": 18.24326923076923, "grad_norm": 0.005975272040814161, "learning_rate": 3.6012214398769886e-06, "loss": 0.0, "step": 18973 }, { "epoch": 18.244230769230768, "grad_norm": 0.012400230392813683, "learning_rate": 3.6002642173072046e-06, "loss": 0.0001, "step": 18974 }, { "epoch": 18.245192307692307, "grad_norm": 0.013813447207212448, "learning_rate": 3.5993070940425377e-06, "loss": 0.0001, "step": 18975 }, { "epoch": 18.246153846153845, "grad_norm": 0.01058898027986288, "learning_rate": 3.5983500700978425e-06, "loss": 0.0001, "step": 18976 }, { "epoch": 18.247115384615384, "grad_norm": 0.003005461534485221, "learning_rate": 3.5973931454879605e-06, "loss": 0.0, "step": 18977 }, { "epoch": 18.248076923076923, "grad_norm": 0.002676523756235838, "learning_rate": 3.5964363202277507e-06, "loss": 0.0, "step": 18978 }, { "epoch": 18.24903846153846, "grad_norm": 0.0031887448858469725, "learning_rate": 3.595479594332051e-06, "loss": 0.0, "step": 18979 }, { "epoch": 18.25, "grad_norm": 0.003849525237455964, "learning_rate": 3.59452296781571e-06, "loss": 0.0001, "step": 18980 }, { "epoch": 18.25096153846154, "grad_norm": 0.03605835139751434, "learning_rate": 3.5935664406935777e-06, "loss": 0.0001, "step": 18981 }, { "epoch": 18.251923076923077, "grad_norm": 0.0020148088224232197, "learning_rate": 3.5926100129804877e-06, "loss": 0.0, "step": 18982 }, { "epoch": 18.252884615384616, "grad_norm": 0.12139585614204407, "learning_rate": 3.5916536846912843e-06, "loss": 0.0004, "step": 18983 }, { "epoch": 18.253846153846155, "grad_norm": 0.003137756371870637, "learning_rate": 3.59069745584081e-06, "loss": 0.0, "step": 18984 }, { "epoch": 18.254807692307693, "grad_norm": 0.00916922278702259, "learning_rate": 3.5897413264438985e-06, "loss": 0.0001, "step": 18985 }, { "epoch": 18.255769230769232, "grad_norm": 0.2780029773712158, "learning_rate": 3.588785296515387e-06, "loss": 0.0011, "step": 18986 }, { "epoch": 18.25673076923077, "grad_norm": 0.02575298771262169, "learning_rate": 3.5878293660701146e-06, "loss": 0.0002, "step": 18987 }, { "epoch": 18.25769230769231, "grad_norm": 0.0070289806462824345, "learning_rate": 3.5868735351229068e-06, "loss": 0.0001, "step": 18988 }, { "epoch": 18.258653846153845, "grad_norm": 0.024095026776194572, "learning_rate": 3.585917803688603e-06, "loss": 0.0002, "step": 18989 }, { "epoch": 18.259615384615383, "grad_norm": 0.0033798411022871733, "learning_rate": 3.584962171782027e-06, "loss": 0.0, "step": 18990 }, { "epoch": 18.260576923076922, "grad_norm": 0.01186206005513668, "learning_rate": 3.5840066394180107e-06, "loss": 0.0001, "step": 18991 }, { "epoch": 18.26153846153846, "grad_norm": 0.008999879471957684, "learning_rate": 3.583051206611383e-06, "loss": 0.0001, "step": 18992 }, { "epoch": 18.2625, "grad_norm": 0.01775466278195381, "learning_rate": 3.582095873376964e-06, "loss": 0.0001, "step": 18993 }, { "epoch": 18.263461538461538, "grad_norm": 0.0067141116596758366, "learning_rate": 3.58114063972958e-06, "loss": 0.0001, "step": 18994 }, { "epoch": 18.264423076923077, "grad_norm": 0.006766876671463251, "learning_rate": 3.5801855056840586e-06, "loss": 0.0001, "step": 18995 }, { "epoch": 18.265384615384615, "grad_norm": 0.001698812935501337, "learning_rate": 3.579230471255213e-06, "loss": 0.0, "step": 18996 }, { "epoch": 18.266346153846154, "grad_norm": 0.038489434868097305, "learning_rate": 3.578275536457865e-06, "loss": 0.0002, "step": 18997 }, { "epoch": 18.267307692307693, "grad_norm": 0.020744508132338524, "learning_rate": 3.577320701306837e-06, "loss": 0.0002, "step": 18998 }, { "epoch": 18.26826923076923, "grad_norm": 0.0023249839432537556, "learning_rate": 3.576365965816937e-06, "loss": 0.0, "step": 18999 }, { "epoch": 18.26923076923077, "grad_norm": 0.010320899076759815, "learning_rate": 3.575411330002988e-06, "loss": 0.0001, "step": 19000 }, { "epoch": 18.27019230769231, "grad_norm": 0.0044472054578363895, "learning_rate": 3.574456793879796e-06, "loss": 0.0, "step": 19001 }, { "epoch": 18.271153846153847, "grad_norm": 0.023388637229800224, "learning_rate": 3.5735023574621765e-06, "loss": 0.0001, "step": 19002 }, { "epoch": 18.272115384615386, "grad_norm": 0.005689110141247511, "learning_rate": 3.572548020764941e-06, "loss": 0.0001, "step": 19003 }, { "epoch": 18.273076923076925, "grad_norm": 0.013081928715109825, "learning_rate": 3.5715937838028935e-06, "loss": 0.0001, "step": 19004 }, { "epoch": 18.27403846153846, "grad_norm": 0.0030359404627233744, "learning_rate": 3.570639646590842e-06, "loss": 0.0, "step": 19005 }, { "epoch": 18.275, "grad_norm": 0.008676663972437382, "learning_rate": 3.5696856091435984e-06, "loss": 0.0001, "step": 19006 }, { "epoch": 18.275961538461537, "grad_norm": 0.0021646502427756786, "learning_rate": 3.5687316714759567e-06, "loss": 0.0, "step": 19007 }, { "epoch": 18.276923076923076, "grad_norm": 0.09255732595920563, "learning_rate": 3.567777833602725e-06, "loss": 0.0006, "step": 19008 }, { "epoch": 18.277884615384615, "grad_norm": 0.007781430147588253, "learning_rate": 3.5668240955387055e-06, "loss": 0.0001, "step": 19009 }, { "epoch": 18.278846153846153, "grad_norm": 0.00835320632904768, "learning_rate": 3.5658704572986913e-06, "loss": 0.0001, "step": 19010 }, { "epoch": 18.279807692307692, "grad_norm": 0.0034926559310406446, "learning_rate": 3.564916918897484e-06, "loss": 0.0, "step": 19011 }, { "epoch": 18.28076923076923, "grad_norm": 0.001993434503674507, "learning_rate": 3.5639634803498837e-06, "loss": 0.0, "step": 19012 }, { "epoch": 18.28173076923077, "grad_norm": 0.002991167362779379, "learning_rate": 3.5630101416706755e-06, "loss": 0.0, "step": 19013 }, { "epoch": 18.282692307692308, "grad_norm": 0.0057526929304003716, "learning_rate": 3.562056902874662e-06, "loss": 0.0001, "step": 19014 }, { "epoch": 18.283653846153847, "grad_norm": 0.002866547554731369, "learning_rate": 3.5611037639766267e-06, "loss": 0.0, "step": 19015 }, { "epoch": 18.284615384615385, "grad_norm": 0.007450582459568977, "learning_rate": 3.5601507249913625e-06, "loss": 0.0001, "step": 19016 }, { "epoch": 18.285576923076924, "grad_norm": 0.004173893481492996, "learning_rate": 3.5591977859336625e-06, "loss": 0.0001, "step": 19017 }, { "epoch": 18.286538461538463, "grad_norm": 0.03203574940562248, "learning_rate": 3.5582449468183057e-06, "loss": 0.0002, "step": 19018 }, { "epoch": 18.2875, "grad_norm": 0.059780072420835495, "learning_rate": 3.5572922076600814e-06, "loss": 0.0002, "step": 19019 }, { "epoch": 18.28846153846154, "grad_norm": 0.005245693027973175, "learning_rate": 3.5563395684737757e-06, "loss": 0.0001, "step": 19020 }, { "epoch": 18.289423076923075, "grad_norm": 0.0030634363647550344, "learning_rate": 3.5553870292741644e-06, "loss": 0.0, "step": 19021 }, { "epoch": 18.290384615384614, "grad_norm": 0.008571712300181389, "learning_rate": 3.5544345900760323e-06, "loss": 0.0001, "step": 19022 }, { "epoch": 18.291346153846153, "grad_norm": 0.0064891306683421135, "learning_rate": 3.553482250894161e-06, "loss": 0.0001, "step": 19023 }, { "epoch": 18.29230769230769, "grad_norm": 0.042006731033325195, "learning_rate": 3.5525300117433214e-06, "loss": 0.0002, "step": 19024 }, { "epoch": 18.29326923076923, "grad_norm": 0.004892620258033276, "learning_rate": 3.5515778726382967e-06, "loss": 0.0, "step": 19025 }, { "epoch": 18.29423076923077, "grad_norm": 2.132032632827759, "learning_rate": 3.550625833593854e-06, "loss": 0.0064, "step": 19026 }, { "epoch": 18.295192307692307, "grad_norm": 0.0038463568780571222, "learning_rate": 3.549673894624769e-06, "loss": 0.0, "step": 19027 }, { "epoch": 18.296153846153846, "grad_norm": 0.012366113252937794, "learning_rate": 3.548722055745818e-06, "loss": 0.0001, "step": 19028 }, { "epoch": 18.297115384615385, "grad_norm": 0.018686318770051003, "learning_rate": 3.5477703169717624e-06, "loss": 0.0001, "step": 19029 }, { "epoch": 18.298076923076923, "grad_norm": 0.015476030297577381, "learning_rate": 3.546818678317374e-06, "loss": 0.0001, "step": 19030 }, { "epoch": 18.299038461538462, "grad_norm": 0.018932905048131943, "learning_rate": 3.545867139797424e-06, "loss": 0.0001, "step": 19031 }, { "epoch": 18.3, "grad_norm": 0.42442527413368225, "learning_rate": 3.54491570142667e-06, "loss": 0.0017, "step": 19032 }, { "epoch": 18.30096153846154, "grad_norm": 0.009673715569078922, "learning_rate": 3.5439643632198795e-06, "loss": 0.0001, "step": 19033 }, { "epoch": 18.301923076923078, "grad_norm": 0.005472611170262098, "learning_rate": 3.5430131251918156e-06, "loss": 0.0, "step": 19034 }, { "epoch": 18.302884615384617, "grad_norm": 0.021419130265712738, "learning_rate": 3.5420619873572347e-06, "loss": 0.0001, "step": 19035 }, { "epoch": 18.303846153846155, "grad_norm": 0.8081759810447693, "learning_rate": 3.5411109497309006e-06, "loss": 0.0028, "step": 19036 }, { "epoch": 18.30480769230769, "grad_norm": 0.5112113356590271, "learning_rate": 3.540160012327565e-06, "loss": 0.0012, "step": 19037 }, { "epoch": 18.30576923076923, "grad_norm": 0.008292799815535545, "learning_rate": 3.5392091751619873e-06, "loss": 0.0001, "step": 19038 }, { "epoch": 18.306730769230768, "grad_norm": 0.004040335305035114, "learning_rate": 3.538258438248924e-06, "loss": 0.0, "step": 19039 }, { "epoch": 18.307692307692307, "grad_norm": 0.01422013808041811, "learning_rate": 3.5373078016031203e-06, "loss": 0.0001, "step": 19040 }, { "epoch": 18.308653846153845, "grad_norm": 0.006839903071522713, "learning_rate": 3.536357265239333e-06, "loss": 0.0001, "step": 19041 }, { "epoch": 18.309615384615384, "grad_norm": 0.0026355173904448748, "learning_rate": 3.5354068291723134e-06, "loss": 0.0, "step": 19042 }, { "epoch": 18.310576923076923, "grad_norm": 0.00392169039696455, "learning_rate": 3.5344564934168023e-06, "loss": 0.0, "step": 19043 }, { "epoch": 18.31153846153846, "grad_norm": 0.006524338386952877, "learning_rate": 3.5335062579875513e-06, "loss": 0.0001, "step": 19044 }, { "epoch": 18.3125, "grad_norm": 0.009318008087575436, "learning_rate": 3.5325561228993065e-06, "loss": 0.0001, "step": 19045 }, { "epoch": 18.31346153846154, "grad_norm": 0.010855902917683125, "learning_rate": 3.531606088166807e-06, "loss": 0.0001, "step": 19046 }, { "epoch": 18.314423076923077, "grad_norm": 0.07119834423065186, "learning_rate": 3.530656153804799e-06, "loss": 0.0001, "step": 19047 }, { "epoch": 18.315384615384616, "grad_norm": 0.0035279851872473955, "learning_rate": 3.5297063198280166e-06, "loss": 0.0, "step": 19048 }, { "epoch": 18.316346153846155, "grad_norm": 0.0034216083586215973, "learning_rate": 3.528756586251203e-06, "loss": 0.0, "step": 19049 }, { "epoch": 18.317307692307693, "grad_norm": 0.01575210876762867, "learning_rate": 3.527806953089097e-06, "loss": 0.0001, "step": 19050 }, { "epoch": 18.318269230769232, "grad_norm": 0.014344529248774052, "learning_rate": 3.526857420356428e-06, "loss": 0.0001, "step": 19051 }, { "epoch": 18.31923076923077, "grad_norm": 0.00856246892362833, "learning_rate": 3.5259079880679335e-06, "loss": 0.0, "step": 19052 }, { "epoch": 18.32019230769231, "grad_norm": 0.010050629265606403, "learning_rate": 3.524958656238349e-06, "loss": 0.0, "step": 19053 }, { "epoch": 18.321153846153845, "grad_norm": 0.006499101873487234, "learning_rate": 3.5240094248824e-06, "loss": 0.0001, "step": 19054 }, { "epoch": 18.322115384615383, "grad_norm": 0.006178240291774273, "learning_rate": 3.5230602940148163e-06, "loss": 0.0001, "step": 19055 }, { "epoch": 18.323076923076922, "grad_norm": 0.05950922146439552, "learning_rate": 3.5221112636503307e-06, "loss": 0.0003, "step": 19056 }, { "epoch": 18.32403846153846, "grad_norm": 0.0032516804058104753, "learning_rate": 3.521162333803664e-06, "loss": 0.0, "step": 19057 }, { "epoch": 18.325, "grad_norm": 0.011709794402122498, "learning_rate": 3.520213504489545e-06, "loss": 0.0001, "step": 19058 }, { "epoch": 18.325961538461538, "grad_norm": 0.0067247264087200165, "learning_rate": 3.5192647757226903e-06, "loss": 0.0001, "step": 19059 }, { "epoch": 18.326923076923077, "grad_norm": 0.0021960753947496414, "learning_rate": 3.5183161475178273e-06, "loss": 0.0, "step": 19060 }, { "epoch": 18.327884615384615, "grad_norm": 0.0012143644271418452, "learning_rate": 3.5173676198896768e-06, "loss": 0.0, "step": 19061 }, { "epoch": 18.328846153846154, "grad_norm": 0.0045632957480847836, "learning_rate": 3.5164191928529512e-06, "loss": 0.0, "step": 19062 }, { "epoch": 18.329807692307693, "grad_norm": 0.004087380133569241, "learning_rate": 3.51547086642237e-06, "loss": 0.0, "step": 19063 }, { "epoch": 18.33076923076923, "grad_norm": 0.019262030720710754, "learning_rate": 3.514522640612653e-06, "loss": 0.0002, "step": 19064 }, { "epoch": 18.33173076923077, "grad_norm": 0.005388061981648207, "learning_rate": 3.513574515438507e-06, "loss": 0.0001, "step": 19065 }, { "epoch": 18.33269230769231, "grad_norm": 0.016918150708079338, "learning_rate": 3.5126264909146467e-06, "loss": 0.0001, "step": 19066 }, { "epoch": 18.333653846153847, "grad_norm": 0.0021066772751510143, "learning_rate": 3.511678567055786e-06, "loss": 0.0, "step": 19067 }, { "epoch": 18.334615384615386, "grad_norm": 0.005693772342056036, "learning_rate": 3.5107307438766282e-06, "loss": 0.0, "step": 19068 }, { "epoch": 18.335576923076925, "grad_norm": 0.002296146936714649, "learning_rate": 3.509783021391887e-06, "loss": 0.0, "step": 19069 }, { "epoch": 18.33653846153846, "grad_norm": 0.005756719037890434, "learning_rate": 3.5088353996162605e-06, "loss": 0.0001, "step": 19070 }, { "epoch": 18.3375, "grad_norm": 0.1354738473892212, "learning_rate": 3.507887878564458e-06, "loss": 0.0004, "step": 19071 }, { "epoch": 18.338461538461537, "grad_norm": 0.0019888950046151876, "learning_rate": 3.5069404582511845e-06, "loss": 0.0, "step": 19072 }, { "epoch": 18.339423076923076, "grad_norm": 0.003487302688881755, "learning_rate": 3.505993138691135e-06, "loss": 0.0, "step": 19073 }, { "epoch": 18.340384615384615, "grad_norm": 0.054418351501226425, "learning_rate": 3.505045919899013e-06, "loss": 0.0002, "step": 19074 }, { "epoch": 18.341346153846153, "grad_norm": 0.04242326319217682, "learning_rate": 3.5040988018895182e-06, "loss": 0.0002, "step": 19075 }, { "epoch": 18.342307692307692, "grad_norm": 0.005100949201732874, "learning_rate": 3.503151784677341e-06, "loss": 0.0, "step": 19076 }, { "epoch": 18.34326923076923, "grad_norm": 0.011424620635807514, "learning_rate": 3.5022048682771814e-06, "loss": 0.0001, "step": 19077 }, { "epoch": 18.34423076923077, "grad_norm": 0.015008538030087948, "learning_rate": 3.501258052703734e-06, "loss": 0.0001, "step": 19078 }, { "epoch": 18.345192307692308, "grad_norm": 0.004353275988250971, "learning_rate": 3.5003113379716846e-06, "loss": 0.0, "step": 19079 }, { "epoch": 18.346153846153847, "grad_norm": 0.004859311506152153, "learning_rate": 3.4993647240957307e-06, "loss": 0.0, "step": 19080 }, { "epoch": 18.347115384615385, "grad_norm": 0.00921693630516529, "learning_rate": 3.498418211090554e-06, "loss": 0.0001, "step": 19081 }, { "epoch": 18.348076923076924, "grad_norm": 0.010897120460867882, "learning_rate": 3.497471798970844e-06, "loss": 0.0001, "step": 19082 }, { "epoch": 18.349038461538463, "grad_norm": 0.0020150926429778337, "learning_rate": 3.4965254877512912e-06, "loss": 0.0, "step": 19083 }, { "epoch": 18.35, "grad_norm": 0.007275245152413845, "learning_rate": 3.495579277446571e-06, "loss": 0.0, "step": 19084 }, { "epoch": 18.35096153846154, "grad_norm": 0.011617984622716904, "learning_rate": 3.4946331680713706e-06, "loss": 0.0, "step": 19085 }, { "epoch": 18.351923076923075, "grad_norm": 0.009326936677098274, "learning_rate": 3.4936871596403745e-06, "loss": 0.0001, "step": 19086 }, { "epoch": 18.352884615384614, "grad_norm": 0.008012006059288979, "learning_rate": 3.4927412521682535e-06, "loss": 0.0001, "step": 19087 }, { "epoch": 18.353846153846153, "grad_norm": 0.004361479543149471, "learning_rate": 3.49179544566969e-06, "loss": 0.0, "step": 19088 }, { "epoch": 18.35480769230769, "grad_norm": 0.02912004478275776, "learning_rate": 3.4908497401593634e-06, "loss": 0.0001, "step": 19089 }, { "epoch": 18.35576923076923, "grad_norm": 0.06533852964639664, "learning_rate": 3.4899041356519415e-06, "loss": 0.0002, "step": 19090 }, { "epoch": 18.35673076923077, "grad_norm": 0.009123556315898895, "learning_rate": 3.488958632162103e-06, "loss": 0.0001, "step": 19091 }, { "epoch": 18.357692307692307, "grad_norm": 0.01290862075984478, "learning_rate": 3.4880132297045145e-06, "loss": 0.0001, "step": 19092 }, { "epoch": 18.358653846153846, "grad_norm": 0.004255172796547413, "learning_rate": 3.487067928293848e-06, "loss": 0.0, "step": 19093 }, { "epoch": 18.359615384615385, "grad_norm": 0.016998160630464554, "learning_rate": 3.486122727944775e-06, "loss": 0.0001, "step": 19094 }, { "epoch": 18.360576923076923, "grad_norm": 0.013667864724993706, "learning_rate": 3.485177628671956e-06, "loss": 0.0, "step": 19095 }, { "epoch": 18.361538461538462, "grad_norm": 0.003640700364485383, "learning_rate": 3.48423263049006e-06, "loss": 0.0, "step": 19096 }, { "epoch": 18.3625, "grad_norm": 0.0038184935692697763, "learning_rate": 3.4832877334137528e-06, "loss": 0.0, "step": 19097 }, { "epoch": 18.36346153846154, "grad_norm": 0.09860909730195999, "learning_rate": 3.48234293745769e-06, "loss": 0.0003, "step": 19098 }, { "epoch": 18.364423076923078, "grad_norm": 0.010441171005368233, "learning_rate": 3.4813982426365355e-06, "loss": 0.0001, "step": 19099 }, { "epoch": 18.365384615384617, "grad_norm": 0.005458795931190252, "learning_rate": 3.480453648964953e-06, "loss": 0.0001, "step": 19100 }, { "epoch": 18.366346153846155, "grad_norm": 0.013278515078127384, "learning_rate": 3.479509156457591e-06, "loss": 0.0001, "step": 19101 }, { "epoch": 18.36730769230769, "grad_norm": 0.00680331327021122, "learning_rate": 3.47856476512911e-06, "loss": 0.0001, "step": 19102 }, { "epoch": 18.36826923076923, "grad_norm": 0.00986768864095211, "learning_rate": 3.4776204749941667e-06, "loss": 0.0001, "step": 19103 }, { "epoch": 18.369230769230768, "grad_norm": 0.0037853200919926167, "learning_rate": 3.4766762860674076e-06, "loss": 0.0, "step": 19104 }, { "epoch": 18.370192307692307, "grad_norm": 0.0061361826956272125, "learning_rate": 3.4757321983634895e-06, "loss": 0.0, "step": 19105 }, { "epoch": 18.371153846153845, "grad_norm": 0.005416503641754389, "learning_rate": 3.4747882118970565e-06, "loss": 0.0001, "step": 19106 }, { "epoch": 18.372115384615384, "grad_norm": 0.0056916349567472935, "learning_rate": 3.4738443266827583e-06, "loss": 0.0001, "step": 19107 }, { "epoch": 18.373076923076923, "grad_norm": 0.005736384075134993, "learning_rate": 3.4729005427352457e-06, "loss": 0.0001, "step": 19108 }, { "epoch": 18.37403846153846, "grad_norm": 0.00390702486038208, "learning_rate": 3.471956860069158e-06, "loss": 0.0, "step": 19109 }, { "epoch": 18.375, "grad_norm": 0.037430230528116226, "learning_rate": 3.471013278699139e-06, "loss": 0.0002, "step": 19110 }, { "epoch": 18.37596153846154, "grad_norm": 0.0061211190186440945, "learning_rate": 3.4700697986398347e-06, "loss": 0.0001, "step": 19111 }, { "epoch": 18.376923076923077, "grad_norm": 0.011304495856165886, "learning_rate": 3.4691264199058795e-06, "loss": 0.0001, "step": 19112 }, { "epoch": 18.377884615384616, "grad_norm": 0.003790404414758086, "learning_rate": 3.468183142511914e-06, "loss": 0.0, "step": 19113 }, { "epoch": 18.378846153846155, "grad_norm": 0.0041421991772949696, "learning_rate": 3.4672399664725787e-06, "loss": 0.0, "step": 19114 }, { "epoch": 18.379807692307693, "grad_norm": 0.0044548846781253815, "learning_rate": 3.466296891802502e-06, "loss": 0.0, "step": 19115 }, { "epoch": 18.380769230769232, "grad_norm": 0.011032437905669212, "learning_rate": 3.4653539185163253e-06, "loss": 0.0001, "step": 19116 }, { "epoch": 18.38173076923077, "grad_norm": 0.0042780437506735325, "learning_rate": 3.464411046628673e-06, "loss": 0.0, "step": 19117 }, { "epoch": 18.38269230769231, "grad_norm": 0.02169344387948513, "learning_rate": 3.4634682761541804e-06, "loss": 0.0001, "step": 19118 }, { "epoch": 18.383653846153845, "grad_norm": 0.01073381956666708, "learning_rate": 3.4625256071074776e-06, "loss": 0.0001, "step": 19119 }, { "epoch": 18.384615384615383, "grad_norm": 0.007256121840327978, "learning_rate": 3.461583039503188e-06, "loss": 0.0001, "step": 19120 }, { "epoch": 18.385576923076922, "grad_norm": 0.004106511827558279, "learning_rate": 3.460640573355939e-06, "loss": 0.0, "step": 19121 }, { "epoch": 18.38653846153846, "grad_norm": 0.004656861070543528, "learning_rate": 3.459698208680359e-06, "loss": 0.0, "step": 19122 }, { "epoch": 18.3875, "grad_norm": 0.005020581651479006, "learning_rate": 3.458755945491065e-06, "loss": 0.0, "step": 19123 }, { "epoch": 18.388461538461538, "grad_norm": 1.1290441751480103, "learning_rate": 3.4578137838026784e-06, "loss": 0.0123, "step": 19124 }, { "epoch": 18.389423076923077, "grad_norm": 0.04301862046122551, "learning_rate": 3.4568717236298257e-06, "loss": 0.0002, "step": 19125 }, { "epoch": 18.390384615384615, "grad_norm": 0.00479627400636673, "learning_rate": 3.455929764987116e-06, "loss": 0.0, "step": 19126 }, { "epoch": 18.391346153846154, "grad_norm": 0.0037428350187838078, "learning_rate": 3.4549879078891722e-06, "loss": 0.0001, "step": 19127 }, { "epoch": 18.392307692307693, "grad_norm": 0.011180195026099682, "learning_rate": 3.454046152350605e-06, "loss": 0.0001, "step": 19128 }, { "epoch": 18.39326923076923, "grad_norm": 0.004278943873941898, "learning_rate": 3.4531044983860285e-06, "loss": 0.0001, "step": 19129 }, { "epoch": 18.39423076923077, "grad_norm": 0.007062720134854317, "learning_rate": 3.452162946010059e-06, "loss": 0.0, "step": 19130 }, { "epoch": 18.39519230769231, "grad_norm": 0.052890896797180176, "learning_rate": 3.451221495237299e-06, "loss": 0.0002, "step": 19131 }, { "epoch": 18.396153846153847, "grad_norm": 0.011772464960813522, "learning_rate": 3.450280146082361e-06, "loss": 0.0001, "step": 19132 }, { "epoch": 18.397115384615386, "grad_norm": 1.0541255474090576, "learning_rate": 3.449338898559856e-06, "loss": 0.0067, "step": 19133 }, { "epoch": 18.398076923076925, "grad_norm": 0.10509517788887024, "learning_rate": 3.4483977526843816e-06, "loss": 0.0003, "step": 19134 }, { "epoch": 18.39903846153846, "grad_norm": 1.3609508275985718, "learning_rate": 3.4474567084705455e-06, "loss": 0.0046, "step": 19135 }, { "epoch": 18.4, "grad_norm": 0.0033973173703998327, "learning_rate": 3.446515765932953e-06, "loss": 0.0, "step": 19136 }, { "epoch": 18.400961538461537, "grad_norm": 0.007281671278178692, "learning_rate": 3.445574925086198e-06, "loss": 0.0001, "step": 19137 }, { "epoch": 18.401923076923076, "grad_norm": 0.015006228350102901, "learning_rate": 3.4446341859448863e-06, "loss": 0.0001, "step": 19138 }, { "epoch": 18.402884615384615, "grad_norm": 0.003947537858039141, "learning_rate": 3.44369354852361e-06, "loss": 0.0, "step": 19139 }, { "epoch": 18.403846153846153, "grad_norm": 0.003383863717317581, "learning_rate": 3.4427530128369657e-06, "loss": 0.0, "step": 19140 }, { "epoch": 18.404807692307692, "grad_norm": 0.013589254580438137, "learning_rate": 3.441812578899554e-06, "loss": 0.0001, "step": 19141 }, { "epoch": 18.40576923076923, "grad_norm": 0.02547815442085266, "learning_rate": 3.440872246725959e-06, "loss": 0.0002, "step": 19142 }, { "epoch": 18.40673076923077, "grad_norm": 0.0022286796011030674, "learning_rate": 3.439932016330777e-06, "loss": 0.0, "step": 19143 }, { "epoch": 18.407692307692308, "grad_norm": 0.010050946846604347, "learning_rate": 3.438991887728599e-06, "loss": 0.0, "step": 19144 }, { "epoch": 18.408653846153847, "grad_norm": 0.008858375251293182, "learning_rate": 3.4380518609340076e-06, "loss": 0.0001, "step": 19145 }, { "epoch": 18.409615384615385, "grad_norm": 0.006072836462408304, "learning_rate": 3.437111935961591e-06, "loss": 0.0001, "step": 19146 }, { "epoch": 18.410576923076924, "grad_norm": 0.010889616794884205, "learning_rate": 3.4361721128259396e-06, "loss": 0.0001, "step": 19147 }, { "epoch": 18.411538461538463, "grad_norm": 0.011632407084107399, "learning_rate": 3.4352323915416297e-06, "loss": 0.0001, "step": 19148 }, { "epoch": 18.4125, "grad_norm": 0.00527683412656188, "learning_rate": 3.434292772123248e-06, "loss": 0.0, "step": 19149 }, { "epoch": 18.41346153846154, "grad_norm": 0.0011390707222744823, "learning_rate": 3.4333532545853697e-06, "loss": 0.0, "step": 19150 }, { "epoch": 18.414423076923075, "grad_norm": 0.0028439133893698454, "learning_rate": 3.4324138389425753e-06, "loss": 0.0, "step": 19151 }, { "epoch": 18.415384615384614, "grad_norm": 0.011633312329649925, "learning_rate": 3.431474525209446e-06, "loss": 0.0001, "step": 19152 }, { "epoch": 18.416346153846153, "grad_norm": 0.013880543410778046, "learning_rate": 3.4305353134005514e-06, "loss": 0.0001, "step": 19153 }, { "epoch": 18.41730769230769, "grad_norm": 0.03226662054657936, "learning_rate": 3.4295962035304663e-06, "loss": 0.0001, "step": 19154 }, { "epoch": 18.41826923076923, "grad_norm": 0.0037898856680840254, "learning_rate": 3.4286571956137683e-06, "loss": 0.0, "step": 19155 }, { "epoch": 18.41923076923077, "grad_norm": 0.04528987407684326, "learning_rate": 3.427718289665021e-06, "loss": 0.0004, "step": 19156 }, { "epoch": 18.420192307692307, "grad_norm": 0.00892900861799717, "learning_rate": 3.426779485698796e-06, "loss": 0.0001, "step": 19157 }, { "epoch": 18.421153846153846, "grad_norm": 0.014754301868379116, "learning_rate": 3.4258407837296635e-06, "loss": 0.0001, "step": 19158 }, { "epoch": 18.422115384615385, "grad_norm": 0.04982955753803253, "learning_rate": 3.4249021837721853e-06, "loss": 0.0002, "step": 19159 }, { "epoch": 18.423076923076923, "grad_norm": 0.009875200688838959, "learning_rate": 3.4239636858409297e-06, "loss": 0.0001, "step": 19160 }, { "epoch": 18.424038461538462, "grad_norm": 0.004326935391873121, "learning_rate": 3.4230252899504546e-06, "loss": 0.0, "step": 19161 }, { "epoch": 18.425, "grad_norm": 0.010147696360945702, "learning_rate": 3.4220869961153236e-06, "loss": 0.0001, "step": 19162 }, { "epoch": 18.42596153846154, "grad_norm": 0.002186289755627513, "learning_rate": 3.4211488043500996e-06, "loss": 0.0, "step": 19163 }, { "epoch": 18.426923076923078, "grad_norm": 0.003154414240270853, "learning_rate": 3.4202107146693343e-06, "loss": 0.0, "step": 19164 }, { "epoch": 18.427884615384617, "grad_norm": 0.0019475938752293587, "learning_rate": 3.419272727087587e-06, "loss": 0.0, "step": 19165 }, { "epoch": 18.428846153846155, "grad_norm": 0.004718503914773464, "learning_rate": 3.4183348416194174e-06, "loss": 0.0, "step": 19166 }, { "epoch": 18.42980769230769, "grad_norm": 0.00280547677539289, "learning_rate": 3.417397058279369e-06, "loss": 0.0, "step": 19167 }, { "epoch": 18.43076923076923, "grad_norm": 0.004362011794000864, "learning_rate": 3.4164593770819997e-06, "loss": 0.0, "step": 19168 }, { "epoch": 18.431730769230768, "grad_norm": 0.010122296400368214, "learning_rate": 3.415521798041861e-06, "loss": 0.0001, "step": 19169 }, { "epoch": 18.432692307692307, "grad_norm": 0.31426355242729187, "learning_rate": 3.4145843211734966e-06, "loss": 0.0009, "step": 19170 }, { "epoch": 18.433653846153845, "grad_norm": 0.006256815046072006, "learning_rate": 3.413646946491458e-06, "loss": 0.0001, "step": 19171 }, { "epoch": 18.434615384615384, "grad_norm": 0.007729833479970694, "learning_rate": 3.4127096740102848e-06, "loss": 0.0, "step": 19172 }, { "epoch": 18.435576923076923, "grad_norm": 0.0065248520113527775, "learning_rate": 3.4117725037445237e-06, "loss": 0.0001, "step": 19173 }, { "epoch": 18.43653846153846, "grad_norm": 0.004238903988152742, "learning_rate": 3.4108354357087213e-06, "loss": 0.0, "step": 19174 }, { "epoch": 18.4375, "grad_norm": 0.0038619409315288067, "learning_rate": 3.40989846991741e-06, "loss": 0.0, "step": 19175 }, { "epoch": 18.43846153846154, "grad_norm": 0.005768647883087397, "learning_rate": 3.408961606385133e-06, "loss": 0.0001, "step": 19176 }, { "epoch": 18.439423076923077, "grad_norm": 0.008251654915511608, "learning_rate": 3.4080248451264307e-06, "loss": 0.0001, "step": 19177 }, { "epoch": 18.440384615384616, "grad_norm": 0.0016137370839715004, "learning_rate": 3.4070881861558326e-06, "loss": 0.0, "step": 19178 }, { "epoch": 18.441346153846155, "grad_norm": 0.00865291990339756, "learning_rate": 3.4061516294878747e-06, "loss": 0.0001, "step": 19179 }, { "epoch": 18.442307692307693, "grad_norm": 0.001936224289238453, "learning_rate": 3.4052151751370954e-06, "loss": 0.0, "step": 19180 }, { "epoch": 18.443269230769232, "grad_norm": 0.0016963024390861392, "learning_rate": 3.404278823118017e-06, "loss": 0.0, "step": 19181 }, { "epoch": 18.44423076923077, "grad_norm": 0.013843486085534096, "learning_rate": 3.4033425734451762e-06, "loss": 0.0001, "step": 19182 }, { "epoch": 18.44519230769231, "grad_norm": 0.006977410987019539, "learning_rate": 3.4024064261330956e-06, "loss": 0.0, "step": 19183 }, { "epoch": 18.446153846153845, "grad_norm": 0.012729091569781303, "learning_rate": 3.4014703811963024e-06, "loss": 0.0001, "step": 19184 }, { "epoch": 18.447115384615383, "grad_norm": 0.0029907075222581625, "learning_rate": 3.4005344386493266e-06, "loss": 0.0, "step": 19185 }, { "epoch": 18.448076923076922, "grad_norm": 0.0074907392263412476, "learning_rate": 3.399598598506684e-06, "loss": 0.0001, "step": 19186 }, { "epoch": 18.44903846153846, "grad_norm": 0.4256020188331604, "learning_rate": 3.3986628607828974e-06, "loss": 0.0011, "step": 19187 }, { "epoch": 18.45, "grad_norm": 0.0039735157042741776, "learning_rate": 3.397727225492493e-06, "loss": 0.0, "step": 19188 }, { "epoch": 18.450961538461538, "grad_norm": 0.003890431020408869, "learning_rate": 3.3967916926499823e-06, "loss": 0.0, "step": 19189 }, { "epoch": 18.451923076923077, "grad_norm": 0.00707230856642127, "learning_rate": 3.3958562622698832e-06, "loss": 0.0001, "step": 19190 }, { "epoch": 18.452884615384615, "grad_norm": 0.0023665486369282007, "learning_rate": 3.3949209343667155e-06, "loss": 0.0, "step": 19191 }, { "epoch": 18.453846153846154, "grad_norm": 0.008592018857598305, "learning_rate": 3.3939857089549855e-06, "loss": 0.0001, "step": 19192 }, { "epoch": 18.454807692307693, "grad_norm": 0.15791773796081543, "learning_rate": 3.393050586049209e-06, "loss": 0.0003, "step": 19193 }, { "epoch": 18.45576923076923, "grad_norm": 0.007390156388282776, "learning_rate": 3.3921155656639003e-06, "loss": 0.0, "step": 19194 }, { "epoch": 18.45673076923077, "grad_norm": 0.027012160047888756, "learning_rate": 3.39118064781356e-06, "loss": 0.0001, "step": 19195 }, { "epoch": 18.45769230769231, "grad_norm": 2.979508876800537, "learning_rate": 3.390245832512703e-06, "loss": 0.0808, "step": 19196 }, { "epoch": 18.458653846153847, "grad_norm": 0.031045764684677124, "learning_rate": 3.3893111197758276e-06, "loss": 0.0001, "step": 19197 }, { "epoch": 18.459615384615386, "grad_norm": 0.004095798823982477, "learning_rate": 3.3883765096174424e-06, "loss": 0.0001, "step": 19198 }, { "epoch": 18.460576923076925, "grad_norm": 0.008882210589945316, "learning_rate": 3.3874420020520515e-06, "loss": 0.0001, "step": 19199 }, { "epoch": 18.46153846153846, "grad_norm": 0.18791604042053223, "learning_rate": 3.38650759709415e-06, "loss": 0.0007, "step": 19200 }, { "epoch": 18.4625, "grad_norm": 0.027211779728531837, "learning_rate": 3.38557329475824e-06, "loss": 0.0001, "step": 19201 }, { "epoch": 18.463461538461537, "grad_norm": 0.6311652064323425, "learning_rate": 3.3846390950588224e-06, "loss": 0.0018, "step": 19202 }, { "epoch": 18.464423076923076, "grad_norm": 0.004646033979952335, "learning_rate": 3.3837049980103876e-06, "loss": 0.0, "step": 19203 }, { "epoch": 18.465384615384615, "grad_norm": 0.02088581956923008, "learning_rate": 3.382771003627433e-06, "loss": 0.0001, "step": 19204 }, { "epoch": 18.466346153846153, "grad_norm": 0.005855717230588198, "learning_rate": 3.381837111924453e-06, "loss": 0.0001, "step": 19205 }, { "epoch": 18.467307692307692, "grad_norm": 0.004698530770838261, "learning_rate": 3.380903322915935e-06, "loss": 0.0001, "step": 19206 }, { "epoch": 18.46826923076923, "grad_norm": 1.5155912637710571, "learning_rate": 3.379969636616374e-06, "loss": 0.007, "step": 19207 }, { "epoch": 18.46923076923077, "grad_norm": 0.00537902070209384, "learning_rate": 3.37903605304025e-06, "loss": 0.0, "step": 19208 }, { "epoch": 18.470192307692308, "grad_norm": 0.6314784288406372, "learning_rate": 3.3781025722020545e-06, "loss": 0.0011, "step": 19209 }, { "epoch": 18.471153846153847, "grad_norm": 0.007288683205842972, "learning_rate": 3.3771691941162755e-06, "loss": 0.0001, "step": 19210 }, { "epoch": 18.472115384615385, "grad_norm": 0.19027219712734222, "learning_rate": 3.3762359187973905e-06, "loss": 0.0004, "step": 19211 }, { "epoch": 18.473076923076924, "grad_norm": 0.0057531255297362804, "learning_rate": 3.375302746259882e-06, "loss": 0.0001, "step": 19212 }, { "epoch": 18.474038461538463, "grad_norm": 0.57675701379776, "learning_rate": 3.3743696765182367e-06, "loss": 0.0025, "step": 19213 }, { "epoch": 18.475, "grad_norm": 0.004061057697981596, "learning_rate": 3.3734367095869238e-06, "loss": 0.0, "step": 19214 }, { "epoch": 18.47596153846154, "grad_norm": 0.014169996604323387, "learning_rate": 3.3725038454804237e-06, "loss": 0.0001, "step": 19215 }, { "epoch": 18.476923076923075, "grad_norm": 0.0044618588872253895, "learning_rate": 3.3715710842132166e-06, "loss": 0.0, "step": 19216 }, { "epoch": 18.477884615384614, "grad_norm": 0.9644813537597656, "learning_rate": 3.370638425799768e-06, "loss": 0.0015, "step": 19217 }, { "epoch": 18.478846153846153, "grad_norm": 0.0241176038980484, "learning_rate": 3.3697058702545584e-06, "loss": 0.0001, "step": 19218 }, { "epoch": 18.47980769230769, "grad_norm": 0.11100735515356064, "learning_rate": 3.3687734175920505e-06, "loss": 0.0003, "step": 19219 }, { "epoch": 18.48076923076923, "grad_norm": 0.025362960994243622, "learning_rate": 3.3678410678267158e-06, "loss": 0.0002, "step": 19220 }, { "epoch": 18.48173076923077, "grad_norm": 0.005838335957378149, "learning_rate": 3.366908820973027e-06, "loss": 0.0001, "step": 19221 }, { "epoch": 18.482692307692307, "grad_norm": 0.12276343256235123, "learning_rate": 3.3659766770454417e-06, "loss": 0.0003, "step": 19222 }, { "epoch": 18.483653846153846, "grad_norm": 0.00889181811362505, "learning_rate": 3.3650446360584276e-06, "loss": 0.0001, "step": 19223 }, { "epoch": 18.484615384615385, "grad_norm": 0.0014462198596447706, "learning_rate": 3.3641126980264505e-06, "loss": 0.0, "step": 19224 }, { "epoch": 18.485576923076923, "grad_norm": 0.02642114832997322, "learning_rate": 3.3631808629639663e-06, "loss": 0.0001, "step": 19225 }, { "epoch": 18.486538461538462, "grad_norm": 0.03286409005522728, "learning_rate": 3.3622491308854343e-06, "loss": 0.0001, "step": 19226 }, { "epoch": 18.4875, "grad_norm": 0.0024230668786913157, "learning_rate": 3.361317501805319e-06, "loss": 0.0, "step": 19227 }, { "epoch": 18.48846153846154, "grad_norm": 0.004920211620628834, "learning_rate": 3.3603859757380673e-06, "loss": 0.0, "step": 19228 }, { "epoch": 18.489423076923078, "grad_norm": 0.004961241502314806, "learning_rate": 3.3594545526981424e-06, "loss": 0.0001, "step": 19229 }, { "epoch": 18.490384615384617, "grad_norm": 0.00561382295563817, "learning_rate": 3.3585232326999894e-06, "loss": 0.0, "step": 19230 }, { "epoch": 18.491346153846155, "grad_norm": 0.006957563105970621, "learning_rate": 3.357592015758064e-06, "loss": 0.0001, "step": 19231 }, { "epoch": 18.49230769230769, "grad_norm": 0.002428224077448249, "learning_rate": 3.356660901886818e-06, "loss": 0.0, "step": 19232 }, { "epoch": 18.49326923076923, "grad_norm": 0.004523518029600382, "learning_rate": 3.355729891100693e-06, "loss": 0.0, "step": 19233 }, { "epoch": 18.494230769230768, "grad_norm": 0.022556448355317116, "learning_rate": 3.3547989834141405e-06, "loss": 0.0001, "step": 19234 }, { "epoch": 18.495192307692307, "grad_norm": 0.04178175330162048, "learning_rate": 3.353868178841607e-06, "loss": 0.0002, "step": 19235 }, { "epoch": 18.496153846153845, "grad_norm": 0.0021765846759080887, "learning_rate": 3.35293747739753e-06, "loss": 0.0, "step": 19236 }, { "epoch": 18.497115384615384, "grad_norm": 0.004452144727110863, "learning_rate": 3.3520068790963546e-06, "loss": 0.0, "step": 19237 }, { "epoch": 18.498076923076923, "grad_norm": 0.003139528911560774, "learning_rate": 3.351076383952524e-06, "loss": 0.0, "step": 19238 }, { "epoch": 18.49903846153846, "grad_norm": 0.004002004396170378, "learning_rate": 3.3501459919804703e-06, "loss": 0.0, "step": 19239 }, { "epoch": 18.5, "grad_norm": 0.00801677443087101, "learning_rate": 3.349215703194637e-06, "loss": 0.0001, "step": 19240 }, { "epoch": 18.50096153846154, "grad_norm": 0.09690989553928375, "learning_rate": 3.348285517609453e-06, "loss": 0.0002, "step": 19241 }, { "epoch": 18.501923076923077, "grad_norm": 0.004144091159105301, "learning_rate": 3.3473554352393543e-06, "loss": 0.0, "step": 19242 }, { "epoch": 18.502884615384616, "grad_norm": 0.012416298501193523, "learning_rate": 3.3464254560987787e-06, "loss": 0.0001, "step": 19243 }, { "epoch": 18.503846153846155, "grad_norm": 0.03226780518889427, "learning_rate": 3.3454955802021483e-06, "loss": 0.0002, "step": 19244 }, { "epoch": 18.504807692307693, "grad_norm": 0.006029238924384117, "learning_rate": 3.3445658075638955e-06, "loss": 0.0001, "step": 19245 }, { "epoch": 18.505769230769232, "grad_norm": 0.003163987537845969, "learning_rate": 3.3436361381984506e-06, "loss": 0.0, "step": 19246 }, { "epoch": 18.50673076923077, "grad_norm": 0.004032443277537823, "learning_rate": 3.3427065721202345e-06, "loss": 0.0, "step": 19247 }, { "epoch": 18.50769230769231, "grad_norm": 0.004862519912421703, "learning_rate": 3.3417771093436724e-06, "loss": 0.0, "step": 19248 }, { "epoch": 18.508653846153845, "grad_norm": 0.004686207510530949, "learning_rate": 3.3408477498831917e-06, "loss": 0.0001, "step": 19249 }, { "epoch": 18.509615384615383, "grad_norm": 0.22164970636367798, "learning_rate": 3.339918493753206e-06, "loss": 0.001, "step": 19250 }, { "epoch": 18.510576923076922, "grad_norm": 0.005035704467445612, "learning_rate": 3.3389893409681417e-06, "loss": 0.0, "step": 19251 }, { "epoch": 18.51153846153846, "grad_norm": 0.10025427490472794, "learning_rate": 3.3380602915424097e-06, "loss": 0.0003, "step": 19252 }, { "epoch": 18.5125, "grad_norm": 0.00975970271974802, "learning_rate": 3.33713134549043e-06, "loss": 0.0001, "step": 19253 }, { "epoch": 18.513461538461538, "grad_norm": 0.007167477160692215, "learning_rate": 3.336202502826619e-06, "loss": 0.0, "step": 19254 }, { "epoch": 18.514423076923077, "grad_norm": 0.005341215059161186, "learning_rate": 3.335273763565384e-06, "loss": 0.0001, "step": 19255 }, { "epoch": 18.515384615384615, "grad_norm": 0.009284122847020626, "learning_rate": 3.3343451277211405e-06, "loss": 0.0001, "step": 19256 }, { "epoch": 18.516346153846154, "grad_norm": 0.02991511858999729, "learning_rate": 3.3334165953082987e-06, "loss": 0.0001, "step": 19257 }, { "epoch": 18.517307692307693, "grad_norm": 0.005216042511165142, "learning_rate": 3.3324881663412636e-06, "loss": 0.0, "step": 19258 }, { "epoch": 18.51826923076923, "grad_norm": 0.010657103732228279, "learning_rate": 3.331559840834442e-06, "loss": 0.0001, "step": 19259 }, { "epoch": 18.51923076923077, "grad_norm": 0.004922415129840374, "learning_rate": 3.3306316188022436e-06, "loss": 0.0, "step": 19260 }, { "epoch": 18.52019230769231, "grad_norm": 0.008027461357414722, "learning_rate": 3.329703500259065e-06, "loss": 0.0001, "step": 19261 }, { "epoch": 18.521153846153847, "grad_norm": 0.0027698827907443047, "learning_rate": 3.3287754852193143e-06, "loss": 0.0, "step": 19262 }, { "epoch": 18.522115384615386, "grad_norm": 0.0028072907589375973, "learning_rate": 3.327847573697385e-06, "loss": 0.0, "step": 19263 }, { "epoch": 18.523076923076925, "grad_norm": 2.251065492630005, "learning_rate": 3.326919765707678e-06, "loss": 0.0072, "step": 19264 }, { "epoch": 18.52403846153846, "grad_norm": 0.004583652131259441, "learning_rate": 3.3259920612645948e-06, "loss": 0.0, "step": 19265 }, { "epoch": 18.525, "grad_norm": 0.004883281420916319, "learning_rate": 3.3250644603825243e-06, "loss": 0.0001, "step": 19266 }, { "epoch": 18.525961538461537, "grad_norm": 0.008017580956220627, "learning_rate": 3.324136963075861e-06, "loss": 0.0001, "step": 19267 }, { "epoch": 18.526923076923076, "grad_norm": 0.004099899437278509, "learning_rate": 3.323209569359004e-06, "loss": 0.0, "step": 19268 }, { "epoch": 18.527884615384615, "grad_norm": 0.03266466036438942, "learning_rate": 3.322282279246334e-06, "loss": 0.0002, "step": 19269 }, { "epoch": 18.528846153846153, "grad_norm": 0.02249407395720482, "learning_rate": 3.3213550927522443e-06, "loss": 0.0001, "step": 19270 }, { "epoch": 18.529807692307692, "grad_norm": 0.016166236251592636, "learning_rate": 3.3204280098911247e-06, "loss": 0.0001, "step": 19271 }, { "epoch": 18.53076923076923, "grad_norm": 0.013624690473079681, "learning_rate": 3.3195010306773556e-06, "loss": 0.0001, "step": 19272 }, { "epoch": 18.53173076923077, "grad_norm": 0.006160484626889229, "learning_rate": 3.3185741551253226e-06, "loss": 0.0, "step": 19273 }, { "epoch": 18.532692307692308, "grad_norm": 0.009330556727945805, "learning_rate": 3.317647383249413e-06, "loss": 0.0, "step": 19274 }, { "epoch": 18.533653846153847, "grad_norm": 0.00475068436935544, "learning_rate": 3.3167207150640003e-06, "loss": 0.0, "step": 19275 }, { "epoch": 18.534615384615385, "grad_norm": 0.008848868310451508, "learning_rate": 3.315794150583469e-06, "loss": 0.0001, "step": 19276 }, { "epoch": 18.535576923076924, "grad_norm": 0.003864237107336521, "learning_rate": 3.3148676898221922e-06, "loss": 0.0, "step": 19277 }, { "epoch": 18.536538461538463, "grad_norm": 0.0033654943108558655, "learning_rate": 3.313941332794548e-06, "loss": 0.0, "step": 19278 }, { "epoch": 18.5375, "grad_norm": 0.10857480019330978, "learning_rate": 3.313015079514915e-06, "loss": 0.0003, "step": 19279 }, { "epoch": 18.53846153846154, "grad_norm": 0.0021425746381282806, "learning_rate": 3.312088929997658e-06, "loss": 0.0, "step": 19280 }, { "epoch": 18.539423076923075, "grad_norm": 0.016329441219568253, "learning_rate": 3.3111628842571518e-06, "loss": 0.0001, "step": 19281 }, { "epoch": 18.540384615384614, "grad_norm": 0.008324786089360714, "learning_rate": 3.3102369423077695e-06, "loss": 0.0, "step": 19282 }, { "epoch": 18.541346153846153, "grad_norm": 0.001143630244769156, "learning_rate": 3.3093111041638728e-06, "loss": 0.0, "step": 19283 }, { "epoch": 18.54230769230769, "grad_norm": 0.026115410029888153, "learning_rate": 3.3083853698398303e-06, "loss": 0.0001, "step": 19284 }, { "epoch": 18.54326923076923, "grad_norm": 0.005957100074738264, "learning_rate": 3.3074597393500116e-06, "loss": 0.0001, "step": 19285 }, { "epoch": 18.54423076923077, "grad_norm": 0.004781884141266346, "learning_rate": 3.3065342127087705e-06, "loss": 0.0001, "step": 19286 }, { "epoch": 18.545192307692307, "grad_norm": 0.006358637474477291, "learning_rate": 3.3056087899304777e-06, "loss": 0.0001, "step": 19287 }, { "epoch": 18.546153846153846, "grad_norm": 0.009762672707438469, "learning_rate": 3.304683471029485e-06, "loss": 0.0001, "step": 19288 }, { "epoch": 18.547115384615385, "grad_norm": 0.004723724909126759, "learning_rate": 3.303758256020154e-06, "loss": 0.0001, "step": 19289 }, { "epoch": 18.548076923076923, "grad_norm": 0.007392875850200653, "learning_rate": 3.302833144916846e-06, "loss": 0.0001, "step": 19290 }, { "epoch": 18.549038461538462, "grad_norm": 0.00579047529026866, "learning_rate": 3.3019081377339077e-06, "loss": 0.0001, "step": 19291 }, { "epoch": 18.55, "grad_norm": 0.0033053099177777767, "learning_rate": 3.300983234485696e-06, "loss": 0.0, "step": 19292 }, { "epoch": 18.55096153846154, "grad_norm": 0.014752845279872417, "learning_rate": 3.300058435186567e-06, "loss": 0.0001, "step": 19293 }, { "epoch": 18.551923076923078, "grad_norm": 0.0006859844434075058, "learning_rate": 3.2991337398508636e-06, "loss": 0.0, "step": 19294 }, { "epoch": 18.552884615384617, "grad_norm": 0.00564850028604269, "learning_rate": 3.2982091484929378e-06, "loss": 0.0, "step": 19295 }, { "epoch": 18.553846153846155, "grad_norm": 0.007287955842912197, "learning_rate": 3.297284661127139e-06, "loss": 0.0001, "step": 19296 }, { "epoch": 18.55480769230769, "grad_norm": 0.0030225333757698536, "learning_rate": 3.2963602777678084e-06, "loss": 0.0, "step": 19297 }, { "epoch": 18.55576923076923, "grad_norm": 0.004138506017625332, "learning_rate": 3.295435998429294e-06, "loss": 0.0, "step": 19298 }, { "epoch": 18.556730769230768, "grad_norm": 0.005577226169407368, "learning_rate": 3.294511823125931e-06, "loss": 0.0001, "step": 19299 }, { "epoch": 18.557692307692307, "grad_norm": 0.11977527290582657, "learning_rate": 3.2935877518720647e-06, "loss": 0.0002, "step": 19300 }, { "epoch": 18.558653846153845, "grad_norm": 0.01267162710428238, "learning_rate": 3.2926637846820366e-06, "loss": 0.0001, "step": 19301 }, { "epoch": 18.559615384615384, "grad_norm": 0.020253242924809456, "learning_rate": 3.2917399215701773e-06, "loss": 0.0001, "step": 19302 }, { "epoch": 18.560576923076923, "grad_norm": 0.002969274064525962, "learning_rate": 3.2908161625508262e-06, "loss": 0.0, "step": 19303 }, { "epoch": 18.56153846153846, "grad_norm": 0.01890375465154648, "learning_rate": 3.2898925076383203e-06, "loss": 0.0001, "step": 19304 }, { "epoch": 18.5625, "grad_norm": 0.009355949237942696, "learning_rate": 3.288968956846986e-06, "loss": 0.0001, "step": 19305 }, { "epoch": 18.56346153846154, "grad_norm": 0.010747256688773632, "learning_rate": 3.2880455101911557e-06, "loss": 0.0001, "step": 19306 }, { "epoch": 18.564423076923077, "grad_norm": 2.1051361560821533, "learning_rate": 3.287122167685164e-06, "loss": 0.0059, "step": 19307 }, { "epoch": 18.565384615384616, "grad_norm": 0.006151570938527584, "learning_rate": 3.2861989293433306e-06, "loss": 0.0, "step": 19308 }, { "epoch": 18.566346153846155, "grad_norm": 0.003535742638632655, "learning_rate": 3.2852757951799887e-06, "loss": 0.0, "step": 19309 }, { "epoch": 18.567307692307693, "grad_norm": 0.0017465202836319804, "learning_rate": 3.284352765209452e-06, "loss": 0.0, "step": 19310 }, { "epoch": 18.568269230769232, "grad_norm": 0.005695065949112177, "learning_rate": 3.2834298394460528e-06, "loss": 0.0, "step": 19311 }, { "epoch": 18.56923076923077, "grad_norm": 0.00504051661118865, "learning_rate": 3.282507017904114e-06, "loss": 0.0001, "step": 19312 }, { "epoch": 18.57019230769231, "grad_norm": 0.001972018741071224, "learning_rate": 3.2815843005979475e-06, "loss": 0.0, "step": 19313 }, { "epoch": 18.571153846153845, "grad_norm": 0.00645799096673727, "learning_rate": 3.280661687541876e-06, "loss": 0.0, "step": 19314 }, { "epoch": 18.572115384615383, "grad_norm": 0.012827972881495953, "learning_rate": 3.279739178750212e-06, "loss": 0.0001, "step": 19315 }, { "epoch": 18.573076923076922, "grad_norm": 0.0023989372421056032, "learning_rate": 3.2788167742372725e-06, "loss": 0.0, "step": 19316 }, { "epoch": 18.57403846153846, "grad_norm": 0.008142895065248013, "learning_rate": 3.2778944740173736e-06, "loss": 0.0001, "step": 19317 }, { "epoch": 18.575, "grad_norm": 0.003824524814262986, "learning_rate": 3.2769722781048196e-06, "loss": 0.0, "step": 19318 }, { "epoch": 18.575961538461538, "grad_norm": 0.0038407130632549524, "learning_rate": 3.276050186513924e-06, "loss": 0.0, "step": 19319 }, { "epoch": 18.576923076923077, "grad_norm": 0.02248893678188324, "learning_rate": 3.275128199258998e-06, "loss": 0.0001, "step": 19320 }, { "epoch": 18.577884615384615, "grad_norm": 0.018491724506020546, "learning_rate": 3.2742063163543414e-06, "loss": 0.0001, "step": 19321 }, { "epoch": 18.578846153846154, "grad_norm": 0.0037090806290507317, "learning_rate": 3.273284537814264e-06, "loss": 0.0, "step": 19322 }, { "epoch": 18.579807692307693, "grad_norm": 0.006282416637986898, "learning_rate": 3.2723628636530703e-06, "loss": 0.0001, "step": 19323 }, { "epoch": 18.58076923076923, "grad_norm": 0.0067070177756249905, "learning_rate": 3.271441293885057e-06, "loss": 0.0, "step": 19324 }, { "epoch": 18.58173076923077, "grad_norm": 0.03846802935004234, "learning_rate": 3.2705198285245288e-06, "loss": 0.0001, "step": 19325 }, { "epoch": 18.58269230769231, "grad_norm": 1.7920154333114624, "learning_rate": 3.2695984675857797e-06, "loss": 0.0043, "step": 19326 }, { "epoch": 18.583653846153847, "grad_norm": 0.00825237762182951, "learning_rate": 3.268677211083109e-06, "loss": 0.0001, "step": 19327 }, { "epoch": 18.584615384615386, "grad_norm": 0.005515908822417259, "learning_rate": 3.2677560590308145e-06, "loss": 0.0001, "step": 19328 }, { "epoch": 18.585576923076925, "grad_norm": 0.011865093372762203, "learning_rate": 3.2668350114431844e-06, "loss": 0.0001, "step": 19329 }, { "epoch": 18.58653846153846, "grad_norm": 0.0052830227650702, "learning_rate": 3.2659140683345125e-06, "loss": 0.0001, "step": 19330 }, { "epoch": 18.5875, "grad_norm": 0.005364842247217894, "learning_rate": 3.2649932297190945e-06, "loss": 0.0001, "step": 19331 }, { "epoch": 18.588461538461537, "grad_norm": 0.0029752205591648817, "learning_rate": 3.26407249561121e-06, "loss": 0.0, "step": 19332 }, { "epoch": 18.589423076923076, "grad_norm": 0.006003377493470907, "learning_rate": 3.2631518660251514e-06, "loss": 0.0001, "step": 19333 }, { "epoch": 18.590384615384615, "grad_norm": 0.008642733097076416, "learning_rate": 3.2622313409752073e-06, "loss": 0.0001, "step": 19334 }, { "epoch": 18.591346153846153, "grad_norm": 0.018677329644560814, "learning_rate": 3.2613109204756533e-06, "loss": 0.0001, "step": 19335 }, { "epoch": 18.592307692307692, "grad_norm": 0.0049032727256417274, "learning_rate": 3.2603906045407797e-06, "loss": 0.0001, "step": 19336 }, { "epoch": 18.59326923076923, "grad_norm": 0.003967689350247383, "learning_rate": 3.2594703931848594e-06, "loss": 0.0, "step": 19337 }, { "epoch": 18.59423076923077, "grad_norm": 0.0025885202921926975, "learning_rate": 3.2585502864221764e-06, "loss": 0.0, "step": 19338 }, { "epoch": 18.595192307692308, "grad_norm": 0.04947485029697418, "learning_rate": 3.2576302842670095e-06, "loss": 0.0001, "step": 19339 }, { "epoch": 18.596153846153847, "grad_norm": 0.008221806026995182, "learning_rate": 3.256710386733629e-06, "loss": 0.0001, "step": 19340 }, { "epoch": 18.597115384615385, "grad_norm": 0.002329588634893298, "learning_rate": 3.255790593836312e-06, "loss": 0.0, "step": 19341 }, { "epoch": 18.598076923076924, "grad_norm": 0.011105495505034924, "learning_rate": 3.254870905589335e-06, "loss": 0.0001, "step": 19342 }, { "epoch": 18.599038461538463, "grad_norm": 0.0040321433916687965, "learning_rate": 3.253951322006961e-06, "loss": 0.0, "step": 19343 }, { "epoch": 18.6, "grad_norm": 0.0024571658577769995, "learning_rate": 3.2530318431034626e-06, "loss": 0.0, "step": 19344 }, { "epoch": 18.60096153846154, "grad_norm": 0.01569707877933979, "learning_rate": 3.2521124688931116e-06, "loss": 0.0001, "step": 19345 }, { "epoch": 18.601923076923075, "grad_norm": 0.06052614748477936, "learning_rate": 3.251193199390167e-06, "loss": 0.0001, "step": 19346 }, { "epoch": 18.602884615384614, "grad_norm": 0.004651680588722229, "learning_rate": 3.250274034608899e-06, "loss": 0.0, "step": 19347 }, { "epoch": 18.603846153846153, "grad_norm": 0.014729221351444721, "learning_rate": 3.2493549745635654e-06, "loss": 0.0001, "step": 19348 }, { "epoch": 18.60480769230769, "grad_norm": 0.3888341784477234, "learning_rate": 3.2484360192684294e-06, "loss": 0.0016, "step": 19349 }, { "epoch": 18.60576923076923, "grad_norm": 0.002307580318301916, "learning_rate": 3.2475171687377537e-06, "loss": 0.0, "step": 19350 }, { "epoch": 18.60673076923077, "grad_norm": 0.0018108751391991973, "learning_rate": 3.2465984229857905e-06, "loss": 0.0, "step": 19351 }, { "epoch": 18.607692307692307, "grad_norm": 0.005726963747292757, "learning_rate": 3.2456797820267995e-06, "loss": 0.0, "step": 19352 }, { "epoch": 18.608653846153846, "grad_norm": 0.15687446296215057, "learning_rate": 3.2447612458750365e-06, "loss": 0.0006, "step": 19353 }, { "epoch": 18.609615384615385, "grad_norm": 0.0016473580617457628, "learning_rate": 3.2438428145447496e-06, "loss": 0.0, "step": 19354 }, { "epoch": 18.610576923076923, "grad_norm": 0.0018911829683929682, "learning_rate": 3.2429244880501933e-06, "loss": 0.0, "step": 19355 }, { "epoch": 18.611538461538462, "grad_norm": 3.121950387954712, "learning_rate": 3.2420062664056205e-06, "loss": 0.0591, "step": 19356 }, { "epoch": 18.6125, "grad_norm": 0.02927841618657112, "learning_rate": 3.2410881496252732e-06, "loss": 0.0001, "step": 19357 }, { "epoch": 18.61346153846154, "grad_norm": 0.00426417076960206, "learning_rate": 3.240170137723403e-06, "loss": 0.0, "step": 19358 }, { "epoch": 18.614423076923078, "grad_norm": 0.01580992341041565, "learning_rate": 3.2392522307142493e-06, "loss": 0.0001, "step": 19359 }, { "epoch": 18.615384615384617, "grad_norm": 0.017184479162096977, "learning_rate": 3.2383344286120587e-06, "loss": 0.0001, "step": 19360 }, { "epoch": 18.616346153846155, "grad_norm": 0.007223091553896666, "learning_rate": 3.2374167314310758e-06, "loss": 0.0, "step": 19361 }, { "epoch": 18.61730769230769, "grad_norm": 0.00722684059292078, "learning_rate": 3.236499139185534e-06, "loss": 0.0001, "step": 19362 }, { "epoch": 18.61826923076923, "grad_norm": 0.46191349625587463, "learning_rate": 3.2355816518896743e-06, "loss": 0.0008, "step": 19363 }, { "epoch": 18.619230769230768, "grad_norm": 0.07085345685482025, "learning_rate": 3.2346642695577377e-06, "loss": 0.0002, "step": 19364 }, { "epoch": 18.620192307692307, "grad_norm": 0.005190706811845303, "learning_rate": 3.233746992203953e-06, "loss": 0.0, "step": 19365 }, { "epoch": 18.621153846153845, "grad_norm": 0.0015483383322134614, "learning_rate": 3.2328298198425556e-06, "loss": 0.0, "step": 19366 }, { "epoch": 18.622115384615384, "grad_norm": 0.028883861377835274, "learning_rate": 3.2319127524877813e-06, "loss": 0.0003, "step": 19367 }, { "epoch": 18.623076923076923, "grad_norm": 0.01187559962272644, "learning_rate": 3.2309957901538545e-06, "loss": 0.0001, "step": 19368 }, { "epoch": 18.62403846153846, "grad_norm": 0.0028166265692561865, "learning_rate": 3.2300789328550086e-06, "loss": 0.0, "step": 19369 }, { "epoch": 18.625, "grad_norm": 0.00713985413312912, "learning_rate": 3.229162180605465e-06, "loss": 0.0001, "step": 19370 }, { "epoch": 18.62596153846154, "grad_norm": 0.007210094947367907, "learning_rate": 3.228245533419453e-06, "loss": 0.0, "step": 19371 }, { "epoch": 18.626923076923077, "grad_norm": 0.003403694136068225, "learning_rate": 3.227328991311197e-06, "loss": 0.0, "step": 19372 }, { "epoch": 18.627884615384616, "grad_norm": 0.023712489753961563, "learning_rate": 3.2264125542949154e-06, "loss": 0.0001, "step": 19373 }, { "epoch": 18.628846153846155, "grad_norm": 0.4753982722759247, "learning_rate": 3.2254962223848307e-06, "loss": 0.001, "step": 19374 }, { "epoch": 18.629807692307693, "grad_norm": 0.002866762690246105, "learning_rate": 3.224579995595165e-06, "loss": 0.0, "step": 19375 }, { "epoch": 18.630769230769232, "grad_norm": 0.0300709567964077, "learning_rate": 3.2236638739401294e-06, "loss": 0.0001, "step": 19376 }, { "epoch": 18.63173076923077, "grad_norm": 0.0027099777944386005, "learning_rate": 3.222747857433941e-06, "loss": 0.0, "step": 19377 }, { "epoch": 18.63269230769231, "grad_norm": 0.009122469462454319, "learning_rate": 3.2218319460908187e-06, "loss": 0.0001, "step": 19378 }, { "epoch": 18.633653846153845, "grad_norm": 0.0009844008600339293, "learning_rate": 3.2209161399249677e-06, "loss": 0.0, "step": 19379 }, { "epoch": 18.634615384615383, "grad_norm": 0.0027762954123318195, "learning_rate": 3.2200004389506045e-06, "loss": 0.0, "step": 19380 }, { "epoch": 18.635576923076922, "grad_norm": 0.009850974194705486, "learning_rate": 3.2190848431819333e-06, "loss": 0.0001, "step": 19381 }, { "epoch": 18.63653846153846, "grad_norm": 0.005942470859736204, "learning_rate": 3.2181693526331624e-06, "loss": 0.0, "step": 19382 }, { "epoch": 18.6375, "grad_norm": 0.019001513719558716, "learning_rate": 3.217253967318503e-06, "loss": 0.0001, "step": 19383 }, { "epoch": 18.638461538461538, "grad_norm": 0.008213413879275322, "learning_rate": 3.2163386872521506e-06, "loss": 0.0001, "step": 19384 }, { "epoch": 18.639423076923077, "grad_norm": 0.0061467476189136505, "learning_rate": 3.2154235124483137e-06, "loss": 0.0001, "step": 19385 }, { "epoch": 18.640384615384615, "grad_norm": 0.01700359396636486, "learning_rate": 3.2145084429211926e-06, "loss": 0.0001, "step": 19386 }, { "epoch": 18.641346153846154, "grad_norm": 0.014754895120859146, "learning_rate": 3.213593478684983e-06, "loss": 0.0001, "step": 19387 }, { "epoch": 18.642307692307693, "grad_norm": 0.003840323770418763, "learning_rate": 3.212678619753885e-06, "loss": 0.0, "step": 19388 }, { "epoch": 18.64326923076923, "grad_norm": 0.003818816738203168, "learning_rate": 3.211763866142097e-06, "loss": 0.0, "step": 19389 }, { "epoch": 18.64423076923077, "grad_norm": 0.044398192316293716, "learning_rate": 3.210849217863807e-06, "loss": 0.0002, "step": 19390 }, { "epoch": 18.64519230769231, "grad_norm": 0.05127773433923721, "learning_rate": 3.209934674933216e-06, "loss": 0.0002, "step": 19391 }, { "epoch": 18.646153846153847, "grad_norm": 0.005637716501951218, "learning_rate": 3.209020237364505e-06, "loss": 0.0001, "step": 19392 }, { "epoch": 18.647115384615386, "grad_norm": 0.13457216322422028, "learning_rate": 3.2081059051718697e-06, "loss": 0.0004, "step": 19393 }, { "epoch": 18.648076923076925, "grad_norm": 0.014494255185127258, "learning_rate": 3.2071916783695e-06, "loss": 0.0001, "step": 19394 }, { "epoch": 18.64903846153846, "grad_norm": 0.5416608452796936, "learning_rate": 3.206277556971575e-06, "loss": 0.0064, "step": 19395 }, { "epoch": 18.65, "grad_norm": 0.005508513189852238, "learning_rate": 3.2053635409922844e-06, "loss": 0.0, "step": 19396 }, { "epoch": 18.650961538461537, "grad_norm": 0.0038186232559382915, "learning_rate": 3.2044496304458117e-06, "loss": 0.0, "step": 19397 }, { "epoch": 18.651923076923076, "grad_norm": 0.0047501916997134686, "learning_rate": 3.2035358253463333e-06, "loss": 0.0001, "step": 19398 }, { "epoch": 18.652884615384615, "grad_norm": 0.0019113464513793588, "learning_rate": 3.2026221257080316e-06, "loss": 0.0, "step": 19399 }, { "epoch": 18.653846153846153, "grad_norm": 0.010554752312600613, "learning_rate": 3.2017085315450868e-06, "loss": 0.0, "step": 19400 }, { "epoch": 18.654807692307692, "grad_norm": 0.06537250429391861, "learning_rate": 3.2007950428716705e-06, "loss": 0.0002, "step": 19401 }, { "epoch": 18.65576923076923, "grad_norm": 1.2538498640060425, "learning_rate": 3.1998816597019588e-06, "loss": 0.0113, "step": 19402 }, { "epoch": 18.65673076923077, "grad_norm": 0.004946773871779442, "learning_rate": 3.198968382050129e-06, "loss": 0.0, "step": 19403 }, { "epoch": 18.657692307692308, "grad_norm": 0.007674787193536758, "learning_rate": 3.1980552099303462e-06, "loss": 0.0, "step": 19404 }, { "epoch": 18.658653846153847, "grad_norm": 0.007618348114192486, "learning_rate": 3.197142143356787e-06, "loss": 0.0001, "step": 19405 }, { "epoch": 18.659615384615385, "grad_norm": 0.010627077892422676, "learning_rate": 3.196229182343612e-06, "loss": 0.0001, "step": 19406 }, { "epoch": 18.660576923076924, "grad_norm": 0.02317461557686329, "learning_rate": 3.1953163269049915e-06, "loss": 0.0001, "step": 19407 }, { "epoch": 18.661538461538463, "grad_norm": 0.11269208043813705, "learning_rate": 3.194403577055092e-06, "loss": 0.0002, "step": 19408 }, { "epoch": 18.6625, "grad_norm": 0.005408172495663166, "learning_rate": 3.193490932808072e-06, "loss": 0.0001, "step": 19409 }, { "epoch": 18.66346153846154, "grad_norm": 0.0022988731507211924, "learning_rate": 3.192578394178096e-06, "loss": 0.0, "step": 19410 }, { "epoch": 18.664423076923075, "grad_norm": 0.0041490159928798676, "learning_rate": 3.191665961179328e-06, "loss": 0.0, "step": 19411 }, { "epoch": 18.665384615384614, "grad_norm": 0.005385319236665964, "learning_rate": 3.1907536338259184e-06, "loss": 0.0, "step": 19412 }, { "epoch": 18.666346153846153, "grad_norm": 0.04065649211406708, "learning_rate": 3.1898414121320277e-06, "loss": 0.0001, "step": 19413 }, { "epoch": 18.66730769230769, "grad_norm": 0.007665787357836962, "learning_rate": 3.188929296111812e-06, "loss": 0.0001, "step": 19414 }, { "epoch": 18.66826923076923, "grad_norm": 0.0017512423219159245, "learning_rate": 3.188017285779422e-06, "loss": 0.0, "step": 19415 }, { "epoch": 18.66923076923077, "grad_norm": 0.0016566970152780414, "learning_rate": 3.1871053811490137e-06, "loss": 0.0, "step": 19416 }, { "epoch": 18.670192307692307, "grad_norm": 0.021719954907894135, "learning_rate": 3.1861935822347314e-06, "loss": 0.0001, "step": 19417 }, { "epoch": 18.671153846153846, "grad_norm": 0.008698063902556896, "learning_rate": 3.1852818890507255e-06, "loss": 0.0001, "step": 19418 }, { "epoch": 18.672115384615385, "grad_norm": 0.004556956700980663, "learning_rate": 3.1843703016111484e-06, "loss": 0.0, "step": 19419 }, { "epoch": 18.673076923076923, "grad_norm": 0.010036401450634003, "learning_rate": 3.1834588199301352e-06, "loss": 0.0001, "step": 19420 }, { "epoch": 18.674038461538462, "grad_norm": 0.005296607501804829, "learning_rate": 3.1825474440218363e-06, "loss": 0.0, "step": 19421 }, { "epoch": 18.675, "grad_norm": 0.0053077382035553455, "learning_rate": 3.181636173900395e-06, "loss": 0.0, "step": 19422 }, { "epoch": 18.67596153846154, "grad_norm": 0.012010404840111732, "learning_rate": 3.180725009579946e-06, "loss": 0.0001, "step": 19423 }, { "epoch": 18.676923076923078, "grad_norm": 0.006564782466739416, "learning_rate": 3.1798139510746295e-06, "loss": 0.0001, "step": 19424 }, { "epoch": 18.677884615384617, "grad_norm": 0.005434840451925993, "learning_rate": 3.1789029983985877e-06, "loss": 0.0, "step": 19425 }, { "epoch": 18.678846153846155, "grad_norm": 0.007855167612433434, "learning_rate": 3.1779921515659475e-06, "loss": 0.0001, "step": 19426 }, { "epoch": 18.67980769230769, "grad_norm": 0.002226956421509385, "learning_rate": 3.1770814105908497e-06, "loss": 0.0, "step": 19427 }, { "epoch": 18.68076923076923, "grad_norm": 0.008093243464827538, "learning_rate": 3.1761707754874215e-06, "loss": 0.0, "step": 19428 }, { "epoch": 18.681730769230768, "grad_norm": 0.0029659755527973175, "learning_rate": 3.1752602462697947e-06, "loss": 0.0, "step": 19429 }, { "epoch": 18.682692307692307, "grad_norm": 0.005425633862614632, "learning_rate": 3.174349822952101e-06, "loss": 0.0001, "step": 19430 }, { "epoch": 18.683653846153845, "grad_norm": 0.00977200735360384, "learning_rate": 3.1734395055484623e-06, "loss": 0.0001, "step": 19431 }, { "epoch": 18.684615384615384, "grad_norm": 0.001203636173158884, "learning_rate": 3.172529294073007e-06, "loss": 0.0, "step": 19432 }, { "epoch": 18.685576923076923, "grad_norm": 0.01847180910408497, "learning_rate": 3.1716191885398615e-06, "loss": 0.0001, "step": 19433 }, { "epoch": 18.68653846153846, "grad_norm": 0.005596057046204805, "learning_rate": 3.1707091889631424e-06, "loss": 0.0001, "step": 19434 }, { "epoch": 18.6875, "grad_norm": 0.009941293857991695, "learning_rate": 3.169799295356972e-06, "loss": 0.0001, "step": 19435 }, { "epoch": 18.68846153846154, "grad_norm": 0.002412478206679225, "learning_rate": 3.168889507735474e-06, "loss": 0.0, "step": 19436 }, { "epoch": 18.689423076923077, "grad_norm": 0.0024948976933956146, "learning_rate": 3.1679798261127594e-06, "loss": 0.0, "step": 19437 }, { "epoch": 18.690384615384616, "grad_norm": 0.018210135400295258, "learning_rate": 3.1670702505029483e-06, "loss": 0.0001, "step": 19438 }, { "epoch": 18.691346153846155, "grad_norm": 0.0034200800582766533, "learning_rate": 3.1661607809201487e-06, "loss": 0.0, "step": 19439 }, { "epoch": 18.692307692307693, "grad_norm": 0.01184884738177061, "learning_rate": 3.1652514173784787e-06, "loss": 0.0001, "step": 19440 }, { "epoch": 18.693269230769232, "grad_norm": 0.008503834716975689, "learning_rate": 3.1643421598920486e-06, "loss": 0.0, "step": 19441 }, { "epoch": 18.69423076923077, "grad_norm": 3.6946401596069336, "learning_rate": 3.163433008474963e-06, "loss": 0.0323, "step": 19442 }, { "epoch": 18.69519230769231, "grad_norm": 0.0026665683835744858, "learning_rate": 3.1625239631413317e-06, "loss": 0.0, "step": 19443 }, { "epoch": 18.696153846153845, "grad_norm": 0.0028869700618088245, "learning_rate": 3.1616150239052647e-06, "loss": 0.0, "step": 19444 }, { "epoch": 18.697115384615383, "grad_norm": 0.002975042909383774, "learning_rate": 3.160706190780858e-06, "loss": 0.0, "step": 19445 }, { "epoch": 18.698076923076922, "grad_norm": 0.0030453046783804893, "learning_rate": 3.159797463782218e-06, "loss": 0.0, "step": 19446 }, { "epoch": 18.69903846153846, "grad_norm": 0.00333991227671504, "learning_rate": 3.15888884292345e-06, "loss": 0.0, "step": 19447 }, { "epoch": 18.7, "grad_norm": 0.0021321456879377365, "learning_rate": 3.157980328218645e-06, "loss": 0.0, "step": 19448 }, { "epoch": 18.700961538461538, "grad_norm": 0.006457213312387466, "learning_rate": 3.1570719196819065e-06, "loss": 0.0, "step": 19449 }, { "epoch": 18.701923076923077, "grad_norm": 0.02168687805533409, "learning_rate": 3.1561636173273257e-06, "loss": 0.0001, "step": 19450 }, { "epoch": 18.702884615384615, "grad_norm": 0.004773171618580818, "learning_rate": 3.155255421169e-06, "loss": 0.0001, "step": 19451 }, { "epoch": 18.703846153846154, "grad_norm": 0.00801225658506155, "learning_rate": 3.1543473312210225e-06, "loss": 0.0001, "step": 19452 }, { "epoch": 18.704807692307693, "grad_norm": 0.02783786691725254, "learning_rate": 3.1534393474974814e-06, "loss": 0.0001, "step": 19453 }, { "epoch": 18.70576923076923, "grad_norm": 0.05899070203304291, "learning_rate": 3.1525314700124653e-06, "loss": 0.0002, "step": 19454 }, { "epoch": 18.70673076923077, "grad_norm": 0.0034645157866179943, "learning_rate": 3.1516236987800684e-06, "loss": 0.0, "step": 19455 }, { "epoch": 18.70769230769231, "grad_norm": 0.5897703766822815, "learning_rate": 3.150716033814368e-06, "loss": 0.0075, "step": 19456 }, { "epoch": 18.708653846153847, "grad_norm": 0.018054112792015076, "learning_rate": 3.1498084751294523e-06, "loss": 0.0001, "step": 19457 }, { "epoch": 18.709615384615386, "grad_norm": 0.44376078248023987, "learning_rate": 3.148901022739407e-06, "loss": 0.0013, "step": 19458 }, { "epoch": 18.710576923076925, "grad_norm": 0.009952102787792683, "learning_rate": 3.1479936766583073e-06, "loss": 0.0001, "step": 19459 }, { "epoch": 18.71153846153846, "grad_norm": 0.02476084977388382, "learning_rate": 3.1470864369002387e-06, "loss": 0.0001, "step": 19460 }, { "epoch": 18.7125, "grad_norm": 0.04302901402115822, "learning_rate": 3.146179303479272e-06, "loss": 0.0001, "step": 19461 }, { "epoch": 18.713461538461537, "grad_norm": 0.011517909355461597, "learning_rate": 3.1452722764094866e-06, "loss": 0.0001, "step": 19462 }, { "epoch": 18.714423076923076, "grad_norm": 0.008606944233179092, "learning_rate": 3.1443653557049615e-06, "loss": 0.0001, "step": 19463 }, { "epoch": 18.715384615384615, "grad_norm": 0.008943689055740833, "learning_rate": 3.1434585413797604e-06, "loss": 0.0001, "step": 19464 }, { "epoch": 18.716346153846153, "grad_norm": 0.007918245159089565, "learning_rate": 3.14255183344796e-06, "loss": 0.0001, "step": 19465 }, { "epoch": 18.717307692307692, "grad_norm": 0.006892351433634758, "learning_rate": 3.1416452319236314e-06, "loss": 0.0001, "step": 19466 }, { "epoch": 18.71826923076923, "grad_norm": 0.005166740622371435, "learning_rate": 3.1407387368208365e-06, "loss": 0.0, "step": 19467 }, { "epoch": 18.71923076923077, "grad_norm": 0.006592908874154091, "learning_rate": 3.1398323481536454e-06, "loss": 0.0, "step": 19468 }, { "epoch": 18.720192307692308, "grad_norm": 0.008426479995250702, "learning_rate": 3.1389260659361244e-06, "loss": 0.0, "step": 19469 }, { "epoch": 18.721153846153847, "grad_norm": 0.05138527229428291, "learning_rate": 3.1380198901823313e-06, "loss": 0.0002, "step": 19470 }, { "epoch": 18.722115384615385, "grad_norm": 0.020906368270516396, "learning_rate": 3.137113820906332e-06, "loss": 0.0001, "step": 19471 }, { "epoch": 18.723076923076924, "grad_norm": 0.015491343103349209, "learning_rate": 3.136207858122181e-06, "loss": 0.0001, "step": 19472 }, { "epoch": 18.724038461538463, "grad_norm": 0.08364734798669815, "learning_rate": 3.1353020018439394e-06, "loss": 0.0002, "step": 19473 }, { "epoch": 18.725, "grad_norm": 0.00967747438699007, "learning_rate": 3.1343962520856663e-06, "loss": 0.0001, "step": 19474 }, { "epoch": 18.72596153846154, "grad_norm": 0.006607829127460718, "learning_rate": 3.1334906088614104e-06, "loss": 0.0001, "step": 19475 }, { "epoch": 18.726923076923075, "grad_norm": 0.005830651149153709, "learning_rate": 3.1325850721852257e-06, "loss": 0.0, "step": 19476 }, { "epoch": 18.727884615384614, "grad_norm": 0.0010574840707704425, "learning_rate": 3.131679642071169e-06, "loss": 0.0, "step": 19477 }, { "epoch": 18.728846153846153, "grad_norm": 0.005905051715672016, "learning_rate": 3.130774318533284e-06, "loss": 0.0, "step": 19478 }, { "epoch": 18.72980769230769, "grad_norm": 0.0050711496733129025, "learning_rate": 3.12986910158562e-06, "loss": 0.0, "step": 19479 }, { "epoch": 18.73076923076923, "grad_norm": 0.007039938122034073, "learning_rate": 3.1289639912422264e-06, "loss": 0.0001, "step": 19480 }, { "epoch": 18.73173076923077, "grad_norm": 0.003270898712798953, "learning_rate": 3.1280589875171417e-06, "loss": 0.0, "step": 19481 }, { "epoch": 18.732692307692307, "grad_norm": 0.04280799627304077, "learning_rate": 3.1271540904244148e-06, "loss": 0.0001, "step": 19482 }, { "epoch": 18.733653846153846, "grad_norm": 0.005127521697431803, "learning_rate": 3.126249299978086e-06, "loss": 0.0, "step": 19483 }, { "epoch": 18.734615384615385, "grad_norm": 0.008695738390088081, "learning_rate": 3.125344616192192e-06, "loss": 0.0, "step": 19484 }, { "epoch": 18.735576923076923, "grad_norm": 0.01718411035835743, "learning_rate": 3.124440039080775e-06, "loss": 0.0001, "step": 19485 }, { "epoch": 18.736538461538462, "grad_norm": 0.007378848269581795, "learning_rate": 3.123535568657866e-06, "loss": 0.0001, "step": 19486 }, { "epoch": 18.7375, "grad_norm": 0.007349132094532251, "learning_rate": 3.122631204937503e-06, "loss": 0.0001, "step": 19487 }, { "epoch": 18.73846153846154, "grad_norm": 0.0015179971233010292, "learning_rate": 3.121726947933722e-06, "loss": 0.0, "step": 19488 }, { "epoch": 18.739423076923078, "grad_norm": 0.013247048482298851, "learning_rate": 3.1208227976605475e-06, "loss": 0.0001, "step": 19489 }, { "epoch": 18.740384615384617, "grad_norm": 0.005534248426556587, "learning_rate": 3.1199187541320144e-06, "loss": 0.0, "step": 19490 }, { "epoch": 18.741346153846155, "grad_norm": 0.022088930010795593, "learning_rate": 3.1190148173621515e-06, "loss": 0.0001, "step": 19491 }, { "epoch": 18.74230769230769, "grad_norm": 0.1255832463502884, "learning_rate": 3.1181109873649806e-06, "loss": 0.0005, "step": 19492 }, { "epoch": 18.74326923076923, "grad_norm": 0.014335007406771183, "learning_rate": 3.1172072641545294e-06, "loss": 0.0001, "step": 19493 }, { "epoch": 18.744230769230768, "grad_norm": 0.03048940934240818, "learning_rate": 3.1163036477448227e-06, "loss": 0.0001, "step": 19494 }, { "epoch": 18.745192307692307, "grad_norm": 0.0036190396640449762, "learning_rate": 3.1154001381498775e-06, "loss": 0.0001, "step": 19495 }, { "epoch": 18.746153846153845, "grad_norm": 0.005551035515964031, "learning_rate": 3.1144967353837196e-06, "loss": 0.0, "step": 19496 }, { "epoch": 18.747115384615384, "grad_norm": 0.009110629558563232, "learning_rate": 3.1135934394603596e-06, "loss": 0.0001, "step": 19497 }, { "epoch": 18.748076923076923, "grad_norm": 0.005866329651325941, "learning_rate": 3.1126902503938185e-06, "loss": 0.0, "step": 19498 }, { "epoch": 18.74903846153846, "grad_norm": 0.00508395629003644, "learning_rate": 3.111787168198114e-06, "loss": 0.0, "step": 19499 }, { "epoch": 18.75, "grad_norm": 0.003112197620794177, "learning_rate": 3.1108841928872526e-06, "loss": 0.0, "step": 19500 }, { "epoch": 18.75096153846154, "grad_norm": 0.07235032320022583, "learning_rate": 3.1099813244752497e-06, "loss": 0.0007, "step": 19501 }, { "epoch": 18.751923076923077, "grad_norm": 0.0022785994224250317, "learning_rate": 3.1090785629761178e-06, "loss": 0.0, "step": 19502 }, { "epoch": 18.752884615384616, "grad_norm": 0.00384896551258862, "learning_rate": 3.1081759084038577e-06, "loss": 0.0, "step": 19503 }, { "epoch": 18.753846153846155, "grad_norm": 0.0070885163731873035, "learning_rate": 3.1072733607724816e-06, "loss": 0.0001, "step": 19504 }, { "epoch": 18.754807692307693, "grad_norm": 0.011931653134524822, "learning_rate": 3.106370920095996e-06, "loss": 0.0001, "step": 19505 }, { "epoch": 18.755769230769232, "grad_norm": 0.01115288957953453, "learning_rate": 3.105468586388397e-06, "loss": 0.0001, "step": 19506 }, { "epoch": 18.75673076923077, "grad_norm": 0.010566502809524536, "learning_rate": 3.1045663596636943e-06, "loss": 0.0001, "step": 19507 }, { "epoch": 18.75769230769231, "grad_norm": 0.013274815864861012, "learning_rate": 3.1036642399358808e-06, "loss": 0.0001, "step": 19508 }, { "epoch": 18.758653846153845, "grad_norm": 0.00313083129003644, "learning_rate": 3.1027622272189572e-06, "loss": 0.0, "step": 19509 }, { "epoch": 18.759615384615383, "grad_norm": 0.003125506453216076, "learning_rate": 3.101860321526924e-06, "loss": 0.0, "step": 19510 }, { "epoch": 18.760576923076922, "grad_norm": 0.004072288982570171, "learning_rate": 3.1009585228737705e-06, "loss": 0.0, "step": 19511 }, { "epoch": 18.76153846153846, "grad_norm": 0.0036568145733326674, "learning_rate": 3.1000568312734912e-06, "loss": 0.0, "step": 19512 }, { "epoch": 18.7625, "grad_norm": 0.011926090344786644, "learning_rate": 3.099155246740082e-06, "loss": 0.0001, "step": 19513 }, { "epoch": 18.763461538461538, "grad_norm": 0.00490225525572896, "learning_rate": 3.098253769287527e-06, "loss": 0.0001, "step": 19514 }, { "epoch": 18.764423076923077, "grad_norm": 0.007811117451637983, "learning_rate": 3.097352398929817e-06, "loss": 0.0, "step": 19515 }, { "epoch": 18.765384615384615, "grad_norm": 0.016499673947691917, "learning_rate": 3.0964511356809423e-06, "loss": 0.0001, "step": 19516 }, { "epoch": 18.766346153846154, "grad_norm": 0.001475783996284008, "learning_rate": 3.0955499795548803e-06, "loss": 0.0, "step": 19517 }, { "epoch": 18.767307692307693, "grad_norm": 0.005623789969831705, "learning_rate": 3.0946489305656226e-06, "loss": 0.0, "step": 19518 }, { "epoch": 18.76826923076923, "grad_norm": 0.004774341359734535, "learning_rate": 3.0937479887271415e-06, "loss": 0.0, "step": 19519 }, { "epoch": 18.76923076923077, "grad_norm": 0.03397296369075775, "learning_rate": 3.0928471540534232e-06, "loss": 0.0002, "step": 19520 }, { "epoch": 18.77019230769231, "grad_norm": 0.014283474534749985, "learning_rate": 3.091946426558449e-06, "loss": 0.0001, "step": 19521 }, { "epoch": 18.771153846153847, "grad_norm": 0.008502090349793434, "learning_rate": 3.0910458062561865e-06, "loss": 0.0001, "step": 19522 }, { "epoch": 18.772115384615386, "grad_norm": 0.09895636886358261, "learning_rate": 3.090145293160616e-06, "loss": 0.0003, "step": 19523 }, { "epoch": 18.773076923076925, "grad_norm": 0.022732313722372055, "learning_rate": 3.089244887285714e-06, "loss": 0.0001, "step": 19524 }, { "epoch": 18.77403846153846, "grad_norm": 0.2772330939769745, "learning_rate": 3.0883445886454443e-06, "loss": 0.0008, "step": 19525 }, { "epoch": 18.775, "grad_norm": 0.06857644766569138, "learning_rate": 3.087444397253783e-06, "loss": 0.0003, "step": 19526 }, { "epoch": 18.775961538461537, "grad_norm": 0.0033755903132259846, "learning_rate": 3.0865443131246986e-06, "loss": 0.0, "step": 19527 }, { "epoch": 18.776923076923076, "grad_norm": 0.015626981854438782, "learning_rate": 3.085644336272152e-06, "loss": 0.0001, "step": 19528 }, { "epoch": 18.777884615384615, "grad_norm": 0.004091158974915743, "learning_rate": 3.0847444667101156e-06, "loss": 0.0, "step": 19529 }, { "epoch": 18.778846153846153, "grad_norm": 0.033775582909584045, "learning_rate": 3.0838447044525455e-06, "loss": 0.0002, "step": 19530 }, { "epoch": 18.779807692307692, "grad_norm": 0.006327994167804718, "learning_rate": 3.0829450495134074e-06, "loss": 0.0001, "step": 19531 }, { "epoch": 18.78076923076923, "grad_norm": 0.02850065939128399, "learning_rate": 3.082045501906664e-06, "loss": 0.0001, "step": 19532 }, { "epoch": 18.78173076923077, "grad_norm": 0.00265865377150476, "learning_rate": 3.0811460616462672e-06, "loss": 0.0, "step": 19533 }, { "epoch": 18.782692307692308, "grad_norm": 0.009969176724553108, "learning_rate": 3.080246728746177e-06, "loss": 0.0001, "step": 19534 }, { "epoch": 18.783653846153847, "grad_norm": 0.020109033212065697, "learning_rate": 3.0793475032203513e-06, "loss": 0.0002, "step": 19535 }, { "epoch": 18.784615384615385, "grad_norm": 0.013685915619134903, "learning_rate": 3.0784483850827374e-06, "loss": 0.0001, "step": 19536 }, { "epoch": 18.785576923076924, "grad_norm": 0.009301229380071163, "learning_rate": 3.0775493743472906e-06, "loss": 0.0001, "step": 19537 }, { "epoch": 18.786538461538463, "grad_norm": 0.00790502317249775, "learning_rate": 3.0766504710279632e-06, "loss": 0.0001, "step": 19538 }, { "epoch": 18.7875, "grad_norm": 0.014505075290799141, "learning_rate": 3.0757516751386974e-06, "loss": 0.0001, "step": 19539 }, { "epoch": 18.78846153846154, "grad_norm": 0.002676749136298895, "learning_rate": 3.0748529866934463e-06, "loss": 0.0, "step": 19540 }, { "epoch": 18.789423076923075, "grad_norm": 0.03128349408507347, "learning_rate": 3.073954405706148e-06, "loss": 0.0001, "step": 19541 }, { "epoch": 18.790384615384614, "grad_norm": 0.003791751340031624, "learning_rate": 3.0730559321907506e-06, "loss": 0.0, "step": 19542 }, { "epoch": 18.791346153846153, "grad_norm": 0.0035498570650815964, "learning_rate": 3.0721575661611967e-06, "loss": 0.0, "step": 19543 }, { "epoch": 18.79230769230769, "grad_norm": 0.0048200590535998344, "learning_rate": 3.0712593076314224e-06, "loss": 0.0, "step": 19544 }, { "epoch": 18.79326923076923, "grad_norm": 0.016797306016087532, "learning_rate": 3.0703611566153677e-06, "loss": 0.0001, "step": 19545 }, { "epoch": 18.79423076923077, "grad_norm": 0.003589806379750371, "learning_rate": 3.0694631131269735e-06, "loss": 0.0, "step": 19546 }, { "epoch": 18.795192307692307, "grad_norm": 0.004189744126051664, "learning_rate": 3.0685651771801672e-06, "loss": 0.0, "step": 19547 }, { "epoch": 18.796153846153846, "grad_norm": 0.16238223016262054, "learning_rate": 3.0676673487888854e-06, "loss": 0.0003, "step": 19548 }, { "epoch": 18.797115384615385, "grad_norm": 0.007037299685180187, "learning_rate": 3.0667696279670635e-06, "loss": 0.0, "step": 19549 }, { "epoch": 18.798076923076923, "grad_norm": 0.004161403514444828, "learning_rate": 3.065872014728626e-06, "loss": 0.0001, "step": 19550 }, { "epoch": 18.799038461538462, "grad_norm": 0.007607366424053907, "learning_rate": 3.064974509087505e-06, "loss": 0.0001, "step": 19551 }, { "epoch": 18.8, "grad_norm": 0.0050872997380793095, "learning_rate": 3.064077111057623e-06, "loss": 0.0, "step": 19552 }, { "epoch": 18.80096153846154, "grad_norm": 0.053974900394678116, "learning_rate": 3.0631798206529085e-06, "loss": 0.0003, "step": 19553 }, { "epoch": 18.801923076923078, "grad_norm": 0.0020620892755687237, "learning_rate": 3.062282637887286e-06, "loss": 0.0, "step": 19554 }, { "epoch": 18.802884615384617, "grad_norm": 0.009613187983632088, "learning_rate": 3.0613855627746713e-06, "loss": 0.0001, "step": 19555 }, { "epoch": 18.803846153846155, "grad_norm": 0.004812038037925959, "learning_rate": 3.0604885953289897e-06, "loss": 0.0, "step": 19556 }, { "epoch": 18.80480769230769, "grad_norm": 0.008081604726612568, "learning_rate": 3.05959173556416e-06, "loss": 0.0, "step": 19557 }, { "epoch": 18.80576923076923, "grad_norm": 0.04516001045703888, "learning_rate": 3.058694983494094e-06, "loss": 0.0002, "step": 19558 }, { "epoch": 18.806730769230768, "grad_norm": 0.023060046136379242, "learning_rate": 3.0577983391327082e-06, "loss": 0.0001, "step": 19559 }, { "epoch": 18.807692307692307, "grad_norm": 0.007115822285413742, "learning_rate": 3.0569018024939213e-06, "loss": 0.0, "step": 19560 }, { "epoch": 18.808653846153845, "grad_norm": 0.0012603176292032003, "learning_rate": 3.0560053735916372e-06, "loss": 0.0, "step": 19561 }, { "epoch": 18.809615384615384, "grad_norm": 0.007363313343375921, "learning_rate": 3.055109052439773e-06, "loss": 0.0, "step": 19562 }, { "epoch": 18.810576923076923, "grad_norm": 0.004686763510107994, "learning_rate": 3.0542128390522298e-06, "loss": 0.0, "step": 19563 }, { "epoch": 18.81153846153846, "grad_norm": 0.00836334191262722, "learning_rate": 3.053316733442917e-06, "loss": 0.0001, "step": 19564 }, { "epoch": 18.8125, "grad_norm": 0.00926429033279419, "learning_rate": 3.0524207356257453e-06, "loss": 0.0, "step": 19565 }, { "epoch": 18.81346153846154, "grad_norm": 0.0030968347564339638, "learning_rate": 3.0515248456146073e-06, "loss": 0.0, "step": 19566 }, { "epoch": 18.814423076923077, "grad_norm": 0.00148787721991539, "learning_rate": 3.0506290634234116e-06, "loss": 0.0, "step": 19567 }, { "epoch": 18.815384615384616, "grad_norm": 0.009734829887747765, "learning_rate": 3.0497333890660598e-06, "loss": 0.0, "step": 19568 }, { "epoch": 18.816346153846155, "grad_norm": 0.004231343511492014, "learning_rate": 3.048837822556443e-06, "loss": 0.0001, "step": 19569 }, { "epoch": 18.817307692307693, "grad_norm": 0.010094858705997467, "learning_rate": 3.047942363908463e-06, "loss": 0.0, "step": 19570 }, { "epoch": 18.818269230769232, "grad_norm": 0.010814821347594261, "learning_rate": 3.0470470131360165e-06, "loss": 0.0001, "step": 19571 }, { "epoch": 18.81923076923077, "grad_norm": 0.0029654596000909805, "learning_rate": 3.0461517702529896e-06, "loss": 0.0, "step": 19572 }, { "epoch": 18.82019230769231, "grad_norm": 0.003032645210623741, "learning_rate": 3.045256635273278e-06, "loss": 0.0, "step": 19573 }, { "epoch": 18.821153846153845, "grad_norm": 0.0046165622770786285, "learning_rate": 3.0443616082107753e-06, "loss": 0.0001, "step": 19574 }, { "epoch": 18.822115384615383, "grad_norm": 0.014406634494662285, "learning_rate": 3.043466689079363e-06, "loss": 0.0001, "step": 19575 }, { "epoch": 18.823076923076922, "grad_norm": 0.002215743064880371, "learning_rate": 3.0425718778929326e-06, "loss": 0.0, "step": 19576 }, { "epoch": 18.82403846153846, "grad_norm": 0.008411492221057415, "learning_rate": 3.041677174665364e-06, "loss": 0.0, "step": 19577 }, { "epoch": 18.825, "grad_norm": 0.004614382050931454, "learning_rate": 3.040782579410544e-06, "loss": 0.0001, "step": 19578 }, { "epoch": 18.825961538461538, "grad_norm": 0.011219743639230728, "learning_rate": 3.0398880921423556e-06, "loss": 0.0001, "step": 19579 }, { "epoch": 18.826923076923077, "grad_norm": 0.0030171433463692665, "learning_rate": 3.038993712874674e-06, "loss": 0.0, "step": 19580 }, { "epoch": 18.827884615384615, "grad_norm": 0.0049533843994140625, "learning_rate": 3.0380994416213785e-06, "loss": 0.0, "step": 19581 }, { "epoch": 18.828846153846154, "grad_norm": 0.004201618023216724, "learning_rate": 3.0372052783963502e-06, "loss": 0.0, "step": 19582 }, { "epoch": 18.829807692307693, "grad_norm": 0.004112176597118378, "learning_rate": 3.0363112232134584e-06, "loss": 0.0, "step": 19583 }, { "epoch": 18.83076923076923, "grad_norm": 0.001182154519483447, "learning_rate": 3.0354172760865775e-06, "loss": 0.0, "step": 19584 }, { "epoch": 18.83173076923077, "grad_norm": 0.0070143467746675014, "learning_rate": 3.034523437029582e-06, "loss": 0.0001, "step": 19585 }, { "epoch": 18.83269230769231, "grad_norm": 0.005601631477475166, "learning_rate": 3.033629706056337e-06, "loss": 0.0, "step": 19586 }, { "epoch": 18.833653846153847, "grad_norm": 0.04324699565768242, "learning_rate": 3.032736083180716e-06, "loss": 0.0001, "step": 19587 }, { "epoch": 18.834615384615386, "grad_norm": 0.0016049534315243363, "learning_rate": 3.0318425684165786e-06, "loss": 0.0, "step": 19588 }, { "epoch": 18.835576923076925, "grad_norm": 0.004533327650278807, "learning_rate": 3.0309491617777943e-06, "loss": 0.0, "step": 19589 }, { "epoch": 18.83653846153846, "grad_norm": 0.016516510397195816, "learning_rate": 3.030055863278227e-06, "loss": 0.0001, "step": 19590 }, { "epoch": 18.8375, "grad_norm": 0.002464552642777562, "learning_rate": 3.029162672931735e-06, "loss": 0.0, "step": 19591 }, { "epoch": 18.838461538461537, "grad_norm": 0.008268081583082676, "learning_rate": 3.0282695907521774e-06, "loss": 0.0001, "step": 19592 }, { "epoch": 18.839423076923076, "grad_norm": 0.020122947171330452, "learning_rate": 3.027376616753418e-06, "loss": 0.0001, "step": 19593 }, { "epoch": 18.840384615384615, "grad_norm": 0.0027729221619665623, "learning_rate": 3.026483750949305e-06, "loss": 0.0, "step": 19594 }, { "epoch": 18.841346153846153, "grad_norm": 0.011457822285592556, "learning_rate": 3.0255909933536974e-06, "loss": 0.0001, "step": 19595 }, { "epoch": 18.842307692307692, "grad_norm": 0.011517480947077274, "learning_rate": 3.024698343980452e-06, "loss": 0.0, "step": 19596 }, { "epoch": 18.84326923076923, "grad_norm": 0.0031081733759492636, "learning_rate": 3.0238058028434126e-06, "loss": 0.0, "step": 19597 }, { "epoch": 18.84423076923077, "grad_norm": 0.0038609844632446766, "learning_rate": 3.022913369956435e-06, "loss": 0.0, "step": 19598 }, { "epoch": 18.845192307692308, "grad_norm": 0.009570424444973469, "learning_rate": 3.022021045333361e-06, "loss": 0.0, "step": 19599 }, { "epoch": 18.846153846153847, "grad_norm": 0.00470324931666255, "learning_rate": 3.0211288289880404e-06, "loss": 0.0, "step": 19600 }, { "epoch": 18.847115384615385, "grad_norm": 0.009040352888405323, "learning_rate": 3.0202367209343207e-06, "loss": 0.0001, "step": 19601 }, { "epoch": 18.848076923076924, "grad_norm": 0.008374987170100212, "learning_rate": 3.0193447211860393e-06, "loss": 0.0001, "step": 19602 }, { "epoch": 18.849038461538463, "grad_norm": 0.004326952155679464, "learning_rate": 3.0184528297570394e-06, "loss": 0.0, "step": 19603 }, { "epoch": 18.85, "grad_norm": 0.008970016613602638, "learning_rate": 3.017561046661164e-06, "loss": 0.0001, "step": 19604 }, { "epoch": 18.85096153846154, "grad_norm": 0.0025348798371851444, "learning_rate": 3.0166693719122454e-06, "loss": 0.0, "step": 19605 }, { "epoch": 18.851923076923075, "grad_norm": 0.002667511347681284, "learning_rate": 3.0157778055241214e-06, "loss": 0.0, "step": 19606 }, { "epoch": 18.852884615384614, "grad_norm": 0.004485303536057472, "learning_rate": 3.0148863475106315e-06, "loss": 0.0, "step": 19607 }, { "epoch": 18.853846153846153, "grad_norm": 0.004289917182177305, "learning_rate": 3.0139949978856007e-06, "loss": 0.0, "step": 19608 }, { "epoch": 18.85480769230769, "grad_norm": 0.02262876182794571, "learning_rate": 3.0131037566628673e-06, "loss": 0.0001, "step": 19609 }, { "epoch": 18.85576923076923, "grad_norm": 0.052479200065135956, "learning_rate": 3.012212623856253e-06, "loss": 0.0001, "step": 19610 }, { "epoch": 18.85673076923077, "grad_norm": 0.007186118513345718, "learning_rate": 3.0113215994795917e-06, "loss": 0.0001, "step": 19611 }, { "epoch": 18.857692307692307, "grad_norm": 0.0035553507041186094, "learning_rate": 3.0104306835467102e-06, "loss": 0.0, "step": 19612 }, { "epoch": 18.858653846153846, "grad_norm": 0.004211681429296732, "learning_rate": 3.009539876071427e-06, "loss": 0.0, "step": 19613 }, { "epoch": 18.859615384615385, "grad_norm": 0.014415789395570755, "learning_rate": 3.0086491770675673e-06, "loss": 0.0, "step": 19614 }, { "epoch": 18.860576923076923, "grad_norm": 0.0026443072129040956, "learning_rate": 3.0077585865489567e-06, "loss": 0.0, "step": 19615 }, { "epoch": 18.861538461538462, "grad_norm": 0.00818406231701374, "learning_rate": 3.0068681045294078e-06, "loss": 0.0001, "step": 19616 }, { "epoch": 18.8625, "grad_norm": 0.004163701552897692, "learning_rate": 3.0059777310227423e-06, "loss": 0.0, "step": 19617 }, { "epoch": 18.86346153846154, "grad_norm": 0.029916126281023026, "learning_rate": 3.0050874660427776e-06, "loss": 0.0001, "step": 19618 }, { "epoch": 18.864423076923078, "grad_norm": 0.002259651431813836, "learning_rate": 3.0041973096033215e-06, "loss": 0.0, "step": 19619 }, { "epoch": 18.865384615384617, "grad_norm": 0.006279531866312027, "learning_rate": 3.003307261718196e-06, "loss": 0.0001, "step": 19620 }, { "epoch": 18.866346153846155, "grad_norm": 0.015120298601686954, "learning_rate": 3.002417322401202e-06, "loss": 0.0001, "step": 19621 }, { "epoch": 18.86730769230769, "grad_norm": 0.004582426976412535, "learning_rate": 3.001527491666153e-06, "loss": 0.0, "step": 19622 }, { "epoch": 18.86826923076923, "grad_norm": 0.0046387603506445885, "learning_rate": 3.000637769526862e-06, "loss": 0.0, "step": 19623 }, { "epoch": 18.869230769230768, "grad_norm": 0.0101253567263484, "learning_rate": 2.999748155997125e-06, "loss": 0.0001, "step": 19624 }, { "epoch": 18.870192307692307, "grad_norm": 0.0028427578508853912, "learning_rate": 2.9988586510907526e-06, "loss": 0.0, "step": 19625 }, { "epoch": 18.871153846153845, "grad_norm": 0.029090257361531258, "learning_rate": 2.997969254821548e-06, "loss": 0.0001, "step": 19626 }, { "epoch": 18.872115384615384, "grad_norm": 0.006570396013557911, "learning_rate": 2.997079967203307e-06, "loss": 0.0001, "step": 19627 }, { "epoch": 18.873076923076923, "grad_norm": 0.00470182579010725, "learning_rate": 2.996190788249832e-06, "loss": 0.0, "step": 19628 }, { "epoch": 18.87403846153846, "grad_norm": 0.008019911125302315, "learning_rate": 2.9953017179749223e-06, "loss": 0.0001, "step": 19629 }, { "epoch": 18.875, "grad_norm": 0.008275311440229416, "learning_rate": 2.9944127563923687e-06, "loss": 0.0, "step": 19630 }, { "epoch": 18.87596153846154, "grad_norm": 0.001672877580858767, "learning_rate": 2.993523903515971e-06, "loss": 0.0, "step": 19631 }, { "epoch": 18.876923076923077, "grad_norm": 0.0500168576836586, "learning_rate": 2.992635159359516e-06, "loss": 0.0002, "step": 19632 }, { "epoch": 18.877884615384616, "grad_norm": 0.004057822749018669, "learning_rate": 2.9917465239367972e-06, "loss": 0.0, "step": 19633 }, { "epoch": 18.878846153846155, "grad_norm": 0.027296755462884903, "learning_rate": 2.9908579972616057e-06, "loss": 0.0001, "step": 19634 }, { "epoch": 18.879807692307693, "grad_norm": 0.003209914779290557, "learning_rate": 2.9899695793477236e-06, "loss": 0.0, "step": 19635 }, { "epoch": 18.880769230769232, "grad_norm": 0.00309377396479249, "learning_rate": 2.989081270208939e-06, "loss": 0.0, "step": 19636 }, { "epoch": 18.88173076923077, "grad_norm": 0.12277978658676147, "learning_rate": 2.98819306985904e-06, "loss": 0.0003, "step": 19637 }, { "epoch": 18.88269230769231, "grad_norm": 0.002087016822770238, "learning_rate": 2.9873049783118015e-06, "loss": 0.0, "step": 19638 }, { "epoch": 18.883653846153845, "grad_norm": 0.003977984189987183, "learning_rate": 2.9864169955810085e-06, "loss": 0.0, "step": 19639 }, { "epoch": 18.884615384615383, "grad_norm": 1.1809037923812866, "learning_rate": 2.9855291216804416e-06, "loss": 0.0071, "step": 19640 }, { "epoch": 18.885576923076922, "grad_norm": 0.006687134969979525, "learning_rate": 2.984641356623872e-06, "loss": 0.0001, "step": 19641 }, { "epoch": 18.88653846153846, "grad_norm": 0.0016554535832256079, "learning_rate": 2.9837537004250825e-06, "loss": 0.0, "step": 19642 }, { "epoch": 18.8875, "grad_norm": 0.009924485348165035, "learning_rate": 2.9828661530978397e-06, "loss": 0.0, "step": 19643 }, { "epoch": 18.888461538461538, "grad_norm": 0.0026975045911967754, "learning_rate": 2.98197871465592e-06, "loss": 0.0, "step": 19644 }, { "epoch": 18.889423076923077, "grad_norm": 0.03107164241373539, "learning_rate": 2.981091385113095e-06, "loss": 0.0001, "step": 19645 }, { "epoch": 18.890384615384615, "grad_norm": 0.025230761617422104, "learning_rate": 2.980204164483129e-06, "loss": 0.0001, "step": 19646 }, { "epoch": 18.891346153846154, "grad_norm": 0.1024169996380806, "learning_rate": 2.979317052779791e-06, "loss": 0.0003, "step": 19647 }, { "epoch": 18.892307692307693, "grad_norm": 0.005420512054115534, "learning_rate": 2.9784300500168505e-06, "loss": 0.0, "step": 19648 }, { "epoch": 18.89326923076923, "grad_norm": 0.029370944947004318, "learning_rate": 2.977543156208065e-06, "loss": 0.0001, "step": 19649 }, { "epoch": 18.89423076923077, "grad_norm": 0.027023524045944214, "learning_rate": 2.976656371367198e-06, "loss": 0.0001, "step": 19650 }, { "epoch": 18.89519230769231, "grad_norm": 0.0054485490545630455, "learning_rate": 2.975769695508014e-06, "loss": 0.0, "step": 19651 }, { "epoch": 18.896153846153847, "grad_norm": 0.0014491775073111057, "learning_rate": 2.974883128644266e-06, "loss": 0.0, "step": 19652 }, { "epoch": 18.897115384615386, "grad_norm": 0.007639635354280472, "learning_rate": 2.973996670789716e-06, "loss": 0.0001, "step": 19653 }, { "epoch": 18.898076923076925, "grad_norm": 0.0061166067607700825, "learning_rate": 2.9731103219581137e-06, "loss": 0.0, "step": 19654 }, { "epoch": 18.89903846153846, "grad_norm": 0.004166333470493555, "learning_rate": 2.972224082163215e-06, "loss": 0.0, "step": 19655 }, { "epoch": 18.9, "grad_norm": 0.024944128468632698, "learning_rate": 2.9713379514187756e-06, "loss": 0.0001, "step": 19656 }, { "epoch": 18.900961538461537, "grad_norm": 0.0025487097445875406, "learning_rate": 2.9704519297385392e-06, "loss": 0.0, "step": 19657 }, { "epoch": 18.901923076923076, "grad_norm": 0.14730091392993927, "learning_rate": 2.969566017136258e-06, "loss": 0.0003, "step": 19658 }, { "epoch": 18.902884615384615, "grad_norm": 0.010264051146805286, "learning_rate": 2.968680213625681e-06, "loss": 0.0001, "step": 19659 }, { "epoch": 18.903846153846153, "grad_norm": 0.007762775290757418, "learning_rate": 2.967794519220547e-06, "loss": 0.0001, "step": 19660 }, { "epoch": 18.904807692307692, "grad_norm": 0.0010867143282666802, "learning_rate": 2.9669089339346026e-06, "loss": 0.0, "step": 19661 }, { "epoch": 18.90576923076923, "grad_norm": 0.2510031759738922, "learning_rate": 2.9660234577815936e-06, "loss": 0.0003, "step": 19662 }, { "epoch": 18.90673076923077, "grad_norm": 0.009616375900804996, "learning_rate": 2.9651380907752525e-06, "loss": 0.0001, "step": 19663 }, { "epoch": 18.907692307692308, "grad_norm": 1.459555745124817, "learning_rate": 2.9642528329293206e-06, "loss": 0.0096, "step": 19664 }, { "epoch": 18.908653846153847, "grad_norm": 0.011307773180305958, "learning_rate": 2.9633676842575386e-06, "loss": 0.0001, "step": 19665 }, { "epoch": 18.909615384615385, "grad_norm": 0.0021148486994206905, "learning_rate": 2.9624826447736356e-06, "loss": 0.0, "step": 19666 }, { "epoch": 18.910576923076924, "grad_norm": 0.005138018634170294, "learning_rate": 2.9615977144913487e-06, "loss": 0.0001, "step": 19667 }, { "epoch": 18.911538461538463, "grad_norm": 0.006123741157352924, "learning_rate": 2.9607128934244054e-06, "loss": 0.0001, "step": 19668 }, { "epoch": 18.9125, "grad_norm": 0.0017183605814352632, "learning_rate": 2.959828181586538e-06, "loss": 0.0, "step": 19669 }, { "epoch": 18.91346153846154, "grad_norm": 0.007269688881933689, "learning_rate": 2.958943578991478e-06, "loss": 0.0001, "step": 19670 }, { "epoch": 18.914423076923075, "grad_norm": 1.3509018421173096, "learning_rate": 2.9580590856529445e-06, "loss": 0.0092, "step": 19671 }, { "epoch": 18.915384615384614, "grad_norm": 0.004177280236035585, "learning_rate": 2.9571747015846673e-06, "loss": 0.0, "step": 19672 }, { "epoch": 18.916346153846153, "grad_norm": 0.0023569785989820957, "learning_rate": 2.95629042680037e-06, "loss": 0.0, "step": 19673 }, { "epoch": 18.91730769230769, "grad_norm": 0.24226360023021698, "learning_rate": 2.95540626131377e-06, "loss": 0.0019, "step": 19674 }, { "epoch": 18.91826923076923, "grad_norm": 0.0040490212850272655, "learning_rate": 2.9545222051385892e-06, "loss": 0.0, "step": 19675 }, { "epoch": 18.91923076923077, "grad_norm": 0.003651525592431426, "learning_rate": 2.953638258288548e-06, "loss": 0.0, "step": 19676 }, { "epoch": 18.920192307692307, "grad_norm": 0.0040419865399599075, "learning_rate": 2.9527544207773583e-06, "loss": 0.0, "step": 19677 }, { "epoch": 18.921153846153846, "grad_norm": 0.002217930741608143, "learning_rate": 2.951870692618739e-06, "loss": 0.0, "step": 19678 }, { "epoch": 18.922115384615385, "grad_norm": 0.01610528863966465, "learning_rate": 2.950987073826397e-06, "loss": 0.0, "step": 19679 }, { "epoch": 18.923076923076923, "grad_norm": 0.03216591849923134, "learning_rate": 2.9501035644140476e-06, "loss": 0.0001, "step": 19680 }, { "epoch": 18.924038461538462, "grad_norm": 0.01182393729686737, "learning_rate": 2.949220164395402e-06, "loss": 0.0001, "step": 19681 }, { "epoch": 18.925, "grad_norm": 0.001583157223649323, "learning_rate": 2.9483368737841632e-06, "loss": 0.0, "step": 19682 }, { "epoch": 18.92596153846154, "grad_norm": 0.03690702095627785, "learning_rate": 2.94745369259404e-06, "loss": 0.0001, "step": 19683 }, { "epoch": 18.926923076923078, "grad_norm": 0.014362324960529804, "learning_rate": 2.946570620838739e-06, "loss": 0.0001, "step": 19684 }, { "epoch": 18.927884615384617, "grad_norm": 0.005942098330706358, "learning_rate": 2.945687658531957e-06, "loss": 0.0, "step": 19685 }, { "epoch": 18.928846153846155, "grad_norm": 2.1625726222991943, "learning_rate": 2.9448048056874e-06, "loss": 0.0063, "step": 19686 }, { "epoch": 18.92980769230769, "grad_norm": 0.005426662042737007, "learning_rate": 2.943922062318767e-06, "loss": 0.0001, "step": 19687 }, { "epoch": 18.93076923076923, "grad_norm": 0.002253464423120022, "learning_rate": 2.9430394284397523e-06, "loss": 0.0, "step": 19688 }, { "epoch": 18.931730769230768, "grad_norm": 0.003610621439293027, "learning_rate": 2.9421569040640564e-06, "loss": 0.0, "step": 19689 }, { "epoch": 18.932692307692307, "grad_norm": 0.0251313429325819, "learning_rate": 2.941274489205368e-06, "loss": 0.0001, "step": 19690 }, { "epoch": 18.933653846153845, "grad_norm": 0.0073814960196614265, "learning_rate": 2.940392183877382e-06, "loss": 0.0, "step": 19691 }, { "epoch": 18.934615384615384, "grad_norm": 0.00387083413079381, "learning_rate": 2.9395099880937926e-06, "loss": 0.0, "step": 19692 }, { "epoch": 18.935576923076923, "grad_norm": 0.005479811690747738, "learning_rate": 2.938627901868283e-06, "loss": 0.0, "step": 19693 }, { "epoch": 18.93653846153846, "grad_norm": 0.009794199839234352, "learning_rate": 2.9377459252145436e-06, "loss": 0.0001, "step": 19694 }, { "epoch": 18.9375, "grad_norm": 0.0035090921446681023, "learning_rate": 2.9368640581462627e-06, "loss": 0.0, "step": 19695 }, { "epoch": 18.93846153846154, "grad_norm": 0.001926576136611402, "learning_rate": 2.9359823006771195e-06, "loss": 0.0, "step": 19696 }, { "epoch": 18.939423076923077, "grad_norm": 0.012259015813469887, "learning_rate": 2.935100652820797e-06, "loss": 0.0001, "step": 19697 }, { "epoch": 18.940384615384616, "grad_norm": 0.019304385408759117, "learning_rate": 2.9342191145909815e-06, "loss": 0.0001, "step": 19698 }, { "epoch": 18.941346153846155, "grad_norm": 0.0029370116535574198, "learning_rate": 2.933337686001343e-06, "loss": 0.0, "step": 19699 }, { "epoch": 18.942307692307693, "grad_norm": 0.009181560948491096, "learning_rate": 2.9324563670655672e-06, "loss": 0.0001, "step": 19700 }, { "epoch": 18.943269230769232, "grad_norm": 0.006069214083254337, "learning_rate": 2.9315751577973227e-06, "loss": 0.0, "step": 19701 }, { "epoch": 18.94423076923077, "grad_norm": 0.010029268451035023, "learning_rate": 2.930694058210286e-06, "loss": 0.0001, "step": 19702 }, { "epoch": 18.94519230769231, "grad_norm": 0.010558092035353184, "learning_rate": 2.9298130683181323e-06, "loss": 0.0001, "step": 19703 }, { "epoch": 18.946153846153845, "grad_norm": 0.0034408378414809704, "learning_rate": 2.9289321881345257e-06, "loss": 0.0, "step": 19704 }, { "epoch": 18.947115384615383, "grad_norm": 0.0016863789642229676, "learning_rate": 2.9280514176731377e-06, "loss": 0.0, "step": 19705 }, { "epoch": 18.948076923076922, "grad_norm": 0.1011962741613388, "learning_rate": 2.9271707569476405e-06, "loss": 0.0003, "step": 19706 }, { "epoch": 18.94903846153846, "grad_norm": 0.0026166390161961317, "learning_rate": 2.92629020597169e-06, "loss": 0.0, "step": 19707 }, { "epoch": 18.95, "grad_norm": 0.004396848380565643, "learning_rate": 2.9254097647589553e-06, "loss": 0.0, "step": 19708 }, { "epoch": 18.950961538461538, "grad_norm": 0.25143879652023315, "learning_rate": 2.9245294333231e-06, "loss": 0.0005, "step": 19709 }, { "epoch": 18.951923076923077, "grad_norm": 0.005722947884351015, "learning_rate": 2.9236492116777794e-06, "loss": 0.0, "step": 19710 }, { "epoch": 18.952884615384615, "grad_norm": 0.018011286854743958, "learning_rate": 2.9227690998366564e-06, "loss": 0.0001, "step": 19711 }, { "epoch": 18.953846153846154, "grad_norm": 0.007435190957039595, "learning_rate": 2.921889097813383e-06, "loss": 0.0001, "step": 19712 }, { "epoch": 18.954807692307693, "grad_norm": 0.0035519099328666925, "learning_rate": 2.9210092056216166e-06, "loss": 0.0, "step": 19713 }, { "epoch": 18.95576923076923, "grad_norm": 0.010683620348572731, "learning_rate": 2.9201294232750133e-06, "loss": 0.0001, "step": 19714 }, { "epoch": 18.95673076923077, "grad_norm": 0.010701905004680157, "learning_rate": 2.91924975078722e-06, "loss": 0.0001, "step": 19715 }, { "epoch": 18.95769230769231, "grad_norm": 0.019707009196281433, "learning_rate": 2.9183701881718885e-06, "loss": 0.0001, "step": 19716 }, { "epoch": 18.958653846153847, "grad_norm": 0.048391375690698624, "learning_rate": 2.9174907354426696e-06, "loss": 0.0001, "step": 19717 }, { "epoch": 18.959615384615386, "grad_norm": 0.0036955319810658693, "learning_rate": 2.9166113926132057e-06, "loss": 0.0, "step": 19718 }, { "epoch": 18.960576923076925, "grad_norm": 0.439304381608963, "learning_rate": 2.9157321596971434e-06, "loss": 0.0009, "step": 19719 }, { "epoch": 18.96153846153846, "grad_norm": 0.8640358448028564, "learning_rate": 2.9148530367081286e-06, "loss": 0.0083, "step": 19720 }, { "epoch": 18.9625, "grad_norm": 0.003715652273967862, "learning_rate": 2.9139740236597958e-06, "loss": 0.0, "step": 19721 }, { "epoch": 18.963461538461537, "grad_norm": 0.00598687119781971, "learning_rate": 2.9130951205657932e-06, "loss": 0.0001, "step": 19722 }, { "epoch": 18.964423076923076, "grad_norm": 0.0046411980874836445, "learning_rate": 2.912216327439752e-06, "loss": 0.0, "step": 19723 }, { "epoch": 18.965384615384615, "grad_norm": 0.0015971723478287458, "learning_rate": 2.91133764429531e-06, "loss": 0.0, "step": 19724 }, { "epoch": 18.966346153846153, "grad_norm": 0.007443995214998722, "learning_rate": 2.9104590711461066e-06, "loss": 0.0, "step": 19725 }, { "epoch": 18.967307692307692, "grad_norm": 0.09586936980485916, "learning_rate": 2.9095806080057666e-06, "loss": 0.0003, "step": 19726 }, { "epoch": 18.96826923076923, "grad_norm": 0.0011773452861234546, "learning_rate": 2.908702254887926e-06, "loss": 0.0, "step": 19727 }, { "epoch": 18.96923076923077, "grad_norm": 0.003747635753825307, "learning_rate": 2.9078240118062173e-06, "loss": 0.0, "step": 19728 }, { "epoch": 18.970192307692308, "grad_norm": 0.9631731510162354, "learning_rate": 2.9069458787742612e-06, "loss": 0.0017, "step": 19729 }, { "epoch": 18.971153846153847, "grad_norm": 0.07830747216939926, "learning_rate": 2.9060678558056876e-06, "loss": 0.0002, "step": 19730 }, { "epoch": 18.972115384615385, "grad_norm": 0.5295861959457397, "learning_rate": 2.905189942914123e-06, "loss": 0.0033, "step": 19731 }, { "epoch": 18.973076923076924, "grad_norm": 0.0017493115738034248, "learning_rate": 2.9043121401131846e-06, "loss": 0.0, "step": 19732 }, { "epoch": 18.974038461538463, "grad_norm": 0.007456814404577017, "learning_rate": 2.903434447416499e-06, "loss": 0.0, "step": 19733 }, { "epoch": 18.975, "grad_norm": 0.008093255572021008, "learning_rate": 2.90255686483768e-06, "loss": 0.0001, "step": 19734 }, { "epoch": 18.97596153846154, "grad_norm": 2.2901899814605713, "learning_rate": 2.9016793923903474e-06, "loss": 0.0307, "step": 19735 }, { "epoch": 18.976923076923075, "grad_norm": 0.008989200927317142, "learning_rate": 2.90080203008812e-06, "loss": 0.0001, "step": 19736 }, { "epoch": 18.977884615384614, "grad_norm": 0.025087665766477585, "learning_rate": 2.8999247779446073e-06, "loss": 0.0001, "step": 19737 }, { "epoch": 18.978846153846153, "grad_norm": 0.05449514836072922, "learning_rate": 2.899047635973423e-06, "loss": 0.0002, "step": 19738 }, { "epoch": 18.97980769230769, "grad_norm": 0.000943630700930953, "learning_rate": 2.898170604188181e-06, "loss": 0.0, "step": 19739 }, { "epoch": 18.98076923076923, "grad_norm": 0.0219865832477808, "learning_rate": 2.8972936826024843e-06, "loss": 0.0001, "step": 19740 }, { "epoch": 18.98173076923077, "grad_norm": 0.009859230369329453, "learning_rate": 2.8964168712299447e-06, "loss": 0.0001, "step": 19741 }, { "epoch": 18.982692307692307, "grad_norm": 0.039605166763067245, "learning_rate": 2.895540170084169e-06, "loss": 0.0001, "step": 19742 }, { "epoch": 18.983653846153846, "grad_norm": 0.0026133726350963116, "learning_rate": 2.8946635791787546e-06, "loss": 0.0, "step": 19743 }, { "epoch": 18.984615384615385, "grad_norm": 0.025406794622540474, "learning_rate": 2.893787098527312e-06, "loss": 0.0001, "step": 19744 }, { "epoch": 18.985576923076923, "grad_norm": 0.02159729041159153, "learning_rate": 2.8929107281434334e-06, "loss": 0.0001, "step": 19745 }, { "epoch": 18.986538461538462, "grad_norm": 0.004254850558936596, "learning_rate": 2.892034468040721e-06, "loss": 0.0001, "step": 19746 }, { "epoch": 18.9875, "grad_norm": 0.00477804196998477, "learning_rate": 2.891158318232775e-06, "loss": 0.0, "step": 19747 }, { "epoch": 18.98846153846154, "grad_norm": 0.005600109696388245, "learning_rate": 2.8902822787331853e-06, "loss": 0.0, "step": 19748 }, { "epoch": 18.989423076923078, "grad_norm": 0.0046057975850999355, "learning_rate": 2.8894063495555468e-06, "loss": 0.0001, "step": 19749 }, { "epoch": 18.990384615384617, "grad_norm": 0.0036732391454279423, "learning_rate": 2.888530530713456e-06, "loss": 0.0, "step": 19750 }, { "epoch": 18.991346153846155, "grad_norm": 0.004336795769631863, "learning_rate": 2.8876548222204968e-06, "loss": 0.0, "step": 19751 }, { "epoch": 18.99230769230769, "grad_norm": 1.2312091588974, "learning_rate": 2.8867792240902603e-06, "loss": 0.0061, "step": 19752 }, { "epoch": 18.99326923076923, "grad_norm": 0.011518219485878944, "learning_rate": 2.8859037363363362e-06, "loss": 0.0, "step": 19753 }, { "epoch": 18.994230769230768, "grad_norm": 0.004600440617650747, "learning_rate": 2.885028358972303e-06, "loss": 0.0, "step": 19754 }, { "epoch": 18.995192307692307, "grad_norm": 0.008974486961960793, "learning_rate": 2.884153092011748e-06, "loss": 0.0, "step": 19755 }, { "epoch": 18.996153846153845, "grad_norm": 0.35741907358169556, "learning_rate": 2.883277935468254e-06, "loss": 0.0012, "step": 19756 }, { "epoch": 18.997115384615384, "grad_norm": 0.011538022197782993, "learning_rate": 2.882402889355398e-06, "loss": 0.0001, "step": 19757 }, { "epoch": 18.998076923076923, "grad_norm": 0.0018846964230760932, "learning_rate": 2.881527953686761e-06, "loss": 0.0, "step": 19758 }, { "epoch": 18.99903846153846, "grad_norm": 0.007509991992264986, "learning_rate": 2.8806531284759144e-06, "loss": 0.0001, "step": 19759 }, { "epoch": 19.0, "grad_norm": 0.007418016437441111, "learning_rate": 2.879778413736437e-06, "loss": 0.0, "step": 19760 }, { "epoch": 19.00096153846154, "grad_norm": 0.0019903213251382113, "learning_rate": 2.8789038094819044e-06, "loss": 0.0, "step": 19761 }, { "epoch": 19.001923076923077, "grad_norm": 0.009773681871592999, "learning_rate": 2.878029315725881e-06, "loss": 0.0001, "step": 19762 }, { "epoch": 19.002884615384616, "grad_norm": 0.00749755697324872, "learning_rate": 2.877154932481939e-06, "loss": 0.0, "step": 19763 }, { "epoch": 19.003846153846155, "grad_norm": 0.01327274926006794, "learning_rate": 2.8762806597636516e-06, "loss": 0.0001, "step": 19764 }, { "epoch": 19.004807692307693, "grad_norm": 0.0008773610461503267, "learning_rate": 2.875406497584576e-06, "loss": 0.0, "step": 19765 }, { "epoch": 19.005769230769232, "grad_norm": 0.0020678741857409477, "learning_rate": 2.8745324459582823e-06, "loss": 0.0, "step": 19766 }, { "epoch": 19.00673076923077, "grad_norm": 0.0047835297882556915, "learning_rate": 2.8736585048983356e-06, "loss": 0.0, "step": 19767 }, { "epoch": 19.00769230769231, "grad_norm": 0.004899152554571629, "learning_rate": 2.8727846744182885e-06, "loss": 0.0001, "step": 19768 }, { "epoch": 19.008653846153845, "grad_norm": 0.002371292095631361, "learning_rate": 2.8719109545317102e-06, "loss": 0.0, "step": 19769 }, { "epoch": 19.009615384615383, "grad_norm": 0.0019348182249814272, "learning_rate": 2.871037345252149e-06, "loss": 0.0, "step": 19770 }, { "epoch": 19.010576923076922, "grad_norm": 0.0032545786816626787, "learning_rate": 2.870163846593165e-06, "loss": 0.0, "step": 19771 }, { "epoch": 19.01153846153846, "grad_norm": 0.0023450299631804228, "learning_rate": 2.8692904585683156e-06, "loss": 0.0, "step": 19772 }, { "epoch": 19.0125, "grad_norm": 0.004306704271584749, "learning_rate": 2.8684171811911464e-06, "loss": 0.0, "step": 19773 }, { "epoch": 19.013461538461538, "grad_norm": 0.005304371938109398, "learning_rate": 2.867544014475212e-06, "loss": 0.0001, "step": 19774 }, { "epoch": 19.014423076923077, "grad_norm": 4.891952037811279, "learning_rate": 2.8666709584340636e-06, "loss": 0.0128, "step": 19775 }, { "epoch": 19.015384615384615, "grad_norm": 0.0040958477184176445, "learning_rate": 2.8657980130812424e-06, "loss": 0.0, "step": 19776 }, { "epoch": 19.016346153846154, "grad_norm": 0.0017493836348876357, "learning_rate": 2.864925178430299e-06, "loss": 0.0, "step": 19777 }, { "epoch": 19.017307692307693, "grad_norm": 0.4660157263278961, "learning_rate": 2.8640524544947777e-06, "loss": 0.0013, "step": 19778 }, { "epoch": 19.01826923076923, "grad_norm": 0.0026005469262599945, "learning_rate": 2.863179841288215e-06, "loss": 0.0, "step": 19779 }, { "epoch": 19.01923076923077, "grad_norm": 0.0026365758385509253, "learning_rate": 2.8623073388241587e-06, "loss": 0.0, "step": 19780 }, { "epoch": 19.02019230769231, "grad_norm": 0.003250607056543231, "learning_rate": 2.8614349471161406e-06, "loss": 0.0, "step": 19781 }, { "epoch": 19.021153846153847, "grad_norm": 0.004982835613191128, "learning_rate": 2.8605626661776995e-06, "loss": 0.0, "step": 19782 }, { "epoch": 19.022115384615386, "grad_norm": 0.004064397886395454, "learning_rate": 2.859690496022377e-06, "loss": 0.0, "step": 19783 }, { "epoch": 19.023076923076925, "grad_norm": 0.0074160401709377766, "learning_rate": 2.858818436663696e-06, "loss": 0.0001, "step": 19784 }, { "epoch": 19.02403846153846, "grad_norm": 0.002800102811306715, "learning_rate": 2.857946488115195e-06, "loss": 0.0, "step": 19785 }, { "epoch": 19.025, "grad_norm": 0.002954671625047922, "learning_rate": 2.8570746503904067e-06, "loss": 0.0, "step": 19786 }, { "epoch": 19.025961538461537, "grad_norm": 0.011738954111933708, "learning_rate": 2.856202923502851e-06, "loss": 0.0001, "step": 19787 }, { "epoch": 19.026923076923076, "grad_norm": 0.005488502327352762, "learning_rate": 2.8553313074660606e-06, "loss": 0.0001, "step": 19788 }, { "epoch": 19.027884615384615, "grad_norm": 0.0016695326194167137, "learning_rate": 2.854459802293561e-06, "loss": 0.0, "step": 19789 }, { "epoch": 19.028846153846153, "grad_norm": 0.0041224220767617226, "learning_rate": 2.853588407998871e-06, "loss": 0.0, "step": 19790 }, { "epoch": 19.029807692307692, "grad_norm": 0.012139366939663887, "learning_rate": 2.8527171245955163e-06, "loss": 0.0001, "step": 19791 }, { "epoch": 19.03076923076923, "grad_norm": 0.0033278700429946184, "learning_rate": 2.851845952097012e-06, "loss": 0.0, "step": 19792 }, { "epoch": 19.03173076923077, "grad_norm": 0.006095346063375473, "learning_rate": 2.85097489051688e-06, "loss": 0.0, "step": 19793 }, { "epoch": 19.032692307692308, "grad_norm": 0.002755044959485531, "learning_rate": 2.8501039398686382e-06, "loss": 0.0, "step": 19794 }, { "epoch": 19.033653846153847, "grad_norm": 0.0035405883099883795, "learning_rate": 2.849233100165795e-06, "loss": 0.0, "step": 19795 }, { "epoch": 19.034615384615385, "grad_norm": 0.004104555584490299, "learning_rate": 2.848362371421868e-06, "loss": 0.0001, "step": 19796 }, { "epoch": 19.035576923076924, "grad_norm": 0.002040149411186576, "learning_rate": 2.8474917536503687e-06, "loss": 0.0, "step": 19797 }, { "epoch": 19.036538461538463, "grad_norm": 0.002890395000576973, "learning_rate": 2.8466212468648026e-06, "loss": 0.0, "step": 19798 }, { "epoch": 19.0375, "grad_norm": 0.015901310369372368, "learning_rate": 2.84575085107868e-06, "loss": 0.0001, "step": 19799 }, { "epoch": 19.03846153846154, "grad_norm": 0.005856502801179886, "learning_rate": 2.84488056630551e-06, "loss": 0.0001, "step": 19800 }, { "epoch": 19.039423076923075, "grad_norm": 0.061278242617845535, "learning_rate": 2.8440103925587904e-06, "loss": 0.0002, "step": 19801 }, { "epoch": 19.040384615384614, "grad_norm": 0.002778170630335808, "learning_rate": 2.8431403298520288e-06, "loss": 0.0, "step": 19802 }, { "epoch": 19.041346153846153, "grad_norm": 0.011964703910052776, "learning_rate": 2.8422703781987226e-06, "loss": 0.0001, "step": 19803 }, { "epoch": 19.04230769230769, "grad_norm": 0.00881983432918787, "learning_rate": 2.841400537612372e-06, "loss": 0.0001, "step": 19804 }, { "epoch": 19.04326923076923, "grad_norm": 0.022254236042499542, "learning_rate": 2.8405308081064777e-06, "loss": 0.0001, "step": 19805 }, { "epoch": 19.04423076923077, "grad_norm": 0.002623980166390538, "learning_rate": 2.839661189694529e-06, "loss": 0.0, "step": 19806 }, { "epoch": 19.045192307692307, "grad_norm": 0.0038618086837232113, "learning_rate": 2.8387916823900243e-06, "loss": 0.0, "step": 19807 }, { "epoch": 19.046153846153846, "grad_norm": 0.0015061659505590796, "learning_rate": 2.837922286206457e-06, "loss": 0.0, "step": 19808 }, { "epoch": 19.047115384615385, "grad_norm": 0.0008653567638248205, "learning_rate": 2.8370530011573126e-06, "loss": 0.0, "step": 19809 }, { "epoch": 19.048076923076923, "grad_norm": 0.0026436757761985064, "learning_rate": 2.8361838272560826e-06, "loss": 0.0, "step": 19810 }, { "epoch": 19.049038461538462, "grad_norm": 0.004784040153026581, "learning_rate": 2.835314764516258e-06, "loss": 0.0001, "step": 19811 }, { "epoch": 19.05, "grad_norm": 0.006311521865427494, "learning_rate": 2.8344458129513163e-06, "loss": 0.0, "step": 19812 }, { "epoch": 19.05096153846154, "grad_norm": 0.006848958786576986, "learning_rate": 2.8335769725747476e-06, "loss": 0.0001, "step": 19813 }, { "epoch": 19.051923076923078, "grad_norm": 0.006381426006555557, "learning_rate": 2.8327082434000286e-06, "loss": 0.0001, "step": 19814 }, { "epoch": 19.052884615384617, "grad_norm": 0.012753209099173546, "learning_rate": 2.831839625440642e-06, "loss": 0.0001, "step": 19815 }, { "epoch": 19.053846153846155, "grad_norm": 0.0030650270637124777, "learning_rate": 2.8309711187100687e-06, "loss": 0.0, "step": 19816 }, { "epoch": 19.05480769230769, "grad_norm": 0.0018291722517460585, "learning_rate": 2.8301027232217804e-06, "loss": 0.0, "step": 19817 }, { "epoch": 19.05576923076923, "grad_norm": 0.0017271593678742647, "learning_rate": 2.8292344389892533e-06, "loss": 0.0, "step": 19818 }, { "epoch": 19.056730769230768, "grad_norm": 0.0026885182596743107, "learning_rate": 2.828366266025966e-06, "loss": 0.0, "step": 19819 }, { "epoch": 19.057692307692307, "grad_norm": 0.0021168517414480448, "learning_rate": 2.8274982043453826e-06, "loss": 0.0, "step": 19820 }, { "epoch": 19.058653846153845, "grad_norm": 0.0072546713054180145, "learning_rate": 2.8266302539609747e-06, "loss": 0.0001, "step": 19821 }, { "epoch": 19.059615384615384, "grad_norm": 0.009060429409146309, "learning_rate": 2.825762414886216e-06, "loss": 0.0001, "step": 19822 }, { "epoch": 19.060576923076923, "grad_norm": 0.05566740036010742, "learning_rate": 2.824894687134565e-06, "loss": 0.0003, "step": 19823 }, { "epoch": 19.06153846153846, "grad_norm": 0.002813340863212943, "learning_rate": 2.824027070719493e-06, "loss": 0.0, "step": 19824 }, { "epoch": 19.0625, "grad_norm": 0.003955906722694635, "learning_rate": 2.8231595656544563e-06, "loss": 0.0, "step": 19825 }, { "epoch": 19.06346153846154, "grad_norm": 0.03017723374068737, "learning_rate": 2.8222921719529194e-06, "loss": 0.0001, "step": 19826 }, { "epoch": 19.064423076923077, "grad_norm": 0.002142153913155198, "learning_rate": 2.8214248896283457e-06, "loss": 0.0, "step": 19827 }, { "epoch": 19.065384615384616, "grad_norm": 0.002597354119643569, "learning_rate": 2.8205577186941845e-06, "loss": 0.0, "step": 19828 }, { "epoch": 19.066346153846155, "grad_norm": 0.00631664739921689, "learning_rate": 2.8196906591638975e-06, "loss": 0.0, "step": 19829 }, { "epoch": 19.067307692307693, "grad_norm": 0.002359657548367977, "learning_rate": 2.8188237110509386e-06, "loss": 0.0, "step": 19830 }, { "epoch": 19.068269230769232, "grad_norm": 0.002587817842140794, "learning_rate": 2.817956874368758e-06, "loss": 0.0, "step": 19831 }, { "epoch": 19.06923076923077, "grad_norm": 0.00327454530633986, "learning_rate": 2.8170901491308067e-06, "loss": 0.0, "step": 19832 }, { "epoch": 19.07019230769231, "grad_norm": 0.0025984225794672966, "learning_rate": 2.8162235353505376e-06, "loss": 0.0, "step": 19833 }, { "epoch": 19.071153846153845, "grad_norm": 0.0037405178882181644, "learning_rate": 2.8153570330413925e-06, "loss": 0.0, "step": 19834 }, { "epoch": 19.072115384615383, "grad_norm": 0.005264625418931246, "learning_rate": 2.8144906422168227e-06, "loss": 0.0, "step": 19835 }, { "epoch": 19.073076923076922, "grad_norm": 0.002057392615824938, "learning_rate": 2.813624362890266e-06, "loss": 0.0, "step": 19836 }, { "epoch": 19.07403846153846, "grad_norm": 0.0025859081652015448, "learning_rate": 2.812758195075167e-06, "loss": 0.0, "step": 19837 }, { "epoch": 19.075, "grad_norm": 0.014880773611366749, "learning_rate": 2.8118921387849698e-06, "loss": 0.0, "step": 19838 }, { "epoch": 19.075961538461538, "grad_norm": 0.011395197361707687, "learning_rate": 2.811026194033106e-06, "loss": 0.0001, "step": 19839 }, { "epoch": 19.076923076923077, "grad_norm": 0.0031566028483211994, "learning_rate": 2.8101603608330162e-06, "loss": 0.0, "step": 19840 }, { "epoch": 19.077884615384615, "grad_norm": 0.005931397434324026, "learning_rate": 2.80929463919814e-06, "loss": 0.0, "step": 19841 }, { "epoch": 19.078846153846154, "grad_norm": 0.0026283699553459883, "learning_rate": 2.8084290291419024e-06, "loss": 0.0, "step": 19842 }, { "epoch": 19.079807692307693, "grad_norm": 0.006570948287844658, "learning_rate": 2.8075635306777404e-06, "loss": 0.0001, "step": 19843 }, { "epoch": 19.08076923076923, "grad_norm": 0.04827825725078583, "learning_rate": 2.8066981438190854e-06, "loss": 0.0001, "step": 19844 }, { "epoch": 19.08173076923077, "grad_norm": 0.01826678030192852, "learning_rate": 2.8058328685793588e-06, "loss": 0.0001, "step": 19845 }, { "epoch": 19.08269230769231, "grad_norm": 0.27263036370277405, "learning_rate": 2.8049677049719936e-06, "loss": 0.0008, "step": 19846 }, { "epoch": 19.083653846153847, "grad_norm": 0.005073715932667255, "learning_rate": 2.8041026530104144e-06, "loss": 0.0001, "step": 19847 }, { "epoch": 19.084615384615386, "grad_norm": 0.003025044221431017, "learning_rate": 2.803237712708039e-06, "loss": 0.0, "step": 19848 }, { "epoch": 19.085576923076925, "grad_norm": 0.002705791499465704, "learning_rate": 2.802372884078295e-06, "loss": 0.0, "step": 19849 }, { "epoch": 19.08653846153846, "grad_norm": 0.004519248381257057, "learning_rate": 2.8015081671345968e-06, "loss": 0.0, "step": 19850 }, { "epoch": 19.0875, "grad_norm": 0.19045290350914001, "learning_rate": 2.8006435618903636e-06, "loss": 0.0003, "step": 19851 }, { "epoch": 19.088461538461537, "grad_norm": 0.003370797960087657, "learning_rate": 2.7997790683590163e-06, "loss": 0.0, "step": 19852 }, { "epoch": 19.089423076923076, "grad_norm": 0.00291711138561368, "learning_rate": 2.798914686553963e-06, "loss": 0.0, "step": 19853 }, { "epoch": 19.090384615384615, "grad_norm": 0.12706494331359863, "learning_rate": 2.798050416488618e-06, "loss": 0.0002, "step": 19854 }, { "epoch": 19.091346153846153, "grad_norm": 0.0026955900248140097, "learning_rate": 2.7971862581763955e-06, "loss": 0.0, "step": 19855 }, { "epoch": 19.092307692307692, "grad_norm": 0.005240594036877155, "learning_rate": 2.7963222116307e-06, "loss": 0.0001, "step": 19856 }, { "epoch": 19.09326923076923, "grad_norm": 0.005888657178729773, "learning_rate": 2.795458276864941e-06, "loss": 0.0, "step": 19857 }, { "epoch": 19.09423076923077, "grad_norm": 0.003179264720529318, "learning_rate": 2.7945944538925275e-06, "loss": 0.0, "step": 19858 }, { "epoch": 19.095192307692308, "grad_norm": 0.002612191718071699, "learning_rate": 2.793730742726858e-06, "loss": 0.0, "step": 19859 }, { "epoch": 19.096153846153847, "grad_norm": 0.0018935047555714846, "learning_rate": 2.7928671433813392e-06, "loss": 0.0, "step": 19860 }, { "epoch": 19.097115384615385, "grad_norm": 0.002562009496614337, "learning_rate": 2.7920036558693674e-06, "loss": 0.0, "step": 19861 }, { "epoch": 19.098076923076924, "grad_norm": 0.0020289281383156776, "learning_rate": 2.791140280204343e-06, "loss": 0.0, "step": 19862 }, { "epoch": 19.099038461538463, "grad_norm": 0.002353811636567116, "learning_rate": 2.7902770163996673e-06, "loss": 0.0, "step": 19863 }, { "epoch": 19.1, "grad_norm": 0.0029456322081387043, "learning_rate": 2.7894138644687296e-06, "loss": 0.0, "step": 19864 }, { "epoch": 19.10096153846154, "grad_norm": 0.004719793796539307, "learning_rate": 2.788550824424925e-06, "loss": 0.0, "step": 19865 }, { "epoch": 19.101923076923075, "grad_norm": 0.009788512252271175, "learning_rate": 2.7876878962816488e-06, "loss": 0.0001, "step": 19866 }, { "epoch": 19.102884615384614, "grad_norm": 0.004311965312808752, "learning_rate": 2.7868250800522866e-06, "loss": 0.0, "step": 19867 }, { "epoch": 19.103846153846153, "grad_norm": 0.006126533728092909, "learning_rate": 2.7859623757502286e-06, "loss": 0.0001, "step": 19868 }, { "epoch": 19.10480769230769, "grad_norm": 0.002641684841364622, "learning_rate": 2.785099783388864e-06, "loss": 0.0, "step": 19869 }, { "epoch": 19.10576923076923, "grad_norm": 0.003531420137733221, "learning_rate": 2.7842373029815727e-06, "loss": 0.0, "step": 19870 }, { "epoch": 19.10673076923077, "grad_norm": 0.003266172716394067, "learning_rate": 2.783374934541744e-06, "loss": 0.0, "step": 19871 }, { "epoch": 19.107692307692307, "grad_norm": 0.0049164677038788795, "learning_rate": 2.782512678082752e-06, "loss": 0.0, "step": 19872 }, { "epoch": 19.108653846153846, "grad_norm": 0.0013792458921670914, "learning_rate": 2.78165053361798e-06, "loss": 0.0, "step": 19873 }, { "epoch": 19.109615384615385, "grad_norm": 0.0012664998648688197, "learning_rate": 2.7807885011608094e-06, "loss": 0.0, "step": 19874 }, { "epoch": 19.110576923076923, "grad_norm": 0.002421511569991708, "learning_rate": 2.77992658072461e-06, "loss": 0.0, "step": 19875 }, { "epoch": 19.111538461538462, "grad_norm": 0.004064695909619331, "learning_rate": 2.7790647723227593e-06, "loss": 0.0, "step": 19876 }, { "epoch": 19.1125, "grad_norm": 0.009436708875000477, "learning_rate": 2.778203075968634e-06, "loss": 0.0001, "step": 19877 }, { "epoch": 19.11346153846154, "grad_norm": 0.011366978287696838, "learning_rate": 2.7773414916755968e-06, "loss": 0.0, "step": 19878 }, { "epoch": 19.114423076923078, "grad_norm": 0.0013605448184534907, "learning_rate": 2.776480019457023e-06, "loss": 0.0, "step": 19879 }, { "epoch": 19.115384615384617, "grad_norm": 0.001841254997998476, "learning_rate": 2.7756186593262802e-06, "loss": 0.0, "step": 19880 }, { "epoch": 19.116346153846155, "grad_norm": 0.024899931624531746, "learning_rate": 2.7747574112967303e-06, "loss": 0.0001, "step": 19881 }, { "epoch": 19.11730769230769, "grad_norm": 0.002977276686578989, "learning_rate": 2.773896275381742e-06, "loss": 0.0, "step": 19882 }, { "epoch": 19.11826923076923, "grad_norm": 0.0004356657445896417, "learning_rate": 2.7730352515946735e-06, "loss": 0.0, "step": 19883 }, { "epoch": 19.119230769230768, "grad_norm": 0.006374106742441654, "learning_rate": 2.7721743399488854e-06, "loss": 0.0, "step": 19884 }, { "epoch": 19.120192307692307, "grad_norm": 0.005300417542457581, "learning_rate": 2.7713135404577417e-06, "loss": 0.0, "step": 19885 }, { "epoch": 19.121153846153845, "grad_norm": 0.002193283522501588, "learning_rate": 2.770452853134593e-06, "loss": 0.0, "step": 19886 }, { "epoch": 19.122115384615384, "grad_norm": 0.018908487632870674, "learning_rate": 2.7695922779927975e-06, "loss": 0.0001, "step": 19887 }, { "epoch": 19.123076923076923, "grad_norm": 0.0026749875396490097, "learning_rate": 2.768731815045712e-06, "loss": 0.0, "step": 19888 }, { "epoch": 19.12403846153846, "grad_norm": 0.002620240906253457, "learning_rate": 2.767871464306682e-06, "loss": 0.0, "step": 19889 }, { "epoch": 19.125, "grad_norm": 0.008615085855126381, "learning_rate": 2.76701122578906e-06, "loss": 0.0, "step": 19890 }, { "epoch": 19.12596153846154, "grad_norm": 0.0028766216710209846, "learning_rate": 2.7661510995062e-06, "loss": 0.0, "step": 19891 }, { "epoch": 19.126923076923077, "grad_norm": 0.0028477050364017487, "learning_rate": 2.7652910854714397e-06, "loss": 0.0, "step": 19892 }, { "epoch": 19.127884615384616, "grad_norm": 0.010049699805676937, "learning_rate": 2.764431183698131e-06, "loss": 0.0, "step": 19893 }, { "epoch": 19.128846153846155, "grad_norm": 0.0022530080750584602, "learning_rate": 2.763571394199612e-06, "loss": 0.0, "step": 19894 }, { "epoch": 19.129807692307693, "grad_norm": 0.007136894389986992, "learning_rate": 2.762711716989226e-06, "loss": 0.0, "step": 19895 }, { "epoch": 19.130769230769232, "grad_norm": 0.004778367932885885, "learning_rate": 2.7618521520803155e-06, "loss": 0.0, "step": 19896 }, { "epoch": 19.13173076923077, "grad_norm": 0.002824625000357628, "learning_rate": 2.7609926994862136e-06, "loss": 0.0, "step": 19897 }, { "epoch": 19.13269230769231, "grad_norm": 0.007721103727817535, "learning_rate": 2.7601333592202583e-06, "loss": 0.0001, "step": 19898 }, { "epoch": 19.133653846153845, "grad_norm": 0.012295298278331757, "learning_rate": 2.759274131295787e-06, "loss": 0.0001, "step": 19899 }, { "epoch": 19.134615384615383, "grad_norm": 0.002814382314682007, "learning_rate": 2.758415015726128e-06, "loss": 0.0, "step": 19900 }, { "epoch": 19.135576923076922, "grad_norm": 0.0030854246579110622, "learning_rate": 2.757556012524614e-06, "loss": 0.0, "step": 19901 }, { "epoch": 19.13653846153846, "grad_norm": 0.006079170387238264, "learning_rate": 2.7566971217045767e-06, "loss": 0.0, "step": 19902 }, { "epoch": 19.1375, "grad_norm": 0.011497863568365574, "learning_rate": 2.7558383432793377e-06, "loss": 0.0001, "step": 19903 }, { "epoch": 19.138461538461538, "grad_norm": 0.0016480085905641317, "learning_rate": 2.7549796772622306e-06, "loss": 0.0, "step": 19904 }, { "epoch": 19.139423076923077, "grad_norm": 0.004005616530776024, "learning_rate": 2.754121123666571e-06, "loss": 0.0, "step": 19905 }, { "epoch": 19.140384615384615, "grad_norm": 0.001951380050741136, "learning_rate": 2.753262682505686e-06, "loss": 0.0, "step": 19906 }, { "epoch": 19.141346153846154, "grad_norm": 0.008784310892224312, "learning_rate": 2.752404353792898e-06, "loss": 0.0, "step": 19907 }, { "epoch": 19.142307692307693, "grad_norm": 0.0016144381370395422, "learning_rate": 2.75154613754152e-06, "loss": 0.0, "step": 19908 }, { "epoch": 19.14326923076923, "grad_norm": 0.03455531224608421, "learning_rate": 2.7506880337648723e-06, "loss": 0.0001, "step": 19909 }, { "epoch": 19.14423076923077, "grad_norm": 0.005046285223215818, "learning_rate": 2.7498300424762725e-06, "loss": 0.0, "step": 19910 }, { "epoch": 19.14519230769231, "grad_norm": 0.0030030079651623964, "learning_rate": 2.7489721636890276e-06, "loss": 0.0, "step": 19911 }, { "epoch": 19.146153846153847, "grad_norm": 0.008348534815013409, "learning_rate": 2.7481143974164548e-06, "loss": 0.0001, "step": 19912 }, { "epoch": 19.147115384615386, "grad_norm": 0.0020230577792972326, "learning_rate": 2.7472567436718642e-06, "loss": 0.0, "step": 19913 }, { "epoch": 19.148076923076925, "grad_norm": 0.007100765127688646, "learning_rate": 2.7463992024685604e-06, "loss": 0.0, "step": 19914 }, { "epoch": 19.14903846153846, "grad_norm": 0.007791698910295963, "learning_rate": 2.7455417738198542e-06, "loss": 0.0, "step": 19915 }, { "epoch": 19.15, "grad_norm": 0.011045590043067932, "learning_rate": 2.744684457739045e-06, "loss": 0.0, "step": 19916 }, { "epoch": 19.150961538461537, "grad_norm": 0.003811987116932869, "learning_rate": 2.7438272542394396e-06, "loss": 0.0, "step": 19917 }, { "epoch": 19.151923076923076, "grad_norm": 0.0018873425433412194, "learning_rate": 2.742970163334341e-06, "loss": 0.0, "step": 19918 }, { "epoch": 19.152884615384615, "grad_norm": 0.008637804538011551, "learning_rate": 2.742113185037044e-06, "loss": 0.0001, "step": 19919 }, { "epoch": 19.153846153846153, "grad_norm": 0.0032605030573904514, "learning_rate": 2.741256319360848e-06, "loss": 0.0, "step": 19920 }, { "epoch": 19.154807692307692, "grad_norm": 0.002024404238909483, "learning_rate": 2.740399566319053e-06, "loss": 0.0, "step": 19921 }, { "epoch": 19.15576923076923, "grad_norm": 0.00041567182051949203, "learning_rate": 2.7395429259249472e-06, "loss": 0.0, "step": 19922 }, { "epoch": 19.15673076923077, "grad_norm": 0.007788221351802349, "learning_rate": 2.7386863981918256e-06, "loss": 0.0001, "step": 19923 }, { "epoch": 19.157692307692308, "grad_norm": 0.003241352504119277, "learning_rate": 2.7378299831329836e-06, "loss": 0.0, "step": 19924 }, { "epoch": 19.158653846153847, "grad_norm": 0.0019336406840011477, "learning_rate": 2.736973680761702e-06, "loss": 0.0, "step": 19925 }, { "epoch": 19.159615384615385, "grad_norm": 0.031886786222457886, "learning_rate": 2.7361174910912745e-06, "loss": 0.0001, "step": 19926 }, { "epoch": 19.160576923076924, "grad_norm": 0.003366251476109028, "learning_rate": 2.735261414134983e-06, "loss": 0.0, "step": 19927 }, { "epoch": 19.161538461538463, "grad_norm": 0.00663877883926034, "learning_rate": 2.734405449906111e-06, "loss": 0.0001, "step": 19928 }, { "epoch": 19.1625, "grad_norm": 0.003634076099842787, "learning_rate": 2.7335495984179463e-06, "loss": 0.0, "step": 19929 }, { "epoch": 19.16346153846154, "grad_norm": 0.003228135406970978, "learning_rate": 2.7326938596837614e-06, "loss": 0.0, "step": 19930 }, { "epoch": 19.164423076923075, "grad_norm": 0.004404055420309305, "learning_rate": 2.7318382337168393e-06, "loss": 0.0001, "step": 19931 }, { "epoch": 19.165384615384614, "grad_norm": 0.004863533657044172, "learning_rate": 2.7309827205304584e-06, "loss": 0.0001, "step": 19932 }, { "epoch": 19.166346153846153, "grad_norm": 0.0029582062270492315, "learning_rate": 2.7301273201378884e-06, "loss": 0.0, "step": 19933 }, { "epoch": 19.16730769230769, "grad_norm": 0.002781681716442108, "learning_rate": 2.7292720325524057e-06, "loss": 0.0, "step": 19934 }, { "epoch": 19.16826923076923, "grad_norm": 0.003658255096524954, "learning_rate": 2.7284168577872837e-06, "loss": 0.0, "step": 19935 }, { "epoch": 19.16923076923077, "grad_norm": 0.0015467460034415126, "learning_rate": 2.7275617958557887e-06, "loss": 0.0, "step": 19936 }, { "epoch": 19.170192307692307, "grad_norm": 0.00365710468031466, "learning_rate": 2.7267068467711898e-06, "loss": 0.0, "step": 19937 }, { "epoch": 19.171153846153846, "grad_norm": 0.0019538775086402893, "learning_rate": 2.7258520105467566e-06, "loss": 0.0, "step": 19938 }, { "epoch": 19.172115384615385, "grad_norm": 0.03196011483669281, "learning_rate": 2.7249972871957474e-06, "loss": 0.0001, "step": 19939 }, { "epoch": 19.173076923076923, "grad_norm": 0.001522683072835207, "learning_rate": 2.7241426767314325e-06, "loss": 0.0, "step": 19940 }, { "epoch": 19.174038461538462, "grad_norm": 0.012788629159331322, "learning_rate": 2.7232881791670652e-06, "loss": 0.0001, "step": 19941 }, { "epoch": 19.175, "grad_norm": 0.00329311634413898, "learning_rate": 2.7224337945159087e-06, "loss": 0.0, "step": 19942 }, { "epoch": 19.17596153846154, "grad_norm": 0.006314065307378769, "learning_rate": 2.7215795227912236e-06, "loss": 0.0, "step": 19943 }, { "epoch": 19.176923076923078, "grad_norm": 0.01615356281399727, "learning_rate": 2.7207253640062593e-06, "loss": 0.0001, "step": 19944 }, { "epoch": 19.177884615384617, "grad_norm": 0.0043106721714138985, "learning_rate": 2.719871318174272e-06, "loss": 0.0, "step": 19945 }, { "epoch": 19.178846153846155, "grad_norm": 0.0045550488866865635, "learning_rate": 2.719017385308519e-06, "loss": 0.0, "step": 19946 }, { "epoch": 19.17980769230769, "grad_norm": 0.010589633136987686, "learning_rate": 2.7181635654222425e-06, "loss": 0.0, "step": 19947 }, { "epoch": 19.18076923076923, "grad_norm": 0.0024158628657460213, "learning_rate": 2.7173098585286974e-06, "loss": 0.0, "step": 19948 }, { "epoch": 19.181730769230768, "grad_norm": 0.002743515884503722, "learning_rate": 2.7164562646411306e-06, "loss": 0.0, "step": 19949 }, { "epoch": 19.182692307692307, "grad_norm": 0.002434046706184745, "learning_rate": 2.715602783772783e-06, "loss": 0.0, "step": 19950 }, { "epoch": 19.183653846153845, "grad_norm": 2.421846628189087, "learning_rate": 2.714749415936904e-06, "loss": 0.0346, "step": 19951 }, { "epoch": 19.184615384615384, "grad_norm": 0.0087191266939044, "learning_rate": 2.713896161146727e-06, "loss": 0.0, "step": 19952 }, { "epoch": 19.185576923076923, "grad_norm": 0.004014232661575079, "learning_rate": 2.7130430194155e-06, "loss": 0.0, "step": 19953 }, { "epoch": 19.18653846153846, "grad_norm": 0.003376738866791129, "learning_rate": 2.7121899907564597e-06, "loss": 0.0, "step": 19954 }, { "epoch": 19.1875, "grad_norm": 0.006423544604331255, "learning_rate": 2.7113370751828394e-06, "loss": 0.0001, "step": 19955 }, { "epoch": 19.18846153846154, "grad_norm": 0.056200433522462845, "learning_rate": 2.710484272707875e-06, "loss": 0.0003, "step": 19956 }, { "epoch": 19.189423076923077, "grad_norm": 0.003872405970469117, "learning_rate": 2.7096315833448027e-06, "loss": 0.0, "step": 19957 }, { "epoch": 19.190384615384616, "grad_norm": 0.0025957170873880386, "learning_rate": 2.7087790071068485e-06, "loss": 0.0, "step": 19958 }, { "epoch": 19.191346153846155, "grad_norm": 0.0031670820899307728, "learning_rate": 2.7079265440072456e-06, "loss": 0.0, "step": 19959 }, { "epoch": 19.192307692307693, "grad_norm": 0.0006502874894067645, "learning_rate": 2.7070741940592225e-06, "loss": 0.0, "step": 19960 }, { "epoch": 19.193269230769232, "grad_norm": 0.004247342236340046, "learning_rate": 2.706221957276002e-06, "loss": 0.0, "step": 19961 }, { "epoch": 19.19423076923077, "grad_norm": 0.00265843840315938, "learning_rate": 2.7053698336708113e-06, "loss": 0.0, "step": 19962 }, { "epoch": 19.19519230769231, "grad_norm": 0.005356731824576855, "learning_rate": 2.704517823256869e-06, "loss": 0.0001, "step": 19963 }, { "epoch": 19.196153846153845, "grad_norm": 0.0029977259691804647, "learning_rate": 2.7036659260473973e-06, "loss": 0.0, "step": 19964 }, { "epoch": 19.197115384615383, "grad_norm": 0.0026817212346941233, "learning_rate": 2.7028141420556196e-06, "loss": 0.0, "step": 19965 }, { "epoch": 19.198076923076922, "grad_norm": 0.005380645394325256, "learning_rate": 2.7019624712947468e-06, "loss": 0.0, "step": 19966 }, { "epoch": 19.19903846153846, "grad_norm": 0.001378944143652916, "learning_rate": 2.7011109137779967e-06, "loss": 0.0, "step": 19967 }, { "epoch": 19.2, "grad_norm": 0.0036074670497328043, "learning_rate": 2.700259469518586e-06, "loss": 0.0, "step": 19968 }, { "epoch": 19.200961538461538, "grad_norm": 0.016451392322778702, "learning_rate": 2.699408138529721e-06, "loss": 0.0001, "step": 19969 }, { "epoch": 19.201923076923077, "grad_norm": 0.004035330377519131, "learning_rate": 2.6985569208246144e-06, "loss": 0.0, "step": 19970 }, { "epoch": 19.202884615384615, "grad_norm": 0.003011122113093734, "learning_rate": 2.6977058164164784e-06, "loss": 0.0, "step": 19971 }, { "epoch": 19.203846153846154, "grad_norm": 0.006259158719331026, "learning_rate": 2.6968548253185135e-06, "loss": 0.0, "step": 19972 }, { "epoch": 19.204807692307693, "grad_norm": 0.004685625433921814, "learning_rate": 2.696003947543929e-06, "loss": 0.0001, "step": 19973 }, { "epoch": 19.20576923076923, "grad_norm": 0.0025949578266590834, "learning_rate": 2.695153183105924e-06, "loss": 0.0, "step": 19974 }, { "epoch": 19.20673076923077, "grad_norm": 0.0021144135389477015, "learning_rate": 2.6943025320177018e-06, "loss": 0.0, "step": 19975 }, { "epoch": 19.20769230769231, "grad_norm": 0.0009158277534879744, "learning_rate": 2.6934519942924663e-06, "loss": 0.0, "step": 19976 }, { "epoch": 19.208653846153847, "grad_norm": 0.020071331411600113, "learning_rate": 2.692601569943407e-06, "loss": 0.0001, "step": 19977 }, { "epoch": 19.209615384615386, "grad_norm": 0.002441832795739174, "learning_rate": 2.6917512589837257e-06, "loss": 0.0, "step": 19978 }, { "epoch": 19.210576923076925, "grad_norm": 0.0025532422587275505, "learning_rate": 2.6909010614266173e-06, "loss": 0.0, "step": 19979 }, { "epoch": 19.21153846153846, "grad_norm": 0.015843894332647324, "learning_rate": 2.69005097728527e-06, "loss": 0.0, "step": 19980 }, { "epoch": 19.2125, "grad_norm": 0.002494971500709653, "learning_rate": 2.689201006572877e-06, "loss": 0.0, "step": 19981 }, { "epoch": 19.213461538461537, "grad_norm": 0.003252606838941574, "learning_rate": 2.6883511493026306e-06, "loss": 0.0, "step": 19982 }, { "epoch": 19.214423076923076, "grad_norm": 0.18277396261692047, "learning_rate": 2.6875014054877114e-06, "loss": 0.0005, "step": 19983 }, { "epoch": 19.215384615384615, "grad_norm": 0.000782180461101234, "learning_rate": 2.6866517751413123e-06, "loss": 0.0, "step": 19984 }, { "epoch": 19.216346153846153, "grad_norm": 0.006124719977378845, "learning_rate": 2.6858022582766097e-06, "loss": 0.0, "step": 19985 }, { "epoch": 19.217307692307692, "grad_norm": 3.444134473800659, "learning_rate": 2.6849528549067894e-06, "loss": 0.0155, "step": 19986 }, { "epoch": 19.21826923076923, "grad_norm": 0.0040741306729614735, "learning_rate": 2.684103565045034e-06, "loss": 0.0, "step": 19987 }, { "epoch": 19.21923076923077, "grad_norm": 0.007488573435693979, "learning_rate": 2.683254388704517e-06, "loss": 0.0, "step": 19988 }, { "epoch": 19.220192307692308, "grad_norm": 0.0021773113403469324, "learning_rate": 2.682405325898416e-06, "loss": 0.0, "step": 19989 }, { "epoch": 19.221153846153847, "grad_norm": 0.0034997626207768917, "learning_rate": 2.6815563766399122e-06, "loss": 0.0, "step": 19990 }, { "epoch": 19.222115384615385, "grad_norm": 0.005149311851710081, "learning_rate": 2.6807075409421703e-06, "loss": 0.0, "step": 19991 }, { "epoch": 19.223076923076924, "grad_norm": 0.004859680309891701, "learning_rate": 2.6798588188183662e-06, "loss": 0.0001, "step": 19992 }, { "epoch": 19.224038461538463, "grad_norm": 0.0066904411651194096, "learning_rate": 2.6790102102816707e-06, "loss": 0.0, "step": 19993 }, { "epoch": 19.225, "grad_norm": 0.0072172111831605434, "learning_rate": 2.678161715345248e-06, "loss": 0.0001, "step": 19994 }, { "epoch": 19.22596153846154, "grad_norm": 0.004395523574203253, "learning_rate": 2.6773133340222677e-06, "loss": 0.0, "step": 19995 }, { "epoch": 19.226923076923075, "grad_norm": 0.5997393727302551, "learning_rate": 2.6764650663258907e-06, "loss": 0.0023, "step": 19996 }, { "epoch": 19.227884615384614, "grad_norm": 0.006545008160173893, "learning_rate": 2.675616912269281e-06, "loss": 0.0, "step": 19997 }, { "epoch": 19.228846153846153, "grad_norm": 0.0021859738044440746, "learning_rate": 2.6747688718656038e-06, "loss": 0.0, "step": 19998 }, { "epoch": 19.22980769230769, "grad_norm": 0.0019008256494998932, "learning_rate": 2.6739209451280112e-06, "loss": 0.0, "step": 19999 }, { "epoch": 19.23076923076923, "grad_norm": 0.001664792187511921, "learning_rate": 2.6730731320696636e-06, "loss": 0.0, "step": 20000 }, { "epoch": 19.23173076923077, "grad_norm": 0.0029337715823203325, "learning_rate": 2.67222543270372e-06, "loss": 0.0, "step": 20001 }, { "epoch": 19.232692307692307, "grad_norm": 0.00429883087053895, "learning_rate": 2.671377847043327e-06, "loss": 0.0, "step": 20002 }, { "epoch": 19.233653846153846, "grad_norm": 0.01097430381923914, "learning_rate": 2.670530375101641e-06, "loss": 0.0001, "step": 20003 }, { "epoch": 19.234615384615385, "grad_norm": 0.007553720846772194, "learning_rate": 2.6696830168918154e-06, "loss": 0.0001, "step": 20004 }, { "epoch": 19.235576923076923, "grad_norm": 0.007705559488385916, "learning_rate": 2.668835772426992e-06, "loss": 0.0001, "step": 20005 }, { "epoch": 19.236538461538462, "grad_norm": 0.0031127759721130133, "learning_rate": 2.6679886417203226e-06, "loss": 0.0, "step": 20006 }, { "epoch": 19.2375, "grad_norm": 0.014280480332672596, "learning_rate": 2.667141624784949e-06, "loss": 0.0001, "step": 20007 }, { "epoch": 19.23846153846154, "grad_norm": 0.003506181761622429, "learning_rate": 2.666294721634014e-06, "loss": 0.0, "step": 20008 }, { "epoch": 19.239423076923078, "grad_norm": 0.003654500935226679, "learning_rate": 2.665447932280665e-06, "loss": 0.0, "step": 20009 }, { "epoch": 19.240384615384617, "grad_norm": 0.0014565656892955303, "learning_rate": 2.664601256738034e-06, "loss": 0.0, "step": 20010 }, { "epoch": 19.241346153846155, "grad_norm": 0.004301460459828377, "learning_rate": 2.6637546950192627e-06, "loss": 0.0, "step": 20011 }, { "epoch": 19.24230769230769, "grad_norm": 0.003142280038446188, "learning_rate": 2.66290824713749e-06, "loss": 0.0, "step": 20012 }, { "epoch": 19.24326923076923, "grad_norm": 0.003121115267276764, "learning_rate": 2.6620619131058432e-06, "loss": 0.0, "step": 20013 }, { "epoch": 19.244230769230768, "grad_norm": 0.004618379287421703, "learning_rate": 2.66121569293746e-06, "loss": 0.0, "step": 20014 }, { "epoch": 19.245192307692307, "grad_norm": 0.004400776699185371, "learning_rate": 2.6603695866454725e-06, "loss": 0.0, "step": 20015 }, { "epoch": 19.246153846153845, "grad_norm": 0.004986509680747986, "learning_rate": 2.6595235942430044e-06, "loss": 0.0, "step": 20016 }, { "epoch": 19.247115384615384, "grad_norm": 0.005848277825862169, "learning_rate": 2.6586777157431865e-06, "loss": 0.0, "step": 20017 }, { "epoch": 19.248076923076923, "grad_norm": 0.0037419763393700123, "learning_rate": 2.6578319511591466e-06, "loss": 0.0, "step": 20018 }, { "epoch": 19.24903846153846, "grad_norm": 0.004895674996078014, "learning_rate": 2.6569863005040032e-06, "loss": 0.0, "step": 20019 }, { "epoch": 19.25, "grad_norm": 0.0073980106972157955, "learning_rate": 2.656140763790883e-06, "loss": 0.0001, "step": 20020 }, { "epoch": 19.25096153846154, "grad_norm": 0.0027680883649736643, "learning_rate": 2.655295341032903e-06, "loss": 0.0, "step": 20021 }, { "epoch": 19.251923076923077, "grad_norm": 0.0037087183445692062, "learning_rate": 2.654450032243181e-06, "loss": 0.0, "step": 20022 }, { "epoch": 19.252884615384616, "grad_norm": 0.007073741406202316, "learning_rate": 2.65360483743484e-06, "loss": 0.0, "step": 20023 }, { "epoch": 19.253846153846155, "grad_norm": 0.0031865043565630913, "learning_rate": 2.652759756620986e-06, "loss": 0.0, "step": 20024 }, { "epoch": 19.254807692307693, "grad_norm": 0.001980168977752328, "learning_rate": 2.651914789814738e-06, "loss": 0.0, "step": 20025 }, { "epoch": 19.255769230769232, "grad_norm": 0.0030514474492520094, "learning_rate": 2.6510699370292083e-06, "loss": 0.0, "step": 20026 }, { "epoch": 19.25673076923077, "grad_norm": 0.005187137983739376, "learning_rate": 2.6502251982775017e-06, "loss": 0.0, "step": 20027 }, { "epoch": 19.25769230769231, "grad_norm": 0.007031432818621397, "learning_rate": 2.6493805735727287e-06, "loss": 0.0, "step": 20028 }, { "epoch": 19.258653846153845, "grad_norm": 0.0052482145838439465, "learning_rate": 2.648536062927999e-06, "loss": 0.0, "step": 20029 }, { "epoch": 19.259615384615383, "grad_norm": 0.003324732417240739, "learning_rate": 2.647691666356409e-06, "loss": 0.0001, "step": 20030 }, { "epoch": 19.260576923076922, "grad_norm": 0.003915151581168175, "learning_rate": 2.6468473838710695e-06, "loss": 0.0, "step": 20031 }, { "epoch": 19.26153846153846, "grad_norm": 0.004295116290450096, "learning_rate": 2.6460032154850734e-06, "loss": 0.0, "step": 20032 }, { "epoch": 19.2625, "grad_norm": 0.02539079077541828, "learning_rate": 2.6451591612115245e-06, "loss": 0.0001, "step": 20033 }, { "epoch": 19.263461538461538, "grad_norm": 0.001907000201754272, "learning_rate": 2.6443152210635227e-06, "loss": 0.0, "step": 20034 }, { "epoch": 19.264423076923077, "grad_norm": 0.08733632415533066, "learning_rate": 2.6434713950541558e-06, "loss": 0.0003, "step": 20035 }, { "epoch": 19.265384615384615, "grad_norm": 0.003467248287051916, "learning_rate": 2.6426276831965227e-06, "loss": 0.0, "step": 20036 }, { "epoch": 19.266346153846154, "grad_norm": 0.011064781807363033, "learning_rate": 2.641784085503717e-06, "loss": 0.0001, "step": 20037 }, { "epoch": 19.267307692307693, "grad_norm": 0.004159769508987665, "learning_rate": 2.6409406019888227e-06, "loss": 0.0, "step": 20038 }, { "epoch": 19.26826923076923, "grad_norm": 0.02337775193154812, "learning_rate": 2.640097232664932e-06, "loss": 0.0001, "step": 20039 }, { "epoch": 19.26923076923077, "grad_norm": 0.0039001302793622017, "learning_rate": 2.6392539775451344e-06, "loss": 0.0, "step": 20040 }, { "epoch": 19.27019230769231, "grad_norm": 0.0057300603948533535, "learning_rate": 2.6384108366425075e-06, "loss": 0.0, "step": 20041 }, { "epoch": 19.271153846153847, "grad_norm": 0.004746666178107262, "learning_rate": 2.637567809970143e-06, "loss": 0.0001, "step": 20042 }, { "epoch": 19.272115384615386, "grad_norm": 0.0019695470109581947, "learning_rate": 2.636724897541114e-06, "loss": 0.0, "step": 20043 }, { "epoch": 19.273076923076925, "grad_norm": 0.001713775796815753, "learning_rate": 2.635882099368503e-06, "loss": 0.0, "step": 20044 }, { "epoch": 19.27403846153846, "grad_norm": 0.005396862048655748, "learning_rate": 2.6350394154653926e-06, "loss": 0.0001, "step": 20045 }, { "epoch": 19.275, "grad_norm": 0.010988308116793633, "learning_rate": 2.634196845844851e-06, "loss": 0.0001, "step": 20046 }, { "epoch": 19.275961538461537, "grad_norm": 0.9368448257446289, "learning_rate": 2.6333543905199554e-06, "loss": 0.0034, "step": 20047 }, { "epoch": 19.276923076923076, "grad_norm": 0.00814804621040821, "learning_rate": 2.6325120495037828e-06, "loss": 0.0, "step": 20048 }, { "epoch": 19.277884615384615, "grad_norm": 2.592186212539673, "learning_rate": 2.631669822809396e-06, "loss": 0.0076, "step": 20049 }, { "epoch": 19.278846153846153, "grad_norm": 0.008224544115364552, "learning_rate": 2.630827710449868e-06, "loss": 0.0, "step": 20050 }, { "epoch": 19.279807692307692, "grad_norm": 0.0049825869500637054, "learning_rate": 2.629985712438269e-06, "loss": 0.0, "step": 20051 }, { "epoch": 19.28076923076923, "grad_norm": 0.008968677371740341, "learning_rate": 2.629143828787657e-06, "loss": 0.0, "step": 20052 }, { "epoch": 19.28173076923077, "grad_norm": 0.014477003365755081, "learning_rate": 2.628302059511104e-06, "loss": 0.0001, "step": 20053 }, { "epoch": 19.282692307692308, "grad_norm": 0.010001585818827152, "learning_rate": 2.6274604046216635e-06, "loss": 0.0001, "step": 20054 }, { "epoch": 19.283653846153847, "grad_norm": 0.0013778064167127013, "learning_rate": 2.6266188641324e-06, "loss": 0.0, "step": 20055 }, { "epoch": 19.284615384615385, "grad_norm": 0.003206397406756878, "learning_rate": 2.6257774380563726e-06, "loss": 0.0, "step": 20056 }, { "epoch": 19.285576923076924, "grad_norm": 0.004262412432581186, "learning_rate": 2.624936126406635e-06, "loss": 0.0, "step": 20057 }, { "epoch": 19.286538461538463, "grad_norm": 0.006294610444456339, "learning_rate": 2.6240949291962424e-06, "loss": 0.0, "step": 20058 }, { "epoch": 19.2875, "grad_norm": 0.005006480496376753, "learning_rate": 2.623253846438252e-06, "loss": 0.0, "step": 20059 }, { "epoch": 19.28846153846154, "grad_norm": 0.08157418668270111, "learning_rate": 2.6224128781457083e-06, "loss": 0.0002, "step": 20060 }, { "epoch": 19.289423076923075, "grad_norm": 0.0133639145642519, "learning_rate": 2.6215720243316636e-06, "loss": 0.0002, "step": 20061 }, { "epoch": 19.290384615384614, "grad_norm": 0.005164950154721737, "learning_rate": 2.6207312850091683e-06, "loss": 0.0001, "step": 20062 }, { "epoch": 19.291346153846153, "grad_norm": 0.007171222008764744, "learning_rate": 2.6198906601912623e-06, "loss": 0.0001, "step": 20063 }, { "epoch": 19.29230769230769, "grad_norm": 0.003473642049357295, "learning_rate": 2.619050149890997e-06, "loss": 0.0, "step": 20064 }, { "epoch": 19.29326923076923, "grad_norm": 0.005805852822959423, "learning_rate": 2.6182097541214067e-06, "loss": 0.0001, "step": 20065 }, { "epoch": 19.29423076923077, "grad_norm": 0.2296004593372345, "learning_rate": 2.6173694728955357e-06, "loss": 0.0007, "step": 20066 }, { "epoch": 19.295192307692307, "grad_norm": 0.0024361673276871443, "learning_rate": 2.6165293062264264e-06, "loss": 0.0, "step": 20067 }, { "epoch": 19.296153846153846, "grad_norm": 0.0027584845665842295, "learning_rate": 2.6156892541271083e-06, "loss": 0.0, "step": 20068 }, { "epoch": 19.297115384615385, "grad_norm": 0.0036662211641669273, "learning_rate": 2.6148493166106215e-06, "loss": 0.0, "step": 20069 }, { "epoch": 19.298076923076923, "grad_norm": 0.009077344089746475, "learning_rate": 2.6140094936899997e-06, "loss": 0.0, "step": 20070 }, { "epoch": 19.299038461538462, "grad_norm": 0.014091948047280312, "learning_rate": 2.613169785378271e-06, "loss": 0.0, "step": 20071 }, { "epoch": 19.3, "grad_norm": 0.011926615610718727, "learning_rate": 2.6123301916884657e-06, "loss": 0.0, "step": 20072 }, { "epoch": 19.30096153846154, "grad_norm": 0.003059589536860585, "learning_rate": 2.6114907126336176e-06, "loss": 0.0, "step": 20073 }, { "epoch": 19.301923076923078, "grad_norm": 0.007952542044222355, "learning_rate": 2.610651348226746e-06, "loss": 0.0, "step": 20074 }, { "epoch": 19.302884615384617, "grad_norm": 0.0011291500413790345, "learning_rate": 2.6098120984808793e-06, "loss": 0.0, "step": 20075 }, { "epoch": 19.303846153846155, "grad_norm": 0.0027562626637518406, "learning_rate": 2.608972963409038e-06, "loss": 0.0, "step": 20076 }, { "epoch": 19.30480769230769, "grad_norm": 0.00196277373470366, "learning_rate": 2.6081339430242426e-06, "loss": 0.0, "step": 20077 }, { "epoch": 19.30576923076923, "grad_norm": 0.0034721544943749905, "learning_rate": 2.6072950373395167e-06, "loss": 0.0, "step": 20078 }, { "epoch": 19.306730769230768, "grad_norm": 0.0018427800387144089, "learning_rate": 2.606456246367871e-06, "loss": 0.0, "step": 20079 }, { "epoch": 19.307692307692307, "grad_norm": 0.00642014853656292, "learning_rate": 2.605617570122325e-06, "loss": 0.0001, "step": 20080 }, { "epoch": 19.308653846153845, "grad_norm": 0.003531768685206771, "learning_rate": 2.604779008615895e-06, "loss": 0.0, "step": 20081 }, { "epoch": 19.309615384615384, "grad_norm": 0.0009949234081432223, "learning_rate": 2.6039405618615878e-06, "loss": 0.0, "step": 20082 }, { "epoch": 19.310576923076923, "grad_norm": 0.004433866124600172, "learning_rate": 2.603102229872415e-06, "loss": 0.0, "step": 20083 }, { "epoch": 19.31153846153846, "grad_norm": 0.0896802693605423, "learning_rate": 2.6022640126613895e-06, "loss": 0.0006, "step": 20084 }, { "epoch": 19.3125, "grad_norm": 0.002421668963506818, "learning_rate": 2.6014259102415106e-06, "loss": 0.0, "step": 20085 }, { "epoch": 19.31346153846154, "grad_norm": 0.005081364419311285, "learning_rate": 2.6005879226257915e-06, "loss": 0.0, "step": 20086 }, { "epoch": 19.314423076923077, "grad_norm": 0.00600131880491972, "learning_rate": 2.5997500498272264e-06, "loss": 0.0, "step": 20087 }, { "epoch": 19.315384615384616, "grad_norm": 0.0016332771629095078, "learning_rate": 2.5989122918588216e-06, "loss": 0.0, "step": 20088 }, { "epoch": 19.316346153846155, "grad_norm": 0.0037539247423410416, "learning_rate": 2.5980746487335785e-06, "loss": 0.0, "step": 20089 }, { "epoch": 19.317307692307693, "grad_norm": 0.015248795039951801, "learning_rate": 2.5972371204644907e-06, "loss": 0.0001, "step": 20090 }, { "epoch": 19.318269230769232, "grad_norm": 0.00901268795132637, "learning_rate": 2.596399707064555e-06, "loss": 0.0, "step": 20091 }, { "epoch": 19.31923076923077, "grad_norm": 0.005644030869007111, "learning_rate": 2.59556240854677e-06, "loss": 0.0, "step": 20092 }, { "epoch": 19.32019230769231, "grad_norm": 0.13886858522891998, "learning_rate": 2.5947252249241218e-06, "loss": 0.0003, "step": 20093 }, { "epoch": 19.321153846153845, "grad_norm": 0.002863008761778474, "learning_rate": 2.593888156209603e-06, "loss": 0.0, "step": 20094 }, { "epoch": 19.322115384615383, "grad_norm": 0.1030595600605011, "learning_rate": 2.593051202416207e-06, "loss": 0.0004, "step": 20095 }, { "epoch": 19.323076923076922, "grad_norm": 0.004783864598721266, "learning_rate": 2.592214363556914e-06, "loss": 0.0001, "step": 20096 }, { "epoch": 19.32403846153846, "grad_norm": 0.002628444926813245, "learning_rate": 2.5913776396447155e-06, "loss": 0.0, "step": 20097 }, { "epoch": 19.325, "grad_norm": 0.0028866631910204887, "learning_rate": 2.590541030692589e-06, "loss": 0.0, "step": 20098 }, { "epoch": 19.325961538461538, "grad_norm": 0.0076879397965967655, "learning_rate": 2.5897045367135163e-06, "loss": 0.0, "step": 20099 }, { "epoch": 19.326923076923077, "grad_norm": 0.002308892784640193, "learning_rate": 2.5888681577204865e-06, "loss": 0.0, "step": 20100 }, { "epoch": 19.327884615384615, "grad_norm": 0.006722066085785627, "learning_rate": 2.588031893726468e-06, "loss": 0.0, "step": 20101 }, { "epoch": 19.328846153846154, "grad_norm": 1.2585060596466064, "learning_rate": 2.587195744744444e-06, "loss": 0.0041, "step": 20102 }, { "epoch": 19.329807692307693, "grad_norm": 0.004412509500980377, "learning_rate": 2.586359710787383e-06, "loss": 0.0, "step": 20103 }, { "epoch": 19.33076923076923, "grad_norm": 0.00408868957310915, "learning_rate": 2.5855237918682597e-06, "loss": 0.0001, "step": 20104 }, { "epoch": 19.33173076923077, "grad_norm": 0.003299527568742633, "learning_rate": 2.5846879880000496e-06, "loss": 0.0, "step": 20105 }, { "epoch": 19.33269230769231, "grad_norm": 0.004471152555197477, "learning_rate": 2.583852299195715e-06, "loss": 0.0, "step": 20106 }, { "epoch": 19.333653846153847, "grad_norm": 0.001843729056417942, "learning_rate": 2.583016725468226e-06, "loss": 0.0, "step": 20107 }, { "epoch": 19.334615384615386, "grad_norm": 0.0034875236451625824, "learning_rate": 2.5821812668305523e-06, "loss": 0.0, "step": 20108 }, { "epoch": 19.335576923076925, "grad_norm": 0.003608420491218567, "learning_rate": 2.5813459232956518e-06, "loss": 0.0, "step": 20109 }, { "epoch": 19.33653846153846, "grad_norm": 0.010802729986608028, "learning_rate": 2.580510694876488e-06, "loss": 0.0, "step": 20110 }, { "epoch": 19.3375, "grad_norm": 0.0031601719092577696, "learning_rate": 2.5796755815860264e-06, "loss": 0.0, "step": 20111 }, { "epoch": 19.338461538461537, "grad_norm": 0.001922396826557815, "learning_rate": 2.5788405834372178e-06, "loss": 0.0, "step": 20112 }, { "epoch": 19.339423076923076, "grad_norm": 0.010618682019412518, "learning_rate": 2.578005700443026e-06, "loss": 0.0001, "step": 20113 }, { "epoch": 19.340384615384615, "grad_norm": 0.05885925889015198, "learning_rate": 2.5771709326163987e-06, "loss": 0.0003, "step": 20114 }, { "epoch": 19.341346153846153, "grad_norm": 0.016806215047836304, "learning_rate": 2.576336279970293e-06, "loss": 0.0001, "step": 20115 }, { "epoch": 19.342307692307692, "grad_norm": 0.0023964534047991037, "learning_rate": 2.5755017425176633e-06, "loss": 0.0, "step": 20116 }, { "epoch": 19.34326923076923, "grad_norm": 0.005885419435799122, "learning_rate": 2.5746673202714534e-06, "loss": 0.0, "step": 20117 }, { "epoch": 19.34423076923077, "grad_norm": 0.009321127086877823, "learning_rate": 2.573833013244612e-06, "loss": 0.0001, "step": 20118 }, { "epoch": 19.345192307692308, "grad_norm": 0.02789277769625187, "learning_rate": 2.5729988214500913e-06, "loss": 0.0001, "step": 20119 }, { "epoch": 19.346153846153847, "grad_norm": 0.0076508475467562675, "learning_rate": 2.572164744900827e-06, "loss": 0.0, "step": 20120 }, { "epoch": 19.347115384615385, "grad_norm": 0.02101846970617771, "learning_rate": 2.5713307836097657e-06, "loss": 0.0001, "step": 20121 }, { "epoch": 19.348076923076924, "grad_norm": 0.0012341784313321114, "learning_rate": 2.5704969375898513e-06, "loss": 0.0, "step": 20122 }, { "epoch": 19.349038461538463, "grad_norm": 0.0026501049287617207, "learning_rate": 2.569663206854015e-06, "loss": 0.0, "step": 20123 }, { "epoch": 19.35, "grad_norm": 0.0031217506621032953, "learning_rate": 2.5688295914152017e-06, "loss": 0.0, "step": 20124 }, { "epoch": 19.35096153846154, "grad_norm": 0.0034185058902949095, "learning_rate": 2.56799609128634e-06, "loss": 0.0, "step": 20125 }, { "epoch": 19.351923076923075, "grad_norm": 0.002459360985085368, "learning_rate": 2.567162706480366e-06, "loss": 0.0, "step": 20126 }, { "epoch": 19.352884615384614, "grad_norm": 0.0030861427076160908, "learning_rate": 2.5663294370102142e-06, "loss": 0.0, "step": 20127 }, { "epoch": 19.353846153846153, "grad_norm": 0.00655597448348999, "learning_rate": 2.5654962828888096e-06, "loss": 0.0001, "step": 20128 }, { "epoch": 19.35480769230769, "grad_norm": 0.003852399531751871, "learning_rate": 2.564663244129083e-06, "loss": 0.0, "step": 20129 }, { "epoch": 19.35576923076923, "grad_norm": 0.002080195117741823, "learning_rate": 2.563830320743963e-06, "loss": 0.0, "step": 20130 }, { "epoch": 19.35673076923077, "grad_norm": 0.003054049564525485, "learning_rate": 2.562997512746368e-06, "loss": 0.0, "step": 20131 }, { "epoch": 19.357692307692307, "grad_norm": 0.0021059433929622173, "learning_rate": 2.5621648201492257e-06, "loss": 0.0, "step": 20132 }, { "epoch": 19.358653846153846, "grad_norm": 0.0027954482939094305, "learning_rate": 2.5613322429654573e-06, "loss": 0.0, "step": 20133 }, { "epoch": 19.359615384615385, "grad_norm": 0.0009039474534802139, "learning_rate": 2.560499781207977e-06, "loss": 0.0, "step": 20134 }, { "epoch": 19.360576923076923, "grad_norm": 0.0033447383902966976, "learning_rate": 2.5596674348897086e-06, "loss": 0.0, "step": 20135 }, { "epoch": 19.361538461538462, "grad_norm": 0.0029233170207589865, "learning_rate": 2.558835204023563e-06, "loss": 0.0, "step": 20136 }, { "epoch": 19.3625, "grad_norm": 0.00350966933183372, "learning_rate": 2.5580030886224537e-06, "loss": 0.0, "step": 20137 }, { "epoch": 19.36346153846154, "grad_norm": 0.0012291226303204894, "learning_rate": 2.5571710886992985e-06, "loss": 0.0, "step": 20138 }, { "epoch": 19.364423076923078, "grad_norm": 0.0013794248225167394, "learning_rate": 2.556339204267e-06, "loss": 0.0, "step": 20139 }, { "epoch": 19.365384615384617, "grad_norm": 0.007967863231897354, "learning_rate": 2.5555074353384713e-06, "loss": 0.0001, "step": 20140 }, { "epoch": 19.366346153846155, "grad_norm": 0.05332441255450249, "learning_rate": 2.5546757819266197e-06, "loss": 0.0001, "step": 20141 }, { "epoch": 19.36730769230769, "grad_norm": 0.002027084119617939, "learning_rate": 2.5538442440443456e-06, "loss": 0.0, "step": 20142 }, { "epoch": 19.36826923076923, "grad_norm": 0.011028502136468887, "learning_rate": 2.5530128217045546e-06, "loss": 0.0, "step": 20143 }, { "epoch": 19.369230769230768, "grad_norm": 0.006522058509290218, "learning_rate": 2.55218151492015e-06, "loss": 0.0001, "step": 20144 }, { "epoch": 19.370192307692307, "grad_norm": 0.0034419645089656115, "learning_rate": 2.5513503237040273e-06, "loss": 0.0, "step": 20145 }, { "epoch": 19.371153846153845, "grad_norm": 0.007433951832354069, "learning_rate": 2.5505192480690865e-06, "loss": 0.0, "step": 20146 }, { "epoch": 19.372115384615384, "grad_norm": 0.00396473053842783, "learning_rate": 2.5496882880282246e-06, "loss": 0.0, "step": 20147 }, { "epoch": 19.373076923076923, "grad_norm": 0.00243662903085351, "learning_rate": 2.5488574435943327e-06, "loss": 0.0, "step": 20148 }, { "epoch": 19.37403846153846, "grad_norm": 0.0059279790148139, "learning_rate": 2.5480267147803064e-06, "loss": 0.0001, "step": 20149 }, { "epoch": 19.375, "grad_norm": 0.00935233011841774, "learning_rate": 2.547196101599031e-06, "loss": 0.0, "step": 20150 }, { "epoch": 19.37596153846154, "grad_norm": 0.13759002089500427, "learning_rate": 2.5463656040633988e-06, "loss": 0.0004, "step": 20151 }, { "epoch": 19.376923076923077, "grad_norm": 0.0072096590884029865, "learning_rate": 2.5455352221862995e-06, "loss": 0.0001, "step": 20152 }, { "epoch": 19.377884615384616, "grad_norm": 0.002624954329803586, "learning_rate": 2.5447049559806116e-06, "loss": 0.0, "step": 20153 }, { "epoch": 19.378846153846155, "grad_norm": 0.0012685653055086732, "learning_rate": 2.543874805459221e-06, "loss": 0.0, "step": 20154 }, { "epoch": 19.379807692307693, "grad_norm": 0.0014851944288238883, "learning_rate": 2.5430447706350147e-06, "loss": 0.0, "step": 20155 }, { "epoch": 19.380769230769232, "grad_norm": 0.003018233459442854, "learning_rate": 2.542214851520863e-06, "loss": 0.0, "step": 20156 }, { "epoch": 19.38173076923077, "grad_norm": 0.22705331444740295, "learning_rate": 2.5413850481296497e-06, "loss": 0.0007, "step": 20157 }, { "epoch": 19.38269230769231, "grad_norm": 0.008496382273733616, "learning_rate": 2.5405553604742518e-06, "loss": 0.0001, "step": 20158 }, { "epoch": 19.383653846153845, "grad_norm": 0.014533184468746185, "learning_rate": 2.5397257885675396e-06, "loss": 0.0001, "step": 20159 }, { "epoch": 19.384615384615383, "grad_norm": 0.004264448303729296, "learning_rate": 2.5388963324223893e-06, "loss": 0.0, "step": 20160 }, { "epoch": 19.385576923076922, "grad_norm": 0.0009606648818589747, "learning_rate": 2.5380669920516677e-06, "loss": 0.0, "step": 20161 }, { "epoch": 19.38653846153846, "grad_norm": 0.0036324169486761093, "learning_rate": 2.5372377674682457e-06, "loss": 0.0, "step": 20162 }, { "epoch": 19.3875, "grad_norm": 0.007251542992889881, "learning_rate": 2.5364086586849933e-06, "loss": 0.0, "step": 20163 }, { "epoch": 19.388461538461538, "grad_norm": 0.01642880029976368, "learning_rate": 2.53557966571477e-06, "loss": 0.0001, "step": 20164 }, { "epoch": 19.389423076923077, "grad_norm": 0.009816765785217285, "learning_rate": 2.5347507885704424e-06, "loss": 0.0, "step": 20165 }, { "epoch": 19.390384615384615, "grad_norm": 0.0034713742788881063, "learning_rate": 2.5339220272648756e-06, "loss": 0.0, "step": 20166 }, { "epoch": 19.391346153846154, "grad_norm": 0.005647755227982998, "learning_rate": 2.533093381810924e-06, "loss": 0.0, "step": 20167 }, { "epoch": 19.392307692307693, "grad_norm": 0.003208416746929288, "learning_rate": 2.5322648522214465e-06, "loss": 0.0, "step": 20168 }, { "epoch": 19.39326923076923, "grad_norm": 0.01589217782020569, "learning_rate": 2.5314364385093047e-06, "loss": 0.0001, "step": 20169 }, { "epoch": 19.39423076923077, "grad_norm": 0.003678161883726716, "learning_rate": 2.530608140687347e-06, "loss": 0.0, "step": 20170 }, { "epoch": 19.39519230769231, "grad_norm": 0.004500537645071745, "learning_rate": 2.529779958768431e-06, "loss": 0.0, "step": 20171 }, { "epoch": 19.396153846153847, "grad_norm": 0.004326876252889633, "learning_rate": 2.528951892765402e-06, "loss": 0.0, "step": 20172 }, { "epoch": 19.397115384615386, "grad_norm": 0.004123843275010586, "learning_rate": 2.528123942691113e-06, "loss": 0.0, "step": 20173 }, { "epoch": 19.398076923076925, "grad_norm": 0.005857758689671755, "learning_rate": 2.5272961085584135e-06, "loss": 0.0, "step": 20174 }, { "epoch": 19.39903846153846, "grad_norm": 0.0024044043384492397, "learning_rate": 2.5264683903801422e-06, "loss": 0.0, "step": 20175 }, { "epoch": 19.4, "grad_norm": 0.003728039562702179, "learning_rate": 2.525640788169148e-06, "loss": 0.0, "step": 20176 }, { "epoch": 19.400961538461537, "grad_norm": 0.0016931216232478619, "learning_rate": 2.5248133019382747e-06, "loss": 0.0, "step": 20177 }, { "epoch": 19.401923076923076, "grad_norm": 0.005431676749140024, "learning_rate": 2.5239859317003567e-06, "loss": 0.0, "step": 20178 }, { "epoch": 19.402884615384615, "grad_norm": 0.0022728205658495426, "learning_rate": 2.523158677468235e-06, "loss": 0.0, "step": 20179 }, { "epoch": 19.403846153846153, "grad_norm": 0.002705879043787718, "learning_rate": 2.52233153925475e-06, "loss": 0.0, "step": 20180 }, { "epoch": 19.404807692307692, "grad_norm": 0.0020622857846319675, "learning_rate": 2.521504517072728e-06, "loss": 0.0, "step": 20181 }, { "epoch": 19.40576923076923, "grad_norm": 0.001680197543464601, "learning_rate": 2.520677610935012e-06, "loss": 0.0, "step": 20182 }, { "epoch": 19.40673076923077, "grad_norm": 0.0053792549297213554, "learning_rate": 2.519850820854424e-06, "loss": 0.0, "step": 20183 }, { "epoch": 19.407692307692308, "grad_norm": 0.0035512337926775217, "learning_rate": 2.519024146843796e-06, "loss": 0.0, "step": 20184 }, { "epoch": 19.408653846153847, "grad_norm": 0.0009052801178768277, "learning_rate": 2.5181975889159615e-06, "loss": 0.0, "step": 20185 }, { "epoch": 19.409615384615385, "grad_norm": 0.005158009938895702, "learning_rate": 2.517371147083738e-06, "loss": 0.0, "step": 20186 }, { "epoch": 19.410576923076924, "grad_norm": 0.792900562286377, "learning_rate": 2.5165448213599532e-06, "loss": 0.0016, "step": 20187 }, { "epoch": 19.411538461538463, "grad_norm": 0.015855537727475166, "learning_rate": 2.515718611757432e-06, "loss": 0.0001, "step": 20188 }, { "epoch": 19.4125, "grad_norm": 0.0018319884547963738, "learning_rate": 2.514892518288988e-06, "loss": 0.0, "step": 20189 }, { "epoch": 19.41346153846154, "grad_norm": 0.013322041369974613, "learning_rate": 2.5140665409674446e-06, "loss": 0.0001, "step": 20190 }, { "epoch": 19.414423076923075, "grad_norm": 0.003666456788778305, "learning_rate": 2.513240679805621e-06, "loss": 0.0, "step": 20191 }, { "epoch": 19.415384615384614, "grad_norm": 0.005330884829163551, "learning_rate": 2.512414934816325e-06, "loss": 0.0001, "step": 20192 }, { "epoch": 19.416346153846153, "grad_norm": 0.004064160864800215, "learning_rate": 2.511589306012376e-06, "loss": 0.0, "step": 20193 }, { "epoch": 19.41730769230769, "grad_norm": 0.0023392634466290474, "learning_rate": 2.510763793406581e-06, "loss": 0.0, "step": 20194 }, { "epoch": 19.41826923076923, "grad_norm": 0.02249416895210743, "learning_rate": 2.5099383970117498e-06, "loss": 0.0001, "step": 20195 }, { "epoch": 19.41923076923077, "grad_norm": 0.02593020722270012, "learning_rate": 2.509113116840696e-06, "loss": 0.0001, "step": 20196 }, { "epoch": 19.420192307692307, "grad_norm": 0.004790907260030508, "learning_rate": 2.5082879529062165e-06, "loss": 0.0, "step": 20197 }, { "epoch": 19.421153846153846, "grad_norm": 0.012738510966300964, "learning_rate": 2.507462905221122e-06, "loss": 0.0001, "step": 20198 }, { "epoch": 19.422115384615385, "grad_norm": 0.005061016883701086, "learning_rate": 2.5066379737982136e-06, "loss": 0.0, "step": 20199 }, { "epoch": 19.423076923076923, "grad_norm": 0.012055995874106884, "learning_rate": 2.50581315865029e-06, "loss": 0.0001, "step": 20200 }, { "epoch": 19.424038461538462, "grad_norm": 0.001898166723549366, "learning_rate": 2.5049884597901508e-06, "loss": 0.0, "step": 20201 }, { "epoch": 19.425, "grad_norm": 0.07930269092321396, "learning_rate": 2.5041638772305952e-06, "loss": 0.0002, "step": 20202 }, { "epoch": 19.42596153846154, "grad_norm": 0.009817888028919697, "learning_rate": 2.5033394109844135e-06, "loss": 0.0001, "step": 20203 }, { "epoch": 19.426923076923078, "grad_norm": 0.005541097838431597, "learning_rate": 2.5025150610644055e-06, "loss": 0.0, "step": 20204 }, { "epoch": 19.427884615384617, "grad_norm": 0.0152282090857625, "learning_rate": 2.501690827483355e-06, "loss": 0.0001, "step": 20205 }, { "epoch": 19.428846153846155, "grad_norm": 0.002026174683123827, "learning_rate": 2.5008667102540563e-06, "loss": 0.0, "step": 20206 }, { "epoch": 19.42980769230769, "grad_norm": 0.0024977170396596193, "learning_rate": 2.500042709389299e-06, "loss": 0.0, "step": 20207 }, { "epoch": 19.43076923076923, "grad_norm": 0.009485883638262749, "learning_rate": 2.4992188249018633e-06, "loss": 0.0, "step": 20208 }, { "epoch": 19.431730769230768, "grad_norm": 0.2595553696155548, "learning_rate": 2.4983950568045377e-06, "loss": 0.0005, "step": 20209 }, { "epoch": 19.432692307692307, "grad_norm": 0.1529642641544342, "learning_rate": 2.497571405110106e-06, "loss": 0.0006, "step": 20210 }, { "epoch": 19.433653846153845, "grad_norm": 0.0030314545147120953, "learning_rate": 2.496747869831345e-06, "loss": 0.0, "step": 20211 }, { "epoch": 19.434615384615384, "grad_norm": 0.005140334367752075, "learning_rate": 2.4959244509810345e-06, "loss": 0.0, "step": 20212 }, { "epoch": 19.435576923076923, "grad_norm": 0.002298553939908743, "learning_rate": 2.4951011485719556e-06, "loss": 0.0, "step": 20213 }, { "epoch": 19.43653846153846, "grad_norm": 0.0043406118638813496, "learning_rate": 2.4942779626168766e-06, "loss": 0.0, "step": 20214 }, { "epoch": 19.4375, "grad_norm": 0.0016989191062748432, "learning_rate": 2.4934548931285784e-06, "loss": 0.0, "step": 20215 }, { "epoch": 19.43846153846154, "grad_norm": 0.0023753324057906866, "learning_rate": 2.4926319401198263e-06, "loss": 0.0, "step": 20216 }, { "epoch": 19.439423076923077, "grad_norm": 0.005497433245182037, "learning_rate": 2.491809103603392e-06, "loss": 0.0, "step": 20217 }, { "epoch": 19.440384615384616, "grad_norm": 0.0035586797166615725, "learning_rate": 2.4909863835920466e-06, "loss": 0.0, "step": 20218 }, { "epoch": 19.441346153846155, "grad_norm": 0.009433588944375515, "learning_rate": 2.4901637800985512e-06, "loss": 0.0001, "step": 20219 }, { "epoch": 19.442307692307693, "grad_norm": 0.0030162634793668985, "learning_rate": 2.489341293135673e-06, "loss": 0.0, "step": 20220 }, { "epoch": 19.443269230769232, "grad_norm": 0.004117683973163366, "learning_rate": 2.4885189227161767e-06, "loss": 0.0, "step": 20221 }, { "epoch": 19.44423076923077, "grad_norm": 0.004519030451774597, "learning_rate": 2.487696668852819e-06, "loss": 0.0, "step": 20222 }, { "epoch": 19.44519230769231, "grad_norm": 0.000784042349550873, "learning_rate": 2.4868745315583598e-06, "loss": 0.0, "step": 20223 }, { "epoch": 19.446153846153845, "grad_norm": 0.0038971782196313143, "learning_rate": 2.48605251084556e-06, "loss": 0.0, "step": 20224 }, { "epoch": 19.447115384615383, "grad_norm": 0.0026964866556227207, "learning_rate": 2.485230606727169e-06, "loss": 0.0, "step": 20225 }, { "epoch": 19.448076923076922, "grad_norm": 0.0018146728398278356, "learning_rate": 2.4844088192159445e-06, "loss": 0.0, "step": 20226 }, { "epoch": 19.44903846153846, "grad_norm": 0.001881826901808381, "learning_rate": 2.48358714832464e-06, "loss": 0.0, "step": 20227 }, { "epoch": 19.45, "grad_norm": 0.006149987690150738, "learning_rate": 2.482765594065999e-06, "loss": 0.0, "step": 20228 }, { "epoch": 19.450961538461538, "grad_norm": 0.0039036739617586136, "learning_rate": 2.481944156452776e-06, "loss": 0.0, "step": 20229 }, { "epoch": 19.451923076923077, "grad_norm": 0.0033116983249783516, "learning_rate": 2.4811228354977113e-06, "loss": 0.0, "step": 20230 }, { "epoch": 19.452884615384615, "grad_norm": 0.0011740565532818437, "learning_rate": 2.480301631213553e-06, "loss": 0.0, "step": 20231 }, { "epoch": 19.453846153846154, "grad_norm": 0.0014470114838331938, "learning_rate": 2.4794805436130464e-06, "loss": 0.0, "step": 20232 }, { "epoch": 19.454807692307693, "grad_norm": 0.001882687909528613, "learning_rate": 2.478659572708926e-06, "loss": 0.0, "step": 20233 }, { "epoch": 19.45576923076923, "grad_norm": 0.002644539112225175, "learning_rate": 2.477838718513934e-06, "loss": 0.0, "step": 20234 }, { "epoch": 19.45673076923077, "grad_norm": 0.0024707571137696505, "learning_rate": 2.4770179810408114e-06, "loss": 0.0, "step": 20235 }, { "epoch": 19.45769230769231, "grad_norm": 0.004556981381028891, "learning_rate": 2.476197360302286e-06, "loss": 0.0, "step": 20236 }, { "epoch": 19.458653846153847, "grad_norm": 0.006682931445538998, "learning_rate": 2.475376856311097e-06, "loss": 0.0001, "step": 20237 }, { "epoch": 19.459615384615386, "grad_norm": 0.003175852820277214, "learning_rate": 2.474556469079976e-06, "loss": 0.0, "step": 20238 }, { "epoch": 19.460576923076925, "grad_norm": 0.002800468821078539, "learning_rate": 2.47373619862165e-06, "loss": 0.0, "step": 20239 }, { "epoch": 19.46153846153846, "grad_norm": 0.0010948918061330914, "learning_rate": 2.472916044948851e-06, "loss": 0.0, "step": 20240 }, { "epoch": 19.4625, "grad_norm": 0.012071022763848305, "learning_rate": 2.472096008074301e-06, "loss": 0.0001, "step": 20241 }, { "epoch": 19.463461538461537, "grad_norm": 0.004530551377683878, "learning_rate": 2.471276088010727e-06, "loss": 0.0, "step": 20242 }, { "epoch": 19.464423076923076, "grad_norm": 0.0006360611296258867, "learning_rate": 2.4704562847708535e-06, "loss": 0.0, "step": 20243 }, { "epoch": 19.465384615384615, "grad_norm": 0.008690758608281612, "learning_rate": 2.4696365983673976e-06, "loss": 0.0, "step": 20244 }, { "epoch": 19.466346153846153, "grad_norm": 0.002915770746767521, "learning_rate": 2.4688170288130795e-06, "loss": 0.0, "step": 20245 }, { "epoch": 19.467307692307692, "grad_norm": 0.002565397648140788, "learning_rate": 2.467997576120621e-06, "loss": 0.0, "step": 20246 }, { "epoch": 19.46826923076923, "grad_norm": 0.0030716338660568, "learning_rate": 2.46717824030273e-06, "loss": 0.0, "step": 20247 }, { "epoch": 19.46923076923077, "grad_norm": 0.026472292840480804, "learning_rate": 2.4663590213721254e-06, "loss": 0.0002, "step": 20248 }, { "epoch": 19.470192307692308, "grad_norm": 0.00832621194422245, "learning_rate": 2.46553991934152e-06, "loss": 0.0001, "step": 20249 }, { "epoch": 19.471153846153847, "grad_norm": 0.003528272733092308, "learning_rate": 2.464720934223619e-06, "loss": 0.0, "step": 20250 }, { "epoch": 19.472115384615385, "grad_norm": 0.22424179315567017, "learning_rate": 2.4639020660311364e-06, "loss": 0.001, "step": 20251 }, { "epoch": 19.473076923076924, "grad_norm": 0.013464005663990974, "learning_rate": 2.463083314776772e-06, "loss": 0.0, "step": 20252 }, { "epoch": 19.474038461538463, "grad_norm": 0.0038647956680506468, "learning_rate": 2.4622646804732342e-06, "loss": 0.0, "step": 20253 }, { "epoch": 19.475, "grad_norm": 0.008290976285934448, "learning_rate": 2.4614461631332287e-06, "loss": 0.0001, "step": 20254 }, { "epoch": 19.47596153846154, "grad_norm": 0.0014630757505074143, "learning_rate": 2.460627762769451e-06, "loss": 0.0, "step": 20255 }, { "epoch": 19.476923076923075, "grad_norm": 0.001285152044147253, "learning_rate": 2.4598094793946016e-06, "loss": 0.0, "step": 20256 }, { "epoch": 19.477884615384614, "grad_norm": 0.00197591888718307, "learning_rate": 2.4589913130213815e-06, "loss": 0.0, "step": 20257 }, { "epoch": 19.478846153846153, "grad_norm": 0.011811782605946064, "learning_rate": 2.4581732636624824e-06, "loss": 0.0001, "step": 20258 }, { "epoch": 19.47980769230769, "grad_norm": 0.001808313187211752, "learning_rate": 2.4573553313305976e-06, "loss": 0.0, "step": 20259 }, { "epoch": 19.48076923076923, "grad_norm": 2.8530476093292236, "learning_rate": 2.456537516038424e-06, "loss": 0.0971, "step": 20260 }, { "epoch": 19.48173076923077, "grad_norm": 0.001904792501591146, "learning_rate": 2.4557198177986464e-06, "loss": 0.0, "step": 20261 }, { "epoch": 19.482692307692307, "grad_norm": 0.0037883243057876825, "learning_rate": 2.454902236623957e-06, "loss": 0.0, "step": 20262 }, { "epoch": 19.483653846153846, "grad_norm": 0.004925890825688839, "learning_rate": 2.4540847725270376e-06, "loss": 0.0, "step": 20263 }, { "epoch": 19.484615384615385, "grad_norm": 0.0016536560142412782, "learning_rate": 2.4532674255205756e-06, "loss": 0.0, "step": 20264 }, { "epoch": 19.485576923076923, "grad_norm": 0.06839518249034882, "learning_rate": 2.4524501956172563e-06, "loss": 0.0002, "step": 20265 }, { "epoch": 19.486538461538462, "grad_norm": 0.0023288221564143896, "learning_rate": 2.4516330828297563e-06, "loss": 0.0, "step": 20266 }, { "epoch": 19.4875, "grad_norm": 0.005274124443531036, "learning_rate": 2.4508160871707554e-06, "loss": 0.0, "step": 20267 }, { "epoch": 19.48846153846154, "grad_norm": 0.0014528933679684997, "learning_rate": 2.4499992086529357e-06, "loss": 0.0, "step": 20268 }, { "epoch": 19.489423076923078, "grad_norm": 0.0028691620100289583, "learning_rate": 2.4491824472889667e-06, "loss": 0.0, "step": 20269 }, { "epoch": 19.490384615384617, "grad_norm": 0.0028548610862344503, "learning_rate": 2.4483658030915247e-06, "loss": 0.0, "step": 20270 }, { "epoch": 19.491346153846155, "grad_norm": 0.002851621713489294, "learning_rate": 2.4475492760732843e-06, "loss": 0.0, "step": 20271 }, { "epoch": 19.49230769230769, "grad_norm": 0.0021165870130062103, "learning_rate": 2.4467328662469103e-06, "loss": 0.0, "step": 20272 }, { "epoch": 19.49326923076923, "grad_norm": 0.007499256636947393, "learning_rate": 2.4459165736250756e-06, "loss": 0.0001, "step": 20273 }, { "epoch": 19.494230769230768, "grad_norm": 0.010687722824513912, "learning_rate": 2.4451003982204425e-06, "loss": 0.0001, "step": 20274 }, { "epoch": 19.495192307692307, "grad_norm": 0.006147122010588646, "learning_rate": 2.4442843400456784e-06, "loss": 0.0, "step": 20275 }, { "epoch": 19.496153846153845, "grad_norm": 0.005963568110018969, "learning_rate": 2.4434683991134476e-06, "loss": 0.0, "step": 20276 }, { "epoch": 19.497115384615384, "grad_norm": 0.003127150470390916, "learning_rate": 2.442652575436406e-06, "loss": 0.0, "step": 20277 }, { "epoch": 19.498076923076923, "grad_norm": 0.015407475642859936, "learning_rate": 2.4418368690272163e-06, "loss": 0.0001, "step": 20278 }, { "epoch": 19.49903846153846, "grad_norm": 0.005929594859480858, "learning_rate": 2.4410212798985387e-06, "loss": 0.0001, "step": 20279 }, { "epoch": 19.5, "grad_norm": 0.006893342360854149, "learning_rate": 2.440205808063023e-06, "loss": 0.0001, "step": 20280 }, { "epoch": 19.50096153846154, "grad_norm": 0.0033219889737665653, "learning_rate": 2.4393904535333244e-06, "loss": 0.0, "step": 20281 }, { "epoch": 19.501923076923077, "grad_norm": 0.005075380671769381, "learning_rate": 2.438575216322099e-06, "loss": 0.0, "step": 20282 }, { "epoch": 19.502884615384616, "grad_norm": 0.002696732059121132, "learning_rate": 2.4377600964419914e-06, "loss": 0.0, "step": 20283 }, { "epoch": 19.503846153846155, "grad_norm": 0.003093509469181299, "learning_rate": 2.4369450939056547e-06, "loss": 0.0, "step": 20284 }, { "epoch": 19.504807692307693, "grad_norm": 0.004008118994534016, "learning_rate": 2.43613020872573e-06, "loss": 0.0, "step": 20285 }, { "epoch": 19.505769230769232, "grad_norm": 0.01010807603597641, "learning_rate": 2.4353154409148637e-06, "loss": 0.0001, "step": 20286 }, { "epoch": 19.50673076923077, "grad_norm": 0.007411276455968618, "learning_rate": 2.434500790485704e-06, "loss": 0.0, "step": 20287 }, { "epoch": 19.50769230769231, "grad_norm": 0.013334391638636589, "learning_rate": 2.4336862574508846e-06, "loss": 0.0001, "step": 20288 }, { "epoch": 19.508653846153845, "grad_norm": 0.02683991566300392, "learning_rate": 2.432871841823047e-06, "loss": 0.0002, "step": 20289 }, { "epoch": 19.509615384615383, "grad_norm": 0.002555928658694029, "learning_rate": 2.432057543614832e-06, "loss": 0.0, "step": 20290 }, { "epoch": 19.510576923076922, "grad_norm": 0.005014962982386351, "learning_rate": 2.4312433628388697e-06, "loss": 0.0, "step": 20291 }, { "epoch": 19.51153846153846, "grad_norm": 0.008139320649206638, "learning_rate": 2.430429299507796e-06, "loss": 0.0001, "step": 20292 }, { "epoch": 19.5125, "grad_norm": 0.005036038812249899, "learning_rate": 2.429615353634247e-06, "loss": 0.0, "step": 20293 }, { "epoch": 19.513461538461538, "grad_norm": 0.0026507023721933365, "learning_rate": 2.428801525230845e-06, "loss": 0.0, "step": 20294 }, { "epoch": 19.514423076923077, "grad_norm": 0.0026475994382053614, "learning_rate": 2.4279878143102253e-06, "loss": 0.0, "step": 20295 }, { "epoch": 19.515384615384615, "grad_norm": 0.004942868836224079, "learning_rate": 2.427174220885009e-06, "loss": 0.0, "step": 20296 }, { "epoch": 19.516346153846154, "grad_norm": 0.0023935334756970406, "learning_rate": 2.4263607449678216e-06, "loss": 0.0, "step": 20297 }, { "epoch": 19.517307692307693, "grad_norm": 0.0014805975370109081, "learning_rate": 2.4255473865712907e-06, "loss": 0.0, "step": 20298 }, { "epoch": 19.51826923076923, "grad_norm": 4.639775276184082, "learning_rate": 2.424734145708031e-06, "loss": 0.0465, "step": 20299 }, { "epoch": 19.51923076923077, "grad_norm": 0.0168650820851326, "learning_rate": 2.423921022390663e-06, "loss": 0.0001, "step": 20300 }, { "epoch": 19.52019230769231, "grad_norm": 0.004131063353270292, "learning_rate": 2.4231080166318087e-06, "loss": 0.0, "step": 20301 }, { "epoch": 19.521153846153847, "grad_norm": 0.004145441576838493, "learning_rate": 2.4222951284440776e-06, "loss": 0.0, "step": 20302 }, { "epoch": 19.522115384615386, "grad_norm": 0.0014785499079152942, "learning_rate": 2.421482357840086e-06, "loss": 0.0, "step": 20303 }, { "epoch": 19.523076923076925, "grad_norm": 0.004064739216119051, "learning_rate": 2.420669704832448e-06, "loss": 0.0, "step": 20304 }, { "epoch": 19.52403846153846, "grad_norm": 0.003849572269245982, "learning_rate": 2.419857169433768e-06, "loss": 0.0, "step": 20305 }, { "epoch": 19.525, "grad_norm": 0.24173545837402344, "learning_rate": 2.4190447516566605e-06, "loss": 0.0006, "step": 20306 }, { "epoch": 19.525961538461537, "grad_norm": 0.008455364033579826, "learning_rate": 2.4182324515137256e-06, "loss": 0.0, "step": 20307 }, { "epoch": 19.526923076923076, "grad_norm": 0.004076363053172827, "learning_rate": 2.417420269017571e-06, "loss": 0.0, "step": 20308 }, { "epoch": 19.527884615384615, "grad_norm": 0.00426195003092289, "learning_rate": 2.4166082041808013e-06, "loss": 0.0, "step": 20309 }, { "epoch": 19.528846153846153, "grad_norm": 0.004796887747943401, "learning_rate": 2.4157962570160133e-06, "loss": 0.0, "step": 20310 }, { "epoch": 19.529807692307692, "grad_norm": 0.0055663990788161755, "learning_rate": 2.414984427535807e-06, "loss": 0.0, "step": 20311 }, { "epoch": 19.53076923076923, "grad_norm": 0.039099305868148804, "learning_rate": 2.414172715752785e-06, "loss": 0.0001, "step": 20312 }, { "epoch": 19.53173076923077, "grad_norm": 0.0027656331658363342, "learning_rate": 2.413361121679534e-06, "loss": 0.0, "step": 20313 }, { "epoch": 19.532692307692308, "grad_norm": 0.010943801142275333, "learning_rate": 2.4125496453286524e-06, "loss": 0.0001, "step": 20314 }, { "epoch": 19.533653846153847, "grad_norm": 0.004576405044645071, "learning_rate": 2.411738286712735e-06, "loss": 0.0, "step": 20315 }, { "epoch": 19.534615384615385, "grad_norm": 0.00378791824914515, "learning_rate": 2.4109270458443646e-06, "loss": 0.0, "step": 20316 }, { "epoch": 19.535576923076924, "grad_norm": 0.001958410954102874, "learning_rate": 2.4101159227361326e-06, "loss": 0.0, "step": 20317 }, { "epoch": 19.536538461538463, "grad_norm": 0.008168873377144337, "learning_rate": 2.4093049174006287e-06, "loss": 0.0001, "step": 20318 }, { "epoch": 19.5375, "grad_norm": 0.007311031688004732, "learning_rate": 2.408494029850431e-06, "loss": 0.0001, "step": 20319 }, { "epoch": 19.53846153846154, "grad_norm": 0.028990790247917175, "learning_rate": 2.407683260098128e-06, "loss": 0.0001, "step": 20320 }, { "epoch": 19.539423076923075, "grad_norm": 0.003989622928202152, "learning_rate": 2.4068726081562953e-06, "loss": 0.0, "step": 20321 }, { "epoch": 19.540384615384614, "grad_norm": 0.007285315077751875, "learning_rate": 2.406062074037513e-06, "loss": 0.0, "step": 20322 }, { "epoch": 19.541346153846153, "grad_norm": 0.005138856824487448, "learning_rate": 2.405251657754363e-06, "loss": 0.0, "step": 20323 }, { "epoch": 19.54230769230769, "grad_norm": 0.0018972003599628806, "learning_rate": 2.4044413593194136e-06, "loss": 0.0, "step": 20324 }, { "epoch": 19.54326923076923, "grad_norm": 0.031031206250190735, "learning_rate": 2.403631178745243e-06, "loss": 0.0001, "step": 20325 }, { "epoch": 19.54423076923077, "grad_norm": 0.0038383181672543287, "learning_rate": 2.4028211160444227e-06, "loss": 0.0, "step": 20326 }, { "epoch": 19.545192307692307, "grad_norm": 0.002214343287050724, "learning_rate": 2.4020111712295193e-06, "loss": 0.0, "step": 20327 }, { "epoch": 19.546153846153846, "grad_norm": 0.006970182992517948, "learning_rate": 2.401201344313102e-06, "loss": 0.0, "step": 20328 }, { "epoch": 19.547115384615385, "grad_norm": 0.00894844252616167, "learning_rate": 2.4003916353077416e-06, "loss": 0.0001, "step": 20329 }, { "epoch": 19.548076923076923, "grad_norm": 0.0035372006241232157, "learning_rate": 2.3995820442259943e-06, "loss": 0.0, "step": 20330 }, { "epoch": 19.549038461538462, "grad_norm": 0.9031139016151428, "learning_rate": 2.39877257108043e-06, "loss": 0.01, "step": 20331 }, { "epoch": 19.55, "grad_norm": 0.07752566784620285, "learning_rate": 2.397963215883603e-06, "loss": 0.0002, "step": 20332 }, { "epoch": 19.55096153846154, "grad_norm": 0.011910402216017246, "learning_rate": 2.3971539786480757e-06, "loss": 0.0001, "step": 20333 }, { "epoch": 19.551923076923078, "grad_norm": 0.00316443108022213, "learning_rate": 2.396344859386408e-06, "loss": 0.0, "step": 20334 }, { "epoch": 19.552884615384617, "grad_norm": 0.019231487065553665, "learning_rate": 2.3955358581111477e-06, "loss": 0.0001, "step": 20335 }, { "epoch": 19.553846153846155, "grad_norm": 0.004921347834169865, "learning_rate": 2.394726974834852e-06, "loss": 0.0001, "step": 20336 }, { "epoch": 19.55480769230769, "grad_norm": 0.0016016382724046707, "learning_rate": 2.393918209570075e-06, "loss": 0.0, "step": 20337 }, { "epoch": 19.55576923076923, "grad_norm": 0.009696977213025093, "learning_rate": 2.393109562329362e-06, "loss": 0.0, "step": 20338 }, { "epoch": 19.556730769230768, "grad_norm": 0.007288401015102863, "learning_rate": 2.392301033125263e-06, "loss": 0.0001, "step": 20339 }, { "epoch": 19.557692307692307, "grad_norm": 0.06235614791512489, "learning_rate": 2.391492621970325e-06, "loss": 0.0002, "step": 20340 }, { "epoch": 19.558653846153845, "grad_norm": 0.006488875485956669, "learning_rate": 2.390684328877089e-06, "loss": 0.0001, "step": 20341 }, { "epoch": 19.559615384615384, "grad_norm": 0.43469616770744324, "learning_rate": 2.3898761538581015e-06, "loss": 0.0008, "step": 20342 }, { "epoch": 19.560576923076923, "grad_norm": 0.004893847741186619, "learning_rate": 2.3890680969258985e-06, "loss": 0.0001, "step": 20343 }, { "epoch": 19.56153846153846, "grad_norm": 0.003954526036977768, "learning_rate": 2.3882601580930196e-06, "loss": 0.0, "step": 20344 }, { "epoch": 19.5625, "grad_norm": 0.0035462870728224516, "learning_rate": 2.387452337372006e-06, "loss": 0.0, "step": 20345 }, { "epoch": 19.56346153846154, "grad_norm": 0.0025882497429847717, "learning_rate": 2.386644634775388e-06, "loss": 0.0, "step": 20346 }, { "epoch": 19.564423076923077, "grad_norm": 0.006324579007923603, "learning_rate": 2.385837050315699e-06, "loss": 0.0001, "step": 20347 }, { "epoch": 19.565384615384616, "grad_norm": 0.007735998835414648, "learning_rate": 2.385029584005475e-06, "loss": 0.0001, "step": 20348 }, { "epoch": 19.566346153846155, "grad_norm": 0.0034571290016174316, "learning_rate": 2.3842222358572387e-06, "loss": 0.0, "step": 20349 }, { "epoch": 19.567307692307693, "grad_norm": 0.003211542032659054, "learning_rate": 2.3834150058835227e-06, "loss": 0.0, "step": 20350 }, { "epoch": 19.568269230769232, "grad_norm": 0.0030224930960685015, "learning_rate": 2.3826078940968535e-06, "loss": 0.0, "step": 20351 }, { "epoch": 19.56923076923077, "grad_norm": 0.015462221577763557, "learning_rate": 2.3818009005097496e-06, "loss": 0.0001, "step": 20352 }, { "epoch": 19.57019230769231, "grad_norm": 0.003642204450443387, "learning_rate": 2.3809940251347396e-06, "loss": 0.0, "step": 20353 }, { "epoch": 19.571153846153845, "grad_norm": 0.0019760713912546635, "learning_rate": 2.3801872679843384e-06, "loss": 0.0, "step": 20354 }, { "epoch": 19.572115384615383, "grad_norm": 0.002427985891699791, "learning_rate": 2.3793806290710665e-06, "loss": 0.0, "step": 20355 }, { "epoch": 19.573076923076922, "grad_norm": 0.0037720876280218363, "learning_rate": 2.378574108407444e-06, "loss": 0.0, "step": 20356 }, { "epoch": 19.57403846153846, "grad_norm": 0.0035833774600178003, "learning_rate": 2.37776770600598e-06, "loss": 0.0, "step": 20357 }, { "epoch": 19.575, "grad_norm": 0.0029536744114011526, "learning_rate": 2.37696142187919e-06, "loss": 0.0, "step": 20358 }, { "epoch": 19.575961538461538, "grad_norm": 0.0042966995388269424, "learning_rate": 2.3761552560395883e-06, "loss": 0.0, "step": 20359 }, { "epoch": 19.576923076923077, "grad_norm": 0.001572354231029749, "learning_rate": 2.375349208499679e-06, "loss": 0.0, "step": 20360 }, { "epoch": 19.577884615384615, "grad_norm": 0.0017148935003206134, "learning_rate": 2.3745432792719715e-06, "loss": 0.0, "step": 20361 }, { "epoch": 19.578846153846154, "grad_norm": 0.0036351655144244432, "learning_rate": 2.373737468368975e-06, "loss": 0.0, "step": 20362 }, { "epoch": 19.579807692307693, "grad_norm": 0.0014860263327136636, "learning_rate": 2.3729317758031866e-06, "loss": 0.0, "step": 20363 }, { "epoch": 19.58076923076923, "grad_norm": 0.004128014203161001, "learning_rate": 2.3721262015871147e-06, "loss": 0.0, "step": 20364 }, { "epoch": 19.58173076923077, "grad_norm": 0.001381852081976831, "learning_rate": 2.3713207457332542e-06, "loss": 0.0, "step": 20365 }, { "epoch": 19.58269230769231, "grad_norm": 0.0027573732659220695, "learning_rate": 2.370515408254106e-06, "loss": 0.0, "step": 20366 }, { "epoch": 19.583653846153847, "grad_norm": 0.002640810329467058, "learning_rate": 2.36971018916217e-06, "loss": 0.0, "step": 20367 }, { "epoch": 19.584615384615386, "grad_norm": 0.004415872041136026, "learning_rate": 2.3689050884699327e-06, "loss": 0.0, "step": 20368 }, { "epoch": 19.585576923076925, "grad_norm": 0.0017082173144444823, "learning_rate": 2.368100106189892e-06, "loss": 0.0, "step": 20369 }, { "epoch": 19.58653846153846, "grad_norm": 0.6506190299987793, "learning_rate": 2.367295242334542e-06, "loss": 0.0015, "step": 20370 }, { "epoch": 19.5875, "grad_norm": 0.6431792974472046, "learning_rate": 2.366490496916364e-06, "loss": 0.0019, "step": 20371 }, { "epoch": 19.588461538461537, "grad_norm": 0.0019168449798598886, "learning_rate": 2.36568586994785e-06, "loss": 0.0, "step": 20372 }, { "epoch": 19.589423076923076, "grad_norm": 0.007903792895376682, "learning_rate": 2.364881361441488e-06, "loss": 0.0, "step": 20373 }, { "epoch": 19.590384615384615, "grad_norm": 0.002722811419516802, "learning_rate": 2.3640769714097555e-06, "loss": 0.0, "step": 20374 }, { "epoch": 19.591346153846153, "grad_norm": 0.003606517566367984, "learning_rate": 2.36327269986514e-06, "loss": 0.0, "step": 20375 }, { "epoch": 19.592307692307692, "grad_norm": 0.013154768384993076, "learning_rate": 2.362468546820116e-06, "loss": 0.0001, "step": 20376 }, { "epoch": 19.59326923076923, "grad_norm": 0.00325922598131001, "learning_rate": 2.361664512287163e-06, "loss": 0.0, "step": 20377 }, { "epoch": 19.59423076923077, "grad_norm": 0.0009753828635439277, "learning_rate": 2.3608605962787623e-06, "loss": 0.0, "step": 20378 }, { "epoch": 19.595192307692308, "grad_norm": 0.005635788198560476, "learning_rate": 2.360056798807382e-06, "loss": 0.0, "step": 20379 }, { "epoch": 19.596153846153847, "grad_norm": 0.0033348076976835728, "learning_rate": 2.3592531198854974e-06, "loss": 0.0, "step": 20380 }, { "epoch": 19.597115384615385, "grad_norm": 0.01523686945438385, "learning_rate": 2.3584495595255806e-06, "loss": 0.0, "step": 20381 }, { "epoch": 19.598076923076924, "grad_norm": 0.002184299984946847, "learning_rate": 2.3576461177400976e-06, "loss": 0.0, "step": 20382 }, { "epoch": 19.599038461538463, "grad_norm": 0.0022698170505464077, "learning_rate": 2.3568427945415163e-06, "loss": 0.0, "step": 20383 }, { "epoch": 19.6, "grad_norm": 0.5794857740402222, "learning_rate": 2.356039589942305e-06, "loss": 0.0017, "step": 20384 }, { "epoch": 19.60096153846154, "grad_norm": 0.004091938957571983, "learning_rate": 2.355236503954922e-06, "loss": 0.0, "step": 20385 }, { "epoch": 19.601923076923075, "grad_norm": 0.004768615588545799, "learning_rate": 2.3544335365918337e-06, "loss": 0.0, "step": 20386 }, { "epoch": 19.602884615384614, "grad_norm": 0.004165717866271734, "learning_rate": 2.3536306878654956e-06, "loss": 0.0, "step": 20387 }, { "epoch": 19.603846153846153, "grad_norm": 0.02647419646382332, "learning_rate": 2.352827957788366e-06, "loss": 0.0002, "step": 20388 }, { "epoch": 19.60480769230769, "grad_norm": 0.004182812292128801, "learning_rate": 2.3520253463729058e-06, "loss": 0.0, "step": 20389 }, { "epoch": 19.60576923076923, "grad_norm": 0.002022127853706479, "learning_rate": 2.351222853631563e-06, "loss": 0.0, "step": 20390 }, { "epoch": 19.60673076923077, "grad_norm": 0.007542117964476347, "learning_rate": 2.350420479576793e-06, "loss": 0.0, "step": 20391 }, { "epoch": 19.607692307692307, "grad_norm": 0.5066879987716675, "learning_rate": 2.3496182242210484e-06, "loss": 0.0016, "step": 20392 }, { "epoch": 19.608653846153846, "grad_norm": 0.048441845923662186, "learning_rate": 2.3488160875767717e-06, "loss": 0.0001, "step": 20393 }, { "epoch": 19.609615384615385, "grad_norm": 0.25235915184020996, "learning_rate": 2.348014069656415e-06, "loss": 0.0006, "step": 20394 }, { "epoch": 19.610576923076923, "grad_norm": 0.005081855691969395, "learning_rate": 2.3472121704724225e-06, "loss": 0.0, "step": 20395 }, { "epoch": 19.611538461538462, "grad_norm": 0.0025891605764627457, "learning_rate": 2.3464103900372346e-06, "loss": 0.0, "step": 20396 }, { "epoch": 19.6125, "grad_norm": 0.0007421809714287519, "learning_rate": 2.345608728363298e-06, "loss": 0.0, "step": 20397 }, { "epoch": 19.61346153846154, "grad_norm": 0.010381493717432022, "learning_rate": 2.344807185463045e-06, "loss": 0.0001, "step": 20398 }, { "epoch": 19.614423076923078, "grad_norm": 0.0021239439956843853, "learning_rate": 2.3440057613489165e-06, "loss": 0.0, "step": 20399 }, { "epoch": 19.615384615384617, "grad_norm": 0.007682673167437315, "learning_rate": 2.3432044560333523e-06, "loss": 0.0001, "step": 20400 }, { "epoch": 19.616346153846155, "grad_norm": 0.02238648384809494, "learning_rate": 2.342403269528779e-06, "loss": 0.0001, "step": 20401 }, { "epoch": 19.61730769230769, "grad_norm": 0.016177605837583542, "learning_rate": 2.3416022018476326e-06, "loss": 0.0001, "step": 20402 }, { "epoch": 19.61826923076923, "grad_norm": 0.001801745849661529, "learning_rate": 2.340801253002345e-06, "loss": 0.0, "step": 20403 }, { "epoch": 19.619230769230768, "grad_norm": 0.003273890120908618, "learning_rate": 2.3400004230053397e-06, "loss": 0.0, "step": 20404 }, { "epoch": 19.620192307692307, "grad_norm": 0.0022071674466133118, "learning_rate": 2.339199711869047e-06, "loss": 0.0, "step": 20405 }, { "epoch": 19.621153846153845, "grad_norm": 0.007091738283634186, "learning_rate": 2.3383991196058918e-06, "loss": 0.0001, "step": 20406 }, { "epoch": 19.622115384615384, "grad_norm": 0.002623689593747258, "learning_rate": 2.337598646228294e-06, "loss": 0.0, "step": 20407 }, { "epoch": 19.623076923076923, "grad_norm": 0.0005795325851067901, "learning_rate": 2.3367982917486764e-06, "loss": 0.0, "step": 20408 }, { "epoch": 19.62403846153846, "grad_norm": 0.003217417048290372, "learning_rate": 2.3359980561794603e-06, "loss": 0.0, "step": 20409 }, { "epoch": 19.625, "grad_norm": 0.002513284794986248, "learning_rate": 2.335197939533058e-06, "loss": 0.0, "step": 20410 }, { "epoch": 19.62596153846154, "grad_norm": 0.009842154569923878, "learning_rate": 2.3343979418218923e-06, "loss": 0.0001, "step": 20411 }, { "epoch": 19.626923076923077, "grad_norm": 0.0051484620198607445, "learning_rate": 2.3335980630583676e-06, "loss": 0.0, "step": 20412 }, { "epoch": 19.627884615384616, "grad_norm": 0.002677938435226679, "learning_rate": 2.332798303254901e-06, "loss": 0.0, "step": 20413 }, { "epoch": 19.628846153846155, "grad_norm": 0.029698926955461502, "learning_rate": 2.3319986624239043e-06, "loss": 0.0001, "step": 20414 }, { "epoch": 19.629807692307693, "grad_norm": 0.005802048370242119, "learning_rate": 2.33119914057778e-06, "loss": 0.0001, "step": 20415 }, { "epoch": 19.630769230769232, "grad_norm": 0.07931426167488098, "learning_rate": 2.330399737728938e-06, "loss": 0.0002, "step": 20416 }, { "epoch": 19.63173076923077, "grad_norm": 0.0018738741055130959, "learning_rate": 2.3296004538897843e-06, "loss": 0.0, "step": 20417 }, { "epoch": 19.63269230769231, "grad_norm": 0.004697802942246199, "learning_rate": 2.3288012890727165e-06, "loss": 0.0, "step": 20418 }, { "epoch": 19.633653846153845, "grad_norm": 0.005632183980196714, "learning_rate": 2.328002243290138e-06, "loss": 0.0, "step": 20419 }, { "epoch": 19.634615384615383, "grad_norm": 0.004793807864189148, "learning_rate": 2.3272033165544505e-06, "loss": 0.0, "step": 20420 }, { "epoch": 19.635576923076922, "grad_norm": 0.008039159700274467, "learning_rate": 2.3264045088780463e-06, "loss": 0.0, "step": 20421 }, { "epoch": 19.63653846153846, "grad_norm": 0.25405943393707275, "learning_rate": 2.3256058202733246e-06, "loss": 0.0005, "step": 20422 }, { "epoch": 19.6375, "grad_norm": 0.0021933834068477154, "learning_rate": 2.324807250752673e-06, "loss": 0.0, "step": 20423 }, { "epoch": 19.638461538461538, "grad_norm": 0.0008003590628504753, "learning_rate": 2.324008800328487e-06, "loss": 0.0, "step": 20424 }, { "epoch": 19.639423076923077, "grad_norm": 0.008564367890357971, "learning_rate": 2.3232104690131585e-06, "loss": 0.0, "step": 20425 }, { "epoch": 19.640384615384615, "grad_norm": 0.006508807651698589, "learning_rate": 2.322412256819069e-06, "loss": 0.0, "step": 20426 }, { "epoch": 19.641346153846154, "grad_norm": 0.011715898290276527, "learning_rate": 2.321614163758609e-06, "loss": 0.0001, "step": 20427 }, { "epoch": 19.642307692307693, "grad_norm": 0.0036976777482777834, "learning_rate": 2.320816189844163e-06, "loss": 0.0, "step": 20428 }, { "epoch": 19.64326923076923, "grad_norm": 0.0008547598263248801, "learning_rate": 2.3200183350881087e-06, "loss": 0.0, "step": 20429 }, { "epoch": 19.64423076923077, "grad_norm": 0.004686001688241959, "learning_rate": 2.3192205995028306e-06, "loss": 0.0, "step": 20430 }, { "epoch": 19.64519230769231, "grad_norm": 0.006251761224120855, "learning_rate": 2.3184229831007077e-06, "loss": 0.0, "step": 20431 }, { "epoch": 19.646153846153847, "grad_norm": 0.006038238760083914, "learning_rate": 2.317625485894113e-06, "loss": 0.0, "step": 20432 }, { "epoch": 19.647115384615386, "grad_norm": 0.004530883394181728, "learning_rate": 2.3168281078954256e-06, "loss": 0.0001, "step": 20433 }, { "epoch": 19.648076923076925, "grad_norm": 0.002882382832467556, "learning_rate": 2.3160308491170124e-06, "loss": 0.0, "step": 20434 }, { "epoch": 19.64903846153846, "grad_norm": 0.006345882546156645, "learning_rate": 2.3152337095712497e-06, "loss": 0.0, "step": 20435 }, { "epoch": 19.65, "grad_norm": 0.0029452252201735973, "learning_rate": 2.3144366892705074e-06, "loss": 0.0, "step": 20436 }, { "epoch": 19.650961538461537, "grad_norm": 0.0009361940319649875, "learning_rate": 2.313639788227149e-06, "loss": 0.0, "step": 20437 }, { "epoch": 19.651923076923076, "grad_norm": 0.0034110480919480324, "learning_rate": 2.31284300645354e-06, "loss": 0.0, "step": 20438 }, { "epoch": 19.652884615384615, "grad_norm": 0.015780536457896233, "learning_rate": 2.31204634396205e-06, "loss": 0.0001, "step": 20439 }, { "epoch": 19.653846153846153, "grad_norm": 0.002096261130645871, "learning_rate": 2.311249800765034e-06, "loss": 0.0, "step": 20440 }, { "epoch": 19.654807692307692, "grad_norm": 0.024398844689130783, "learning_rate": 2.310453376874855e-06, "loss": 0.0001, "step": 20441 }, { "epoch": 19.65576923076923, "grad_norm": 0.002872105687856674, "learning_rate": 2.3096570723038737e-06, "loss": 0.0, "step": 20442 }, { "epoch": 19.65673076923077, "grad_norm": 0.0021769979503005743, "learning_rate": 2.308860887064441e-06, "loss": 0.0, "step": 20443 }, { "epoch": 19.657692307692308, "grad_norm": 0.003420911729335785, "learning_rate": 2.308064821168917e-06, "loss": 0.0, "step": 20444 }, { "epoch": 19.658653846153847, "grad_norm": 0.0025772275403141975, "learning_rate": 2.307268874629649e-06, "loss": 0.0, "step": 20445 }, { "epoch": 19.659615384615385, "grad_norm": 0.037541463971138, "learning_rate": 2.306473047458989e-06, "loss": 0.0001, "step": 20446 }, { "epoch": 19.660576923076924, "grad_norm": 1.0304319858551025, "learning_rate": 2.305677339669291e-06, "loss": 0.0048, "step": 20447 }, { "epoch": 19.661538461538463, "grad_norm": 0.0016431845724582672, "learning_rate": 2.304881751272895e-06, "loss": 0.0, "step": 20448 }, { "epoch": 19.6625, "grad_norm": 0.0018383146962150931, "learning_rate": 2.3040862822821485e-06, "loss": 0.0, "step": 20449 }, { "epoch": 19.66346153846154, "grad_norm": 0.004428856074810028, "learning_rate": 2.3032909327093987e-06, "loss": 0.0, "step": 20450 }, { "epoch": 19.664423076923075, "grad_norm": 0.020639486610889435, "learning_rate": 2.3024957025669825e-06, "loss": 0.0001, "step": 20451 }, { "epoch": 19.665384615384614, "grad_norm": 0.005604552570730448, "learning_rate": 2.30170059186724e-06, "loss": 0.0, "step": 20452 }, { "epoch": 19.666346153846153, "grad_norm": 0.005296798888593912, "learning_rate": 2.3009056006225127e-06, "loss": 0.0, "step": 20453 }, { "epoch": 19.66730769230769, "grad_norm": 0.02405133843421936, "learning_rate": 2.300110728845132e-06, "loss": 0.0001, "step": 20454 }, { "epoch": 19.66826923076923, "grad_norm": 0.0023481121752411127, "learning_rate": 2.299315976547436e-06, "loss": 0.0, "step": 20455 }, { "epoch": 19.66923076923077, "grad_norm": 0.00392842898145318, "learning_rate": 2.2985213437417532e-06, "loss": 0.0, "step": 20456 }, { "epoch": 19.670192307692307, "grad_norm": 0.0035022443626075983, "learning_rate": 2.2977268304404145e-06, "loss": 0.0001, "step": 20457 }, { "epoch": 19.671153846153846, "grad_norm": 0.010112456977367401, "learning_rate": 2.296932436655752e-06, "loss": 0.0, "step": 20458 }, { "epoch": 19.672115384615385, "grad_norm": 0.003138376632705331, "learning_rate": 2.2961381624000876e-06, "loss": 0.0, "step": 20459 }, { "epoch": 19.673076923076923, "grad_norm": 0.00447184918448329, "learning_rate": 2.295344007685748e-06, "loss": 0.0, "step": 20460 }, { "epoch": 19.674038461538462, "grad_norm": 0.0024754845071583986, "learning_rate": 2.29454997252506e-06, "loss": 0.0, "step": 20461 }, { "epoch": 19.675, "grad_norm": 0.0008577553671784699, "learning_rate": 2.2937560569303376e-06, "loss": 0.0, "step": 20462 }, { "epoch": 19.67596153846154, "grad_norm": 0.0361386276781559, "learning_rate": 2.2929622609139035e-06, "loss": 0.0001, "step": 20463 }, { "epoch": 19.676923076923078, "grad_norm": 0.006345170084387064, "learning_rate": 2.2921685844880792e-06, "loss": 0.0, "step": 20464 }, { "epoch": 19.677884615384617, "grad_norm": 0.00651254178956151, "learning_rate": 2.2913750276651724e-06, "loss": 0.0, "step": 20465 }, { "epoch": 19.678846153846155, "grad_norm": 0.005631399806588888, "learning_rate": 2.2905815904575033e-06, "loss": 0.0, "step": 20466 }, { "epoch": 19.67980769230769, "grad_norm": 0.0023117808159440756, "learning_rate": 2.289788272877379e-06, "loss": 0.0, "step": 20467 }, { "epoch": 19.68076923076923, "grad_norm": 0.026405787095427513, "learning_rate": 2.2889950749371117e-06, "loss": 0.0001, "step": 20468 }, { "epoch": 19.681730769230768, "grad_norm": 0.003519219346344471, "learning_rate": 2.288201996649011e-06, "loss": 0.0, "step": 20469 }, { "epoch": 19.682692307692307, "grad_norm": 0.005923000164330006, "learning_rate": 2.28740903802538e-06, "loss": 0.0, "step": 20470 }, { "epoch": 19.683653846153845, "grad_norm": 0.004025659989565611, "learning_rate": 2.2866161990785228e-06, "loss": 0.0, "step": 20471 }, { "epoch": 19.684615384615384, "grad_norm": 0.0009348454768769443, "learning_rate": 2.2858234798207478e-06, "loss": 0.0, "step": 20472 }, { "epoch": 19.685576923076923, "grad_norm": 0.003984462469816208, "learning_rate": 2.2850308802643483e-06, "loss": 0.0, "step": 20473 }, { "epoch": 19.68653846153846, "grad_norm": 0.00616413401439786, "learning_rate": 2.2842384004216268e-06, "loss": 0.0001, "step": 20474 }, { "epoch": 19.6875, "grad_norm": 0.005952492356300354, "learning_rate": 2.283446040304883e-06, "loss": 0.0, "step": 20475 }, { "epoch": 19.68846153846154, "grad_norm": 0.0033181817270815372, "learning_rate": 2.2826537999264054e-06, "loss": 0.0, "step": 20476 }, { "epoch": 19.689423076923077, "grad_norm": 0.005730580072849989, "learning_rate": 2.281861679298493e-06, "loss": 0.0, "step": 20477 }, { "epoch": 19.690384615384616, "grad_norm": 0.009054703637957573, "learning_rate": 2.281069678433433e-06, "loss": 0.0, "step": 20478 }, { "epoch": 19.691346153846155, "grad_norm": 0.006541149225085974, "learning_rate": 2.2802777973435166e-06, "loss": 0.0, "step": 20479 }, { "epoch": 19.692307692307693, "grad_norm": 0.001518659177236259, "learning_rate": 2.279486036041034e-06, "loss": 0.0, "step": 20480 }, { "epoch": 19.693269230769232, "grad_norm": 0.00479148468002677, "learning_rate": 2.2786943945382654e-06, "loss": 0.0, "step": 20481 }, { "epoch": 19.69423076923077, "grad_norm": 0.004974078387022018, "learning_rate": 2.2779028728474993e-06, "loss": 0.0, "step": 20482 }, { "epoch": 19.69519230769231, "grad_norm": 0.0017850951990112662, "learning_rate": 2.2771114709810184e-06, "loss": 0.0, "step": 20483 }, { "epoch": 19.696153846153845, "grad_norm": 0.005474323872476816, "learning_rate": 2.2763201889510987e-06, "loss": 0.0, "step": 20484 }, { "epoch": 19.697115384615383, "grad_norm": 0.00633013341575861, "learning_rate": 2.2755290267700212e-06, "loss": 0.0, "step": 20485 }, { "epoch": 19.698076923076922, "grad_norm": 0.0024951326195150614, "learning_rate": 2.274737984450065e-06, "loss": 0.0, "step": 20486 }, { "epoch": 19.69903846153846, "grad_norm": 0.007300749886780977, "learning_rate": 2.273947062003499e-06, "loss": 0.0001, "step": 20487 }, { "epoch": 19.7, "grad_norm": 0.0029341697227209806, "learning_rate": 2.2731562594426013e-06, "loss": 0.0, "step": 20488 }, { "epoch": 19.700961538461538, "grad_norm": 0.004045846872031689, "learning_rate": 2.272365576779637e-06, "loss": 0.0, "step": 20489 }, { "epoch": 19.701923076923077, "grad_norm": 0.007316800765693188, "learning_rate": 2.27157501402688e-06, "loss": 0.0, "step": 20490 }, { "epoch": 19.702884615384615, "grad_norm": 0.006042154971510172, "learning_rate": 2.270784571196598e-06, "loss": 0.0001, "step": 20491 }, { "epoch": 19.703846153846154, "grad_norm": 0.005782610271126032, "learning_rate": 2.269994248301053e-06, "loss": 0.0001, "step": 20492 }, { "epoch": 19.704807692307693, "grad_norm": 0.0033326242119073868, "learning_rate": 2.269204045352509e-06, "loss": 0.0, "step": 20493 }, { "epoch": 19.70576923076923, "grad_norm": 0.08591152727603912, "learning_rate": 2.268413962363231e-06, "loss": 0.0002, "step": 20494 }, { "epoch": 19.70673076923077, "grad_norm": 0.004213077016174793, "learning_rate": 2.267623999345474e-06, "loss": 0.0, "step": 20495 }, { "epoch": 19.70769230769231, "grad_norm": 0.004735316149890423, "learning_rate": 2.2668341563114983e-06, "loss": 0.0, "step": 20496 }, { "epoch": 19.708653846153847, "grad_norm": 0.0017894321354106069, "learning_rate": 2.266044433273562e-06, "loss": 0.0, "step": 20497 }, { "epoch": 19.709615384615386, "grad_norm": 0.007414915133267641, "learning_rate": 2.265254830243916e-06, "loss": 0.0, "step": 20498 }, { "epoch": 19.710576923076925, "grad_norm": 0.016671326011419296, "learning_rate": 2.264465347234812e-06, "loss": 0.0001, "step": 20499 }, { "epoch": 19.71153846153846, "grad_norm": 0.0008717906894162297, "learning_rate": 2.2636759842585066e-06, "loss": 0.0, "step": 20500 }, { "epoch": 19.7125, "grad_norm": 0.0038379242178052664, "learning_rate": 2.2628867413272402e-06, "loss": 0.0, "step": 20501 }, { "epoch": 19.713461538461537, "grad_norm": 0.004844470880925655, "learning_rate": 2.262097618453267e-06, "loss": 0.0, "step": 20502 }, { "epoch": 19.714423076923076, "grad_norm": 0.013425854966044426, "learning_rate": 2.261308615648825e-06, "loss": 0.0001, "step": 20503 }, { "epoch": 19.715384615384615, "grad_norm": 0.0016350410878658295, "learning_rate": 2.260519732926161e-06, "loss": 0.0, "step": 20504 }, { "epoch": 19.716346153846153, "grad_norm": 0.0017331482376903296, "learning_rate": 2.259730970297518e-06, "loss": 0.0, "step": 20505 }, { "epoch": 19.717307692307692, "grad_norm": 0.0038776295259594917, "learning_rate": 2.25894232777513e-06, "loss": 0.0, "step": 20506 }, { "epoch": 19.71826923076923, "grad_norm": 0.0031945956870913506, "learning_rate": 2.2581538053712383e-06, "loss": 0.0, "step": 20507 }, { "epoch": 19.71923076923077, "grad_norm": 0.003471977077424526, "learning_rate": 2.25736540309808e-06, "loss": 0.0, "step": 20508 }, { "epoch": 19.720192307692308, "grad_norm": 0.6801972985267639, "learning_rate": 2.2565771209678822e-06, "loss": 0.0019, "step": 20509 }, { "epoch": 19.721153846153847, "grad_norm": 0.0026366168167442083, "learning_rate": 2.2557889589928815e-06, "loss": 0.0, "step": 20510 }, { "epoch": 19.722115384615385, "grad_norm": 0.0020084206480532885, "learning_rate": 2.2550009171853103e-06, "loss": 0.0, "step": 20511 }, { "epoch": 19.723076923076924, "grad_norm": 3.5612785816192627, "learning_rate": 2.254212995557391e-06, "loss": 0.0135, "step": 20512 }, { "epoch": 19.724038461538463, "grad_norm": 0.002574882237240672, "learning_rate": 2.253425194121355e-06, "loss": 0.0, "step": 20513 }, { "epoch": 19.725, "grad_norm": 0.0011529865441843867, "learning_rate": 2.2526375128894205e-06, "loss": 0.0, "step": 20514 }, { "epoch": 19.72596153846154, "grad_norm": 0.016373077407479286, "learning_rate": 2.2518499518738147e-06, "loss": 0.0001, "step": 20515 }, { "epoch": 19.726923076923075, "grad_norm": 0.0028309363406151533, "learning_rate": 2.251062511086759e-06, "loss": 0.0, "step": 20516 }, { "epoch": 19.727884615384614, "grad_norm": 0.13441920280456543, "learning_rate": 2.2502751905404675e-06, "loss": 0.0002, "step": 20517 }, { "epoch": 19.728846153846153, "grad_norm": 1.903530240058899, "learning_rate": 2.2494879902471613e-06, "loss": 0.0063, "step": 20518 }, { "epoch": 19.72980769230769, "grad_norm": 0.004595922771841288, "learning_rate": 2.248700910219055e-06, "loss": 0.0, "step": 20519 }, { "epoch": 19.73076923076923, "grad_norm": 0.003012410830706358, "learning_rate": 2.247913950468359e-06, "loss": 0.0, "step": 20520 }, { "epoch": 19.73173076923077, "grad_norm": 0.05161096900701523, "learning_rate": 2.2471271110072856e-06, "loss": 0.0002, "step": 20521 }, { "epoch": 19.732692307692307, "grad_norm": 0.003478188533335924, "learning_rate": 2.2463403918480486e-06, "loss": 0.0, "step": 20522 }, { "epoch": 19.733653846153846, "grad_norm": 0.003981659188866615, "learning_rate": 2.245553793002849e-06, "loss": 0.0, "step": 20523 }, { "epoch": 19.734615384615385, "grad_norm": 0.0026344726793468, "learning_rate": 2.2447673144838975e-06, "loss": 0.0, "step": 20524 }, { "epoch": 19.735576923076923, "grad_norm": 0.0076071722432971, "learning_rate": 2.2439809563033945e-06, "loss": 0.0001, "step": 20525 }, { "epoch": 19.736538461538462, "grad_norm": 0.003297606250271201, "learning_rate": 2.2431947184735427e-06, "loss": 0.0, "step": 20526 }, { "epoch": 19.7375, "grad_norm": 0.0014540620613843203, "learning_rate": 2.242408601006546e-06, "loss": 0.0, "step": 20527 }, { "epoch": 19.73846153846154, "grad_norm": 0.003166850656270981, "learning_rate": 2.241622603914596e-06, "loss": 0.0, "step": 20528 }, { "epoch": 19.739423076923078, "grad_norm": 0.013089430518448353, "learning_rate": 2.2408367272098928e-06, "loss": 0.0001, "step": 20529 }, { "epoch": 19.740384615384617, "grad_norm": 0.002873355755582452, "learning_rate": 2.2400509709046348e-06, "loss": 0.0, "step": 20530 }, { "epoch": 19.741346153846155, "grad_norm": 0.006865566596388817, "learning_rate": 2.2392653350110072e-06, "loss": 0.0001, "step": 20531 }, { "epoch": 19.74230769230769, "grad_norm": 0.005140594206750393, "learning_rate": 2.238479819541203e-06, "loss": 0.0001, "step": 20532 }, { "epoch": 19.74326923076923, "grad_norm": 0.002046840963885188, "learning_rate": 2.2376944245074162e-06, "loss": 0.0, "step": 20533 }, { "epoch": 19.744230769230768, "grad_norm": 0.002464100020006299, "learning_rate": 2.2369091499218266e-06, "loss": 0.0, "step": 20534 }, { "epoch": 19.745192307692307, "grad_norm": 0.009647957049310207, "learning_rate": 2.236123995796625e-06, "loss": 0.0001, "step": 20535 }, { "epoch": 19.746153846153845, "grad_norm": 0.008988686837255955, "learning_rate": 2.23533896214399e-06, "loss": 0.0, "step": 20536 }, { "epoch": 19.747115384615384, "grad_norm": 0.0030382988043129444, "learning_rate": 2.2345540489761054e-06, "loss": 0.0, "step": 20537 }, { "epoch": 19.748076923076923, "grad_norm": 0.005562958307564259, "learning_rate": 2.233769256305153e-06, "loss": 0.0, "step": 20538 }, { "epoch": 19.74903846153846, "grad_norm": 0.0025716836098581553, "learning_rate": 2.232984584143306e-06, "loss": 0.0, "step": 20539 }, { "epoch": 19.75, "grad_norm": 0.00450473977252841, "learning_rate": 2.2322000325027417e-06, "loss": 0.0, "step": 20540 }, { "epoch": 19.75096153846154, "grad_norm": 0.03127117082476616, "learning_rate": 2.2314156013956377e-06, "loss": 0.0001, "step": 20541 }, { "epoch": 19.751923076923077, "grad_norm": 0.0010263101430609822, "learning_rate": 2.23063129083416e-06, "loss": 0.0, "step": 20542 }, { "epoch": 19.752884615384616, "grad_norm": 0.004948430694639683, "learning_rate": 2.2298471008304823e-06, "loss": 0.0, "step": 20543 }, { "epoch": 19.753846153846155, "grad_norm": 0.0040513877756893635, "learning_rate": 2.2290630313967754e-06, "loss": 0.0, "step": 20544 }, { "epoch": 19.754807692307693, "grad_norm": 0.0018107594223693013, "learning_rate": 2.2282790825451996e-06, "loss": 0.0, "step": 20545 }, { "epoch": 19.755769230769232, "grad_norm": 0.004343831911683083, "learning_rate": 2.2274952542879258e-06, "loss": 0.0, "step": 20546 }, { "epoch": 19.75673076923077, "grad_norm": 0.010472024790942669, "learning_rate": 2.22671154663711e-06, "loss": 0.0001, "step": 20547 }, { "epoch": 19.75769230769231, "grad_norm": 0.011942085810005665, "learning_rate": 2.2259279596049175e-06, "loss": 0.0001, "step": 20548 }, { "epoch": 19.758653846153845, "grad_norm": 0.009119106456637383, "learning_rate": 2.2251444932035094e-06, "loss": 0.0001, "step": 20549 }, { "epoch": 19.759615384615383, "grad_norm": 0.001004184945486486, "learning_rate": 2.2243611474450366e-06, "loss": 0.0, "step": 20550 }, { "epoch": 19.760576923076922, "grad_norm": 0.002297248924151063, "learning_rate": 2.2235779223416575e-06, "loss": 0.0, "step": 20551 }, { "epoch": 19.76153846153846, "grad_norm": 0.06418811529874802, "learning_rate": 2.222794817905528e-06, "loss": 0.0001, "step": 20552 }, { "epoch": 19.7625, "grad_norm": 0.00394916906952858, "learning_rate": 2.222011834148795e-06, "loss": 0.0, "step": 20553 }, { "epoch": 19.763461538461538, "grad_norm": 0.0026763270143419504, "learning_rate": 2.22122897108361e-06, "loss": 0.0, "step": 20554 }, { "epoch": 19.764423076923077, "grad_norm": 0.0033697260078042746, "learning_rate": 2.220446228722123e-06, "loss": 0.0, "step": 20555 }, { "epoch": 19.765384615384615, "grad_norm": 0.0007374736596830189, "learning_rate": 2.2196636070764764e-06, "loss": 0.0, "step": 20556 }, { "epoch": 19.766346153846154, "grad_norm": 0.005104604177176952, "learning_rate": 2.2188811061588177e-06, "loss": 0.0, "step": 20557 }, { "epoch": 19.767307692307693, "grad_norm": 0.003036833368241787, "learning_rate": 2.2180987259812838e-06, "loss": 0.0, "step": 20558 }, { "epoch": 19.76826923076923, "grad_norm": 0.005156467203050852, "learning_rate": 2.217316466556019e-06, "loss": 0.0, "step": 20559 }, { "epoch": 19.76923076923077, "grad_norm": 0.004782624077051878, "learning_rate": 2.216534327895162e-06, "loss": 0.0, "step": 20560 }, { "epoch": 19.77019230769231, "grad_norm": 0.0035410327836871147, "learning_rate": 2.2157523100108467e-06, "loss": 0.0, "step": 20561 }, { "epoch": 19.771153846153847, "grad_norm": 0.006981491111218929, "learning_rate": 2.2149704129152083e-06, "loss": 0.0001, "step": 20562 }, { "epoch": 19.772115384615386, "grad_norm": 1.2951990365982056, "learning_rate": 2.2141886366203836e-06, "loss": 0.0041, "step": 20563 }, { "epoch": 19.773076923076925, "grad_norm": 0.03558618202805519, "learning_rate": 2.213406981138497e-06, "loss": 0.0001, "step": 20564 }, { "epoch": 19.77403846153846, "grad_norm": 0.0071605900302529335, "learning_rate": 2.212625446481681e-06, "loss": 0.0001, "step": 20565 }, { "epoch": 19.775, "grad_norm": 0.0034636803902685642, "learning_rate": 2.211844032662065e-06, "loss": 0.0, "step": 20566 }, { "epoch": 19.775961538461537, "grad_norm": 0.004406825173646212, "learning_rate": 2.2110627396917684e-06, "loss": 0.0, "step": 20567 }, { "epoch": 19.776923076923076, "grad_norm": 0.008036628365516663, "learning_rate": 2.2102815675829213e-06, "loss": 0.0, "step": 20568 }, { "epoch": 19.777884615384615, "grad_norm": 0.008879654109477997, "learning_rate": 2.209500516347639e-06, "loss": 0.0, "step": 20569 }, { "epoch": 19.778846153846153, "grad_norm": 0.0036276420578360558, "learning_rate": 2.2087195859980436e-06, "loss": 0.0, "step": 20570 }, { "epoch": 19.779807692307692, "grad_norm": 0.0009028841159306467, "learning_rate": 2.2079387765462555e-06, "loss": 0.0, "step": 20571 }, { "epoch": 19.78076923076923, "grad_norm": 0.0016859518364071846, "learning_rate": 2.2071580880043853e-06, "loss": 0.0, "step": 20572 }, { "epoch": 19.78173076923077, "grad_norm": 0.0980457216501236, "learning_rate": 2.2063775203845504e-06, "loss": 0.0002, "step": 20573 }, { "epoch": 19.782692307692308, "grad_norm": 0.0032291682437062263, "learning_rate": 2.2055970736988653e-06, "loss": 0.0, "step": 20574 }, { "epoch": 19.783653846153847, "grad_norm": 0.001527082989923656, "learning_rate": 2.204816747959434e-06, "loss": 0.0, "step": 20575 }, { "epoch": 19.784615384615385, "grad_norm": 0.16740064322948456, "learning_rate": 2.2040365431783683e-06, "loss": 0.0003, "step": 20576 }, { "epoch": 19.785576923076924, "grad_norm": 0.002328564878553152, "learning_rate": 2.2032564593677773e-06, "loss": 0.0, "step": 20577 }, { "epoch": 19.786538461538463, "grad_norm": 0.002839641412720084, "learning_rate": 2.2024764965397607e-06, "loss": 0.0, "step": 20578 }, { "epoch": 19.7875, "grad_norm": 0.015598008409142494, "learning_rate": 2.2016966547064254e-06, "loss": 0.0, "step": 20579 }, { "epoch": 19.78846153846154, "grad_norm": 0.0010667971801012754, "learning_rate": 2.2009169338798685e-06, "loss": 0.0, "step": 20580 }, { "epoch": 19.789423076923075, "grad_norm": 0.004972090478986502, "learning_rate": 2.2001373340721897e-06, "loss": 0.0001, "step": 20581 }, { "epoch": 19.790384615384614, "grad_norm": 0.0006480990559794009, "learning_rate": 2.199357855295491e-06, "loss": 0.0, "step": 20582 }, { "epoch": 19.791346153846153, "grad_norm": 0.001816388568840921, "learning_rate": 2.1985784975618596e-06, "loss": 0.0, "step": 20583 }, { "epoch": 19.79230769230769, "grad_norm": 0.03920347988605499, "learning_rate": 2.197799260883394e-06, "loss": 0.0001, "step": 20584 }, { "epoch": 19.79326923076923, "grad_norm": 0.0017981124110519886, "learning_rate": 2.1970201452721873e-06, "loss": 0.0, "step": 20585 }, { "epoch": 19.79423076923077, "grad_norm": 0.0005428448785096407, "learning_rate": 2.1962411507403235e-06, "loss": 0.0, "step": 20586 }, { "epoch": 19.795192307692307, "grad_norm": 0.007792527321726084, "learning_rate": 2.1954622772998935e-06, "loss": 0.0, "step": 20587 }, { "epoch": 19.796153846153846, "grad_norm": 0.003087395802140236, "learning_rate": 2.194683524962986e-06, "loss": 0.0, "step": 20588 }, { "epoch": 19.797115384615385, "grad_norm": 0.024660911411046982, "learning_rate": 2.1939048937416783e-06, "loss": 0.0001, "step": 20589 }, { "epoch": 19.798076923076923, "grad_norm": 0.009644575417041779, "learning_rate": 2.1931263836480563e-06, "loss": 0.0, "step": 20590 }, { "epoch": 19.799038461538462, "grad_norm": 0.002193091670051217, "learning_rate": 2.1923479946942028e-06, "loss": 0.0, "step": 20591 }, { "epoch": 19.8, "grad_norm": 0.00482352776452899, "learning_rate": 2.191569726892191e-06, "loss": 0.0, "step": 20592 }, { "epoch": 19.80096153846154, "grad_norm": 0.0031700225081294775, "learning_rate": 2.190791580254101e-06, "loss": 0.0, "step": 20593 }, { "epoch": 19.801923076923078, "grad_norm": 0.0028187718708068132, "learning_rate": 2.1900135547920042e-06, "loss": 0.0, "step": 20594 }, { "epoch": 19.802884615384617, "grad_norm": 0.0032276988495141268, "learning_rate": 2.1892356505179747e-06, "loss": 0.0, "step": 20595 }, { "epoch": 19.803846153846155, "grad_norm": 0.002071250230073929, "learning_rate": 2.188457867444087e-06, "loss": 0.0, "step": 20596 }, { "epoch": 19.80480769230769, "grad_norm": 0.0019792921375483274, "learning_rate": 2.1876802055824032e-06, "loss": 0.0, "step": 20597 }, { "epoch": 19.80576923076923, "grad_norm": 0.0011264216154813766, "learning_rate": 2.1869026649449933e-06, "loss": 0.0, "step": 20598 }, { "epoch": 19.806730769230768, "grad_norm": 0.001532376161776483, "learning_rate": 2.1861252455439266e-06, "loss": 0.0, "step": 20599 }, { "epoch": 19.807692307692307, "grad_norm": 1.1469722986221313, "learning_rate": 2.18534794739126e-06, "loss": 0.0023, "step": 20600 }, { "epoch": 19.808653846153845, "grad_norm": 0.0025280648842453957, "learning_rate": 2.184570770499056e-06, "loss": 0.0, "step": 20601 }, { "epoch": 19.809615384615384, "grad_norm": 0.004092046990990639, "learning_rate": 2.18379371487938e-06, "loss": 0.0, "step": 20602 }, { "epoch": 19.810576923076923, "grad_norm": 0.002812999999150634, "learning_rate": 2.1830167805442813e-06, "loss": 0.0, "step": 20603 }, { "epoch": 19.81153846153846, "grad_norm": 0.007158288732171059, "learning_rate": 2.1822399675058225e-06, "loss": 0.0001, "step": 20604 }, { "epoch": 19.8125, "grad_norm": 0.0027080425061285496, "learning_rate": 2.1814632757760525e-06, "loss": 0.0, "step": 20605 }, { "epoch": 19.81346153846154, "grad_norm": 0.0013102666707709432, "learning_rate": 2.1806867053670254e-06, "loss": 0.0, "step": 20606 }, { "epoch": 19.814423076923077, "grad_norm": 0.0007103762472979724, "learning_rate": 2.1799102562907936e-06, "loss": 0.0, "step": 20607 }, { "epoch": 19.815384615384616, "grad_norm": 0.0014714734861627221, "learning_rate": 2.1791339285593995e-06, "loss": 0.0, "step": 20608 }, { "epoch": 19.816346153846155, "grad_norm": 0.01403095107525587, "learning_rate": 2.178357722184894e-06, "loss": 0.0, "step": 20609 }, { "epoch": 19.817307692307693, "grad_norm": 0.0036579719744622707, "learning_rate": 2.1775816371793223e-06, "loss": 0.0, "step": 20610 }, { "epoch": 19.818269230769232, "grad_norm": 0.0018324399134144187, "learning_rate": 2.176805673554723e-06, "loss": 0.0, "step": 20611 }, { "epoch": 19.81923076923077, "grad_norm": 0.007334013935178518, "learning_rate": 2.1760298313231387e-06, "loss": 0.0, "step": 20612 }, { "epoch": 19.82019230769231, "grad_norm": 0.00492519699037075, "learning_rate": 2.1752541104966107e-06, "loss": 0.0, "step": 20613 }, { "epoch": 19.821153846153845, "grad_norm": 0.0018488038331270218, "learning_rate": 2.1744785110871713e-06, "loss": 0.0, "step": 20614 }, { "epoch": 19.822115384615383, "grad_norm": 0.0025187626015394926, "learning_rate": 2.17370303310686e-06, "loss": 0.0, "step": 20615 }, { "epoch": 19.823076923076922, "grad_norm": 0.002545429626479745, "learning_rate": 2.1729276765677056e-06, "loss": 0.0, "step": 20616 }, { "epoch": 19.82403846153846, "grad_norm": 0.0011256723664700985, "learning_rate": 2.1721524414817406e-06, "loss": 0.0, "step": 20617 }, { "epoch": 19.825, "grad_norm": 0.05244065076112747, "learning_rate": 2.1713773278609985e-06, "loss": 0.0002, "step": 20618 }, { "epoch": 19.825961538461538, "grad_norm": 0.004334016237407923, "learning_rate": 2.170602335717501e-06, "loss": 0.0, "step": 20619 }, { "epoch": 19.826923076923077, "grad_norm": 0.004090005531907082, "learning_rate": 2.1698274650632754e-06, "loss": 0.0, "step": 20620 }, { "epoch": 19.827884615384615, "grad_norm": 0.0038406162057071924, "learning_rate": 2.16905271591035e-06, "loss": 0.0, "step": 20621 }, { "epoch": 19.828846153846154, "grad_norm": 0.0058103566989302635, "learning_rate": 2.16827808827074e-06, "loss": 0.0, "step": 20622 }, { "epoch": 19.829807692307693, "grad_norm": 0.0012065356131643057, "learning_rate": 2.1675035821564682e-06, "loss": 0.0, "step": 20623 }, { "epoch": 19.83076923076923, "grad_norm": 0.0013704320881515741, "learning_rate": 2.1667291975795555e-06, "loss": 0.0, "step": 20624 }, { "epoch": 19.83173076923077, "grad_norm": 0.0018918960122391582, "learning_rate": 2.1659549345520125e-06, "loss": 0.0, "step": 20625 }, { "epoch": 19.83269230769231, "grad_norm": 0.003202728694304824, "learning_rate": 2.1651807930858584e-06, "loss": 0.0, "step": 20626 }, { "epoch": 19.833653846153847, "grad_norm": 0.0041472953744232655, "learning_rate": 2.1644067731931005e-06, "loss": 0.0, "step": 20627 }, { "epoch": 19.834615384615386, "grad_norm": 0.011890535242855549, "learning_rate": 2.1636328748857537e-06, "loss": 0.0, "step": 20628 }, { "epoch": 19.835576923076925, "grad_norm": 0.004217630252242088, "learning_rate": 2.162859098175827e-06, "loss": 0.0, "step": 20629 }, { "epoch": 19.83653846153846, "grad_norm": 0.00288186757825315, "learning_rate": 2.1620854430753222e-06, "loss": 0.0, "step": 20630 }, { "epoch": 19.8375, "grad_norm": 0.004361538682132959, "learning_rate": 2.161311909596249e-06, "loss": 0.0001, "step": 20631 }, { "epoch": 19.838461538461537, "grad_norm": 0.002595324767753482, "learning_rate": 2.1605384977506106e-06, "loss": 0.0, "step": 20632 }, { "epoch": 19.839423076923076, "grad_norm": 0.014000941999256611, "learning_rate": 2.1597652075504026e-06, "loss": 0.0, "step": 20633 }, { "epoch": 19.840384615384615, "grad_norm": 0.0012662389781326056, "learning_rate": 2.158992039007628e-06, "loss": 0.0, "step": 20634 }, { "epoch": 19.841346153846153, "grad_norm": 0.001225296058692038, "learning_rate": 2.1582189921342876e-06, "loss": 0.0, "step": 20635 }, { "epoch": 19.842307692307692, "grad_norm": 0.015437815338373184, "learning_rate": 2.157446066942369e-06, "loss": 0.0001, "step": 20636 }, { "epoch": 19.84326923076923, "grad_norm": 0.0036238757893443108, "learning_rate": 2.1566732634438738e-06, "loss": 0.0, "step": 20637 }, { "epoch": 19.84423076923077, "grad_norm": 0.0031294035725295544, "learning_rate": 2.1559005816507863e-06, "loss": 0.0, "step": 20638 }, { "epoch": 19.845192307692308, "grad_norm": 0.0024628317914903164, "learning_rate": 2.1551280215751005e-06, "loss": 0.0, "step": 20639 }, { "epoch": 19.846153846153847, "grad_norm": 0.011823249980807304, "learning_rate": 2.1543555832288056e-06, "loss": 0.0, "step": 20640 }, { "epoch": 19.847115384615385, "grad_norm": 0.003515405347570777, "learning_rate": 2.153583266623882e-06, "loss": 0.0, "step": 20641 }, { "epoch": 19.848076923076924, "grad_norm": 0.0027627302333712578, "learning_rate": 2.152811071772317e-06, "loss": 0.0, "step": 20642 }, { "epoch": 19.849038461538463, "grad_norm": 0.04782193899154663, "learning_rate": 2.152038998686097e-06, "loss": 0.0001, "step": 20643 }, { "epoch": 19.85, "grad_norm": 0.0016002284828573465, "learning_rate": 2.1512670473771944e-06, "loss": 0.0, "step": 20644 }, { "epoch": 19.85096153846154, "grad_norm": 0.07549490034580231, "learning_rate": 2.150495217857591e-06, "loss": 0.0002, "step": 20645 }, { "epoch": 19.851923076923075, "grad_norm": 0.0026359360199421644, "learning_rate": 2.1497235101392678e-06, "loss": 0.0, "step": 20646 }, { "epoch": 19.852884615384614, "grad_norm": 0.008719809353351593, "learning_rate": 2.148951924234193e-06, "loss": 0.0001, "step": 20647 }, { "epoch": 19.853846153846153, "grad_norm": 0.004016328603029251, "learning_rate": 2.1481804601543433e-06, "loss": 0.0, "step": 20648 }, { "epoch": 19.85480769230769, "grad_norm": 0.0030325378756970167, "learning_rate": 2.147409117911685e-06, "loss": 0.0, "step": 20649 }, { "epoch": 19.85576923076923, "grad_norm": 0.0014433326432481408, "learning_rate": 2.146637897518191e-06, "loss": 0.0, "step": 20650 }, { "epoch": 19.85673076923077, "grad_norm": 0.0066361259669065475, "learning_rate": 2.1458667989858304e-06, "loss": 0.0, "step": 20651 }, { "epoch": 19.857692307692307, "grad_norm": 0.0014765652595087886, "learning_rate": 2.145095822326563e-06, "loss": 0.0, "step": 20652 }, { "epoch": 19.858653846153846, "grad_norm": 0.002889722352847457, "learning_rate": 2.1443249675523536e-06, "loss": 0.0, "step": 20653 }, { "epoch": 19.859615384615385, "grad_norm": 0.0035786142107099295, "learning_rate": 2.143554234675168e-06, "loss": 0.0, "step": 20654 }, { "epoch": 19.860576923076923, "grad_norm": 0.0043365806341171265, "learning_rate": 2.1427836237069587e-06, "loss": 0.0, "step": 20655 }, { "epoch": 19.861538461538462, "grad_norm": 0.0015719123184680939, "learning_rate": 2.1420131346596883e-06, "loss": 0.0, "step": 20656 }, { "epoch": 19.8625, "grad_norm": 0.0011420809896662831, "learning_rate": 2.1412427675453128e-06, "loss": 0.0, "step": 20657 }, { "epoch": 19.86346153846154, "grad_norm": 0.00313668604940176, "learning_rate": 2.1404725223757817e-06, "loss": 0.0, "step": 20658 }, { "epoch": 19.864423076923078, "grad_norm": 0.02019163779914379, "learning_rate": 2.139702399163053e-06, "loss": 0.0001, "step": 20659 }, { "epoch": 19.865384615384617, "grad_norm": 0.011059126816689968, "learning_rate": 2.13893239791907e-06, "loss": 0.0, "step": 20660 }, { "epoch": 19.866346153846155, "grad_norm": 0.009125393815338612, "learning_rate": 2.1381625186557842e-06, "loss": 0.0, "step": 20661 }, { "epoch": 19.86730769230769, "grad_norm": 0.002592832315713167, "learning_rate": 2.1373927613851454e-06, "loss": 0.0, "step": 20662 }, { "epoch": 19.86826923076923, "grad_norm": 0.008143630810081959, "learning_rate": 2.1366231261190905e-06, "loss": 0.0, "step": 20663 }, { "epoch": 19.869230769230768, "grad_norm": 0.004937898833304644, "learning_rate": 2.135853612869566e-06, "loss": 0.0, "step": 20664 }, { "epoch": 19.870192307692307, "grad_norm": 0.005945300217717886, "learning_rate": 2.1350842216485167e-06, "loss": 0.0001, "step": 20665 }, { "epoch": 19.871153846153845, "grad_norm": 0.0008086762973107398, "learning_rate": 2.134314952467873e-06, "loss": 0.0, "step": 20666 }, { "epoch": 19.872115384615384, "grad_norm": 2.6971616744995117, "learning_rate": 2.1335458053395764e-06, "loss": 0.025, "step": 20667 }, { "epoch": 19.873076923076923, "grad_norm": 0.0031421298626810312, "learning_rate": 2.1327767802755637e-06, "loss": 0.0, "step": 20668 }, { "epoch": 19.87403846153846, "grad_norm": 0.0020743347704410553, "learning_rate": 2.132007877287762e-06, "loss": 0.0, "step": 20669 }, { "epoch": 19.875, "grad_norm": 0.002246546559035778, "learning_rate": 2.131239096388109e-06, "loss": 0.0, "step": 20670 }, { "epoch": 19.87596153846154, "grad_norm": 0.0013712762156501412, "learning_rate": 2.130470437588528e-06, "loss": 0.0, "step": 20671 }, { "epoch": 19.876923076923077, "grad_norm": 0.0018260752549394965, "learning_rate": 2.129701900900949e-06, "loss": 0.0, "step": 20672 }, { "epoch": 19.877884615384616, "grad_norm": 0.008028283715248108, "learning_rate": 2.1289334863373e-06, "loss": 0.0, "step": 20673 }, { "epoch": 19.878846153846155, "grad_norm": 0.0032565670553594828, "learning_rate": 2.1281651939094996e-06, "loss": 0.0, "step": 20674 }, { "epoch": 19.879807692307693, "grad_norm": 0.0019473546417430043, "learning_rate": 2.1273970236294717e-06, "loss": 0.0, "step": 20675 }, { "epoch": 19.880769230769232, "grad_norm": 0.0009522167965769768, "learning_rate": 2.1266289755091387e-06, "loss": 0.0, "step": 20676 }, { "epoch": 19.88173076923077, "grad_norm": 0.0017444901168346405, "learning_rate": 2.125861049560414e-06, "loss": 0.0, "step": 20677 }, { "epoch": 19.88269230769231, "grad_norm": 0.010114168748259544, "learning_rate": 2.1250932457952146e-06, "loss": 0.0001, "step": 20678 }, { "epoch": 19.883653846153845, "grad_norm": 0.001996062695980072, "learning_rate": 2.124325564225458e-06, "loss": 0.0, "step": 20679 }, { "epoch": 19.884615384615383, "grad_norm": 0.002972874790430069, "learning_rate": 2.123558004863051e-06, "loss": 0.0, "step": 20680 }, { "epoch": 19.885576923076922, "grad_norm": 0.018996890634298325, "learning_rate": 2.1227905677199077e-06, "loss": 0.0001, "step": 20681 }, { "epoch": 19.88653846153846, "grad_norm": 2.511662483215332, "learning_rate": 2.1220232528079375e-06, "loss": 0.0313, "step": 20682 }, { "epoch": 19.8875, "grad_norm": 0.0031158598139882088, "learning_rate": 2.121256060139042e-06, "loss": 0.0, "step": 20683 }, { "epoch": 19.888461538461538, "grad_norm": 0.01291575189679861, "learning_rate": 2.1204889897251322e-06, "loss": 0.0001, "step": 20684 }, { "epoch": 19.889423076923077, "grad_norm": 0.004276557359844446, "learning_rate": 2.119722041578104e-06, "loss": 0.0, "step": 20685 }, { "epoch": 19.890384615384615, "grad_norm": 0.011756517924368382, "learning_rate": 2.1189552157098613e-06, "loss": 0.0001, "step": 20686 }, { "epoch": 19.891346153846154, "grad_norm": 0.001966780284419656, "learning_rate": 2.1181885121323054e-06, "loss": 0.0, "step": 20687 }, { "epoch": 19.892307692307693, "grad_norm": 0.0015377671224996448, "learning_rate": 2.1174219308573286e-06, "loss": 0.0, "step": 20688 }, { "epoch": 19.89326923076923, "grad_norm": 0.002470281906425953, "learning_rate": 2.116655471896828e-06, "loss": 0.0, "step": 20689 }, { "epoch": 19.89423076923077, "grad_norm": 0.001187278889119625, "learning_rate": 2.115889135262701e-06, "loss": 0.0, "step": 20690 }, { "epoch": 19.89519230769231, "grad_norm": 0.0009411475621163845, "learning_rate": 2.115122920966831e-06, "loss": 0.0, "step": 20691 }, { "epoch": 19.896153846153847, "grad_norm": 0.009502619504928589, "learning_rate": 2.1143568290211115e-06, "loss": 0.0001, "step": 20692 }, { "epoch": 19.897115384615386, "grad_norm": 0.004695102572441101, "learning_rate": 2.113590859437433e-06, "loss": 0.0, "step": 20693 }, { "epoch": 19.898076923076925, "grad_norm": 0.00396706722676754, "learning_rate": 2.112825012227676e-06, "loss": 0.0, "step": 20694 }, { "epoch": 19.89903846153846, "grad_norm": 0.0021589212119579315, "learning_rate": 2.1120592874037284e-06, "loss": 0.0, "step": 20695 }, { "epoch": 19.9, "grad_norm": 0.0017546018352732062, "learning_rate": 2.1112936849774667e-06, "loss": 0.0, "step": 20696 }, { "epoch": 19.900961538461537, "grad_norm": 0.004834544379264116, "learning_rate": 2.1105282049607744e-06, "loss": 0.0, "step": 20697 }, { "epoch": 19.901923076923076, "grad_norm": 0.0024939526338130236, "learning_rate": 2.1097628473655317e-06, "loss": 0.0, "step": 20698 }, { "epoch": 19.902884615384615, "grad_norm": 0.003512678435072303, "learning_rate": 2.1089976122036092e-06, "loss": 0.0, "step": 20699 }, { "epoch": 19.903846153846153, "grad_norm": 0.0246890801936388, "learning_rate": 2.108232499486884e-06, "loss": 0.0001, "step": 20700 }, { "epoch": 19.904807692307692, "grad_norm": 0.006557278800755739, "learning_rate": 2.1074675092272313e-06, "loss": 0.0, "step": 20701 }, { "epoch": 19.90576923076923, "grad_norm": 0.00236929371021688, "learning_rate": 2.1067026414365153e-06, "loss": 0.0, "step": 20702 }, { "epoch": 19.90673076923077, "grad_norm": 0.002449609339237213, "learning_rate": 2.1059378961266076e-06, "loss": 0.0, "step": 20703 }, { "epoch": 19.907692307692308, "grad_norm": 0.004236323293298483, "learning_rate": 2.105173273309379e-06, "loss": 0.0, "step": 20704 }, { "epoch": 19.908653846153847, "grad_norm": 0.0004011522396467626, "learning_rate": 2.1044087729966856e-06, "loss": 0.0, "step": 20705 }, { "epoch": 19.909615384615385, "grad_norm": 0.0018759425729513168, "learning_rate": 2.1036443952003994e-06, "loss": 0.0, "step": 20706 }, { "epoch": 19.910576923076924, "grad_norm": 0.005278540309518576, "learning_rate": 2.1028801399323727e-06, "loss": 0.0001, "step": 20707 }, { "epoch": 19.911538461538463, "grad_norm": 0.004956730641424656, "learning_rate": 2.102116007204469e-06, "loss": 0.0, "step": 20708 }, { "epoch": 19.9125, "grad_norm": 0.11290360987186432, "learning_rate": 2.101351997028547e-06, "loss": 0.0003, "step": 20709 }, { "epoch": 19.91346153846154, "grad_norm": 0.009996356442570686, "learning_rate": 2.100588109416457e-06, "loss": 0.0, "step": 20710 }, { "epoch": 19.914423076923075, "grad_norm": 0.00423660222440958, "learning_rate": 2.099824344380056e-06, "loss": 0.0, "step": 20711 }, { "epoch": 19.915384615384614, "grad_norm": 0.012546458281576633, "learning_rate": 2.099060701931196e-06, "loss": 0.0001, "step": 20712 }, { "epoch": 19.916346153846153, "grad_norm": 0.004165343940258026, "learning_rate": 2.0982971820817235e-06, "loss": 0.0001, "step": 20713 }, { "epoch": 19.91730769230769, "grad_norm": 0.003938097506761551, "learning_rate": 2.0975337848434864e-06, "loss": 0.0, "step": 20714 }, { "epoch": 19.91826923076923, "grad_norm": 0.004190423525869846, "learning_rate": 2.096770510228334e-06, "loss": 0.0, "step": 20715 }, { "epoch": 19.91923076923077, "grad_norm": 0.008196250535547733, "learning_rate": 2.0960073582481054e-06, "loss": 0.0, "step": 20716 }, { "epoch": 19.920192307692307, "grad_norm": 0.0022395732812583447, "learning_rate": 2.0952443289146475e-06, "loss": 0.0, "step": 20717 }, { "epoch": 19.921153846153846, "grad_norm": 0.005299358628690243, "learning_rate": 2.0944814222397948e-06, "loss": 0.0, "step": 20718 }, { "epoch": 19.922115384615385, "grad_norm": 0.015431876294314861, "learning_rate": 2.093718638235388e-06, "loss": 0.0001, "step": 20719 }, { "epoch": 19.923076923076923, "grad_norm": 0.0033108710777014494, "learning_rate": 2.0929559769132655e-06, "loss": 0.0, "step": 20720 }, { "epoch": 19.924038461538462, "grad_norm": 0.017047185450792313, "learning_rate": 2.092193438285257e-06, "loss": 0.0001, "step": 20721 }, { "epoch": 19.925, "grad_norm": 0.005940124858170748, "learning_rate": 2.0914310223631972e-06, "loss": 0.0, "step": 20722 }, { "epoch": 19.92596153846154, "grad_norm": 0.007644659839570522, "learning_rate": 2.09066872915892e-06, "loss": 0.0, "step": 20723 }, { "epoch": 19.926923076923078, "grad_norm": 0.003519280580803752, "learning_rate": 2.089906558684247e-06, "loss": 0.0, "step": 20724 }, { "epoch": 19.927884615384617, "grad_norm": 0.0043039387091994286, "learning_rate": 2.089144510951008e-06, "loss": 0.0, "step": 20725 }, { "epoch": 19.928846153846155, "grad_norm": 0.0011798632331192493, "learning_rate": 2.088382585971032e-06, "loss": 0.0, "step": 20726 }, { "epoch": 19.92980769230769, "grad_norm": 0.0053007397800683975, "learning_rate": 2.0876207837561334e-06, "loss": 0.0, "step": 20727 }, { "epoch": 19.93076923076923, "grad_norm": 0.02324509248137474, "learning_rate": 2.086859104318142e-06, "loss": 0.0001, "step": 20728 }, { "epoch": 19.931730769230768, "grad_norm": 0.0016316595720127225, "learning_rate": 2.0860975476688692e-06, "loss": 0.0, "step": 20729 }, { "epoch": 19.932692307692307, "grad_norm": 0.007555053569376469, "learning_rate": 2.0853361138201356e-06, "loss": 0.0001, "step": 20730 }, { "epoch": 19.933653846153845, "grad_norm": 0.0023943784181028605, "learning_rate": 2.0845748027837585e-06, "loss": 0.0, "step": 20731 }, { "epoch": 19.934615384615384, "grad_norm": 0.6099779009819031, "learning_rate": 2.0838136145715472e-06, "loss": 0.0021, "step": 20732 }, { "epoch": 19.935576923076923, "grad_norm": 0.0024873872753232718, "learning_rate": 2.0830525491953137e-06, "loss": 0.0, "step": 20733 }, { "epoch": 19.93653846153846, "grad_norm": 0.006918932776898146, "learning_rate": 2.0822916066668718e-06, "loss": 0.0, "step": 20734 }, { "epoch": 19.9375, "grad_norm": 0.0016833189874887466, "learning_rate": 2.0815307869980237e-06, "loss": 0.0, "step": 20735 }, { "epoch": 19.93846153846154, "grad_norm": 0.0028195595368742943, "learning_rate": 2.0807700902005767e-06, "loss": 0.0, "step": 20736 }, { "epoch": 19.939423076923077, "grad_norm": 0.003727514762431383, "learning_rate": 2.080009516286339e-06, "loss": 0.0, "step": 20737 }, { "epoch": 19.940384615384616, "grad_norm": 0.0018572057597339153, "learning_rate": 2.079249065267105e-06, "loss": 0.0, "step": 20738 }, { "epoch": 19.941346153846155, "grad_norm": 0.0036356491036713123, "learning_rate": 2.0784887371546816e-06, "loss": 0.0, "step": 20739 }, { "epoch": 19.942307692307693, "grad_norm": 0.0028047901578247547, "learning_rate": 2.077728531960861e-06, "loss": 0.0, "step": 20740 }, { "epoch": 19.943269230769232, "grad_norm": 0.0029875426553189754, "learning_rate": 2.076968449697442e-06, "loss": 0.0, "step": 20741 }, { "epoch": 19.94423076923077, "grad_norm": 0.010875762440264225, "learning_rate": 2.0762084903762213e-06, "loss": 0.0, "step": 20742 }, { "epoch": 19.94519230769231, "grad_norm": 0.008292674086987972, "learning_rate": 2.075448654008986e-06, "loss": 0.0001, "step": 20743 }, { "epoch": 19.946153846153845, "grad_norm": 0.005085421726107597, "learning_rate": 2.074688940607529e-06, "loss": 0.0001, "step": 20744 }, { "epoch": 19.947115384615383, "grad_norm": 0.010304449126124382, "learning_rate": 2.0739293501836424e-06, "loss": 0.0001, "step": 20745 }, { "epoch": 19.948076923076922, "grad_norm": 0.0014773524599149823, "learning_rate": 2.0731698827491063e-06, "loss": 0.0, "step": 20746 }, { "epoch": 19.94903846153846, "grad_norm": 0.005958050489425659, "learning_rate": 2.072410538315709e-06, "loss": 0.0, "step": 20747 }, { "epoch": 19.95, "grad_norm": 0.003998256754130125, "learning_rate": 2.0716513168952355e-06, "loss": 0.0, "step": 20748 }, { "epoch": 19.950961538461538, "grad_norm": 0.004526173695921898, "learning_rate": 2.0708922184994617e-06, "loss": 0.0, "step": 20749 }, { "epoch": 19.951923076923077, "grad_norm": 0.0013257436221465468, "learning_rate": 2.070133243140172e-06, "loss": 0.0, "step": 20750 }, { "epoch": 19.952884615384615, "grad_norm": 0.004578039050102234, "learning_rate": 2.0693743908291374e-06, "loss": 0.0, "step": 20751 }, { "epoch": 19.953846153846154, "grad_norm": 0.0030380801763385534, "learning_rate": 2.068615661578136e-06, "loss": 0.0, "step": 20752 }, { "epoch": 19.954807692307693, "grad_norm": 0.00259737903252244, "learning_rate": 2.067857055398944e-06, "loss": 0.0, "step": 20753 }, { "epoch": 19.95576923076923, "grad_norm": 0.0008232527761720121, "learning_rate": 2.0670985723033276e-06, "loss": 0.0, "step": 20754 }, { "epoch": 19.95673076923077, "grad_norm": 0.0054510245099663734, "learning_rate": 2.0663402123030584e-06, "loss": 0.0, "step": 20755 }, { "epoch": 19.95769230769231, "grad_norm": 0.0041640871204435825, "learning_rate": 2.0655819754099073e-06, "loss": 0.0, "step": 20756 }, { "epoch": 19.958653846153847, "grad_norm": 0.0029349287506192923, "learning_rate": 2.064823861635633e-06, "loss": 0.0, "step": 20757 }, { "epoch": 19.959615384615386, "grad_norm": 0.0024592976551502943, "learning_rate": 2.0640658709920026e-06, "loss": 0.0, "step": 20758 }, { "epoch": 19.960576923076925, "grad_norm": 0.02677038498222828, "learning_rate": 2.0633080034907816e-06, "loss": 0.0001, "step": 20759 }, { "epoch": 19.96153846153846, "grad_norm": 0.0024883041623979807, "learning_rate": 2.062550259143724e-06, "loss": 0.0, "step": 20760 }, { "epoch": 19.9625, "grad_norm": 5.1314568519592285, "learning_rate": 2.0617926379625896e-06, "loss": 0.0263, "step": 20761 }, { "epoch": 19.963461538461537, "grad_norm": 0.004277081694453955, "learning_rate": 2.061035139959139e-06, "loss": 0.0001, "step": 20762 }, { "epoch": 19.964423076923076, "grad_norm": 0.002289634896442294, "learning_rate": 2.060277765145119e-06, "loss": 0.0, "step": 20763 }, { "epoch": 19.965384615384615, "grad_norm": 0.004197567235678434, "learning_rate": 2.059520513532287e-06, "loss": 0.0, "step": 20764 }, { "epoch": 19.966346153846153, "grad_norm": 0.08849547803401947, "learning_rate": 2.0587633851323906e-06, "loss": 0.0002, "step": 20765 }, { "epoch": 19.967307692307692, "grad_norm": 0.004389441572129726, "learning_rate": 2.058006379957178e-06, "loss": 0.0, "step": 20766 }, { "epoch": 19.96826923076923, "grad_norm": 0.061212942004203796, "learning_rate": 2.0572494980184e-06, "loss": 0.0002, "step": 20767 }, { "epoch": 19.96923076923077, "grad_norm": 0.0015365533763542771, "learning_rate": 2.0564927393277956e-06, "loss": 0.0, "step": 20768 }, { "epoch": 19.970192307692308, "grad_norm": 0.0016042400384321809, "learning_rate": 2.0557361038971102e-06, "loss": 0.0, "step": 20769 }, { "epoch": 19.971153846153847, "grad_norm": 0.004070356022566557, "learning_rate": 2.0549795917380867e-06, "loss": 0.0, "step": 20770 }, { "epoch": 19.972115384615385, "grad_norm": 0.0787447988986969, "learning_rate": 2.0542232028624585e-06, "loss": 0.0003, "step": 20771 }, { "epoch": 19.973076923076924, "grad_norm": 0.010473571717739105, "learning_rate": 2.053466937281966e-06, "loss": 0.0, "step": 20772 }, { "epoch": 19.974038461538463, "grad_norm": 0.009980708360671997, "learning_rate": 2.052710795008347e-06, "loss": 0.0001, "step": 20773 }, { "epoch": 19.975, "grad_norm": 0.005117856431752443, "learning_rate": 2.0519547760533278e-06, "loss": 0.0, "step": 20774 }, { "epoch": 19.97596153846154, "grad_norm": 0.004949488677084446, "learning_rate": 2.0511988804286466e-06, "loss": 0.0, "step": 20775 }, { "epoch": 19.976923076923075, "grad_norm": 0.0019307679031044245, "learning_rate": 2.0504431081460263e-06, "loss": 0.0, "step": 20776 }, { "epoch": 19.977884615384614, "grad_norm": 0.005491971503943205, "learning_rate": 2.0496874592171966e-06, "loss": 0.0, "step": 20777 }, { "epoch": 19.978846153846153, "grad_norm": 0.13983489573001862, "learning_rate": 2.048931933653887e-06, "loss": 0.0003, "step": 20778 }, { "epoch": 19.97980769230769, "grad_norm": 0.00247822399251163, "learning_rate": 2.048176531467816e-06, "loss": 0.0, "step": 20779 }, { "epoch": 19.98076923076923, "grad_norm": 0.014740925282239914, "learning_rate": 2.047421252670706e-06, "loss": 0.0001, "step": 20780 }, { "epoch": 19.98173076923077, "grad_norm": 0.010194764472544193, "learning_rate": 2.0466660972742803e-06, "loss": 0.0, "step": 20781 }, { "epoch": 19.982692307692307, "grad_norm": 0.004035592544823885, "learning_rate": 2.0459110652902515e-06, "loss": 0.0, "step": 20782 }, { "epoch": 19.983653846153846, "grad_norm": 0.03155667707324028, "learning_rate": 2.0451561567303378e-06, "loss": 0.0001, "step": 20783 }, { "epoch": 19.984615384615385, "grad_norm": 0.4414719045162201, "learning_rate": 2.044401371606256e-06, "loss": 0.0014, "step": 20784 }, { "epoch": 19.985576923076923, "grad_norm": 0.003204776905477047, "learning_rate": 2.043646709929713e-06, "loss": 0.0, "step": 20785 }, { "epoch": 19.986538461538462, "grad_norm": 0.34594714641571045, "learning_rate": 2.0428921717124238e-06, "loss": 0.001, "step": 20786 }, { "epoch": 19.9875, "grad_norm": 0.012708491645753384, "learning_rate": 2.0421377569660915e-06, "loss": 0.0001, "step": 20787 }, { "epoch": 19.98846153846154, "grad_norm": 0.004739735275506973, "learning_rate": 2.0413834657024255e-06, "loss": 0.0, "step": 20788 }, { "epoch": 19.989423076923078, "grad_norm": 0.005425538867712021, "learning_rate": 2.0406292979331333e-06, "loss": 0.0, "step": 20789 }, { "epoch": 19.990384615384617, "grad_norm": 0.003692424623295665, "learning_rate": 2.039875253669911e-06, "loss": 0.0, "step": 20790 }, { "epoch": 19.991346153846155, "grad_norm": 0.00956517830491066, "learning_rate": 2.0391213329244606e-06, "loss": 0.0, "step": 20791 }, { "epoch": 19.99230769230769, "grad_norm": 0.0403611846268177, "learning_rate": 2.0383675357084866e-06, "loss": 0.0002, "step": 20792 }, { "epoch": 19.99326923076923, "grad_norm": 0.01472366414964199, "learning_rate": 2.037613862033677e-06, "loss": 0.0, "step": 20793 }, { "epoch": 19.994230769230768, "grad_norm": 0.0025825570337474346, "learning_rate": 2.0368603119117323e-06, "loss": 0.0, "step": 20794 }, { "epoch": 19.995192307692307, "grad_norm": 0.7445372343063354, "learning_rate": 2.0361068853543462e-06, "loss": 0.0015, "step": 20795 }, { "epoch": 19.996153846153845, "grad_norm": 0.002278451807796955, "learning_rate": 2.0353535823732053e-06, "loss": 0.0, "step": 20796 }, { "epoch": 19.997115384615384, "grad_norm": 0.0026628205087035894, "learning_rate": 2.0346004029800027e-06, "loss": 0.0, "step": 20797 }, { "epoch": 19.998076923076923, "grad_norm": 0.003958308137953281, "learning_rate": 2.0338473471864216e-06, "loss": 0.0, "step": 20798 }, { "epoch": 19.99903846153846, "grad_norm": 0.004061559215188026, "learning_rate": 2.0330944150041486e-06, "loss": 0.0, "step": 20799 }, { "epoch": 20.0, "grad_norm": 0.01575739122927189, "learning_rate": 2.032341606444871e-06, "loss": 0.0001, "step": 20800 }, { "epoch": 20.00096153846154, "grad_norm": 0.0006348430761136115, "learning_rate": 2.0315889215202643e-06, "loss": 0.0, "step": 20801 }, { "epoch": 20.001923076923077, "grad_norm": 0.007450965698808432, "learning_rate": 2.03083636024201e-06, "loss": 0.0, "step": 20802 }, { "epoch": 20.002884615384616, "grad_norm": 0.00616405438631773, "learning_rate": 2.0300839226217893e-06, "loss": 0.0001, "step": 20803 }, { "epoch": 20.003846153846155, "grad_norm": 0.0033599548041820526, "learning_rate": 2.029331608671272e-06, "loss": 0.0, "step": 20804 }, { "epoch": 20.004807692307693, "grad_norm": 0.0025951042771339417, "learning_rate": 2.0285794184021344e-06, "loss": 0.0, "step": 20805 }, { "epoch": 20.005769230769232, "grad_norm": 0.0013601582031697035, "learning_rate": 2.0278273518260503e-06, "loss": 0.0, "step": 20806 }, { "epoch": 20.00673076923077, "grad_norm": 0.09638471156358719, "learning_rate": 2.0270754089546864e-06, "loss": 0.0004, "step": 20807 }, { "epoch": 20.00769230769231, "grad_norm": 0.0019910610280930996, "learning_rate": 2.0263235897997133e-06, "loss": 0.0, "step": 20808 }, { "epoch": 20.008653846153845, "grad_norm": 0.0025399718433618546, "learning_rate": 2.025571894372794e-06, "loss": 0.0, "step": 20809 }, { "epoch": 20.009615384615383, "grad_norm": 0.9712848663330078, "learning_rate": 2.0248203226855934e-06, "loss": 0.0045, "step": 20810 }, { "epoch": 20.010576923076922, "grad_norm": 0.001249363413080573, "learning_rate": 2.0240688747497772e-06, "loss": 0.0, "step": 20811 }, { "epoch": 20.01153846153846, "grad_norm": 0.002410456771031022, "learning_rate": 2.023317550577001e-06, "loss": 0.0, "step": 20812 }, { "epoch": 20.0125, "grad_norm": 0.0031459410674870014, "learning_rate": 2.022566350178925e-06, "loss": 0.0, "step": 20813 }, { "epoch": 20.013461538461538, "grad_norm": 0.004410081077367067, "learning_rate": 2.021815273567208e-06, "loss": 0.0, "step": 20814 }, { "epoch": 20.014423076923077, "grad_norm": 0.0023375716991722584, "learning_rate": 2.0210643207534995e-06, "loss": 0.0, "step": 20815 }, { "epoch": 20.015384615384615, "grad_norm": 0.0033839966636151075, "learning_rate": 2.020313491749456e-06, "loss": 0.0, "step": 20816 }, { "epoch": 20.016346153846154, "grad_norm": 0.003968417178839445, "learning_rate": 2.019562786566729e-06, "loss": 0.0, "step": 20817 }, { "epoch": 20.017307692307693, "grad_norm": 0.01165248453617096, "learning_rate": 2.018812205216962e-06, "loss": 0.0001, "step": 20818 }, { "epoch": 20.01826923076923, "grad_norm": 0.0025973410811275244, "learning_rate": 2.018061747711808e-06, "loss": 0.0, "step": 20819 }, { "epoch": 20.01923076923077, "grad_norm": 0.00274828914552927, "learning_rate": 2.017311414062907e-06, "loss": 0.0, "step": 20820 }, { "epoch": 20.02019230769231, "grad_norm": 0.0016074024606496096, "learning_rate": 2.016561204281903e-06, "loss": 0.0, "step": 20821 }, { "epoch": 20.021153846153847, "grad_norm": 0.0045113107189536095, "learning_rate": 2.0158111183804407e-06, "loss": 0.0, "step": 20822 }, { "epoch": 20.022115384615386, "grad_norm": 0.002432696521282196, "learning_rate": 2.0150611563701548e-06, "loss": 0.0, "step": 20823 }, { "epoch": 20.023076923076925, "grad_norm": 0.003556134644895792, "learning_rate": 2.014311318262684e-06, "loss": 0.0, "step": 20824 }, { "epoch": 20.02403846153846, "grad_norm": 0.00381374079734087, "learning_rate": 2.013561604069666e-06, "loss": 0.0, "step": 20825 }, { "epoch": 20.025, "grad_norm": 0.0034338103141635656, "learning_rate": 2.0128120138027296e-06, "loss": 0.0, "step": 20826 }, { "epoch": 20.025961538461537, "grad_norm": 0.004291360732167959, "learning_rate": 2.012062547473508e-06, "loss": 0.0, "step": 20827 }, { "epoch": 20.026923076923076, "grad_norm": 0.003926800098270178, "learning_rate": 2.0113132050936345e-06, "loss": 0.0, "step": 20828 }, { "epoch": 20.027884615384615, "grad_norm": 0.0022315143141895533, "learning_rate": 2.0105639866747306e-06, "loss": 0.0, "step": 20829 }, { "epoch": 20.028846153846153, "grad_norm": 0.0017799545312300324, "learning_rate": 2.009814892228428e-06, "loss": 0.0, "step": 20830 }, { "epoch": 20.029807692307692, "grad_norm": 0.002003601985052228, "learning_rate": 2.0090659217663445e-06, "loss": 0.0, "step": 20831 }, { "epoch": 20.03076923076923, "grad_norm": 0.0024603193160146475, "learning_rate": 2.0083170753001056e-06, "loss": 0.0, "step": 20832 }, { "epoch": 20.03173076923077, "grad_norm": 0.001393251121044159, "learning_rate": 2.0075683528413316e-06, "loss": 0.0, "step": 20833 }, { "epoch": 20.032692307692308, "grad_norm": 0.003993776626884937, "learning_rate": 2.0068197544016375e-06, "loss": 0.0, "step": 20834 }, { "epoch": 20.033653846153847, "grad_norm": 0.02032439038157463, "learning_rate": 2.0060712799926407e-06, "loss": 0.0001, "step": 20835 }, { "epoch": 20.034615384615385, "grad_norm": 0.0008896818617358804, "learning_rate": 2.005322929625959e-06, "loss": 0.0, "step": 20836 }, { "epoch": 20.035576923076924, "grad_norm": 0.002801906783133745, "learning_rate": 2.004574703313198e-06, "loss": 0.0, "step": 20837 }, { "epoch": 20.036538461538463, "grad_norm": 0.00540209049358964, "learning_rate": 2.0038266010659725e-06, "loss": 0.0, "step": 20838 }, { "epoch": 20.0375, "grad_norm": 0.0037458250299096107, "learning_rate": 2.0030786228958908e-06, "loss": 0.0, "step": 20839 }, { "epoch": 20.03846153846154, "grad_norm": 0.0019605227280408144, "learning_rate": 2.0023307688145565e-06, "loss": 0.0, "step": 20840 }, { "epoch": 20.039423076923075, "grad_norm": 0.002306055510416627, "learning_rate": 2.001583038833578e-06, "loss": 0.0, "step": 20841 }, { "epoch": 20.040384615384614, "grad_norm": 0.004988106433302164, "learning_rate": 2.000835432964553e-06, "loss": 0.0, "step": 20842 }, { "epoch": 20.041346153846153, "grad_norm": 0.001877867616713047, "learning_rate": 2.0000879512190852e-06, "loss": 0.0, "step": 20843 }, { "epoch": 20.04230769230769, "grad_norm": 0.003412666032090783, "learning_rate": 1.999340593608775e-06, "loss": 0.0, "step": 20844 }, { "epoch": 20.04326923076923, "grad_norm": 0.009268788620829582, "learning_rate": 1.9985933601452143e-06, "loss": 0.0001, "step": 20845 }, { "epoch": 20.04423076923077, "grad_norm": 0.001092426129616797, "learning_rate": 1.997846250840002e-06, "loss": 0.0, "step": 20846 }, { "epoch": 20.045192307692307, "grad_norm": 0.002155648311600089, "learning_rate": 1.9970992657047315e-06, "loss": 0.0, "step": 20847 }, { "epoch": 20.046153846153846, "grad_norm": 0.0014322944916784763, "learning_rate": 1.9963524047509898e-06, "loss": 0.0, "step": 20848 }, { "epoch": 20.047115384615385, "grad_norm": 0.005931348539888859, "learning_rate": 1.9956056679903678e-06, "loss": 0.0001, "step": 20849 }, { "epoch": 20.048076923076923, "grad_norm": 0.003528825007379055, "learning_rate": 1.994859055434457e-06, "loss": 0.0, "step": 20850 }, { "epoch": 20.049038461538462, "grad_norm": 0.003327704733237624, "learning_rate": 1.994112567094836e-06, "loss": 0.0, "step": 20851 }, { "epoch": 20.05, "grad_norm": 0.005476468242704868, "learning_rate": 1.9933662029830904e-06, "loss": 0.0, "step": 20852 }, { "epoch": 20.05096153846154, "grad_norm": 0.002506427001208067, "learning_rate": 1.9926199631108045e-06, "loss": 0.0, "step": 20853 }, { "epoch": 20.051923076923078, "grad_norm": 0.0018052859231829643, "learning_rate": 1.9918738474895537e-06, "loss": 0.0, "step": 20854 }, { "epoch": 20.052884615384617, "grad_norm": 0.010179605334997177, "learning_rate": 1.991127856130919e-06, "loss": 0.0, "step": 20855 }, { "epoch": 20.053846153846155, "grad_norm": 0.0024191990960389376, "learning_rate": 1.9903819890464727e-06, "loss": 0.0, "step": 20856 }, { "epoch": 20.05480769230769, "grad_norm": 0.006488620303571224, "learning_rate": 1.9896362462477893e-06, "loss": 0.0001, "step": 20857 }, { "epoch": 20.05576923076923, "grad_norm": 0.0014762264909222722, "learning_rate": 1.9888906277464437e-06, "loss": 0.0, "step": 20858 }, { "epoch": 20.056730769230768, "grad_norm": 0.002703560283407569, "learning_rate": 1.9881451335540004e-06, "loss": 0.0, "step": 20859 }, { "epoch": 20.057692307692307, "grad_norm": 0.002028465736657381, "learning_rate": 1.987399763682031e-06, "loss": 0.0, "step": 20860 }, { "epoch": 20.058653846153845, "grad_norm": 1.5691771507263184, "learning_rate": 1.9866545181421016e-06, "loss": 0.0065, "step": 20861 }, { "epoch": 20.059615384615384, "grad_norm": 0.0051691047847270966, "learning_rate": 1.9859093969457742e-06, "loss": 0.0001, "step": 20862 }, { "epoch": 20.060576923076923, "grad_norm": 0.008453008718788624, "learning_rate": 1.9851644001046112e-06, "loss": 0.0001, "step": 20863 }, { "epoch": 20.06153846153846, "grad_norm": 0.004330722149461508, "learning_rate": 1.984419527630176e-06, "loss": 0.0, "step": 20864 }, { "epoch": 20.0625, "grad_norm": 0.0021715520415455103, "learning_rate": 1.9836747795340215e-06, "loss": 0.0, "step": 20865 }, { "epoch": 20.06346153846154, "grad_norm": 0.00310710072517395, "learning_rate": 1.9829301558277103e-06, "loss": 0.0, "step": 20866 }, { "epoch": 20.064423076923077, "grad_norm": 0.1677682250738144, "learning_rate": 1.982185656522789e-06, "loss": 0.0006, "step": 20867 }, { "epoch": 20.065384615384616, "grad_norm": 0.0009551200782880187, "learning_rate": 1.981441281630816e-06, "loss": 0.0, "step": 20868 }, { "epoch": 20.066346153846155, "grad_norm": 0.0039832391776144505, "learning_rate": 1.980697031163342e-06, "loss": 0.0, "step": 20869 }, { "epoch": 20.067307692307693, "grad_norm": 0.0027608098462224007, "learning_rate": 1.9799529051319112e-06, "loss": 0.0, "step": 20870 }, { "epoch": 20.068269230769232, "grad_norm": 0.007139634806662798, "learning_rate": 1.9792089035480733e-06, "loss": 0.0001, "step": 20871 }, { "epoch": 20.06923076923077, "grad_norm": 0.0010360612068325281, "learning_rate": 1.978465026423374e-06, "loss": 0.0, "step": 20872 }, { "epoch": 20.07019230769231, "grad_norm": 0.001997123472392559, "learning_rate": 1.977721273769353e-06, "loss": 0.0, "step": 20873 }, { "epoch": 20.071153846153845, "grad_norm": 0.019988207146525383, "learning_rate": 1.976977645597552e-06, "loss": 0.0001, "step": 20874 }, { "epoch": 20.072115384615383, "grad_norm": 0.004089141730219126, "learning_rate": 1.976234141919513e-06, "loss": 0.0, "step": 20875 }, { "epoch": 20.073076923076922, "grad_norm": 0.006987735163420439, "learning_rate": 1.975490762746769e-06, "loss": 0.0, "step": 20876 }, { "epoch": 20.07403846153846, "grad_norm": 0.002696036593988538, "learning_rate": 1.9747475080908586e-06, "loss": 0.0, "step": 20877 }, { "epoch": 20.075, "grad_norm": 0.004249183926731348, "learning_rate": 1.9740043779633113e-06, "loss": 0.0, "step": 20878 }, { "epoch": 20.075961538461538, "grad_norm": 0.004156962037086487, "learning_rate": 1.973261372375659e-06, "loss": 0.0, "step": 20879 }, { "epoch": 20.076923076923077, "grad_norm": 0.0020541874691843987, "learning_rate": 1.9725184913394345e-06, "loss": 0.0, "step": 20880 }, { "epoch": 20.077884615384615, "grad_norm": 0.004237902816385031, "learning_rate": 1.9717757348661617e-06, "loss": 0.0, "step": 20881 }, { "epoch": 20.078846153846154, "grad_norm": 0.003553419141098857, "learning_rate": 1.9710331029673657e-06, "loss": 0.0, "step": 20882 }, { "epoch": 20.079807692307693, "grad_norm": 0.0015437493566423655, "learning_rate": 1.970290595654574e-06, "loss": 0.0, "step": 20883 }, { "epoch": 20.08076923076923, "grad_norm": 0.001217844896018505, "learning_rate": 1.969548212939302e-06, "loss": 0.0, "step": 20884 }, { "epoch": 20.08173076923077, "grad_norm": 0.002218714915215969, "learning_rate": 1.968805954833073e-06, "loss": 0.0, "step": 20885 }, { "epoch": 20.08269230769231, "grad_norm": 0.003547387197613716, "learning_rate": 1.968063821347408e-06, "loss": 0.0, "step": 20886 }, { "epoch": 20.083653846153847, "grad_norm": 0.02710043452680111, "learning_rate": 1.967321812493813e-06, "loss": 0.0001, "step": 20887 }, { "epoch": 20.084615384615386, "grad_norm": 0.011017554439604282, "learning_rate": 1.9665799282838128e-06, "loss": 0.0, "step": 20888 }, { "epoch": 20.085576923076925, "grad_norm": 0.019429676234722137, "learning_rate": 1.965838168728912e-06, "loss": 0.0001, "step": 20889 }, { "epoch": 20.08653846153846, "grad_norm": 0.0008947900496423244, "learning_rate": 1.9650965338406227e-06, "loss": 0.0, "step": 20890 }, { "epoch": 20.0875, "grad_norm": 0.003689269069582224, "learning_rate": 1.9643550236304553e-06, "loss": 0.0, "step": 20891 }, { "epoch": 20.088461538461537, "grad_norm": 0.0020937221124768257, "learning_rate": 1.9636136381099125e-06, "loss": 0.0, "step": 20892 }, { "epoch": 20.089423076923076, "grad_norm": 0.004123884253203869, "learning_rate": 1.9628723772905e-06, "loss": 0.0001, "step": 20893 }, { "epoch": 20.090384615384615, "grad_norm": 0.0047261579893529415, "learning_rate": 1.962131241183719e-06, "loss": 0.0, "step": 20894 }, { "epoch": 20.091346153846153, "grad_norm": 0.0021798613015562296, "learning_rate": 1.961390229801069e-06, "loss": 0.0, "step": 20895 }, { "epoch": 20.092307692307692, "grad_norm": 0.005324564874172211, "learning_rate": 1.960649343154052e-06, "loss": 0.0001, "step": 20896 }, { "epoch": 20.09326923076923, "grad_norm": 0.0031815359834581614, "learning_rate": 1.959908581254161e-06, "loss": 0.0, "step": 20897 }, { "epoch": 20.09423076923077, "grad_norm": 0.003311675973236561, "learning_rate": 1.9591679441128896e-06, "loss": 0.0, "step": 20898 }, { "epoch": 20.095192307692308, "grad_norm": 0.001243153354153037, "learning_rate": 1.958427431741735e-06, "loss": 0.0, "step": 20899 }, { "epoch": 20.096153846153847, "grad_norm": 0.005099473055452108, "learning_rate": 1.9576870441521834e-06, "loss": 0.0, "step": 20900 }, { "epoch": 20.097115384615385, "grad_norm": 0.0007238461985252798, "learning_rate": 1.9569467813557242e-06, "loss": 0.0, "step": 20901 }, { "epoch": 20.098076923076924, "grad_norm": 0.005645344499498606, "learning_rate": 1.9562066433638473e-06, "loss": 0.0, "step": 20902 }, { "epoch": 20.099038461538463, "grad_norm": 0.003944669384509325, "learning_rate": 1.9554666301880333e-06, "loss": 0.0, "step": 20903 }, { "epoch": 20.1, "grad_norm": 0.002660387894138694, "learning_rate": 1.954726741839769e-06, "loss": 0.0, "step": 20904 }, { "epoch": 20.10096153846154, "grad_norm": 0.0028146393597126007, "learning_rate": 1.95398697833053e-06, "loss": 0.0, "step": 20905 }, { "epoch": 20.101923076923075, "grad_norm": 0.007746910210698843, "learning_rate": 1.953247339671799e-06, "loss": 0.0, "step": 20906 }, { "epoch": 20.102884615384614, "grad_norm": 0.004051148425787687, "learning_rate": 1.952507825875054e-06, "loss": 0.0, "step": 20907 }, { "epoch": 20.103846153846153, "grad_norm": 0.001875942456535995, "learning_rate": 1.9517684369517664e-06, "loss": 0.0, "step": 20908 }, { "epoch": 20.10480769230769, "grad_norm": 0.0023669616784900427, "learning_rate": 1.951029172913411e-06, "loss": 0.0, "step": 20909 }, { "epoch": 20.10576923076923, "grad_norm": 0.008196595124900341, "learning_rate": 1.9502900337714627e-06, "loss": 0.0001, "step": 20910 }, { "epoch": 20.10673076923077, "grad_norm": 0.005989997182041407, "learning_rate": 1.9495510195373846e-06, "loss": 0.0, "step": 20911 }, { "epoch": 20.107692307692307, "grad_norm": 0.002736950060352683, "learning_rate": 1.948812130222647e-06, "loss": 0.0, "step": 20912 }, { "epoch": 20.108653846153846, "grad_norm": 0.0021815707441419363, "learning_rate": 1.9480733658387175e-06, "loss": 0.0, "step": 20913 }, { "epoch": 20.109615384615385, "grad_norm": 0.0012684004614129663, "learning_rate": 1.9473347263970555e-06, "loss": 0.0, "step": 20914 }, { "epoch": 20.110576923076923, "grad_norm": 0.002168933628126979, "learning_rate": 1.946596211909125e-06, "loss": 0.0, "step": 20915 }, { "epoch": 20.111538461538462, "grad_norm": 0.052685946226119995, "learning_rate": 1.9458578223863846e-06, "loss": 0.0001, "step": 20916 }, { "epoch": 20.1125, "grad_norm": 0.004059316124767065, "learning_rate": 1.9451195578402905e-06, "loss": 0.0, "step": 20917 }, { "epoch": 20.11346153846154, "grad_norm": 0.0022992868907749653, "learning_rate": 1.944381418282303e-06, "loss": 0.0, "step": 20918 }, { "epoch": 20.114423076923078, "grad_norm": 0.005594379268586636, "learning_rate": 1.9436434037238705e-06, "loss": 0.0, "step": 20919 }, { "epoch": 20.115384615384617, "grad_norm": 0.0010796593269333243, "learning_rate": 1.9429055141764475e-06, "loss": 0.0, "step": 20920 }, { "epoch": 20.116346153846155, "grad_norm": 0.0018421895802021027, "learning_rate": 1.9421677496514857e-06, "loss": 0.0, "step": 20921 }, { "epoch": 20.11730769230769, "grad_norm": 0.006522593088448048, "learning_rate": 1.9414301101604284e-06, "loss": 0.0, "step": 20922 }, { "epoch": 20.11826923076923, "grad_norm": 0.00434509664773941, "learning_rate": 1.940692595714725e-06, "loss": 0.0, "step": 20923 }, { "epoch": 20.119230769230768, "grad_norm": 0.007985123433172703, "learning_rate": 1.9399552063258197e-06, "loss": 0.0001, "step": 20924 }, { "epoch": 20.120192307692307, "grad_norm": 0.002068156376481056, "learning_rate": 1.9392179420051526e-06, "loss": 0.0, "step": 20925 }, { "epoch": 20.121153846153845, "grad_norm": 0.002845125738531351, "learning_rate": 1.9384808027641666e-06, "loss": 0.0, "step": 20926 }, { "epoch": 20.122115384615384, "grad_norm": 0.004493166226893663, "learning_rate": 1.937743788614296e-06, "loss": 0.0, "step": 20927 }, { "epoch": 20.123076923076923, "grad_norm": 0.0008234319975599647, "learning_rate": 1.93700689956698e-06, "loss": 0.0, "step": 20928 }, { "epoch": 20.12403846153846, "grad_norm": 0.0021576578728854656, "learning_rate": 1.9362701356336545e-06, "loss": 0.0, "step": 20929 }, { "epoch": 20.125, "grad_norm": 0.0023742064367979765, "learning_rate": 1.9355334968257465e-06, "loss": 0.0, "step": 20930 }, { "epoch": 20.12596153846154, "grad_norm": 0.0016942723887041211, "learning_rate": 1.9347969831546897e-06, "loss": 0.0, "step": 20931 }, { "epoch": 20.126923076923077, "grad_norm": 0.01354204025119543, "learning_rate": 1.9340605946319156e-06, "loss": 0.0001, "step": 20932 }, { "epoch": 20.127884615384616, "grad_norm": 0.006740895099937916, "learning_rate": 1.9333243312688453e-06, "loss": 0.0001, "step": 20933 }, { "epoch": 20.128846153846155, "grad_norm": 0.002992543624714017, "learning_rate": 1.9325881930769065e-06, "loss": 0.0, "step": 20934 }, { "epoch": 20.129807692307693, "grad_norm": 0.002136955736204982, "learning_rate": 1.931852180067524e-06, "loss": 0.0, "step": 20935 }, { "epoch": 20.130769230769232, "grad_norm": 0.003405995201319456, "learning_rate": 1.931116292252112e-06, "loss": 0.0, "step": 20936 }, { "epoch": 20.13173076923077, "grad_norm": 0.007235206663608551, "learning_rate": 1.9303805296420973e-06, "loss": 0.0, "step": 20937 }, { "epoch": 20.13269230769231, "grad_norm": 0.0029494971968233585, "learning_rate": 1.9296448922488895e-06, "loss": 0.0, "step": 20938 }, { "epoch": 20.133653846153845, "grad_norm": 0.003237340599298477, "learning_rate": 1.9289093800839067e-06, "loss": 0.0, "step": 20939 }, { "epoch": 20.134615384615383, "grad_norm": 0.006561793386936188, "learning_rate": 1.928173993158564e-06, "loss": 0.0, "step": 20940 }, { "epoch": 20.135576923076922, "grad_norm": 0.003717597806826234, "learning_rate": 1.927438731484269e-06, "loss": 0.0, "step": 20941 }, { "epoch": 20.13653846153846, "grad_norm": 0.002534877508878708, "learning_rate": 1.926703595072431e-06, "loss": 0.0, "step": 20942 }, { "epoch": 20.1375, "grad_norm": 0.00284944917075336, "learning_rate": 1.9259685839344623e-06, "loss": 0.0, "step": 20943 }, { "epoch": 20.138461538461538, "grad_norm": 0.0035958406515419483, "learning_rate": 1.925233698081761e-06, "loss": 0.0, "step": 20944 }, { "epoch": 20.139423076923077, "grad_norm": 0.0035492030438035727, "learning_rate": 1.924498937525733e-06, "loss": 0.0, "step": 20945 }, { "epoch": 20.140384615384615, "grad_norm": 0.001992409350350499, "learning_rate": 1.923764302277783e-06, "loss": 0.0, "step": 20946 }, { "epoch": 20.141346153846154, "grad_norm": 0.0027142995968461037, "learning_rate": 1.923029792349306e-06, "loss": 0.0, "step": 20947 }, { "epoch": 20.142307692307693, "grad_norm": 0.003884365549311042, "learning_rate": 1.922295407751702e-06, "loss": 0.0, "step": 20948 }, { "epoch": 20.14326923076923, "grad_norm": 0.005415678955614567, "learning_rate": 1.9215611484963635e-06, "loss": 0.0, "step": 20949 }, { "epoch": 20.14423076923077, "grad_norm": 0.004123739432543516, "learning_rate": 1.920827014594686e-06, "loss": 0.0, "step": 20950 }, { "epoch": 20.14519230769231, "grad_norm": 0.004490038380026817, "learning_rate": 1.920093006058065e-06, "loss": 0.0, "step": 20951 }, { "epoch": 20.146153846153847, "grad_norm": 0.0012591874692589045, "learning_rate": 1.9193591228978815e-06, "loss": 0.0, "step": 20952 }, { "epoch": 20.147115384615386, "grad_norm": 0.014917903579771519, "learning_rate": 1.918625365125529e-06, "loss": 0.0001, "step": 20953 }, { "epoch": 20.148076923076925, "grad_norm": 0.0022572576999664307, "learning_rate": 1.917891732752396e-06, "loss": 0.0, "step": 20954 }, { "epoch": 20.14903846153846, "grad_norm": 0.005868460517376661, "learning_rate": 1.917158225789858e-06, "loss": 0.0, "step": 20955 }, { "epoch": 20.15, "grad_norm": 0.004305918235331774, "learning_rate": 1.916424844249303e-06, "loss": 0.0, "step": 20956 }, { "epoch": 20.150961538461537, "grad_norm": 0.010539313778281212, "learning_rate": 1.915691588142111e-06, "loss": 0.0, "step": 20957 }, { "epoch": 20.151923076923076, "grad_norm": 0.009966355748474598, "learning_rate": 1.9149584574796564e-06, "loss": 0.0, "step": 20958 }, { "epoch": 20.152884615384615, "grad_norm": 0.005310840904712677, "learning_rate": 1.9142254522733204e-06, "loss": 0.0, "step": 20959 }, { "epoch": 20.153846153846153, "grad_norm": 0.0017990573542192578, "learning_rate": 1.91349257253447e-06, "loss": 0.0, "step": 20960 }, { "epoch": 20.154807692307692, "grad_norm": 0.016843371093273163, "learning_rate": 1.912759818274482e-06, "loss": 0.0001, "step": 20961 }, { "epoch": 20.15576923076923, "grad_norm": 0.0013798160944133997, "learning_rate": 1.9120271895047282e-06, "loss": 0.0, "step": 20962 }, { "epoch": 20.15673076923077, "grad_norm": 0.004187979269772768, "learning_rate": 1.9112946862365723e-06, "loss": 0.0, "step": 20963 }, { "epoch": 20.157692307692308, "grad_norm": 0.003403828013688326, "learning_rate": 1.910562308481383e-06, "loss": 0.0, "step": 20964 }, { "epoch": 20.158653846153847, "grad_norm": 0.0019493602449074388, "learning_rate": 1.9098300562505266e-06, "loss": 0.0, "step": 20965 }, { "epoch": 20.159615384615385, "grad_norm": 0.002251941943541169, "learning_rate": 1.90909792955536e-06, "loss": 0.0, "step": 20966 }, { "epoch": 20.160576923076924, "grad_norm": 0.0036630136892199516, "learning_rate": 1.9083659284072487e-06, "loss": 0.0, "step": 20967 }, { "epoch": 20.161538461538463, "grad_norm": 0.0028873097617179155, "learning_rate": 1.9076340528175507e-06, "loss": 0.0, "step": 20968 }, { "epoch": 20.1625, "grad_norm": 0.0054868715815246105, "learning_rate": 1.9069023027976196e-06, "loss": 0.0, "step": 20969 }, { "epoch": 20.16346153846154, "grad_norm": 0.0009477390558458865, "learning_rate": 1.9061706783588118e-06, "loss": 0.0, "step": 20970 }, { "epoch": 20.164423076923075, "grad_norm": 1.1220207214355469, "learning_rate": 1.9054391795124815e-06, "loss": 0.003, "step": 20971 }, { "epoch": 20.165384615384614, "grad_norm": 0.0007931681466288865, "learning_rate": 1.9047078062699764e-06, "loss": 0.0, "step": 20972 }, { "epoch": 20.166346153846153, "grad_norm": 0.007391452323645353, "learning_rate": 1.9039765586426496e-06, "loss": 0.0, "step": 20973 }, { "epoch": 20.16730769230769, "grad_norm": 0.003315894864499569, "learning_rate": 1.9032454366418419e-06, "loss": 0.0, "step": 20974 }, { "epoch": 20.16826923076923, "grad_norm": 0.0033540152944624424, "learning_rate": 1.902514440278901e-06, "loss": 0.0, "step": 20975 }, { "epoch": 20.16923076923077, "grad_norm": 0.0026321529876440763, "learning_rate": 1.9017835695651732e-06, "loss": 0.0, "step": 20976 }, { "epoch": 20.170192307692307, "grad_norm": 0.00486708665266633, "learning_rate": 1.9010528245119942e-06, "loss": 0.0, "step": 20977 }, { "epoch": 20.171153846153846, "grad_norm": 0.003338201204314828, "learning_rate": 1.9003222051307046e-06, "loss": 0.0, "step": 20978 }, { "epoch": 20.172115384615385, "grad_norm": 0.001428079791367054, "learning_rate": 1.8995917114326446e-06, "loss": 0.0, "step": 20979 }, { "epoch": 20.173076923076923, "grad_norm": 0.0035289390943944454, "learning_rate": 1.8988613434291448e-06, "loss": 0.0, "step": 20980 }, { "epoch": 20.174038461538462, "grad_norm": 0.004482213873416185, "learning_rate": 1.8981311011315395e-06, "loss": 0.0, "step": 20981 }, { "epoch": 20.175, "grad_norm": 0.0030500711873173714, "learning_rate": 1.8974009845511643e-06, "loss": 0.0, "step": 20982 }, { "epoch": 20.17596153846154, "grad_norm": 0.0013030016561970115, "learning_rate": 1.896670993699342e-06, "loss": 0.0, "step": 20983 }, { "epoch": 20.176923076923078, "grad_norm": 0.0032921277452260256, "learning_rate": 1.895941128587405e-06, "loss": 0.0, "step": 20984 }, { "epoch": 20.177884615384617, "grad_norm": 0.002495956141501665, "learning_rate": 1.8952113892266755e-06, "loss": 0.0, "step": 20985 }, { "epoch": 20.178846153846155, "grad_norm": 0.0016390111995860934, "learning_rate": 1.894481775628476e-06, "loss": 0.0, "step": 20986 }, { "epoch": 20.17980769230769, "grad_norm": 0.006805472541600466, "learning_rate": 1.8937522878041337e-06, "loss": 0.0, "step": 20987 }, { "epoch": 20.18076923076923, "grad_norm": 0.0073165567591786385, "learning_rate": 1.8930229257649613e-06, "loss": 0.0, "step": 20988 }, { "epoch": 20.181730769230768, "grad_norm": 0.006287571974098682, "learning_rate": 1.8922936895222798e-06, "loss": 0.0, "step": 20989 }, { "epoch": 20.182692307692307, "grad_norm": 0.00141626107506454, "learning_rate": 1.891564579087407e-06, "loss": 0.0, "step": 20990 }, { "epoch": 20.183653846153845, "grad_norm": 0.011377532966434956, "learning_rate": 1.8908355944716516e-06, "loss": 0.0, "step": 20991 }, { "epoch": 20.184615384615384, "grad_norm": 0.0016476165037602186, "learning_rate": 1.8901067356863268e-06, "loss": 0.0, "step": 20992 }, { "epoch": 20.185576923076923, "grad_norm": 0.0005337967886589468, "learning_rate": 1.8893780027427466e-06, "loss": 0.0, "step": 20993 }, { "epoch": 20.18653846153846, "grad_norm": 0.004892339464277029, "learning_rate": 1.8886493956522123e-06, "loss": 0.0001, "step": 20994 }, { "epoch": 20.1875, "grad_norm": 0.001513094175606966, "learning_rate": 1.8879209144260358e-06, "loss": 0.0, "step": 20995 }, { "epoch": 20.18846153846154, "grad_norm": 0.0018101815367117524, "learning_rate": 1.8871925590755159e-06, "loss": 0.0, "step": 20996 }, { "epoch": 20.189423076923077, "grad_norm": 0.0020938317757099867, "learning_rate": 1.8864643296119556e-06, "loss": 0.0, "step": 20997 }, { "epoch": 20.190384615384616, "grad_norm": 0.004276281222701073, "learning_rate": 1.8857362260466594e-06, "loss": 0.0, "step": 20998 }, { "epoch": 20.191346153846155, "grad_norm": 0.0021293633617460728, "learning_rate": 1.8850082483909182e-06, "loss": 0.0, "step": 20999 }, { "epoch": 20.192307692307693, "grad_norm": 0.0008641383028589189, "learning_rate": 1.884280396656033e-06, "loss": 0.0, "step": 21000 }, { "epoch": 20.193269230769232, "grad_norm": 0.004494199063628912, "learning_rate": 1.8835526708532981e-06, "loss": 0.0, "step": 21001 }, { "epoch": 20.19423076923077, "grad_norm": 0.004913065116852522, "learning_rate": 1.8828250709940032e-06, "loss": 0.0, "step": 21002 }, { "epoch": 20.19519230769231, "grad_norm": 0.003330957842990756, "learning_rate": 1.8820975970894384e-06, "loss": 0.0, "step": 21003 }, { "epoch": 20.196153846153845, "grad_norm": 0.003933675121515989, "learning_rate": 1.8813702491508956e-06, "loss": 0.0, "step": 21004 }, { "epoch": 20.197115384615383, "grad_norm": 0.003159050829708576, "learning_rate": 1.880643027189657e-06, "loss": 0.0, "step": 21005 }, { "epoch": 20.198076923076922, "grad_norm": 0.0046404143795371056, "learning_rate": 1.8799159312170101e-06, "loss": 0.0, "step": 21006 }, { "epoch": 20.19903846153846, "grad_norm": 0.0013771452941000462, "learning_rate": 1.8791889612442337e-06, "loss": 0.0, "step": 21007 }, { "epoch": 20.2, "grad_norm": 0.004254591651260853, "learning_rate": 1.8784621172826113e-06, "loss": 0.0, "step": 21008 }, { "epoch": 20.200961538461538, "grad_norm": 0.0019249053439125419, "learning_rate": 1.8777353993434221e-06, "loss": 0.0, "step": 21009 }, { "epoch": 20.201923076923077, "grad_norm": 0.003129121381789446, "learning_rate": 1.8770088074379377e-06, "loss": 0.0, "step": 21010 }, { "epoch": 20.202884615384615, "grad_norm": 0.002457976806908846, "learning_rate": 1.8762823415774367e-06, "loss": 0.0, "step": 21011 }, { "epoch": 20.203846153846154, "grad_norm": 0.006744074169546366, "learning_rate": 1.8755560017731933e-06, "loss": 0.0001, "step": 21012 }, { "epoch": 20.204807692307693, "grad_norm": 0.003201346844434738, "learning_rate": 1.8748297880364742e-06, "loss": 0.0, "step": 21013 }, { "epoch": 20.20576923076923, "grad_norm": 0.015966979786753654, "learning_rate": 1.8741037003785478e-06, "loss": 0.0002, "step": 21014 }, { "epoch": 20.20673076923077, "grad_norm": 0.008666220121085644, "learning_rate": 1.8733777388106866e-06, "loss": 0.0, "step": 21015 }, { "epoch": 20.20769230769231, "grad_norm": 0.003064373740926385, "learning_rate": 1.8726519033441493e-06, "loss": 0.0, "step": 21016 }, { "epoch": 20.208653846153847, "grad_norm": 0.0023247520439326763, "learning_rate": 1.8719261939902023e-06, "loss": 0.0, "step": 21017 }, { "epoch": 20.209615384615386, "grad_norm": 0.0021178829483687878, "learning_rate": 1.8712006107601033e-06, "loss": 0.0, "step": 21018 }, { "epoch": 20.210576923076925, "grad_norm": 0.0026163142174482346, "learning_rate": 1.8704751536651132e-06, "loss": 0.0, "step": 21019 }, { "epoch": 20.21153846153846, "grad_norm": 0.0013074526796117425, "learning_rate": 1.8697498227164912e-06, "loss": 0.0, "step": 21020 }, { "epoch": 20.2125, "grad_norm": 0.004377918783575296, "learning_rate": 1.8690246179254877e-06, "loss": 0.0, "step": 21021 }, { "epoch": 20.213461538461537, "grad_norm": 0.006016756407916546, "learning_rate": 1.8682995393033575e-06, "loss": 0.0, "step": 21022 }, { "epoch": 20.214423076923076, "grad_norm": 0.003483664710074663, "learning_rate": 1.8675745868613548e-06, "loss": 0.0, "step": 21023 }, { "epoch": 20.215384615384615, "grad_norm": 0.0038370138499885798, "learning_rate": 1.8668497606107238e-06, "loss": 0.0, "step": 21024 }, { "epoch": 20.216346153846153, "grad_norm": 0.0025130987633019686, "learning_rate": 1.8661250605627146e-06, "loss": 0.0, "step": 21025 }, { "epoch": 20.217307692307692, "grad_norm": 0.008107738569378853, "learning_rate": 1.8654004867285735e-06, "loss": 0.0, "step": 21026 }, { "epoch": 20.21826923076923, "grad_norm": 0.003831712296232581, "learning_rate": 1.8646760391195396e-06, "loss": 0.0, "step": 21027 }, { "epoch": 20.21923076923077, "grad_norm": 0.002755909226834774, "learning_rate": 1.8639517177468592e-06, "loss": 0.0, "step": 21028 }, { "epoch": 20.220192307692308, "grad_norm": 0.0024462654255330563, "learning_rate": 1.8632275226217665e-06, "loss": 0.0, "step": 21029 }, { "epoch": 20.221153846153847, "grad_norm": 0.004186154343187809, "learning_rate": 1.862503453755502e-06, "loss": 0.0, "step": 21030 }, { "epoch": 20.222115384615385, "grad_norm": 0.0023820747155696154, "learning_rate": 1.861779511159303e-06, "loss": 0.0, "step": 21031 }, { "epoch": 20.223076923076924, "grad_norm": 0.0028720882255584, "learning_rate": 1.8610556948443981e-06, "loss": 0.0, "step": 21032 }, { "epoch": 20.224038461538463, "grad_norm": 0.006543031893670559, "learning_rate": 1.8603320048220208e-06, "loss": 0.0001, "step": 21033 }, { "epoch": 20.225, "grad_norm": 0.002369366120547056, "learning_rate": 1.8596084411034043e-06, "loss": 0.0, "step": 21034 }, { "epoch": 20.22596153846154, "grad_norm": 0.0017968449974432588, "learning_rate": 1.8588850036997708e-06, "loss": 0.0, "step": 21035 }, { "epoch": 20.226923076923075, "grad_norm": 0.0015786774456501007, "learning_rate": 1.8581616926223477e-06, "loss": 0.0, "step": 21036 }, { "epoch": 20.227884615384614, "grad_norm": 0.003276652889326215, "learning_rate": 1.8574385078823608e-06, "loss": 0.0, "step": 21037 }, { "epoch": 20.228846153846153, "grad_norm": 0.0013333105016499758, "learning_rate": 1.8567154494910288e-06, "loss": 0.0, "step": 21038 }, { "epoch": 20.22980769230769, "grad_norm": 0.023041091859340668, "learning_rate": 1.855992517459575e-06, "loss": 0.0001, "step": 21039 }, { "epoch": 20.23076923076923, "grad_norm": 0.008699114434421062, "learning_rate": 1.8552697117992124e-06, "loss": 0.0, "step": 21040 }, { "epoch": 20.23173076923077, "grad_norm": 0.0024479564744979143, "learning_rate": 1.854547032521159e-06, "loss": 0.0, "step": 21041 }, { "epoch": 20.232692307692307, "grad_norm": 0.0011443676194176078, "learning_rate": 1.8538244796366322e-06, "loss": 0.0, "step": 21042 }, { "epoch": 20.233653846153846, "grad_norm": 0.0011327697429805994, "learning_rate": 1.8531020531568377e-06, "loss": 0.0, "step": 21043 }, { "epoch": 20.234615384615385, "grad_norm": 0.0036182317417114973, "learning_rate": 1.8523797530929887e-06, "loss": 0.0, "step": 21044 }, { "epoch": 20.235576923076923, "grad_norm": 0.0057405442930758, "learning_rate": 1.8516575794562951e-06, "loss": 0.0, "step": 21045 }, { "epoch": 20.236538461538462, "grad_norm": 0.0012380100088194013, "learning_rate": 1.850935532257959e-06, "loss": 0.0, "step": 21046 }, { "epoch": 20.2375, "grad_norm": 0.001141732675023377, "learning_rate": 1.8502136115091863e-06, "loss": 0.0, "step": 21047 }, { "epoch": 20.23846153846154, "grad_norm": 0.001458885963074863, "learning_rate": 1.8494918172211807e-06, "loss": 0.0, "step": 21048 }, { "epoch": 20.239423076923078, "grad_norm": 0.004916192032396793, "learning_rate": 1.848770149405138e-06, "loss": 0.0, "step": 21049 }, { "epoch": 20.240384615384617, "grad_norm": 0.001656253240071237, "learning_rate": 1.8480486080722626e-06, "loss": 0.0, "step": 21050 }, { "epoch": 20.241346153846155, "grad_norm": 0.001893728389404714, "learning_rate": 1.8473271932337433e-06, "loss": 0.0, "step": 21051 }, { "epoch": 20.24230769230769, "grad_norm": 0.0022913371212780476, "learning_rate": 1.846605904900779e-06, "loss": 0.0, "step": 21052 }, { "epoch": 20.24326923076923, "grad_norm": 0.009836144745349884, "learning_rate": 1.845884743084564e-06, "loss": 0.0, "step": 21053 }, { "epoch": 20.244230769230768, "grad_norm": 0.001946675474755466, "learning_rate": 1.8451637077962824e-06, "loss": 0.0, "step": 21054 }, { "epoch": 20.245192307692307, "grad_norm": 0.0012626376701518893, "learning_rate": 1.844442799047126e-06, "loss": 0.0, "step": 21055 }, { "epoch": 20.246153846153845, "grad_norm": 0.001115292077884078, "learning_rate": 1.8437220168482839e-06, "loss": 0.0, "step": 21056 }, { "epoch": 20.247115384615384, "grad_norm": 0.004311637952923775, "learning_rate": 1.8430013612109355e-06, "loss": 0.0, "step": 21057 }, { "epoch": 20.248076923076923, "grad_norm": 0.0010877113090828061, "learning_rate": 1.8422808321462649e-06, "loss": 0.0, "step": 21058 }, { "epoch": 20.24903846153846, "grad_norm": 0.002080735983327031, "learning_rate": 1.8415604296654556e-06, "loss": 0.0, "step": 21059 }, { "epoch": 20.25, "grad_norm": 0.0035170940682291985, "learning_rate": 1.8408401537796816e-06, "loss": 0.0, "step": 21060 }, { "epoch": 20.25096153846154, "grad_norm": 0.0025145295076072216, "learning_rate": 1.840120004500121e-06, "loss": 0.0, "step": 21061 }, { "epoch": 20.251923076923077, "grad_norm": 0.0029326039366424084, "learning_rate": 1.8393999818379527e-06, "loss": 0.0, "step": 21062 }, { "epoch": 20.252884615384616, "grad_norm": 0.008282387629151344, "learning_rate": 1.8386800858043418e-06, "loss": 0.0001, "step": 21063 }, { "epoch": 20.253846153846155, "grad_norm": 0.002338814316317439, "learning_rate": 1.8379603164104655e-06, "loss": 0.0, "step": 21064 }, { "epoch": 20.254807692307693, "grad_norm": 0.001860001590102911, "learning_rate": 1.8372406736674874e-06, "loss": 0.0, "step": 21065 }, { "epoch": 20.255769230769232, "grad_norm": 0.004711877554655075, "learning_rate": 1.8365211575865772e-06, "loss": 0.0, "step": 21066 }, { "epoch": 20.25673076923077, "grad_norm": 0.0031098774634301662, "learning_rate": 1.835801768178901e-06, "loss": 0.0, "step": 21067 }, { "epoch": 20.25769230769231, "grad_norm": 0.003571120323613286, "learning_rate": 1.8350825054556175e-06, "loss": 0.0, "step": 21068 }, { "epoch": 20.258653846153845, "grad_norm": 0.03915506228804588, "learning_rate": 1.8343633694278895e-06, "loss": 0.0001, "step": 21069 }, { "epoch": 20.259615384615383, "grad_norm": 0.0010822807671502233, "learning_rate": 1.8336443601068787e-06, "loss": 0.0, "step": 21070 }, { "epoch": 20.260576923076922, "grad_norm": 0.010397802107036114, "learning_rate": 1.8329254775037375e-06, "loss": 0.0001, "step": 21071 }, { "epoch": 20.26153846153846, "grad_norm": 0.0028034180868417025, "learning_rate": 1.8322067216296224e-06, "loss": 0.0, "step": 21072 }, { "epoch": 20.2625, "grad_norm": 0.004803511779755354, "learning_rate": 1.8314880924956891e-06, "loss": 0.0, "step": 21073 }, { "epoch": 20.263461538461538, "grad_norm": 0.214482381939888, "learning_rate": 1.830769590113084e-06, "loss": 0.0006, "step": 21074 }, { "epoch": 20.264423076923077, "grad_norm": 0.0017969858599826694, "learning_rate": 1.8300512144929616e-06, "loss": 0.0, "step": 21075 }, { "epoch": 20.265384615384615, "grad_norm": 0.0025870369281619787, "learning_rate": 1.8293329656464632e-06, "loss": 0.0, "step": 21076 }, { "epoch": 20.266346153846154, "grad_norm": 0.0026063870172947645, "learning_rate": 1.8286148435847351e-06, "loss": 0.0, "step": 21077 }, { "epoch": 20.267307692307693, "grad_norm": 0.004347546491771936, "learning_rate": 1.8278968483189252e-06, "loss": 0.0, "step": 21078 }, { "epoch": 20.26826923076923, "grad_norm": 0.002797819906845689, "learning_rate": 1.827178979860169e-06, "loss": 0.0, "step": 21079 }, { "epoch": 20.26923076923077, "grad_norm": 0.030125003308057785, "learning_rate": 1.826461238219609e-06, "loss": 0.0001, "step": 21080 }, { "epoch": 20.27019230769231, "grad_norm": 0.25322389602661133, "learning_rate": 1.8257436234083824e-06, "loss": 0.0007, "step": 21081 }, { "epoch": 20.271153846153847, "grad_norm": 0.0018510085064917803, "learning_rate": 1.825026135437622e-06, "loss": 0.0, "step": 21082 }, { "epoch": 20.272115384615386, "grad_norm": 0.0018224235391244292, "learning_rate": 1.824308774318463e-06, "loss": 0.0, "step": 21083 }, { "epoch": 20.273076923076925, "grad_norm": 0.003172952216118574, "learning_rate": 1.8235915400620386e-06, "loss": 0.0, "step": 21084 }, { "epoch": 20.27403846153846, "grad_norm": 0.0018134297570213675, "learning_rate": 1.8228744326794735e-06, "loss": 0.0, "step": 21085 }, { "epoch": 20.275, "grad_norm": 0.0032806256785988808, "learning_rate": 1.8221574521818996e-06, "loss": 0.0, "step": 21086 }, { "epoch": 20.275961538461537, "grad_norm": 0.007750533055514097, "learning_rate": 1.8214405985804384e-06, "loss": 0.0, "step": 21087 }, { "epoch": 20.276923076923076, "grad_norm": 0.003388024168089032, "learning_rate": 1.8207238718862153e-06, "loss": 0.0, "step": 21088 }, { "epoch": 20.277884615384615, "grad_norm": 0.0032455241307616234, "learning_rate": 1.8200072721103545e-06, "loss": 0.0, "step": 21089 }, { "epoch": 20.278846153846153, "grad_norm": 0.0022429712116718292, "learning_rate": 1.8192907992639707e-06, "loss": 0.0, "step": 21090 }, { "epoch": 20.279807692307692, "grad_norm": 0.003620157716795802, "learning_rate": 1.818574453358183e-06, "loss": 0.0, "step": 21091 }, { "epoch": 20.28076923076923, "grad_norm": 0.003219274338334799, "learning_rate": 1.8178582344041096e-06, "loss": 0.0, "step": 21092 }, { "epoch": 20.28173076923077, "grad_norm": 0.0017038077348843217, "learning_rate": 1.817142142412861e-06, "loss": 0.0, "step": 21093 }, { "epoch": 20.282692307692308, "grad_norm": 0.0023117871023714542, "learning_rate": 1.8164261773955484e-06, "loss": 0.0, "step": 21094 }, { "epoch": 20.283653846153847, "grad_norm": 0.0038323472253978252, "learning_rate": 1.8157103393632869e-06, "loss": 0.0, "step": 21095 }, { "epoch": 20.284615384615385, "grad_norm": 0.004214719403535128, "learning_rate": 1.8149946283271768e-06, "loss": 0.0, "step": 21096 }, { "epoch": 20.285576923076924, "grad_norm": 0.003274802817031741, "learning_rate": 1.8142790442983304e-06, "loss": 0.0, "step": 21097 }, { "epoch": 20.286538461538463, "grad_norm": 0.0036809712182730436, "learning_rate": 1.8135635872878453e-06, "loss": 0.0, "step": 21098 }, { "epoch": 20.2875, "grad_norm": 0.18544524908065796, "learning_rate": 1.8128482573068274e-06, "loss": 0.0005, "step": 21099 }, { "epoch": 20.28846153846154, "grad_norm": 0.0013818779261782765, "learning_rate": 1.8121330543663774e-06, "loss": 0.0, "step": 21100 }, { "epoch": 20.289423076923075, "grad_norm": 0.005022846162319183, "learning_rate": 1.8114179784775886e-06, "loss": 0.0, "step": 21101 }, { "epoch": 20.290384615384614, "grad_norm": 0.002932974835857749, "learning_rate": 1.8107030296515604e-06, "loss": 0.0, "step": 21102 }, { "epoch": 20.291346153846153, "grad_norm": 0.002888489281758666, "learning_rate": 1.809988207899388e-06, "loss": 0.0, "step": 21103 }, { "epoch": 20.29230769230769, "grad_norm": 0.0023006387054920197, "learning_rate": 1.8092735132321581e-06, "loss": 0.0, "step": 21104 }, { "epoch": 20.29326923076923, "grad_norm": 0.004970506299287081, "learning_rate": 1.8085589456609641e-06, "loss": 0.0, "step": 21105 }, { "epoch": 20.29423076923077, "grad_norm": 0.002014801139011979, "learning_rate": 1.8078445051968963e-06, "loss": 0.0, "step": 21106 }, { "epoch": 20.295192307692307, "grad_norm": 0.0013001712504774332, "learning_rate": 1.8071301918510364e-06, "loss": 0.0, "step": 21107 }, { "epoch": 20.296153846153846, "grad_norm": 0.00257793627679348, "learning_rate": 1.8064160056344714e-06, "loss": 0.0, "step": 21108 }, { "epoch": 20.297115384615385, "grad_norm": 0.008155369199812412, "learning_rate": 1.8057019465582803e-06, "loss": 0.0001, "step": 21109 }, { "epoch": 20.298076923076923, "grad_norm": 0.0028626194689422846, "learning_rate": 1.8049880146335453e-06, "loss": 0.0, "step": 21110 }, { "epoch": 20.299038461538462, "grad_norm": 0.001244754297658801, "learning_rate": 1.8042742098713461e-06, "loss": 0.0, "step": 21111 }, { "epoch": 20.3, "grad_norm": 0.00208850740455091, "learning_rate": 1.8035605322827544e-06, "loss": 0.0, "step": 21112 }, { "epoch": 20.30096153846154, "grad_norm": 0.004157882649451494, "learning_rate": 1.8028469818788475e-06, "loss": 0.0, "step": 21113 }, { "epoch": 20.301923076923078, "grad_norm": 0.0012186759850010276, "learning_rate": 1.802133558670699e-06, "loss": 0.0, "step": 21114 }, { "epoch": 20.302884615384617, "grad_norm": 0.005193053744733334, "learning_rate": 1.8014202626693744e-06, "loss": 0.0, "step": 21115 }, { "epoch": 20.303846153846155, "grad_norm": 0.03722977638244629, "learning_rate": 1.800707093885946e-06, "loss": 0.0001, "step": 21116 }, { "epoch": 20.30480769230769, "grad_norm": 0.0634632408618927, "learning_rate": 1.7999940523314808e-06, "loss": 0.0002, "step": 21117 }, { "epoch": 20.30576923076923, "grad_norm": 0.0035298350267112255, "learning_rate": 1.7992811380170383e-06, "loss": 0.0, "step": 21118 }, { "epoch": 20.306730769230768, "grad_norm": 0.00203137774951756, "learning_rate": 1.7985683509536867e-06, "loss": 0.0, "step": 21119 }, { "epoch": 20.307692307692307, "grad_norm": 0.003003632416948676, "learning_rate": 1.7978556911524815e-06, "loss": 0.0, "step": 21120 }, { "epoch": 20.308653846153845, "grad_norm": 0.0020387887489050627, "learning_rate": 1.7971431586244814e-06, "loss": 0.0, "step": 21121 }, { "epoch": 20.309615384615384, "grad_norm": 0.0019113717135041952, "learning_rate": 1.7964307533807478e-06, "loss": 0.0, "step": 21122 }, { "epoch": 20.310576923076923, "grad_norm": 0.0010698726400732994, "learning_rate": 1.7957184754323299e-06, "loss": 0.0, "step": 21123 }, { "epoch": 20.31153846153846, "grad_norm": 0.0031366560142487288, "learning_rate": 1.7950063247902805e-06, "loss": 0.0, "step": 21124 }, { "epoch": 20.3125, "grad_norm": 0.004633049480617046, "learning_rate": 1.7942943014656543e-06, "loss": 0.0001, "step": 21125 }, { "epoch": 20.31346153846154, "grad_norm": 0.0015983103075996041, "learning_rate": 1.793582405469496e-06, "loss": 0.0, "step": 21126 }, { "epoch": 20.314423076923077, "grad_norm": 0.005854408256709576, "learning_rate": 1.7928706368128524e-06, "loss": 0.0, "step": 21127 }, { "epoch": 20.315384615384616, "grad_norm": 0.0017349638510495424, "learning_rate": 1.792158995506771e-06, "loss": 0.0, "step": 21128 }, { "epoch": 20.316346153846155, "grad_norm": 0.001587431994266808, "learning_rate": 1.7914474815622896e-06, "loss": 0.0, "step": 21129 }, { "epoch": 20.317307692307693, "grad_norm": 0.0014245061902329326, "learning_rate": 1.790736094990455e-06, "loss": 0.0, "step": 21130 }, { "epoch": 20.318269230769232, "grad_norm": 0.0009697758941911161, "learning_rate": 1.7900248358022986e-06, "loss": 0.0, "step": 21131 }, { "epoch": 20.31923076923077, "grad_norm": 0.0017061048420146108, "learning_rate": 1.7893137040088616e-06, "loss": 0.0, "step": 21132 }, { "epoch": 20.32019230769231, "grad_norm": 0.00350808328948915, "learning_rate": 1.7886026996211804e-06, "loss": 0.0, "step": 21133 }, { "epoch": 20.321153846153845, "grad_norm": 0.00226572435349226, "learning_rate": 1.7878918226502816e-06, "loss": 0.0, "step": 21134 }, { "epoch": 20.322115384615383, "grad_norm": 0.0032941563986241817, "learning_rate": 1.7871810731072004e-06, "loss": 0.0, "step": 21135 }, { "epoch": 20.323076923076922, "grad_norm": 0.0018211555434390903, "learning_rate": 1.7864704510029674e-06, "loss": 0.0, "step": 21136 }, { "epoch": 20.32403846153846, "grad_norm": 0.002723037265241146, "learning_rate": 1.785759956348604e-06, "loss": 0.0, "step": 21137 }, { "epoch": 20.325, "grad_norm": 0.0016030221013352275, "learning_rate": 1.7850495891551367e-06, "loss": 0.0, "step": 21138 }, { "epoch": 20.325961538461538, "grad_norm": 0.0037532576825469732, "learning_rate": 1.7843393494335925e-06, "loss": 0.0, "step": 21139 }, { "epoch": 20.326923076923077, "grad_norm": 0.0020175373647361994, "learning_rate": 1.783629237194986e-06, "loss": 0.0, "step": 21140 }, { "epoch": 20.327884615384615, "grad_norm": 0.002764965407550335, "learning_rate": 1.7829192524503414e-06, "loss": 0.0, "step": 21141 }, { "epoch": 20.328846153846154, "grad_norm": 0.002327093854546547, "learning_rate": 1.78220939521067e-06, "loss": 0.0, "step": 21142 }, { "epoch": 20.329807692307693, "grad_norm": 0.002734794747084379, "learning_rate": 1.781499665486991e-06, "loss": 0.0, "step": 21143 }, { "epoch": 20.33076923076923, "grad_norm": 0.007381867151707411, "learning_rate": 1.7807900632903187e-06, "loss": 0.0001, "step": 21144 }, { "epoch": 20.33173076923077, "grad_norm": 0.003423533868044615, "learning_rate": 1.7800805886316585e-06, "loss": 0.0, "step": 21145 }, { "epoch": 20.33269230769231, "grad_norm": 0.005631833802908659, "learning_rate": 1.7793712415220233e-06, "loss": 0.0, "step": 21146 }, { "epoch": 20.333653846153847, "grad_norm": 0.003577758790925145, "learning_rate": 1.7786620219724205e-06, "loss": 0.0, "step": 21147 }, { "epoch": 20.334615384615386, "grad_norm": 0.0008861927781254053, "learning_rate": 1.7779529299938513e-06, "loss": 0.0, "step": 21148 }, { "epoch": 20.335576923076925, "grad_norm": 0.004120757803320885, "learning_rate": 1.7772439655973217e-06, "loss": 0.0, "step": 21149 }, { "epoch": 20.33653846153846, "grad_norm": 0.0040535712614655495, "learning_rate": 1.7765351287938348e-06, "loss": 0.0, "step": 21150 }, { "epoch": 20.3375, "grad_norm": 0.0021329394076019526, "learning_rate": 1.7758264195943842e-06, "loss": 0.0, "step": 21151 }, { "epoch": 20.338461538461537, "grad_norm": 0.001708649448119104, "learning_rate": 1.775117838009971e-06, "loss": 0.0, "step": 21152 }, { "epoch": 20.339423076923076, "grad_norm": 0.016359319910407066, "learning_rate": 1.7744093840515907e-06, "loss": 0.0001, "step": 21153 }, { "epoch": 20.340384615384615, "grad_norm": 0.006741569377481937, "learning_rate": 1.7737010577302328e-06, "loss": 0.0, "step": 21154 }, { "epoch": 20.341346153846153, "grad_norm": 0.003861300414428115, "learning_rate": 1.7729928590568923e-06, "loss": 0.0, "step": 21155 }, { "epoch": 20.342307692307692, "grad_norm": 0.002139593241736293, "learning_rate": 1.7722847880425554e-06, "loss": 0.0, "step": 21156 }, { "epoch": 20.34326923076923, "grad_norm": 0.0029151670169085264, "learning_rate": 1.7715768446982097e-06, "loss": 0.0, "step": 21157 }, { "epoch": 20.34423076923077, "grad_norm": 0.004048251546919346, "learning_rate": 1.770869029034844e-06, "loss": 0.0, "step": 21158 }, { "epoch": 20.345192307692308, "grad_norm": 0.002120721386745572, "learning_rate": 1.7701613410634367e-06, "loss": 0.0, "step": 21159 }, { "epoch": 20.346153846153847, "grad_norm": 0.0026692524552345276, "learning_rate": 1.7694537807949707e-06, "loss": 0.0, "step": 21160 }, { "epoch": 20.347115384615385, "grad_norm": 0.002504944335669279, "learning_rate": 1.7687463482404276e-06, "loss": 0.0, "step": 21161 }, { "epoch": 20.348076923076924, "grad_norm": 0.30435511469841003, "learning_rate": 1.7680390434107808e-06, "loss": 0.0005, "step": 21162 }, { "epoch": 20.349038461538463, "grad_norm": 0.004547474440187216, "learning_rate": 1.767331866317008e-06, "loss": 0.0, "step": 21163 }, { "epoch": 20.35, "grad_norm": 0.008678686805069447, "learning_rate": 1.766624816970084e-06, "loss": 0.0, "step": 21164 }, { "epoch": 20.35096153846154, "grad_norm": 0.002866809256374836, "learning_rate": 1.7659178953809752e-06, "loss": 0.0, "step": 21165 }, { "epoch": 20.351923076923075, "grad_norm": 0.0023352515418082476, "learning_rate": 1.7652111015606566e-06, "loss": 0.0, "step": 21166 }, { "epoch": 20.352884615384614, "grad_norm": 0.0037748462054878473, "learning_rate": 1.7645044355200913e-06, "loss": 0.0001, "step": 21167 }, { "epoch": 20.353846153846153, "grad_norm": 0.0015642300713807344, "learning_rate": 1.7637978972702453e-06, "loss": 0.0, "step": 21168 }, { "epoch": 20.35480769230769, "grad_norm": 0.0017888949951156974, "learning_rate": 1.763091486822086e-06, "loss": 0.0, "step": 21169 }, { "epoch": 20.35576923076923, "grad_norm": 0.0016009857645258307, "learning_rate": 1.7623852041865698e-06, "loss": 0.0, "step": 21170 }, { "epoch": 20.35673076923077, "grad_norm": 0.0028950239066034555, "learning_rate": 1.7616790493746572e-06, "loss": 0.0, "step": 21171 }, { "epoch": 20.357692307692307, "grad_norm": 0.0019979930948466063, "learning_rate": 1.7609730223973088e-06, "loss": 0.0, "step": 21172 }, { "epoch": 20.358653846153846, "grad_norm": 0.0014992705546319485, "learning_rate": 1.7602671232654755e-06, "loss": 0.0, "step": 21173 }, { "epoch": 20.359615384615385, "grad_norm": 0.001612178166396916, "learning_rate": 1.7595613519901145e-06, "loss": 0.0, "step": 21174 }, { "epoch": 20.360576923076923, "grad_norm": 0.0013463853392750025, "learning_rate": 1.7588557085821766e-06, "loss": 0.0, "step": 21175 }, { "epoch": 20.361538461538462, "grad_norm": 0.005862654186785221, "learning_rate": 1.758150193052609e-06, "loss": 0.0, "step": 21176 }, { "epoch": 20.3625, "grad_norm": 0.0007596382638439536, "learning_rate": 1.757444805412364e-06, "loss": 0.0, "step": 21177 }, { "epoch": 20.36346153846154, "grad_norm": 0.0011069668689742684, "learning_rate": 1.75673954567238e-06, "loss": 0.0, "step": 21178 }, { "epoch": 20.364423076923078, "grad_norm": 0.00690835528075695, "learning_rate": 1.7560344138436059e-06, "loss": 0.0001, "step": 21179 }, { "epoch": 20.365384615384617, "grad_norm": 0.002431815257295966, "learning_rate": 1.7553294099369834e-06, "loss": 0.0, "step": 21180 }, { "epoch": 20.366346153846155, "grad_norm": 0.002438324736431241, "learning_rate": 1.7546245339634494e-06, "loss": 0.0, "step": 21181 }, { "epoch": 20.36730769230769, "grad_norm": 0.0008082573185674846, "learning_rate": 1.7539197859339418e-06, "loss": 0.0, "step": 21182 }, { "epoch": 20.36826923076923, "grad_norm": 0.0021696367766708136, "learning_rate": 1.7532151658594e-06, "loss": 0.0, "step": 21183 }, { "epoch": 20.369230769230768, "grad_norm": 0.004632409196346998, "learning_rate": 1.7525106737507524e-06, "loss": 0.0001, "step": 21184 }, { "epoch": 20.370192307692307, "grad_norm": 0.016385294497013092, "learning_rate": 1.7518063096189319e-06, "loss": 0.0001, "step": 21185 }, { "epoch": 20.371153846153845, "grad_norm": 0.004071612376719713, "learning_rate": 1.751102073474873e-06, "loss": 0.0, "step": 21186 }, { "epoch": 20.372115384615384, "grad_norm": 0.020794691517949104, "learning_rate": 1.7503979653294966e-06, "loss": 0.0001, "step": 21187 }, { "epoch": 20.373076923076923, "grad_norm": 0.0028147492557764053, "learning_rate": 1.7496939851937345e-06, "loss": 0.0, "step": 21188 }, { "epoch": 20.37403846153846, "grad_norm": 0.0016966271214187145, "learning_rate": 1.7489901330785053e-06, "loss": 0.0, "step": 21189 }, { "epoch": 20.375, "grad_norm": 0.002938871504738927, "learning_rate": 1.7482864089947317e-06, "loss": 0.0, "step": 21190 }, { "epoch": 20.37596153846154, "grad_norm": 0.0027040319982916117, "learning_rate": 1.7475828129533378e-06, "loss": 0.0, "step": 21191 }, { "epoch": 20.376923076923077, "grad_norm": 0.0037079532630741596, "learning_rate": 1.7468793449652355e-06, "loss": 0.0, "step": 21192 }, { "epoch": 20.377884615384616, "grad_norm": 0.0027163843624293804, "learning_rate": 1.7461760050413435e-06, "loss": 0.0, "step": 21193 }, { "epoch": 20.378846153846155, "grad_norm": 0.0017886108253151178, "learning_rate": 1.7454727931925763e-06, "loss": 0.0, "step": 21194 }, { "epoch": 20.379807692307693, "grad_norm": 0.003886117599904537, "learning_rate": 1.7447697094298433e-06, "loss": 0.0, "step": 21195 }, { "epoch": 20.380769230769232, "grad_norm": 0.00282693631015718, "learning_rate": 1.7440667537640554e-06, "loss": 0.0, "step": 21196 }, { "epoch": 20.38173076923077, "grad_norm": 0.0031110483687371016, "learning_rate": 1.743363926206123e-06, "loss": 0.0, "step": 21197 }, { "epoch": 20.38269230769231, "grad_norm": 0.003216783981770277, "learning_rate": 1.7426612267669475e-06, "loss": 0.0, "step": 21198 }, { "epoch": 20.383653846153845, "grad_norm": 0.002125162398442626, "learning_rate": 1.7419586554574364e-06, "loss": 0.0, "step": 21199 }, { "epoch": 20.384615384615383, "grad_norm": 0.0028031780384480953, "learning_rate": 1.741256212288488e-06, "loss": 0.0, "step": 21200 }, { "epoch": 20.385576923076922, "grad_norm": 0.0010386188514530659, "learning_rate": 1.7405538972710044e-06, "loss": 0.0, "step": 21201 }, { "epoch": 20.38653846153846, "grad_norm": 0.007340006530284882, "learning_rate": 1.7398517104158851e-06, "loss": 0.0001, "step": 21202 }, { "epoch": 20.3875, "grad_norm": 0.011248613707721233, "learning_rate": 1.7391496517340212e-06, "loss": 0.0, "step": 21203 }, { "epoch": 20.388461538461538, "grad_norm": 0.0022544984240084887, "learning_rate": 1.7384477212363094e-06, "loss": 0.0, "step": 21204 }, { "epoch": 20.389423076923077, "grad_norm": 0.004861300345510244, "learning_rate": 1.7377459189336442e-06, "loss": 0.0001, "step": 21205 }, { "epoch": 20.390384615384615, "grad_norm": 0.010732139460742474, "learning_rate": 1.7370442448369118e-06, "loss": 0.0, "step": 21206 }, { "epoch": 20.391346153846154, "grad_norm": 0.0014429024886339903, "learning_rate": 1.7363426989569997e-06, "loss": 0.0, "step": 21207 }, { "epoch": 20.392307692307693, "grad_norm": 0.0010477915639057755, "learning_rate": 1.7356412813047985e-06, "loss": 0.0, "step": 21208 }, { "epoch": 20.39326923076923, "grad_norm": 0.002431390108540654, "learning_rate": 1.7349399918911858e-06, "loss": 0.0, "step": 21209 }, { "epoch": 20.39423076923077, "grad_norm": 0.003749649040400982, "learning_rate": 1.7342388307270498e-06, "loss": 0.0, "step": 21210 }, { "epoch": 20.39519230769231, "grad_norm": 0.004209036007523537, "learning_rate": 1.7335377978232648e-06, "loss": 0.0, "step": 21211 }, { "epoch": 20.396153846153847, "grad_norm": 0.0019742760341614485, "learning_rate": 1.7328368931907114e-06, "loss": 0.0, "step": 21212 }, { "epoch": 20.397115384615386, "grad_norm": 0.007055316586047411, "learning_rate": 1.7321361168402684e-06, "loss": 0.0, "step": 21213 }, { "epoch": 20.398076923076925, "grad_norm": 0.002279773121699691, "learning_rate": 1.7314354687828039e-06, "loss": 0.0, "step": 21214 }, { "epoch": 20.39903846153846, "grad_norm": 0.0010304589523002505, "learning_rate": 1.7307349490291937e-06, "loss": 0.0, "step": 21215 }, { "epoch": 20.4, "grad_norm": 0.0030882598366588354, "learning_rate": 1.7300345575903087e-06, "loss": 0.0, "step": 21216 }, { "epoch": 20.400961538461537, "grad_norm": 0.0007024621008895338, "learning_rate": 1.7293342944770142e-06, "loss": 0.0, "step": 21217 }, { "epoch": 20.401923076923076, "grad_norm": 0.003026890568435192, "learning_rate": 1.7286341597001765e-06, "loss": 0.0, "step": 21218 }, { "epoch": 20.402884615384615, "grad_norm": 0.002412448637187481, "learning_rate": 1.7279341532706627e-06, "loss": 0.0, "step": 21219 }, { "epoch": 20.403846153846153, "grad_norm": 0.0027483345475047827, "learning_rate": 1.7272342751993298e-06, "loss": 0.0, "step": 21220 }, { "epoch": 20.404807692307692, "grad_norm": 0.0007664650329388678, "learning_rate": 1.7265345254970445e-06, "loss": 0.0, "step": 21221 }, { "epoch": 20.40576923076923, "grad_norm": 0.004494604654610157, "learning_rate": 1.725834904174657e-06, "loss": 0.0, "step": 21222 }, { "epoch": 20.40673076923077, "grad_norm": 0.0019133242312818766, "learning_rate": 1.7251354112430286e-06, "loss": 0.0, "step": 21223 }, { "epoch": 20.407692307692308, "grad_norm": 0.016050999984145164, "learning_rate": 1.724436046713014e-06, "loss": 0.0001, "step": 21224 }, { "epoch": 20.408653846153847, "grad_norm": 0.0006393049843609333, "learning_rate": 1.723736810595461e-06, "loss": 0.0, "step": 21225 }, { "epoch": 20.409615384615385, "grad_norm": 0.0022204064298421144, "learning_rate": 1.7230377029012223e-06, "loss": 0.0, "step": 21226 }, { "epoch": 20.410576923076924, "grad_norm": 0.004955819342285395, "learning_rate": 1.7223387236411493e-06, "loss": 0.0, "step": 21227 }, { "epoch": 20.411538461538463, "grad_norm": 0.0029407963156700134, "learning_rate": 1.7216398728260807e-06, "loss": 0.0, "step": 21228 }, { "epoch": 20.4125, "grad_norm": 0.0033621562179178, "learning_rate": 1.720941150466865e-06, "loss": 0.0, "step": 21229 }, { "epoch": 20.41346153846154, "grad_norm": 0.002628074260428548, "learning_rate": 1.7202425565743464e-06, "loss": 0.0, "step": 21230 }, { "epoch": 20.414423076923075, "grad_norm": 0.0069170002825558186, "learning_rate": 1.7195440911593607e-06, "loss": 0.0, "step": 21231 }, { "epoch": 20.415384615384614, "grad_norm": 0.0015015235403552651, "learning_rate": 1.7188457542327497e-06, "loss": 0.0, "step": 21232 }, { "epoch": 20.416346153846153, "grad_norm": 0.0012403904693201184, "learning_rate": 1.7181475458053453e-06, "loss": 0.0, "step": 21233 }, { "epoch": 20.41730769230769, "grad_norm": 0.001162349944934249, "learning_rate": 1.7174494658879836e-06, "loss": 0.0, "step": 21234 }, { "epoch": 20.41826923076923, "grad_norm": 0.0022238697856664658, "learning_rate": 1.7167515144915003e-06, "loss": 0.0, "step": 21235 }, { "epoch": 20.41923076923077, "grad_norm": 0.0017942482372745872, "learning_rate": 1.7160536916267201e-06, "loss": 0.0, "step": 21236 }, { "epoch": 20.420192307692307, "grad_norm": 0.0014763345243409276, "learning_rate": 1.715355997304473e-06, "loss": 0.0, "step": 21237 }, { "epoch": 20.421153846153846, "grad_norm": 0.002623405307531357, "learning_rate": 1.7146584315355886e-06, "loss": 0.0, "step": 21238 }, { "epoch": 20.422115384615385, "grad_norm": 0.00031226061400957406, "learning_rate": 1.7139609943308856e-06, "loss": 0.0, "step": 21239 }, { "epoch": 20.423076923076923, "grad_norm": 0.00764519302174449, "learning_rate": 1.7132636857011885e-06, "loss": 0.0, "step": 21240 }, { "epoch": 20.424038461538462, "grad_norm": 0.0012730282032862306, "learning_rate": 1.7125665056573205e-06, "loss": 0.0, "step": 21241 }, { "epoch": 20.425, "grad_norm": 0.0010989714646711946, "learning_rate": 1.7118694542100945e-06, "loss": 0.0, "step": 21242 }, { "epoch": 20.42596153846154, "grad_norm": 0.0014180170837789774, "learning_rate": 1.7111725313703287e-06, "loss": 0.0, "step": 21243 }, { "epoch": 20.426923076923078, "grad_norm": 0.01876344531774521, "learning_rate": 1.7104757371488412e-06, "loss": 0.0, "step": 21244 }, { "epoch": 20.427884615384617, "grad_norm": 0.0023917655926197767, "learning_rate": 1.7097790715564377e-06, "loss": 0.0, "step": 21245 }, { "epoch": 20.428846153846155, "grad_norm": 0.0012686768313869834, "learning_rate": 1.709082534603934e-06, "loss": 0.0, "step": 21246 }, { "epoch": 20.42980769230769, "grad_norm": 0.0028636998031288385, "learning_rate": 1.708386126302133e-06, "loss": 0.0, "step": 21247 }, { "epoch": 20.43076923076923, "grad_norm": 0.002577162580564618, "learning_rate": 1.7076898466618442e-06, "loss": 0.0, "step": 21248 }, { "epoch": 20.431730769230768, "grad_norm": 0.0006116657168604434, "learning_rate": 1.7069936956938737e-06, "loss": 0.0, "step": 21249 }, { "epoch": 20.432692307692307, "grad_norm": 0.007299321703612804, "learning_rate": 1.7062976734090187e-06, "loss": 0.0, "step": 21250 }, { "epoch": 20.433653846153845, "grad_norm": 0.0021401720587164164, "learning_rate": 1.7056017798180824e-06, "loss": 0.0, "step": 21251 }, { "epoch": 20.434615384615384, "grad_norm": 0.0027852763887494802, "learning_rate": 1.7049060149318652e-06, "loss": 0.0, "step": 21252 }, { "epoch": 20.435576923076923, "grad_norm": 0.0014901469694450498, "learning_rate": 1.7042103787611585e-06, "loss": 0.0, "step": 21253 }, { "epoch": 20.43653846153846, "grad_norm": 0.0020293528214097023, "learning_rate": 1.7035148713167583e-06, "loss": 0.0, "step": 21254 }, { "epoch": 20.4375, "grad_norm": 0.002545523690059781, "learning_rate": 1.70281949260946e-06, "loss": 0.0, "step": 21255 }, { "epoch": 20.43846153846154, "grad_norm": 0.0007781580789014697, "learning_rate": 1.7021242426500495e-06, "loss": 0.0, "step": 21256 }, { "epoch": 20.439423076923077, "grad_norm": 0.006147816777229309, "learning_rate": 1.7014291214493184e-06, "loss": 0.0001, "step": 21257 }, { "epoch": 20.440384615384616, "grad_norm": 0.0010545500554144382, "learning_rate": 1.7007341290180491e-06, "loss": 0.0, "step": 21258 }, { "epoch": 20.441346153846155, "grad_norm": 0.0008494547219015658, "learning_rate": 1.7000392653670283e-06, "loss": 0.0, "step": 21259 }, { "epoch": 20.442307692307693, "grad_norm": 0.005890185479074717, "learning_rate": 1.69934453050704e-06, "loss": 0.0, "step": 21260 }, { "epoch": 20.443269230769232, "grad_norm": 0.002513156272470951, "learning_rate": 1.6986499244488607e-06, "loss": 0.0, "step": 21261 }, { "epoch": 20.44423076923077, "grad_norm": 0.0036179781891405582, "learning_rate": 1.69795544720327e-06, "loss": 0.0, "step": 21262 }, { "epoch": 20.44519230769231, "grad_norm": 0.000977467279881239, "learning_rate": 1.6972610987810467e-06, "loss": 0.0, "step": 21263 }, { "epoch": 20.446153846153845, "grad_norm": 0.004437005612999201, "learning_rate": 1.69656687919296e-06, "loss": 0.0, "step": 21264 }, { "epoch": 20.447115384615383, "grad_norm": 0.002235334599390626, "learning_rate": 1.695872788449786e-06, "loss": 0.0, "step": 21265 }, { "epoch": 20.448076923076922, "grad_norm": 0.002481581410393119, "learning_rate": 1.695178826562297e-06, "loss": 0.0, "step": 21266 }, { "epoch": 20.44903846153846, "grad_norm": 0.005893315654247999, "learning_rate": 1.6944849935412544e-06, "loss": 0.0, "step": 21267 }, { "epoch": 20.45, "grad_norm": 0.001504336716607213, "learning_rate": 1.6937912893974317e-06, "loss": 0.0, "step": 21268 }, { "epoch": 20.450961538461538, "grad_norm": 0.0024894620291888714, "learning_rate": 1.6930977141415872e-06, "loss": 0.0, "step": 21269 }, { "epoch": 20.451923076923077, "grad_norm": 0.0005720507469959557, "learning_rate": 1.6924042677844855e-06, "loss": 0.0, "step": 21270 }, { "epoch": 20.452884615384615, "grad_norm": 0.005508446600288153, "learning_rate": 1.6917109503368889e-06, "loss": 0.0, "step": 21271 }, { "epoch": 20.453846153846154, "grad_norm": 0.0020568426698446274, "learning_rate": 1.6910177618095525e-06, "loss": 0.0, "step": 21272 }, { "epoch": 20.454807692307693, "grad_norm": 0.003564188489690423, "learning_rate": 1.6903247022132329e-06, "loss": 0.0, "step": 21273 }, { "epoch": 20.45576923076923, "grad_norm": 0.002887031063437462, "learning_rate": 1.6896317715586886e-06, "loss": 0.0, "step": 21274 }, { "epoch": 20.45673076923077, "grad_norm": 0.0009232631418853998, "learning_rate": 1.688938969856666e-06, "loss": 0.0, "step": 21275 }, { "epoch": 20.45769230769231, "grad_norm": 0.0015114865964278579, "learning_rate": 1.688246297117917e-06, "loss": 0.0, "step": 21276 }, { "epoch": 20.458653846153847, "grad_norm": 0.002377445111051202, "learning_rate": 1.687553753353195e-06, "loss": 0.0, "step": 21277 }, { "epoch": 20.459615384615386, "grad_norm": 0.0008575806859880686, "learning_rate": 1.686861338573238e-06, "loss": 0.0, "step": 21278 }, { "epoch": 20.460576923076925, "grad_norm": 0.0006984166684560478, "learning_rate": 1.6861690527887975e-06, "loss": 0.0, "step": 21279 }, { "epoch": 20.46153846153846, "grad_norm": 0.002662062644958496, "learning_rate": 1.6854768960106093e-06, "loss": 0.0, "step": 21280 }, { "epoch": 20.4625, "grad_norm": 0.004600955173373222, "learning_rate": 1.6847848682494172e-06, "loss": 0.0, "step": 21281 }, { "epoch": 20.463461538461537, "grad_norm": 0.0034761822316795588, "learning_rate": 1.6840929695159613e-06, "loss": 0.0, "step": 21282 }, { "epoch": 20.464423076923076, "grad_norm": 2.55522084236145, "learning_rate": 1.6834011998209732e-06, "loss": 0.0101, "step": 21283 }, { "epoch": 20.465384615384615, "grad_norm": 0.0009606425301171839, "learning_rate": 1.6827095591751908e-06, "loss": 0.0, "step": 21284 }, { "epoch": 20.466346153846153, "grad_norm": 0.002949732355773449, "learning_rate": 1.6820180475893455e-06, "loss": 0.0, "step": 21285 }, { "epoch": 20.467307692307692, "grad_norm": 0.0020869411528110504, "learning_rate": 1.6813266650741666e-06, "loss": 0.0, "step": 21286 }, { "epoch": 20.46826923076923, "grad_norm": 0.0025250716134905815, "learning_rate": 1.6806354116403811e-06, "loss": 0.0, "step": 21287 }, { "epoch": 20.46923076923077, "grad_norm": 0.0034309651236981153, "learning_rate": 1.6799442872987204e-06, "loss": 0.0, "step": 21288 }, { "epoch": 20.470192307692308, "grad_norm": 0.0004989822627976537, "learning_rate": 1.6792532920599025e-06, "loss": 0.0, "step": 21289 }, { "epoch": 20.471153846153847, "grad_norm": 0.0036395173519849777, "learning_rate": 1.6785624259346556e-06, "loss": 0.0, "step": 21290 }, { "epoch": 20.472115384615385, "grad_norm": 0.0016267179744318128, "learning_rate": 1.6778716889336932e-06, "loss": 0.0, "step": 21291 }, { "epoch": 20.473076923076924, "grad_norm": 0.0018418116960674524, "learning_rate": 1.677181081067738e-06, "loss": 0.0, "step": 21292 }, { "epoch": 20.474038461538463, "grad_norm": 0.0010644212597981095, "learning_rate": 1.6764906023475069e-06, "loss": 0.0, "step": 21293 }, { "epoch": 20.475, "grad_norm": 0.002284439280629158, "learning_rate": 1.6758002527837102e-06, "loss": 0.0, "step": 21294 }, { "epoch": 20.47596153846154, "grad_norm": 0.0002990230859722942, "learning_rate": 1.6751100323870627e-06, "loss": 0.0, "step": 21295 }, { "epoch": 20.476923076923075, "grad_norm": 0.002364224288612604, "learning_rate": 1.674419941168277e-06, "loss": 0.0, "step": 21296 }, { "epoch": 20.477884615384614, "grad_norm": 0.0028101655188947916, "learning_rate": 1.6737299791380558e-06, "loss": 0.0, "step": 21297 }, { "epoch": 20.478846153846153, "grad_norm": 0.0026933480985462666, "learning_rate": 1.6730401463071077e-06, "loss": 0.0, "step": 21298 }, { "epoch": 20.47980769230769, "grad_norm": 0.0024165655486285686, "learning_rate": 1.6723504426861391e-06, "loss": 0.0, "step": 21299 }, { "epoch": 20.48076923076923, "grad_norm": 0.0023996587842702866, "learning_rate": 1.6716608682858482e-06, "loss": 0.0, "step": 21300 }, { "epoch": 20.48173076923077, "grad_norm": 0.0028853213880211115, "learning_rate": 1.6709714231169405e-06, "loss": 0.0, "step": 21301 }, { "epoch": 20.482692307692307, "grad_norm": 0.0009857562836259604, "learning_rate": 1.6702821071901076e-06, "loss": 0.0, "step": 21302 }, { "epoch": 20.483653846153846, "grad_norm": 0.0041174269281327724, "learning_rate": 1.669592920516049e-06, "loss": 0.0, "step": 21303 }, { "epoch": 20.484615384615385, "grad_norm": 0.0036145763006061316, "learning_rate": 1.6689038631054621e-06, "loss": 0.0, "step": 21304 }, { "epoch": 20.485576923076923, "grad_norm": 0.0006939966115169227, "learning_rate": 1.6682149349690325e-06, "loss": 0.0, "step": 21305 }, { "epoch": 20.486538461538462, "grad_norm": 0.0029901114758104086, "learning_rate": 1.6675261361174543e-06, "loss": 0.0, "step": 21306 }, { "epoch": 20.4875, "grad_norm": 0.002133790636435151, "learning_rate": 1.6668374665614162e-06, "loss": 0.0, "step": 21307 }, { "epoch": 20.48846153846154, "grad_norm": 0.002194236498326063, "learning_rate": 1.6661489263116014e-06, "loss": 0.0, "step": 21308 }, { "epoch": 20.489423076923078, "grad_norm": 0.0016945678507909179, "learning_rate": 1.6654605153786961e-06, "loss": 0.0, "step": 21309 }, { "epoch": 20.490384615384617, "grad_norm": 0.00295640854164958, "learning_rate": 1.664772233773384e-06, "loss": 0.0, "step": 21310 }, { "epoch": 20.491346153846155, "grad_norm": 0.0015874903183430433, "learning_rate": 1.6640840815063409e-06, "loss": 0.0, "step": 21311 }, { "epoch": 20.49230769230769, "grad_norm": 0.00273619731888175, "learning_rate": 1.663396058588249e-06, "loss": 0.0, "step": 21312 }, { "epoch": 20.49326923076923, "grad_norm": 0.00276742922142148, "learning_rate": 1.6627081650297815e-06, "loss": 0.0, "step": 21313 }, { "epoch": 20.494230769230768, "grad_norm": 0.0069277300499379635, "learning_rate": 1.662020400841613e-06, "loss": 0.0, "step": 21314 }, { "epoch": 20.495192307692307, "grad_norm": 0.039713840931653976, "learning_rate": 1.661332766034418e-06, "loss": 0.0003, "step": 21315 }, { "epoch": 20.496153846153845, "grad_norm": 0.0020762064959853888, "learning_rate": 1.660645260618864e-06, "loss": 0.0, "step": 21316 }, { "epoch": 20.497115384615384, "grad_norm": 0.005242569837719202, "learning_rate": 1.6599578846056185e-06, "loss": 0.0, "step": 21317 }, { "epoch": 20.498076923076923, "grad_norm": 0.0013008950045332313, "learning_rate": 1.6592706380053513e-06, "loss": 0.0, "step": 21318 }, { "epoch": 20.49903846153846, "grad_norm": 0.0026217163540422916, "learning_rate": 1.6585835208287216e-06, "loss": 0.0, "step": 21319 }, { "epoch": 20.5, "grad_norm": 0.005452543497085571, "learning_rate": 1.6578965330863938e-06, "loss": 0.0, "step": 21320 }, { "epoch": 20.50096153846154, "grad_norm": 0.00209391419775784, "learning_rate": 1.657209674789031e-06, "loss": 0.0, "step": 21321 }, { "epoch": 20.501923076923077, "grad_norm": 0.0052092717960476875, "learning_rate": 1.6565229459472866e-06, "loss": 0.0, "step": 21322 }, { "epoch": 20.502884615384616, "grad_norm": 0.004594829864799976, "learning_rate": 1.6558363465718197e-06, "loss": 0.0, "step": 21323 }, { "epoch": 20.503846153846155, "grad_norm": 0.0023638505954295397, "learning_rate": 1.65514987667328e-06, "loss": 0.0, "step": 21324 }, { "epoch": 20.504807692307693, "grad_norm": 0.401480108499527, "learning_rate": 1.654463536262323e-06, "loss": 0.0059, "step": 21325 }, { "epoch": 20.505769230769232, "grad_norm": 0.0015317347133532166, "learning_rate": 1.6537773253496003e-06, "loss": 0.0, "step": 21326 }, { "epoch": 20.50673076923077, "grad_norm": 0.0005105735035613179, "learning_rate": 1.653091243945756e-06, "loss": 0.0, "step": 21327 }, { "epoch": 20.50769230769231, "grad_norm": 0.0027017102111130953, "learning_rate": 1.6524052920614363e-06, "loss": 0.0, "step": 21328 }, { "epoch": 20.508653846153845, "grad_norm": 0.0018539719749242067, "learning_rate": 1.6517194697072903e-06, "loss": 0.0, "step": 21329 }, { "epoch": 20.509615384615383, "grad_norm": 0.005965398158878088, "learning_rate": 1.6510337768939534e-06, "loss": 0.0, "step": 21330 }, { "epoch": 20.510576923076922, "grad_norm": 0.0038368029054254293, "learning_rate": 1.650348213632068e-06, "loss": 0.0, "step": 21331 }, { "epoch": 20.51153846153846, "grad_norm": 0.0015797760570421815, "learning_rate": 1.649662779932275e-06, "loss": 0.0, "step": 21332 }, { "epoch": 20.5125, "grad_norm": 0.0009553413256071508, "learning_rate": 1.6489774758052047e-06, "loss": 0.0, "step": 21333 }, { "epoch": 20.513461538461538, "grad_norm": 0.001612223801203072, "learning_rate": 1.6482923012614948e-06, "loss": 0.0, "step": 21334 }, { "epoch": 20.514423076923077, "grad_norm": 0.0007175652426667511, "learning_rate": 1.6476072563117784e-06, "loss": 0.0, "step": 21335 }, { "epoch": 20.515384615384615, "grad_norm": 0.0006054051336832345, "learning_rate": 1.6469223409666812e-06, "loss": 0.0, "step": 21336 }, { "epoch": 20.516346153846154, "grad_norm": 0.00146236561704427, "learning_rate": 1.6462375552368359e-06, "loss": 0.0, "step": 21337 }, { "epoch": 20.517307692307693, "grad_norm": 0.0011895978823304176, "learning_rate": 1.645552899132863e-06, "loss": 0.0, "step": 21338 }, { "epoch": 20.51826923076923, "grad_norm": 0.0016456465236842632, "learning_rate": 1.6448683726653892e-06, "loss": 0.0, "step": 21339 }, { "epoch": 20.51923076923077, "grad_norm": 0.0011469206074252725, "learning_rate": 1.6441839758450385e-06, "loss": 0.0, "step": 21340 }, { "epoch": 20.52019230769231, "grad_norm": 0.002823077840730548, "learning_rate": 1.6434997086824267e-06, "loss": 0.0, "step": 21341 }, { "epoch": 20.521153846153847, "grad_norm": 0.001537881325930357, "learning_rate": 1.6428155711881722e-06, "loss": 0.0, "step": 21342 }, { "epoch": 20.522115384615386, "grad_norm": 0.0036668903194367886, "learning_rate": 1.6421315633728952e-06, "loss": 0.0, "step": 21343 }, { "epoch": 20.523076923076925, "grad_norm": 0.007761342450976372, "learning_rate": 1.641447685247204e-06, "loss": 0.0001, "step": 21344 }, { "epoch": 20.52403846153846, "grad_norm": 0.0022078705951571465, "learning_rate": 1.6407639368217133e-06, "loss": 0.0, "step": 21345 }, { "epoch": 20.525, "grad_norm": 0.0027201524935662746, "learning_rate": 1.6400803181070336e-06, "loss": 0.0, "step": 21346 }, { "epoch": 20.525961538461537, "grad_norm": 0.011260459199547768, "learning_rate": 1.6393968291137686e-06, "loss": 0.0, "step": 21347 }, { "epoch": 20.526923076923076, "grad_norm": 0.003078130539506674, "learning_rate": 1.6387134698525297e-06, "loss": 0.0, "step": 21348 }, { "epoch": 20.527884615384615, "grad_norm": 0.001514892908744514, "learning_rate": 1.6380302403339155e-06, "loss": 0.0, "step": 21349 }, { "epoch": 20.528846153846153, "grad_norm": 0.0019511443097144365, "learning_rate": 1.6373471405685292e-06, "loss": 0.0, "step": 21350 }, { "epoch": 20.529807692307692, "grad_norm": 0.004809049889445305, "learning_rate": 1.636664170566974e-06, "loss": 0.0, "step": 21351 }, { "epoch": 20.53076923076923, "grad_norm": 0.0008935577934607863, "learning_rate": 1.635981330339842e-06, "loss": 0.0, "step": 21352 }, { "epoch": 20.53173076923077, "grad_norm": 0.0028956118039786816, "learning_rate": 1.6352986198977327e-06, "loss": 0.0, "step": 21353 }, { "epoch": 20.532692307692308, "grad_norm": 0.06808678060770035, "learning_rate": 1.63461603925124e-06, "loss": 0.0002, "step": 21354 }, { "epoch": 20.533653846153847, "grad_norm": 0.007339752744883299, "learning_rate": 1.633933588410952e-06, "loss": 0.0001, "step": 21355 }, { "epoch": 20.534615384615385, "grad_norm": 0.007187849376350641, "learning_rate": 1.6332512673874613e-06, "loss": 0.0, "step": 21356 }, { "epoch": 20.535576923076924, "grad_norm": 0.002956200623884797, "learning_rate": 1.6325690761913571e-06, "loss": 0.0, "step": 21357 }, { "epoch": 20.536538461538463, "grad_norm": 0.0025722840800881386, "learning_rate": 1.63188701483322e-06, "loss": 0.0, "step": 21358 }, { "epoch": 20.5375, "grad_norm": 0.0017792681464925408, "learning_rate": 1.6312050833236382e-06, "loss": 0.0, "step": 21359 }, { "epoch": 20.53846153846154, "grad_norm": 0.005689375568181276, "learning_rate": 1.6305232816731898e-06, "loss": 0.0001, "step": 21360 }, { "epoch": 20.539423076923075, "grad_norm": 0.002350586699321866, "learning_rate": 1.6298416098924552e-06, "loss": 0.0, "step": 21361 }, { "epoch": 20.540384615384614, "grad_norm": 0.0017831064760684967, "learning_rate": 1.629160067992016e-06, "loss": 0.0, "step": 21362 }, { "epoch": 20.541346153846153, "grad_norm": 0.0017492742044851184, "learning_rate": 1.6284786559824417e-06, "loss": 0.0, "step": 21363 }, { "epoch": 20.54230769230769, "grad_norm": 0.0030261280480772257, "learning_rate": 1.627797373874308e-06, "loss": 0.0, "step": 21364 }, { "epoch": 20.54326923076923, "grad_norm": 0.0009869667701423168, "learning_rate": 1.62711622167819e-06, "loss": 0.0, "step": 21365 }, { "epoch": 20.54423076923077, "grad_norm": 0.0022424429189413786, "learning_rate": 1.6264351994046512e-06, "loss": 0.0, "step": 21366 }, { "epoch": 20.545192307692307, "grad_norm": 0.0012085686903446913, "learning_rate": 1.6257543070642624e-06, "loss": 0.0, "step": 21367 }, { "epoch": 20.546153846153846, "grad_norm": 0.002494341228157282, "learning_rate": 1.6250735446675914e-06, "loss": 0.0, "step": 21368 }, { "epoch": 20.547115384615385, "grad_norm": 0.0015049353241920471, "learning_rate": 1.6243929122251955e-06, "loss": 0.0, "step": 21369 }, { "epoch": 20.548076923076923, "grad_norm": 0.001378842513076961, "learning_rate": 1.6237124097476432e-06, "loss": 0.0, "step": 21370 }, { "epoch": 20.549038461538462, "grad_norm": 0.003164303954690695, "learning_rate": 1.6230320372454867e-06, "loss": 0.0, "step": 21371 }, { "epoch": 20.55, "grad_norm": 0.0013525537215173244, "learning_rate": 1.6223517947292866e-06, "loss": 0.0, "step": 21372 }, { "epoch": 20.55096153846154, "grad_norm": 0.0008117431425489485, "learning_rate": 1.6216716822096013e-06, "loss": 0.0, "step": 21373 }, { "epoch": 20.551923076923078, "grad_norm": 0.0030035539530217648, "learning_rate": 1.6209916996969799e-06, "loss": 0.0, "step": 21374 }, { "epoch": 20.552884615384617, "grad_norm": 0.001194861950352788, "learning_rate": 1.620311847201974e-06, "loss": 0.0, "step": 21375 }, { "epoch": 20.553846153846155, "grad_norm": 0.002745675388723612, "learning_rate": 1.6196321247351375e-06, "loss": 0.0, "step": 21376 }, { "epoch": 20.55480769230769, "grad_norm": 0.0010802141623571515, "learning_rate": 1.618952532307011e-06, "loss": 0.0, "step": 21377 }, { "epoch": 20.55576923076923, "grad_norm": 0.004231361206620932, "learning_rate": 1.6182730699281434e-06, "loss": 0.0, "step": 21378 }, { "epoch": 20.556730769230768, "grad_norm": 0.0027231902349740267, "learning_rate": 1.617593737609081e-06, "loss": 0.0, "step": 21379 }, { "epoch": 20.557692307692307, "grad_norm": 0.002386776963248849, "learning_rate": 1.6169145353603588e-06, "loss": 0.0, "step": 21380 }, { "epoch": 20.558653846153845, "grad_norm": 0.005747729912400246, "learning_rate": 1.6162354631925203e-06, "loss": 0.0, "step": 21381 }, { "epoch": 20.559615384615384, "grad_norm": 0.0018192082643508911, "learning_rate": 1.6155565211161007e-06, "loss": 0.0, "step": 21382 }, { "epoch": 20.560576923076923, "grad_norm": 0.001050850609317422, "learning_rate": 1.6148777091416346e-06, "loss": 0.0, "step": 21383 }, { "epoch": 20.56153846153846, "grad_norm": 0.0012056393316015601, "learning_rate": 1.6141990272796593e-06, "loss": 0.0, "step": 21384 }, { "epoch": 20.5625, "grad_norm": 0.0010870021069422364, "learning_rate": 1.6135204755407009e-06, "loss": 0.0, "step": 21385 }, { "epoch": 20.56346153846154, "grad_norm": 0.0032448796555399895, "learning_rate": 1.6128420539352896e-06, "loss": 0.0, "step": 21386 }, { "epoch": 20.564423076923077, "grad_norm": 0.0013951450819149613, "learning_rate": 1.6121637624739573e-06, "loss": 0.0, "step": 21387 }, { "epoch": 20.565384615384616, "grad_norm": 0.025443941354751587, "learning_rate": 1.611485601167222e-06, "loss": 0.0001, "step": 21388 }, { "epoch": 20.566346153846155, "grad_norm": 0.009047528728842735, "learning_rate": 1.610807570025611e-06, "loss": 0.0, "step": 21389 }, { "epoch": 20.567307692307693, "grad_norm": 0.0014105411246418953, "learning_rate": 1.6101296690596457e-06, "loss": 0.0, "step": 21390 }, { "epoch": 20.568269230769232, "grad_norm": 0.0042555429972708225, "learning_rate": 1.6094518982798423e-06, "loss": 0.0, "step": 21391 }, { "epoch": 20.56923076923077, "grad_norm": 0.0030487750191241503, "learning_rate": 1.6087742576967226e-06, "loss": 0.0, "step": 21392 }, { "epoch": 20.57019230769231, "grad_norm": 0.005017859861254692, "learning_rate": 1.6080967473207954e-06, "loss": 0.0, "step": 21393 }, { "epoch": 20.571153846153845, "grad_norm": 0.0015477630076929927, "learning_rate": 1.607419367162577e-06, "loss": 0.0, "step": 21394 }, { "epoch": 20.572115384615383, "grad_norm": 0.0012449098285287619, "learning_rate": 1.6067421172325804e-06, "loss": 0.0, "step": 21395 }, { "epoch": 20.573076923076922, "grad_norm": 0.0023465820122510195, "learning_rate": 1.606064997541309e-06, "loss": 0.0, "step": 21396 }, { "epoch": 20.57403846153846, "grad_norm": 0.002215343527495861, "learning_rate": 1.6053880080992746e-06, "loss": 0.0, "step": 21397 }, { "epoch": 20.575, "grad_norm": 0.0014509957982227206, "learning_rate": 1.6047111489169808e-06, "loss": 0.0, "step": 21398 }, { "epoch": 20.575961538461538, "grad_norm": 0.002483628923073411, "learning_rate": 1.6040344200049297e-06, "loss": 0.0, "step": 21399 }, { "epoch": 20.576923076923077, "grad_norm": 0.0021215653978288174, "learning_rate": 1.6033578213736211e-06, "loss": 0.0, "step": 21400 }, { "epoch": 20.577884615384615, "grad_norm": 0.0020837346091866493, "learning_rate": 1.6026813530335583e-06, "loss": 0.0, "step": 21401 }, { "epoch": 20.578846153846154, "grad_norm": 0.0010081109357997775, "learning_rate": 1.6020050149952326e-06, "loss": 0.0, "step": 21402 }, { "epoch": 20.579807692307693, "grad_norm": 0.005628527142107487, "learning_rate": 1.6013288072691435e-06, "loss": 0.0, "step": 21403 }, { "epoch": 20.58076923076923, "grad_norm": 0.002632343675941229, "learning_rate": 1.6006527298657792e-06, "loss": 0.0, "step": 21404 }, { "epoch": 20.58173076923077, "grad_norm": 0.0031259506940841675, "learning_rate": 1.5999767827956324e-06, "loss": 0.0, "step": 21405 }, { "epoch": 20.58269230769231, "grad_norm": 0.0026961921248584986, "learning_rate": 1.599300966069195e-06, "loss": 0.0, "step": 21406 }, { "epoch": 20.583653846153847, "grad_norm": 0.0022779181599617004, "learning_rate": 1.5986252796969482e-06, "loss": 0.0, "step": 21407 }, { "epoch": 20.584615384615386, "grad_norm": 0.002980510937049985, "learning_rate": 1.5979497236893805e-06, "loss": 0.0, "step": 21408 }, { "epoch": 20.585576923076925, "grad_norm": 0.003994896076619625, "learning_rate": 1.5972742980569745e-06, "loss": 0.0, "step": 21409 }, { "epoch": 20.58653846153846, "grad_norm": 0.003529532114043832, "learning_rate": 1.5965990028102074e-06, "loss": 0.0, "step": 21410 }, { "epoch": 20.5875, "grad_norm": 0.0015883255982771516, "learning_rate": 1.5959238379595598e-06, "loss": 0.0, "step": 21411 }, { "epoch": 20.588461538461537, "grad_norm": 0.00030565843917429447, "learning_rate": 1.5952488035155111e-06, "loss": 0.0, "step": 21412 }, { "epoch": 20.589423076923076, "grad_norm": 0.001036057947203517, "learning_rate": 1.5945738994885318e-06, "loss": 0.0, "step": 21413 }, { "epoch": 20.590384615384615, "grad_norm": 0.0019797191489487886, "learning_rate": 1.5938991258890945e-06, "loss": 0.0, "step": 21414 }, { "epoch": 20.591346153846153, "grad_norm": 0.0007736993720754981, "learning_rate": 1.5932244827276756e-06, "loss": 0.0, "step": 21415 }, { "epoch": 20.592307692307692, "grad_norm": 0.0030106515623629093, "learning_rate": 1.5925499700147352e-06, "loss": 0.0, "step": 21416 }, { "epoch": 20.59326923076923, "grad_norm": 0.0012531541287899017, "learning_rate": 1.5918755877607461e-06, "loss": 0.0, "step": 21417 }, { "epoch": 20.59423076923077, "grad_norm": 0.007578104734420776, "learning_rate": 1.591201335976168e-06, "loss": 0.0, "step": 21418 }, { "epoch": 20.595192307692308, "grad_norm": 0.00190210179425776, "learning_rate": 1.5905272146714668e-06, "loss": 0.0, "step": 21419 }, { "epoch": 20.596153846153847, "grad_norm": 0.0016177984653040767, "learning_rate": 1.589853223857103e-06, "loss": 0.0, "step": 21420 }, { "epoch": 20.597115384615385, "grad_norm": 0.00462685851380229, "learning_rate": 1.5891793635435315e-06, "loss": 0.0, "step": 21421 }, { "epoch": 20.598076923076924, "grad_norm": 0.0013954972382634878, "learning_rate": 1.5885056337412107e-06, "loss": 0.0, "step": 21422 }, { "epoch": 20.599038461538463, "grad_norm": 0.0010309257777407765, "learning_rate": 1.5878320344605968e-06, "loss": 0.0, "step": 21423 }, { "epoch": 20.6, "grad_norm": 0.002129531465470791, "learning_rate": 1.587158565712139e-06, "loss": 0.0, "step": 21424 }, { "epoch": 20.60096153846154, "grad_norm": 0.0010980559745803475, "learning_rate": 1.5864852275062892e-06, "loss": 0.0, "step": 21425 }, { "epoch": 20.601923076923075, "grad_norm": 0.006507406011223793, "learning_rate": 1.5858120198534964e-06, "loss": 0.0001, "step": 21426 }, { "epoch": 20.602884615384614, "grad_norm": 0.004121317993849516, "learning_rate": 1.5851389427642038e-06, "loss": 0.0, "step": 21427 }, { "epoch": 20.603846153846153, "grad_norm": 0.0007202527485787868, "learning_rate": 1.5844659962488607e-06, "loss": 0.0, "step": 21428 }, { "epoch": 20.60480769230769, "grad_norm": 0.002652846509590745, "learning_rate": 1.583793180317903e-06, "loss": 0.0, "step": 21429 }, { "epoch": 20.60576923076923, "grad_norm": 0.041121628135442734, "learning_rate": 1.5831204949817736e-06, "loss": 0.0001, "step": 21430 }, { "epoch": 20.60673076923077, "grad_norm": 0.0012006120523437858, "learning_rate": 1.582447940250913e-06, "loss": 0.0, "step": 21431 }, { "epoch": 20.607692307692307, "grad_norm": 0.000723600503988564, "learning_rate": 1.581775516135754e-06, "loss": 0.0, "step": 21432 }, { "epoch": 20.608653846153846, "grad_norm": 0.0015127321239560843, "learning_rate": 1.5811032226467304e-06, "loss": 0.0, "step": 21433 }, { "epoch": 20.609615384615385, "grad_norm": 0.0018099965527653694, "learning_rate": 1.5804310597942785e-06, "loss": 0.0, "step": 21434 }, { "epoch": 20.610576923076923, "grad_norm": 0.003032778622582555, "learning_rate": 1.579759027588823e-06, "loss": 0.0, "step": 21435 }, { "epoch": 20.611538461538462, "grad_norm": 0.002086587017402053, "learning_rate": 1.5790871260407947e-06, "loss": 0.0, "step": 21436 }, { "epoch": 20.6125, "grad_norm": 0.0012736535863950849, "learning_rate": 1.578415355160622e-06, "loss": 0.0, "step": 21437 }, { "epoch": 20.61346153846154, "grad_norm": 0.00312380725517869, "learning_rate": 1.5777437149587226e-06, "loss": 0.0, "step": 21438 }, { "epoch": 20.614423076923078, "grad_norm": 0.0003994836297351867, "learning_rate": 1.5770722054455234e-06, "loss": 0.0, "step": 21439 }, { "epoch": 20.615384615384617, "grad_norm": 0.0031866852659732103, "learning_rate": 1.5764008266314411e-06, "loss": 0.0, "step": 21440 }, { "epoch": 20.616346153846155, "grad_norm": 0.0007751669036224484, "learning_rate": 1.5757295785268955e-06, "loss": 0.0, "step": 21441 }, { "epoch": 20.61730769230769, "grad_norm": 0.004694754723459482, "learning_rate": 1.5750584611423036e-06, "loss": 0.0, "step": 21442 }, { "epoch": 20.61826923076923, "grad_norm": 0.001955245388671756, "learning_rate": 1.574387474488076e-06, "loss": 0.0, "step": 21443 }, { "epoch": 20.619230769230768, "grad_norm": 0.04682133346796036, "learning_rate": 1.5737166185746244e-06, "loss": 0.0002, "step": 21444 }, { "epoch": 20.620192307692307, "grad_norm": 0.0015927485655993223, "learning_rate": 1.5730458934123637e-06, "loss": 0.0, "step": 21445 }, { "epoch": 20.621153846153845, "grad_norm": 0.009631674736738205, "learning_rate": 1.5723752990116948e-06, "loss": 0.0, "step": 21446 }, { "epoch": 20.622115384615384, "grad_norm": 0.0008565197931602597, "learning_rate": 1.5717048353830266e-06, "loss": 0.0, "step": 21447 }, { "epoch": 20.623076923076923, "grad_norm": 0.0012452423106878996, "learning_rate": 1.5710345025367657e-06, "loss": 0.0, "step": 21448 }, { "epoch": 20.62403846153846, "grad_norm": 0.0006064248736947775, "learning_rate": 1.570364300483307e-06, "loss": 0.0, "step": 21449 }, { "epoch": 20.625, "grad_norm": 0.0033953499514609575, "learning_rate": 1.5696942292330574e-06, "loss": 0.0, "step": 21450 }, { "epoch": 20.62596153846154, "grad_norm": 0.0011379887582734227, "learning_rate": 1.5690242887964068e-06, "loss": 0.0, "step": 21451 }, { "epoch": 20.626923076923077, "grad_norm": 0.0017717380542308092, "learning_rate": 1.5683544791837558e-06, "loss": 0.0, "step": 21452 }, { "epoch": 20.627884615384616, "grad_norm": 0.006192093715071678, "learning_rate": 1.567684800405499e-06, "loss": 0.0, "step": 21453 }, { "epoch": 20.628846153846155, "grad_norm": 0.0018599830800667405, "learning_rate": 1.5670152524720227e-06, "loss": 0.0, "step": 21454 }, { "epoch": 20.629807692307693, "grad_norm": 0.000860510568600148, "learning_rate": 1.56634583539372e-06, "loss": 0.0, "step": 21455 }, { "epoch": 20.630769230769232, "grad_norm": 0.0015081438468769193, "learning_rate": 1.5656765491809788e-06, "loss": 0.0, "step": 21456 }, { "epoch": 20.63173076923077, "grad_norm": 0.0017105351435020566, "learning_rate": 1.5650073938441813e-06, "loss": 0.0, "step": 21457 }, { "epoch": 20.63269230769231, "grad_norm": 0.0005929481121711433, "learning_rate": 1.5643383693937119e-06, "loss": 0.0, "step": 21458 }, { "epoch": 20.633653846153845, "grad_norm": 0.0021263635717332363, "learning_rate": 1.5636694758399563e-06, "loss": 0.0, "step": 21459 }, { "epoch": 20.634615384615383, "grad_norm": 0.0007948594284243882, "learning_rate": 1.5630007131932866e-06, "loss": 0.0, "step": 21460 }, { "epoch": 20.635576923076922, "grad_norm": 0.0014768448891118169, "learning_rate": 1.5623320814640862e-06, "loss": 0.0, "step": 21461 }, { "epoch": 20.63653846153846, "grad_norm": 0.0029772052075713873, "learning_rate": 1.5616635806627256e-06, "loss": 0.0, "step": 21462 }, { "epoch": 20.6375, "grad_norm": 0.0010332848178222775, "learning_rate": 1.5609952107995796e-06, "loss": 0.0, "step": 21463 }, { "epoch": 20.638461538461538, "grad_norm": 0.0033954763785004616, "learning_rate": 1.5603269718850222e-06, "loss": 0.0, "step": 21464 }, { "epoch": 20.639423076923077, "grad_norm": 0.0006730170571245253, "learning_rate": 1.5596588639294186e-06, "loss": 0.0, "step": 21465 }, { "epoch": 20.640384615384615, "grad_norm": 0.001852234359830618, "learning_rate": 1.5589908869431358e-06, "loss": 0.0, "step": 21466 }, { "epoch": 20.641346153846154, "grad_norm": 0.0027945751789957285, "learning_rate": 1.5583230409365435e-06, "loss": 0.0, "step": 21467 }, { "epoch": 20.642307692307693, "grad_norm": 0.002281616674736142, "learning_rate": 1.5576553259199989e-06, "loss": 0.0, "step": 21468 }, { "epoch": 20.64326923076923, "grad_norm": 0.010103474371135235, "learning_rate": 1.5569877419038659e-06, "loss": 0.0, "step": 21469 }, { "epoch": 20.64423076923077, "grad_norm": 0.002428478794172406, "learning_rate": 1.5563202888985062e-06, "loss": 0.0, "step": 21470 }, { "epoch": 20.64519230769231, "grad_norm": 0.3499446213245392, "learning_rate": 1.5556529669142707e-06, "loss": 0.0011, "step": 21471 }, { "epoch": 20.646153846153847, "grad_norm": 0.0068104020319879055, "learning_rate": 1.5549857759615195e-06, "loss": 0.0, "step": 21472 }, { "epoch": 20.647115384615386, "grad_norm": 0.0012056088307872415, "learning_rate": 1.5543187160506013e-06, "loss": 0.0, "step": 21473 }, { "epoch": 20.648076923076925, "grad_norm": 0.0026825026143342257, "learning_rate": 1.5536517871918688e-06, "loss": 0.0, "step": 21474 }, { "epoch": 20.64903846153846, "grad_norm": 0.002653085393831134, "learning_rate": 1.5529849893956727e-06, "loss": 0.0, "step": 21475 }, { "epoch": 20.65, "grad_norm": 0.0015238524647429585, "learning_rate": 1.552318322672356e-06, "loss": 0.0, "step": 21476 }, { "epoch": 20.650961538461537, "grad_norm": 0.0031613516621291637, "learning_rate": 1.5516517870322656e-06, "loss": 0.0, "step": 21477 }, { "epoch": 20.651923076923076, "grad_norm": 0.004561434034258127, "learning_rate": 1.5509853824857456e-06, "loss": 0.0, "step": 21478 }, { "epoch": 20.652884615384615, "grad_norm": 0.01342550665140152, "learning_rate": 1.5503191090431324e-06, "loss": 0.0001, "step": 21479 }, { "epoch": 20.653846153846153, "grad_norm": 0.0008874281775206327, "learning_rate": 1.5496529667147675e-06, "loss": 0.0, "step": 21480 }, { "epoch": 20.654807692307692, "grad_norm": 0.0023742534685879946, "learning_rate": 1.5489869555109893e-06, "loss": 0.0, "step": 21481 }, { "epoch": 20.65576923076923, "grad_norm": 0.0015136189758777618, "learning_rate": 1.5483210754421274e-06, "loss": 0.0, "step": 21482 }, { "epoch": 20.65673076923077, "grad_norm": 0.0018091854872182012, "learning_rate": 1.54765532651852e-06, "loss": 0.0, "step": 21483 }, { "epoch": 20.657692307692308, "grad_norm": 0.0038810954429209232, "learning_rate": 1.5469897087504927e-06, "loss": 0.0, "step": 21484 }, { "epoch": 20.658653846153847, "grad_norm": 0.0012292341561987996, "learning_rate": 1.5463242221483742e-06, "loss": 0.0, "step": 21485 }, { "epoch": 20.659615384615385, "grad_norm": 0.001248903339728713, "learning_rate": 1.545658866722496e-06, "loss": 0.0, "step": 21486 }, { "epoch": 20.660576923076924, "grad_norm": 0.005528457462787628, "learning_rate": 1.5449936424831769e-06, "loss": 0.0, "step": 21487 }, { "epoch": 20.661538461538463, "grad_norm": 0.0018567355582490563, "learning_rate": 1.5443285494407412e-06, "loss": 0.0, "step": 21488 }, { "epoch": 20.6625, "grad_norm": 0.002265139017254114, "learning_rate": 1.5436635876055107e-06, "loss": 0.0, "step": 21489 }, { "epoch": 20.66346153846154, "grad_norm": 0.002450524363666773, "learning_rate": 1.5429987569878014e-06, "loss": 0.0, "step": 21490 }, { "epoch": 20.664423076923075, "grad_norm": 0.0012587652308866382, "learning_rate": 1.5423340575979295e-06, "loss": 0.0, "step": 21491 }, { "epoch": 20.665384615384614, "grad_norm": 0.002169953193515539, "learning_rate": 1.5416694894462125e-06, "loss": 0.0, "step": 21492 }, { "epoch": 20.666346153846153, "grad_norm": 0.007849260233342648, "learning_rate": 1.5410050525429577e-06, "loss": 0.0, "step": 21493 }, { "epoch": 20.66730769230769, "grad_norm": 0.0016938523622229695, "learning_rate": 1.54034074689848e-06, "loss": 0.0, "step": 21494 }, { "epoch": 20.66826923076923, "grad_norm": 0.004479848314076662, "learning_rate": 1.5396765725230822e-06, "loss": 0.0, "step": 21495 }, { "epoch": 20.66923076923077, "grad_norm": 0.004210622049868107, "learning_rate": 1.539012529427074e-06, "loss": 0.0, "step": 21496 }, { "epoch": 20.670192307692307, "grad_norm": 0.000754829787183553, "learning_rate": 1.5383486176207606e-06, "loss": 0.0, "step": 21497 }, { "epoch": 20.671153846153846, "grad_norm": 0.0014294835273176432, "learning_rate": 1.5376848371144404e-06, "loss": 0.0, "step": 21498 }, { "epoch": 20.672115384615385, "grad_norm": 0.0013301068684086204, "learning_rate": 1.537021187918415e-06, "loss": 0.0, "step": 21499 }, { "epoch": 20.673076923076923, "grad_norm": 0.002396018709987402, "learning_rate": 1.536357670042984e-06, "loss": 0.0, "step": 21500 }, { "epoch": 20.674038461538462, "grad_norm": 0.003716756822541356, "learning_rate": 1.535694283498439e-06, "loss": 0.0, "step": 21501 }, { "epoch": 20.675, "grad_norm": 0.0008092269417829812, "learning_rate": 1.5350310282950764e-06, "loss": 0.0, "step": 21502 }, { "epoch": 20.67596153846154, "grad_norm": 0.006759030744433403, "learning_rate": 1.5343679044431902e-06, "loss": 0.0, "step": 21503 }, { "epoch": 20.676923076923078, "grad_norm": 0.002319697057828307, "learning_rate": 1.5337049119530655e-06, "loss": 0.0, "step": 21504 }, { "epoch": 20.677884615384617, "grad_norm": 0.0027657141909003258, "learning_rate": 1.533042050834992e-06, "loss": 0.0, "step": 21505 }, { "epoch": 20.678846153846155, "grad_norm": 0.0019736632239073515, "learning_rate": 1.5323793210992576e-06, "loss": 0.0, "step": 21506 }, { "epoch": 20.67980769230769, "grad_norm": 0.0018791417824104428, "learning_rate": 1.5317167227561425e-06, "loss": 0.0, "step": 21507 }, { "epoch": 20.68076923076923, "grad_norm": 0.0023803377989679575, "learning_rate": 1.5310542558159325e-06, "loss": 0.0, "step": 21508 }, { "epoch": 20.681730769230768, "grad_norm": 0.00244748592376709, "learning_rate": 1.5303919202889006e-06, "loss": 0.0, "step": 21509 }, { "epoch": 20.682692307692307, "grad_norm": 0.0017566891619935632, "learning_rate": 1.5297297161853297e-06, "loss": 0.0, "step": 21510 }, { "epoch": 20.683653846153845, "grad_norm": 0.0019875895231962204, "learning_rate": 1.5290676435154949e-06, "loss": 0.0, "step": 21511 }, { "epoch": 20.684615384615384, "grad_norm": 0.101919986307621, "learning_rate": 1.528405702289666e-06, "loss": 0.0002, "step": 21512 }, { "epoch": 20.685576923076923, "grad_norm": 0.0018513501854613423, "learning_rate": 1.5277438925181175e-06, "loss": 0.0, "step": 21513 }, { "epoch": 20.68653846153846, "grad_norm": 0.0024708083365112543, "learning_rate": 1.5270822142111196e-06, "loss": 0.0, "step": 21514 }, { "epoch": 20.6875, "grad_norm": 0.005735183134675026, "learning_rate": 1.526420667378936e-06, "loss": 0.0, "step": 21515 }, { "epoch": 20.68846153846154, "grad_norm": 0.0021235281601548195, "learning_rate": 1.5257592520318343e-06, "loss": 0.0, "step": 21516 }, { "epoch": 20.689423076923077, "grad_norm": 0.00126928451936692, "learning_rate": 1.5250979681800793e-06, "loss": 0.0, "step": 21517 }, { "epoch": 20.690384615384616, "grad_norm": 0.19736778736114502, "learning_rate": 1.5244368158339285e-06, "loss": 0.0005, "step": 21518 }, { "epoch": 20.691346153846155, "grad_norm": 0.001004573656246066, "learning_rate": 1.5237757950036447e-06, "loss": 0.0, "step": 21519 }, { "epoch": 20.692307692307693, "grad_norm": 0.1961522400379181, "learning_rate": 1.523114905699481e-06, "loss": 0.0006, "step": 21520 }, { "epoch": 20.693269230769232, "grad_norm": 0.0018443684093654156, "learning_rate": 1.5224541479316946e-06, "loss": 0.0, "step": 21521 }, { "epoch": 20.69423076923077, "grad_norm": 0.0025135406758636236, "learning_rate": 1.5217935217105407e-06, "loss": 0.0, "step": 21522 }, { "epoch": 20.69519230769231, "grad_norm": 0.0009975830325856805, "learning_rate": 1.5211330270462666e-06, "loss": 0.0, "step": 21523 }, { "epoch": 20.696153846153845, "grad_norm": 0.0014635635307058692, "learning_rate": 1.520472663949122e-06, "loss": 0.0, "step": 21524 }, { "epoch": 20.697115384615383, "grad_norm": 0.0012611155398190022, "learning_rate": 1.5198124324293562e-06, "loss": 0.0, "step": 21525 }, { "epoch": 20.698076923076922, "grad_norm": 0.0013447650708258152, "learning_rate": 1.5191523324972112e-06, "loss": 0.0, "step": 21526 }, { "epoch": 20.69903846153846, "grad_norm": 0.003147450741380453, "learning_rate": 1.5184923641629302e-06, "loss": 0.0, "step": 21527 }, { "epoch": 20.7, "grad_norm": 0.0025828974321484566, "learning_rate": 1.5178325274367578e-06, "loss": 0.0, "step": 21528 }, { "epoch": 20.700961538461538, "grad_norm": 0.0029475323390215635, "learning_rate": 1.5171728223289261e-06, "loss": 0.0, "step": 21529 }, { "epoch": 20.701923076923077, "grad_norm": 0.0011012525064870715, "learning_rate": 1.5165132488496793e-06, "loss": 0.0, "step": 21530 }, { "epoch": 20.702884615384615, "grad_norm": 0.000994338421151042, "learning_rate": 1.5158538070092454e-06, "loss": 0.0, "step": 21531 }, { "epoch": 20.703846153846154, "grad_norm": 0.002848832868039608, "learning_rate": 1.5151944968178589e-06, "loss": 0.0, "step": 21532 }, { "epoch": 20.704807692307693, "grad_norm": 0.007938404567539692, "learning_rate": 1.5145353182857547e-06, "loss": 0.0, "step": 21533 }, { "epoch": 20.70576923076923, "grad_norm": 0.002776860259473324, "learning_rate": 1.5138762714231548e-06, "loss": 0.0, "step": 21534 }, { "epoch": 20.70673076923077, "grad_norm": 0.0016849818639457226, "learning_rate": 1.5132173562402885e-06, "loss": 0.0, "step": 21535 }, { "epoch": 20.70769230769231, "grad_norm": 0.002110287779942155, "learning_rate": 1.5125585727473835e-06, "loss": 0.0, "step": 21536 }, { "epoch": 20.708653846153847, "grad_norm": 0.0021283733658492565, "learning_rate": 1.511899920954656e-06, "loss": 0.0, "step": 21537 }, { "epoch": 20.709615384615386, "grad_norm": 0.0013152540195733309, "learning_rate": 1.5112414008723297e-06, "loss": 0.0, "step": 21538 }, { "epoch": 20.710576923076925, "grad_norm": 0.006584057118743658, "learning_rate": 1.5105830125106258e-06, "loss": 0.0001, "step": 21539 }, { "epoch": 20.71153846153846, "grad_norm": 0.03207262605428696, "learning_rate": 1.5099247558797547e-06, "loss": 0.0001, "step": 21540 }, { "epoch": 20.7125, "grad_norm": 0.005528372712433338, "learning_rate": 1.509266630989935e-06, "loss": 0.0, "step": 21541 }, { "epoch": 20.713461538461537, "grad_norm": 0.0007619365933351219, "learning_rate": 1.5086086378513754e-06, "loss": 0.0, "step": 21542 }, { "epoch": 20.714423076923076, "grad_norm": 0.010916911996901035, "learning_rate": 1.5079507764742874e-06, "loss": 0.0, "step": 21543 }, { "epoch": 20.715384615384615, "grad_norm": 0.0062073213048279285, "learning_rate": 1.5072930468688819e-06, "loss": 0.0, "step": 21544 }, { "epoch": 20.716346153846153, "grad_norm": 0.0018948569195345044, "learning_rate": 1.5066354490453606e-06, "loss": 0.0, "step": 21545 }, { "epoch": 20.717307692307692, "grad_norm": 1.8682843446731567, "learning_rate": 1.5059779830139287e-06, "loss": 0.0161, "step": 21546 }, { "epoch": 20.71826923076923, "grad_norm": 0.001044564414769411, "learning_rate": 1.5053206487847916e-06, "loss": 0.0, "step": 21547 }, { "epoch": 20.71923076923077, "grad_norm": 0.001158202881924808, "learning_rate": 1.5046634463681442e-06, "loss": 0.0, "step": 21548 }, { "epoch": 20.720192307692308, "grad_norm": 0.0013313207309693098, "learning_rate": 1.5040063757741863e-06, "loss": 0.0, "step": 21549 }, { "epoch": 20.721153846153847, "grad_norm": 0.0021682686638087034, "learning_rate": 1.5033494370131162e-06, "loss": 0.0, "step": 21550 }, { "epoch": 20.722115384615385, "grad_norm": 0.0017387320986017585, "learning_rate": 1.5026926300951239e-06, "loss": 0.0, "step": 21551 }, { "epoch": 20.723076923076924, "grad_norm": 0.002463886048644781, "learning_rate": 1.5020359550304043e-06, "loss": 0.0, "step": 21552 }, { "epoch": 20.724038461538463, "grad_norm": 0.011481964960694313, "learning_rate": 1.5013794118291435e-06, "loss": 0.0, "step": 21553 }, { "epoch": 20.725, "grad_norm": 0.0012802319834008813, "learning_rate": 1.5007230005015317e-06, "loss": 0.0, "step": 21554 }, { "epoch": 20.72596153846154, "grad_norm": 0.5089502930641174, "learning_rate": 1.5000667210577547e-06, "loss": 0.0095, "step": 21555 }, { "epoch": 20.726923076923075, "grad_norm": 0.008186398074030876, "learning_rate": 1.4994105735079933e-06, "loss": 0.0, "step": 21556 }, { "epoch": 20.727884615384614, "grad_norm": 0.0037637162022292614, "learning_rate": 1.4987545578624318e-06, "loss": 0.0, "step": 21557 }, { "epoch": 20.728846153846153, "grad_norm": 0.022617459297180176, "learning_rate": 1.4980986741312509e-06, "loss": 0.0001, "step": 21558 }, { "epoch": 20.72980769230769, "grad_norm": 0.0019625478889793158, "learning_rate": 1.4974429223246223e-06, "loss": 0.0, "step": 21559 }, { "epoch": 20.73076923076923, "grad_norm": 0.010629561729729176, "learning_rate": 1.4967873024527257e-06, "loss": 0.0, "step": 21560 }, { "epoch": 20.73173076923077, "grad_norm": 0.001409948687069118, "learning_rate": 1.4961318145257353e-06, "loss": 0.0, "step": 21561 }, { "epoch": 20.732692307692307, "grad_norm": 0.0030466860625892878, "learning_rate": 1.4954764585538194e-06, "loss": 0.0, "step": 21562 }, { "epoch": 20.733653846153846, "grad_norm": 0.003447960363700986, "learning_rate": 1.4948212345471492e-06, "loss": 0.0, "step": 21563 }, { "epoch": 20.734615384615385, "grad_norm": 0.001705906935967505, "learning_rate": 1.4941661425158893e-06, "loss": 0.0, "step": 21564 }, { "epoch": 20.735576923076923, "grad_norm": 0.0014374525053426623, "learning_rate": 1.4935111824702076e-06, "loss": 0.0, "step": 21565 }, { "epoch": 20.736538461538462, "grad_norm": 0.004290128592401743, "learning_rate": 1.4928563544202668e-06, "loss": 0.0, "step": 21566 }, { "epoch": 20.7375, "grad_norm": 0.0021286483388394117, "learning_rate": 1.4922016583762255e-06, "loss": 0.0, "step": 21567 }, { "epoch": 20.73846153846154, "grad_norm": 0.0017588903428986669, "learning_rate": 1.4915470943482447e-06, "loss": 0.0, "step": 21568 }, { "epoch": 20.739423076923078, "grad_norm": 0.005696927662938833, "learning_rate": 1.4908926623464838e-06, "loss": 0.0, "step": 21569 }, { "epoch": 20.740384615384617, "grad_norm": 0.0014900697860866785, "learning_rate": 1.490238362381091e-06, "loss": 0.0, "step": 21570 }, { "epoch": 20.741346153846155, "grad_norm": 0.001486975234001875, "learning_rate": 1.4895841944622248e-06, "loss": 0.0, "step": 21571 }, { "epoch": 20.74230769230769, "grad_norm": 0.0017084653954952955, "learning_rate": 1.488930158600035e-06, "loss": 0.0, "step": 21572 }, { "epoch": 20.74326923076923, "grad_norm": 0.0009671017178334296, "learning_rate": 1.488276254804668e-06, "loss": 0.0, "step": 21573 }, { "epoch": 20.744230769230768, "grad_norm": 0.0016870758263394237, "learning_rate": 1.487622483086273e-06, "loss": 0.0, "step": 21574 }, { "epoch": 20.745192307692307, "grad_norm": 0.0014693454140797257, "learning_rate": 1.4869688434549923e-06, "loss": 0.0, "step": 21575 }, { "epoch": 20.746153846153845, "grad_norm": 0.0005653685657307506, "learning_rate": 1.4863153359209693e-06, "loss": 0.0, "step": 21576 }, { "epoch": 20.747115384615384, "grad_norm": 0.0834924727678299, "learning_rate": 1.4856619604943478e-06, "loss": 0.0002, "step": 21577 }, { "epoch": 20.748076923076923, "grad_norm": 0.0016124803805723786, "learning_rate": 1.48500871718526e-06, "loss": 0.0, "step": 21578 }, { "epoch": 20.74903846153846, "grad_norm": 0.0026006754487752914, "learning_rate": 1.4843556060038466e-06, "loss": 0.0, "step": 21579 }, { "epoch": 20.75, "grad_norm": 0.005865676328539848, "learning_rate": 1.4837026269602439e-06, "loss": 0.0001, "step": 21580 }, { "epoch": 20.75096153846154, "grad_norm": 0.0022025159560143948, "learning_rate": 1.4830497800645782e-06, "loss": 0.0, "step": 21581 }, { "epoch": 20.751923076923077, "grad_norm": 0.0005335173918865621, "learning_rate": 1.4823970653269825e-06, "loss": 0.0, "step": 21582 }, { "epoch": 20.752884615384616, "grad_norm": 0.002243400551378727, "learning_rate": 1.481744482757589e-06, "loss": 0.0, "step": 21583 }, { "epoch": 20.753846153846155, "grad_norm": 0.001794771058484912, "learning_rate": 1.4810920323665169e-06, "loss": 0.0, "step": 21584 }, { "epoch": 20.754807692307693, "grad_norm": 0.0007547484710812569, "learning_rate": 1.4804397141638972e-06, "loss": 0.0, "step": 21585 }, { "epoch": 20.755769230769232, "grad_norm": 0.004279881715774536, "learning_rate": 1.4797875281598451e-06, "loss": 0.0, "step": 21586 }, { "epoch": 20.75673076923077, "grad_norm": 0.0020947905723005533, "learning_rate": 1.4791354743644847e-06, "loss": 0.0, "step": 21587 }, { "epoch": 20.75769230769231, "grad_norm": 0.004737321753054857, "learning_rate": 1.478483552787936e-06, "loss": 0.0, "step": 21588 }, { "epoch": 20.758653846153845, "grad_norm": 0.0023500726092606783, "learning_rate": 1.4778317634403082e-06, "loss": 0.0, "step": 21589 }, { "epoch": 20.759615384615383, "grad_norm": 0.020795557647943497, "learning_rate": 1.4771801063317215e-06, "loss": 0.0001, "step": 21590 }, { "epoch": 20.760576923076922, "grad_norm": 0.0019948233384639025, "learning_rate": 1.4765285814722863e-06, "loss": 0.0, "step": 21591 }, { "epoch": 20.76153846153846, "grad_norm": 0.0016769616631790996, "learning_rate": 1.4758771888721091e-06, "loss": 0.0, "step": 21592 }, { "epoch": 20.7625, "grad_norm": 0.0009202416986227036, "learning_rate": 1.4752259285412995e-06, "loss": 0.0, "step": 21593 }, { "epoch": 20.763461538461538, "grad_norm": 0.0006408430635929108, "learning_rate": 1.4745748004899674e-06, "loss": 0.0, "step": 21594 }, { "epoch": 20.764423076923077, "grad_norm": 0.0036111767403781414, "learning_rate": 1.473923804728209e-06, "loss": 0.0, "step": 21595 }, { "epoch": 20.765384615384615, "grad_norm": 0.0033404852729290724, "learning_rate": 1.4732729412661294e-06, "loss": 0.0, "step": 21596 }, { "epoch": 20.766346153846154, "grad_norm": 0.4675072729587555, "learning_rate": 1.472622210113831e-06, "loss": 0.0019, "step": 21597 }, { "epoch": 20.767307692307693, "grad_norm": 0.0132560720667243, "learning_rate": 1.4719716112814065e-06, "loss": 0.0001, "step": 21598 }, { "epoch": 20.76826923076923, "grad_norm": 0.0028879425954073668, "learning_rate": 1.4713211447789544e-06, "loss": 0.0, "step": 21599 }, { "epoch": 20.76923076923077, "grad_norm": 0.04073620215058327, "learning_rate": 1.4706708106165645e-06, "loss": 0.0003, "step": 21600 }, { "epoch": 20.77019230769231, "grad_norm": 0.025932610034942627, "learning_rate": 1.4700206088043312e-06, "loss": 0.0, "step": 21601 }, { "epoch": 20.771153846153847, "grad_norm": 0.001709286472760141, "learning_rate": 1.469370539352345e-06, "loss": 0.0, "step": 21602 }, { "epoch": 20.772115384615386, "grad_norm": 0.005950343795120716, "learning_rate": 1.468720602270688e-06, "loss": 0.0, "step": 21603 }, { "epoch": 20.773076923076925, "grad_norm": 0.020184094086289406, "learning_rate": 1.4680707975694486e-06, "loss": 0.0001, "step": 21604 }, { "epoch": 20.77403846153846, "grad_norm": 0.00474517373368144, "learning_rate": 1.4674211252587122e-06, "loss": 0.0, "step": 21605 }, { "epoch": 20.775, "grad_norm": 0.00182066997513175, "learning_rate": 1.4667715853485543e-06, "loss": 0.0, "step": 21606 }, { "epoch": 20.775961538461537, "grad_norm": 0.002311808755621314, "learning_rate": 1.4661221778490564e-06, "loss": 0.0, "step": 21607 }, { "epoch": 20.776923076923076, "grad_norm": 0.011600167490541935, "learning_rate": 1.4654729027702985e-06, "loss": 0.0, "step": 21608 }, { "epoch": 20.777884615384615, "grad_norm": 0.0008941764244809747, "learning_rate": 1.4648237601223502e-06, "loss": 0.0, "step": 21609 }, { "epoch": 20.778846153846153, "grad_norm": 0.060866158455610275, "learning_rate": 1.4641747499152893e-06, "loss": 0.0001, "step": 21610 }, { "epoch": 20.779807692307692, "grad_norm": 0.0016023629577830434, "learning_rate": 1.4635258721591805e-06, "loss": 0.0, "step": 21611 }, { "epoch": 20.78076923076923, "grad_norm": 0.0005560561548918486, "learning_rate": 1.4628771268640973e-06, "loss": 0.0, "step": 21612 }, { "epoch": 20.78173076923077, "grad_norm": 0.0017367205582559109, "learning_rate": 1.462228514040106e-06, "loss": 0.0, "step": 21613 }, { "epoch": 20.782692307692308, "grad_norm": 0.0025238131638616323, "learning_rate": 1.4615800336972686e-06, "loss": 0.0, "step": 21614 }, { "epoch": 20.783653846153847, "grad_norm": 0.0007708853227086365, "learning_rate": 1.460931685845649e-06, "loss": 0.0, "step": 21615 }, { "epoch": 20.784615384615385, "grad_norm": 0.0011167442426085472, "learning_rate": 1.4602834704953105e-06, "loss": 0.0, "step": 21616 }, { "epoch": 20.785576923076924, "grad_norm": 0.0029319634195417166, "learning_rate": 1.4596353876563052e-06, "loss": 0.0, "step": 21617 }, { "epoch": 20.786538461538463, "grad_norm": 0.0014877987559884787, "learning_rate": 1.4589874373386937e-06, "loss": 0.0, "step": 21618 }, { "epoch": 20.7875, "grad_norm": 0.002011343836784363, "learning_rate": 1.4583396195525324e-06, "loss": 0.0, "step": 21619 }, { "epoch": 20.78846153846154, "grad_norm": 0.008109446614980698, "learning_rate": 1.4576919343078677e-06, "loss": 0.0, "step": 21620 }, { "epoch": 20.789423076923075, "grad_norm": 0.006352399010211229, "learning_rate": 1.4570443816147561e-06, "loss": 0.0, "step": 21621 }, { "epoch": 20.790384615384614, "grad_norm": 0.0003977472078986466, "learning_rate": 1.4563969614832395e-06, "loss": 0.0, "step": 21622 }, { "epoch": 20.791346153846153, "grad_norm": 0.0014775078743696213, "learning_rate": 1.4557496739233667e-06, "loss": 0.0, "step": 21623 }, { "epoch": 20.79230769230769, "grad_norm": 0.0005676032742485404, "learning_rate": 1.455102518945184e-06, "loss": 0.0, "step": 21624 }, { "epoch": 20.79326923076923, "grad_norm": 0.0032325079664587975, "learning_rate": 1.45445549655873e-06, "loss": 0.0, "step": 21625 }, { "epoch": 20.79423076923077, "grad_norm": 0.0051580467261374, "learning_rate": 1.4538086067740442e-06, "loss": 0.0, "step": 21626 }, { "epoch": 20.795192307692307, "grad_norm": 0.00520977983251214, "learning_rate": 1.453161849601169e-06, "loss": 0.0, "step": 21627 }, { "epoch": 20.796153846153846, "grad_norm": 0.003641996532678604, "learning_rate": 1.4525152250501362e-06, "loss": 0.0, "step": 21628 }, { "epoch": 20.797115384615385, "grad_norm": 0.0018465860048308969, "learning_rate": 1.4518687331309788e-06, "loss": 0.0, "step": 21629 }, { "epoch": 20.798076923076923, "grad_norm": 0.021342910826206207, "learning_rate": 1.4512223738537335e-06, "loss": 0.0001, "step": 21630 }, { "epoch": 20.799038461538462, "grad_norm": 0.0004200309922453016, "learning_rate": 1.4505761472284241e-06, "loss": 0.0, "step": 21631 }, { "epoch": 20.8, "grad_norm": 0.004039679653942585, "learning_rate": 1.449930053265083e-06, "loss": 0.0, "step": 21632 }, { "epoch": 20.80096153846154, "grad_norm": 0.005602442659437656, "learning_rate": 1.4492840919737305e-06, "loss": 0.0, "step": 21633 }, { "epoch": 20.801923076923078, "grad_norm": 0.0022861328907310963, "learning_rate": 1.4486382633643936e-06, "loss": 0.0, "step": 21634 }, { "epoch": 20.802884615384617, "grad_norm": 0.0014611640945076942, "learning_rate": 1.447992567447094e-06, "loss": 0.0, "step": 21635 }, { "epoch": 20.803846153846155, "grad_norm": 0.0014043956762179732, "learning_rate": 1.4473470042318483e-06, "loss": 0.0, "step": 21636 }, { "epoch": 20.80480769230769, "grad_norm": 0.0022171640302985907, "learning_rate": 1.4467015737286761e-06, "loss": 0.0, "step": 21637 }, { "epoch": 20.80576923076923, "grad_norm": 0.001731773721985519, "learning_rate": 1.4460562759475939e-06, "loss": 0.0, "step": 21638 }, { "epoch": 20.806730769230768, "grad_norm": 0.003017440205439925, "learning_rate": 1.445411110898609e-06, "loss": 0.0, "step": 21639 }, { "epoch": 20.807692307692307, "grad_norm": 0.0008993212832137942, "learning_rate": 1.4447660785917384e-06, "loss": 0.0, "step": 21640 }, { "epoch": 20.808653846153845, "grad_norm": 0.006243428215384483, "learning_rate": 1.4441211790369892e-06, "loss": 0.0, "step": 21641 }, { "epoch": 20.809615384615384, "grad_norm": 0.0005868972511962056, "learning_rate": 1.4434764122443667e-06, "loss": 0.0, "step": 21642 }, { "epoch": 20.810576923076923, "grad_norm": 0.0013983830576762557, "learning_rate": 1.44283177822388e-06, "loss": 0.0, "step": 21643 }, { "epoch": 20.81153846153846, "grad_norm": 0.0025044186040759087, "learning_rate": 1.4421872769855262e-06, "loss": 0.0, "step": 21644 }, { "epoch": 20.8125, "grad_norm": 0.010869316756725311, "learning_rate": 1.4415429085393096e-06, "loss": 0.0, "step": 21645 }, { "epoch": 20.81346153846154, "grad_norm": 0.0027960590086877346, "learning_rate": 1.44089867289523e-06, "loss": 0.0, "step": 21646 }, { "epoch": 20.814423076923077, "grad_norm": 0.0023719591554254293, "learning_rate": 1.4402545700632797e-06, "loss": 0.0, "step": 21647 }, { "epoch": 20.815384615384616, "grad_norm": 0.0010572398314252496, "learning_rate": 1.4396106000534572e-06, "loss": 0.0, "step": 21648 }, { "epoch": 20.816346153846155, "grad_norm": 0.0010555521585047245, "learning_rate": 1.4389667628757552e-06, "loss": 0.0, "step": 21649 }, { "epoch": 20.817307692307693, "grad_norm": 0.001804162166081369, "learning_rate": 1.4383230585401597e-06, "loss": 0.0, "step": 21650 }, { "epoch": 20.818269230769232, "grad_norm": 0.0022831587120890617, "learning_rate": 1.4376794870566636e-06, "loss": 0.0, "step": 21651 }, { "epoch": 20.81923076923077, "grad_norm": 0.0040074605494737625, "learning_rate": 1.437036048435253e-06, "loss": 0.0, "step": 21652 }, { "epoch": 20.82019230769231, "grad_norm": 0.0020950811449438334, "learning_rate": 1.4363927426859091e-06, "loss": 0.0, "step": 21653 }, { "epoch": 20.821153846153845, "grad_norm": 0.0017737285234034061, "learning_rate": 1.4357495698186186e-06, "loss": 0.0, "step": 21654 }, { "epoch": 20.822115384615383, "grad_norm": 0.001538748387247324, "learning_rate": 1.4351065298433552e-06, "loss": 0.0, "step": 21655 }, { "epoch": 20.823076923076922, "grad_norm": 0.002131466520950198, "learning_rate": 1.4344636227701026e-06, "loss": 0.0, "step": 21656 }, { "epoch": 20.82403846153846, "grad_norm": 0.002990943845361471, "learning_rate": 1.4338208486088356e-06, "loss": 0.0, "step": 21657 }, { "epoch": 20.825, "grad_norm": 0.004706876818090677, "learning_rate": 1.4331782073695265e-06, "loss": 0.0, "step": 21658 }, { "epoch": 20.825961538461538, "grad_norm": 0.002819864545017481, "learning_rate": 1.4325356990621486e-06, "loss": 0.0, "step": 21659 }, { "epoch": 20.826923076923077, "grad_norm": 0.0009076736168935895, "learning_rate": 1.4318933236966726e-06, "loss": 0.0, "step": 21660 }, { "epoch": 20.827884615384615, "grad_norm": 0.00853290781378746, "learning_rate": 1.4312510812830639e-06, "loss": 0.0001, "step": 21661 }, { "epoch": 20.828846153846154, "grad_norm": 0.0007519663195125759, "learning_rate": 1.4306089718312889e-06, "loss": 0.0, "step": 21662 }, { "epoch": 20.829807692307693, "grad_norm": 0.0030741761438548565, "learning_rate": 1.429966995351314e-06, "loss": 0.0, "step": 21663 }, { "epoch": 20.83076923076923, "grad_norm": 0.0009407374891452491, "learning_rate": 1.4293251518530959e-06, "loss": 0.0, "step": 21664 }, { "epoch": 20.83173076923077, "grad_norm": 0.0005784992245025933, "learning_rate": 1.4286834413465999e-06, "loss": 0.0, "step": 21665 }, { "epoch": 20.83269230769231, "grad_norm": 0.001647208584472537, "learning_rate": 1.4280418638417782e-06, "loss": 0.0, "step": 21666 }, { "epoch": 20.833653846153847, "grad_norm": 0.004716027993708849, "learning_rate": 1.427400419348588e-06, "loss": 0.0, "step": 21667 }, { "epoch": 20.834615384615386, "grad_norm": 0.0005649374215863645, "learning_rate": 1.426759107876985e-06, "loss": 0.0, "step": 21668 }, { "epoch": 20.835576923076925, "grad_norm": 0.0012678380589932203, "learning_rate": 1.4261179294369164e-06, "loss": 0.0, "step": 21669 }, { "epoch": 20.83653846153846, "grad_norm": 0.030875222757458687, "learning_rate": 1.4254768840383337e-06, "loss": 0.0002, "step": 21670 }, { "epoch": 20.8375, "grad_norm": 0.003711145604029298, "learning_rate": 1.4248359716911863e-06, "loss": 0.0, "step": 21671 }, { "epoch": 20.838461538461537, "grad_norm": 0.0007785349152982235, "learning_rate": 1.4241951924054143e-06, "loss": 0.0, "step": 21672 }, { "epoch": 20.839423076923076, "grad_norm": 0.0015782637055963278, "learning_rate": 1.4235545461909628e-06, "loss": 0.0, "step": 21673 }, { "epoch": 20.840384615384615, "grad_norm": 0.01298171654343605, "learning_rate": 1.4229140330577763e-06, "loss": 0.0001, "step": 21674 }, { "epoch": 20.841346153846153, "grad_norm": 0.03659864515066147, "learning_rate": 1.4222736530157855e-06, "loss": 0.0001, "step": 21675 }, { "epoch": 20.842307692307692, "grad_norm": 0.002316773869097233, "learning_rate": 1.4216334060749359e-06, "loss": 0.0, "step": 21676 }, { "epoch": 20.84326923076923, "grad_norm": 0.0012209973065182567, "learning_rate": 1.4209932922451574e-06, "loss": 0.0, "step": 21677 }, { "epoch": 20.84423076923077, "grad_norm": 0.002244877628982067, "learning_rate": 1.4203533115363844e-06, "loss": 0.0, "step": 21678 }, { "epoch": 20.845192307692308, "grad_norm": 0.0069172922521829605, "learning_rate": 1.4197134639585486e-06, "loss": 0.0, "step": 21679 }, { "epoch": 20.846153846153847, "grad_norm": 0.0014263670891523361, "learning_rate": 1.4190737495215746e-06, "loss": 0.0, "step": 21680 }, { "epoch": 20.847115384615385, "grad_norm": 0.0017406464321538806, "learning_rate": 1.4184341682353942e-06, "loss": 0.0, "step": 21681 }, { "epoch": 20.848076923076924, "grad_norm": 0.01525692455470562, "learning_rate": 1.4177947201099262e-06, "loss": 0.0, "step": 21682 }, { "epoch": 20.849038461538463, "grad_norm": 0.0008965844754129648, "learning_rate": 1.4171554051550963e-06, "loss": 0.0, "step": 21683 }, { "epoch": 20.85, "grad_norm": 0.005758144427090883, "learning_rate": 1.416516223380825e-06, "loss": 0.0, "step": 21684 }, { "epoch": 20.85096153846154, "grad_norm": 0.0023259420413523912, "learning_rate": 1.415877174797029e-06, "loss": 0.0, "step": 21685 }, { "epoch": 20.851923076923075, "grad_norm": 0.0035998006351292133, "learning_rate": 1.4152382594136238e-06, "loss": 0.0, "step": 21686 }, { "epoch": 20.852884615384614, "grad_norm": 0.004471052438020706, "learning_rate": 1.414599477240528e-06, "loss": 0.0, "step": 21687 }, { "epoch": 20.853846153846153, "grad_norm": 0.0020630487706512213, "learning_rate": 1.4139608282876483e-06, "loss": 0.0, "step": 21688 }, { "epoch": 20.85480769230769, "grad_norm": 0.002604900626465678, "learning_rate": 1.4133223125648966e-06, "loss": 0.0, "step": 21689 }, { "epoch": 20.85576923076923, "grad_norm": 0.002184706274420023, "learning_rate": 1.4126839300821827e-06, "loss": 0.0, "step": 21690 }, { "epoch": 20.85673076923077, "grad_norm": 0.002352695446461439, "learning_rate": 1.4120456808494088e-06, "loss": 0.0, "step": 21691 }, { "epoch": 20.857692307692307, "grad_norm": 0.002794354921206832, "learning_rate": 1.4114075648764835e-06, "loss": 0.0, "step": 21692 }, { "epoch": 20.858653846153846, "grad_norm": 0.0006411182112060487, "learning_rate": 1.4107695821733026e-06, "loss": 0.0, "step": 21693 }, { "epoch": 20.859615384615385, "grad_norm": 0.0012825170997530222, "learning_rate": 1.4101317327497688e-06, "loss": 0.0, "step": 21694 }, { "epoch": 20.860576923076923, "grad_norm": 0.0008779884083196521, "learning_rate": 1.4094940166157823e-06, "loss": 0.0, "step": 21695 }, { "epoch": 20.861538461538462, "grad_norm": 0.0019056032178923488, "learning_rate": 1.4088564337812327e-06, "loss": 0.0, "step": 21696 }, { "epoch": 20.8625, "grad_norm": 0.0009652962326072156, "learning_rate": 1.4082189842560178e-06, "loss": 0.0, "step": 21697 }, { "epoch": 20.86346153846154, "grad_norm": 0.000796176609583199, "learning_rate": 1.4075816680500288e-06, "loss": 0.0, "step": 21698 }, { "epoch": 20.864423076923078, "grad_norm": 0.0006342444103211164, "learning_rate": 1.4069444851731517e-06, "loss": 0.0, "step": 21699 }, { "epoch": 20.865384615384617, "grad_norm": 0.0021019158884882927, "learning_rate": 1.4063074356352768e-06, "loss": 0.0, "step": 21700 }, { "epoch": 20.866346153846155, "grad_norm": 0.0017114548245444894, "learning_rate": 1.4056705194462894e-06, "loss": 0.0, "step": 21701 }, { "epoch": 20.86730769230769, "grad_norm": 0.001739444094710052, "learning_rate": 1.4050337366160694e-06, "loss": 0.0, "step": 21702 }, { "epoch": 20.86826923076923, "grad_norm": 0.001343495212495327, "learning_rate": 1.4043970871545032e-06, "loss": 0.0, "step": 21703 }, { "epoch": 20.869230769230768, "grad_norm": 0.005351670552045107, "learning_rate": 1.403760571071463e-06, "loss": 0.0, "step": 21704 }, { "epoch": 20.870192307692307, "grad_norm": 0.0023760423064231873, "learning_rate": 1.4031241883768297e-06, "loss": 0.0, "step": 21705 }, { "epoch": 20.871153846153845, "grad_norm": 0.0028512815479189157, "learning_rate": 1.402487939080479e-06, "loss": 0.0, "step": 21706 }, { "epoch": 20.872115384615384, "grad_norm": 0.0019019856117665768, "learning_rate": 1.4018518231922806e-06, "loss": 0.0, "step": 21707 }, { "epoch": 20.873076923076923, "grad_norm": 0.001627174555324018, "learning_rate": 1.4012158407221065e-06, "loss": 0.0, "step": 21708 }, { "epoch": 20.87403846153846, "grad_norm": 0.0016514492454007268, "learning_rate": 1.400579991679828e-06, "loss": 0.0, "step": 21709 }, { "epoch": 20.875, "grad_norm": 0.004878408275544643, "learning_rate": 1.3999442760753068e-06, "loss": 0.0, "step": 21710 }, { "epoch": 20.87596153846154, "grad_norm": 0.001506995758973062, "learning_rate": 1.3993086939184086e-06, "loss": 0.0, "step": 21711 }, { "epoch": 20.876923076923077, "grad_norm": 0.003078462090343237, "learning_rate": 1.398673245219e-06, "loss": 0.0, "step": 21712 }, { "epoch": 20.877884615384616, "grad_norm": 0.002515532309189439, "learning_rate": 1.398037929986935e-06, "loss": 0.0, "step": 21713 }, { "epoch": 20.878846153846155, "grad_norm": 0.0012524218764156103, "learning_rate": 1.3974027482320751e-06, "loss": 0.0, "step": 21714 }, { "epoch": 20.879807692307693, "grad_norm": 0.0008751244749873877, "learning_rate": 1.3967676999642788e-06, "loss": 0.0, "step": 21715 }, { "epoch": 20.880769230769232, "grad_norm": 0.0022568630520254374, "learning_rate": 1.3961327851933949e-06, "loss": 0.0, "step": 21716 }, { "epoch": 20.88173076923077, "grad_norm": 0.0035599705297499895, "learning_rate": 1.395498003929281e-06, "loss": 0.0, "step": 21717 }, { "epoch": 20.88269230769231, "grad_norm": 0.006733942311257124, "learning_rate": 1.3948633561817816e-06, "loss": 0.0, "step": 21718 }, { "epoch": 20.883653846153845, "grad_norm": 0.0023089395835995674, "learning_rate": 1.3942288419607476e-06, "loss": 0.0, "step": 21719 }, { "epoch": 20.884615384615383, "grad_norm": 0.001368507742881775, "learning_rate": 1.3935944612760254e-06, "loss": 0.0, "step": 21720 }, { "epoch": 20.885576923076922, "grad_norm": 0.007850166410207748, "learning_rate": 1.3929602141374564e-06, "loss": 0.0, "step": 21721 }, { "epoch": 20.88653846153846, "grad_norm": 0.0007607849547639489, "learning_rate": 1.3923261005548826e-06, "loss": 0.0, "step": 21722 }, { "epoch": 20.8875, "grad_norm": 0.0024157448206096888, "learning_rate": 1.3916921205381473e-06, "loss": 0.0, "step": 21723 }, { "epoch": 20.888461538461538, "grad_norm": 0.0009621524368412793, "learning_rate": 1.3910582740970835e-06, "loss": 0.0, "step": 21724 }, { "epoch": 20.889423076923077, "grad_norm": 0.006803522352129221, "learning_rate": 1.390424561241528e-06, "loss": 0.0, "step": 21725 }, { "epoch": 20.890384615384615, "grad_norm": 0.0005368650308810174, "learning_rate": 1.389790981981316e-06, "loss": 0.0, "step": 21726 }, { "epoch": 20.891346153846154, "grad_norm": 0.003232406685128808, "learning_rate": 1.3891575363262766e-06, "loss": 0.0, "step": 21727 }, { "epoch": 20.892307692307693, "grad_norm": 0.004962776321917772, "learning_rate": 1.3885242242862407e-06, "loss": 0.0, "step": 21728 }, { "epoch": 20.89326923076923, "grad_norm": 0.0006121491896919906, "learning_rate": 1.3878910458710327e-06, "loss": 0.0, "step": 21729 }, { "epoch": 20.89423076923077, "grad_norm": 0.0013134853215888143, "learning_rate": 1.387258001090479e-06, "loss": 0.0, "step": 21730 }, { "epoch": 20.89519230769231, "grad_norm": 0.0008042511763051152, "learning_rate": 1.3866250899544054e-06, "loss": 0.0, "step": 21731 }, { "epoch": 20.896153846153847, "grad_norm": 0.007151401601731777, "learning_rate": 1.3859923124726283e-06, "loss": 0.0, "step": 21732 }, { "epoch": 20.897115384615386, "grad_norm": 0.0016910642152652144, "learning_rate": 1.3853596686549686e-06, "loss": 0.0, "step": 21733 }, { "epoch": 20.898076923076925, "grad_norm": 0.002615101868286729, "learning_rate": 1.3847271585112465e-06, "loss": 0.0, "step": 21734 }, { "epoch": 20.89903846153846, "grad_norm": 0.0024004403967410326, "learning_rate": 1.3840947820512696e-06, "loss": 0.0, "step": 21735 }, { "epoch": 20.9, "grad_norm": 0.0015162224881350994, "learning_rate": 1.3834625392848556e-06, "loss": 0.0, "step": 21736 }, { "epoch": 20.900961538461537, "grad_norm": 0.0009249842260032892, "learning_rate": 1.3828304302218142e-06, "loss": 0.0, "step": 21737 }, { "epoch": 20.901923076923076, "grad_norm": 0.0016381271416321397, "learning_rate": 1.3821984548719524e-06, "loss": 0.0, "step": 21738 }, { "epoch": 20.902884615384615, "grad_norm": 0.0011935087386518717, "learning_rate": 1.38156661324508e-06, "loss": 0.0, "step": 21739 }, { "epoch": 20.903846153846153, "grad_norm": 0.002960198326036334, "learning_rate": 1.3809349053509968e-06, "loss": 0.0, "step": 21740 }, { "epoch": 20.904807692307692, "grad_norm": 0.007382804993540049, "learning_rate": 1.3803033311995072e-06, "loss": 0.0, "step": 21741 }, { "epoch": 20.90576923076923, "grad_norm": 0.0028537637554109097, "learning_rate": 1.3796718908004137e-06, "loss": 0.0, "step": 21742 }, { "epoch": 20.90673076923077, "grad_norm": 0.0013994068140164018, "learning_rate": 1.379040584163509e-06, "loss": 0.0, "step": 21743 }, { "epoch": 20.907692307692308, "grad_norm": 0.005091653671115637, "learning_rate": 1.3784094112985924e-06, "loss": 0.0, "step": 21744 }, { "epoch": 20.908653846153847, "grad_norm": 0.0016650600591674447, "learning_rate": 1.3777783722154603e-06, "loss": 0.0, "step": 21745 }, { "epoch": 20.909615384615385, "grad_norm": 0.0014772722497582436, "learning_rate": 1.3771474669238994e-06, "loss": 0.0, "step": 21746 }, { "epoch": 20.910576923076924, "grad_norm": 0.0009122209739871323, "learning_rate": 1.3765166954337029e-06, "loss": 0.0, "step": 21747 }, { "epoch": 20.911538461538463, "grad_norm": 0.0045611318200826645, "learning_rate": 1.3758860577546595e-06, "loss": 0.0, "step": 21748 }, { "epoch": 20.9125, "grad_norm": 0.002994221867993474, "learning_rate": 1.3752555538965507e-06, "loss": 0.0, "step": 21749 }, { "epoch": 20.91346153846154, "grad_norm": 0.00806878600269556, "learning_rate": 1.3746251838691637e-06, "loss": 0.0, "step": 21750 }, { "epoch": 20.914423076923075, "grad_norm": 0.0023741358891129494, "learning_rate": 1.3739949476822777e-06, "loss": 0.0, "step": 21751 }, { "epoch": 20.915384615384614, "grad_norm": 0.0014864085242152214, "learning_rate": 1.3733648453456728e-06, "loss": 0.0, "step": 21752 }, { "epoch": 20.916346153846153, "grad_norm": 0.0007582420948892832, "learning_rate": 1.3727348768691285e-06, "loss": 0.0, "step": 21753 }, { "epoch": 20.91730769230769, "grad_norm": 0.0020804270170629025, "learning_rate": 1.372105042262416e-06, "loss": 0.0, "step": 21754 }, { "epoch": 20.91826923076923, "grad_norm": 0.002778747584670782, "learning_rate": 1.371475341535311e-06, "loss": 0.0, "step": 21755 }, { "epoch": 20.91923076923077, "grad_norm": 0.0027310624718666077, "learning_rate": 1.3708457746975868e-06, "loss": 0.0, "step": 21756 }, { "epoch": 20.920192307692307, "grad_norm": 0.003649679012596607, "learning_rate": 1.3702163417590064e-06, "loss": 0.0, "step": 21757 }, { "epoch": 20.921153846153846, "grad_norm": 0.0015001692809164524, "learning_rate": 1.369587042729341e-06, "loss": 0.0, "step": 21758 }, { "epoch": 20.922115384615385, "grad_norm": 0.002140572527423501, "learning_rate": 1.3689578776183577e-06, "loss": 0.0, "step": 21759 }, { "epoch": 20.923076923076923, "grad_norm": 0.003993362654000521, "learning_rate": 1.3683288464358136e-06, "loss": 0.0, "step": 21760 }, { "epoch": 20.924038461538462, "grad_norm": 0.0025841761380434036, "learning_rate": 1.3676999491914745e-06, "loss": 0.0, "step": 21761 }, { "epoch": 20.925, "grad_norm": 0.0012040126603096724, "learning_rate": 1.3670711858950936e-06, "loss": 0.0, "step": 21762 }, { "epoch": 20.92596153846154, "grad_norm": 0.001504325307905674, "learning_rate": 1.366442556556431e-06, "loss": 0.0, "step": 21763 }, { "epoch": 20.926923076923078, "grad_norm": 0.06477738916873932, "learning_rate": 1.3658140611852433e-06, "loss": 0.0001, "step": 21764 }, { "epoch": 20.927884615384617, "grad_norm": 0.0018350250320509076, "learning_rate": 1.3651856997912782e-06, "loss": 0.0, "step": 21765 }, { "epoch": 20.928846153846155, "grad_norm": 0.004932253621518612, "learning_rate": 1.364557472384288e-06, "loss": 0.0, "step": 21766 }, { "epoch": 20.92980769230769, "grad_norm": 0.0035391165874898434, "learning_rate": 1.3639293789740227e-06, "loss": 0.0, "step": 21767 }, { "epoch": 20.93076923076923, "grad_norm": 0.0021473027300089598, "learning_rate": 1.3633014195702254e-06, "loss": 0.0, "step": 21768 }, { "epoch": 20.931730769230768, "grad_norm": 0.0021184314973652363, "learning_rate": 1.362673594182642e-06, "loss": 0.0, "step": 21769 }, { "epoch": 20.932692307692307, "grad_norm": 0.0006625207024626434, "learning_rate": 1.3620459028210154e-06, "loss": 0.0, "step": 21770 }, { "epoch": 20.933653846153845, "grad_norm": 0.002547386335209012, "learning_rate": 1.3614183454950824e-06, "loss": 0.0, "step": 21771 }, { "epoch": 20.934615384615384, "grad_norm": 0.002747496822848916, "learning_rate": 1.3607909222145855e-06, "loss": 0.0, "step": 21772 }, { "epoch": 20.935576923076923, "grad_norm": 0.0016543088713660836, "learning_rate": 1.3601636329892543e-06, "loss": 0.0, "step": 21773 }, { "epoch": 20.93653846153846, "grad_norm": 0.0024767774157226086, "learning_rate": 1.3595364778288268e-06, "loss": 0.0, "step": 21774 }, { "epoch": 20.9375, "grad_norm": 0.0014320090413093567, "learning_rate": 1.3589094567430362e-06, "loss": 0.0, "step": 21775 }, { "epoch": 20.93846153846154, "grad_norm": 0.059633973985910416, "learning_rate": 1.358282569741607e-06, "loss": 0.0004, "step": 21776 }, { "epoch": 20.939423076923077, "grad_norm": 0.0036083688028156757, "learning_rate": 1.357655816834269e-06, "loss": 0.0, "step": 21777 }, { "epoch": 20.940384615384616, "grad_norm": 0.004023299552500248, "learning_rate": 1.3570291980307504e-06, "loss": 0.0, "step": 21778 }, { "epoch": 20.941346153846155, "grad_norm": 0.0013088395353406668, "learning_rate": 1.3564027133407686e-06, "loss": 0.0, "step": 21779 }, { "epoch": 20.942307692307693, "grad_norm": 0.001508014160208404, "learning_rate": 1.3557763627740484e-06, "loss": 0.0, "step": 21780 }, { "epoch": 20.943269230769232, "grad_norm": 0.011865046806633472, "learning_rate": 1.3551501463403115e-06, "loss": 0.0001, "step": 21781 }, { "epoch": 20.94423076923077, "grad_norm": 0.001413889229297638, "learning_rate": 1.3545240640492695e-06, "loss": 0.0, "step": 21782 }, { "epoch": 20.94519230769231, "grad_norm": 0.00483746500685811, "learning_rate": 1.3538981159106423e-06, "loss": 0.0, "step": 21783 }, { "epoch": 20.946153846153845, "grad_norm": 0.0021079981233924627, "learning_rate": 1.3532723019341376e-06, "loss": 0.0, "step": 21784 }, { "epoch": 20.947115384615383, "grad_norm": 0.001142160384915769, "learning_rate": 1.3526466221294687e-06, "loss": 0.0, "step": 21785 }, { "epoch": 20.948076923076922, "grad_norm": 0.002678623655810952, "learning_rate": 1.352021076506347e-06, "loss": 0.0, "step": 21786 }, { "epoch": 20.94903846153846, "grad_norm": 0.002086097840219736, "learning_rate": 1.3513956650744753e-06, "loss": 0.0, "step": 21787 }, { "epoch": 20.95, "grad_norm": 0.0021902690641582012, "learning_rate": 1.3507703878435585e-06, "loss": 0.0, "step": 21788 }, { "epoch": 20.950961538461538, "grad_norm": 0.002298360923305154, "learning_rate": 1.350145244823302e-06, "loss": 0.0, "step": 21789 }, { "epoch": 20.951923076923077, "grad_norm": 0.003827804932370782, "learning_rate": 1.3495202360234028e-06, "loss": 0.0, "step": 21790 }, { "epoch": 20.952884615384615, "grad_norm": 0.028897833079099655, "learning_rate": 1.3488953614535615e-06, "loss": 0.0001, "step": 21791 }, { "epoch": 20.953846153846154, "grad_norm": 0.0022671204060316086, "learning_rate": 1.3482706211234742e-06, "loss": 0.0, "step": 21792 }, { "epoch": 20.954807692307693, "grad_norm": 0.002743433229625225, "learning_rate": 1.3476460150428327e-06, "loss": 0.0, "step": 21793 }, { "epoch": 20.95576923076923, "grad_norm": 0.0018340572714805603, "learning_rate": 1.347021543221333e-06, "loss": 0.0, "step": 21794 }, { "epoch": 20.95673076923077, "grad_norm": 0.004270898178219795, "learning_rate": 1.3463972056686604e-06, "loss": 0.0001, "step": 21795 }, { "epoch": 20.95769230769231, "grad_norm": 0.0014641020679846406, "learning_rate": 1.345773002394506e-06, "loss": 0.0, "step": 21796 }, { "epoch": 20.958653846153847, "grad_norm": 0.0013371185632422566, "learning_rate": 1.3451489334085555e-06, "loss": 0.0, "step": 21797 }, { "epoch": 20.959615384615386, "grad_norm": 0.0015528068179264665, "learning_rate": 1.3445249987204911e-06, "loss": 0.0, "step": 21798 }, { "epoch": 20.960576923076925, "grad_norm": 0.0011446099961176515, "learning_rate": 1.343901198339994e-06, "loss": 0.0, "step": 21799 }, { "epoch": 20.96153846153846, "grad_norm": 0.0036610066890716553, "learning_rate": 1.3432775322767478e-06, "loss": 0.0, "step": 21800 }, { "epoch": 20.9625, "grad_norm": 0.002567944349721074, "learning_rate": 1.3426540005404243e-06, "loss": 0.0, "step": 21801 }, { "epoch": 20.963461538461537, "grad_norm": 0.0019406249048188329, "learning_rate": 1.3420306031407027e-06, "loss": 0.0, "step": 21802 }, { "epoch": 20.964423076923076, "grad_norm": 0.0018566632643342018, "learning_rate": 1.3414073400872574e-06, "loss": 0.0, "step": 21803 }, { "epoch": 20.965384615384615, "grad_norm": 0.003479521721601486, "learning_rate": 1.3407842113897541e-06, "loss": 0.0, "step": 21804 }, { "epoch": 20.966346153846153, "grad_norm": 0.0016358274733647704, "learning_rate": 1.340161217057866e-06, "loss": 0.0, "step": 21805 }, { "epoch": 20.967307692307692, "grad_norm": 0.0008487019804306328, "learning_rate": 1.3395383571012621e-06, "loss": 0.0, "step": 21806 }, { "epoch": 20.96826923076923, "grad_norm": 0.0012068521464243531, "learning_rate": 1.3389156315296025e-06, "loss": 0.0, "step": 21807 }, { "epoch": 20.96923076923077, "grad_norm": 0.0020063992124050856, "learning_rate": 1.338293040352554e-06, "loss": 0.0, "step": 21808 }, { "epoch": 20.970192307692308, "grad_norm": 0.0015609225956723094, "learning_rate": 1.3376705835797744e-06, "loss": 0.0, "step": 21809 }, { "epoch": 20.971153846153847, "grad_norm": 0.00203959783539176, "learning_rate": 1.3370482612209224e-06, "loss": 0.0, "step": 21810 }, { "epoch": 20.972115384615385, "grad_norm": 0.00113110791426152, "learning_rate": 1.3364260732856594e-06, "loss": 0.0, "step": 21811 }, { "epoch": 20.973076923076924, "grad_norm": 0.002828397788107395, "learning_rate": 1.335804019783633e-06, "loss": 0.0, "step": 21812 }, { "epoch": 20.974038461538463, "grad_norm": 0.0015437380643561482, "learning_rate": 1.3351821007245003e-06, "loss": 0.0, "step": 21813 }, { "epoch": 20.975, "grad_norm": 0.1903027445077896, "learning_rate": 1.3345603161179122e-06, "loss": 0.0003, "step": 21814 }, { "epoch": 20.97596153846154, "grad_norm": 0.0017776694148778915, "learning_rate": 1.3339386659735132e-06, "loss": 0.0, "step": 21815 }, { "epoch": 20.976923076923075, "grad_norm": 0.0009050912340171635, "learning_rate": 1.3333171503009512e-06, "loss": 0.0, "step": 21816 }, { "epoch": 20.977884615384614, "grad_norm": 0.002394818002358079, "learning_rate": 1.332695769109873e-06, "loss": 0.0, "step": 21817 }, { "epoch": 20.978846153846153, "grad_norm": 0.0023634699173271656, "learning_rate": 1.3320745224099153e-06, "loss": 0.0, "step": 21818 }, { "epoch": 20.97980769230769, "grad_norm": 0.0017750116530805826, "learning_rate": 1.3314534102107247e-06, "loss": 0.0, "step": 21819 }, { "epoch": 20.98076923076923, "grad_norm": 0.0028765429742634296, "learning_rate": 1.3308324325219313e-06, "loss": 0.0, "step": 21820 }, { "epoch": 20.98173076923077, "grad_norm": 0.0019224623683840036, "learning_rate": 1.3302115893531763e-06, "loss": 0.0, "step": 21821 }, { "epoch": 20.982692307692307, "grad_norm": 0.0018598100868985057, "learning_rate": 1.3295908807140933e-06, "loss": 0.0, "step": 21822 }, { "epoch": 20.983653846153846, "grad_norm": 0.0009297248907387257, "learning_rate": 1.3289703066143112e-06, "loss": 0.0, "step": 21823 }, { "epoch": 20.984615384615385, "grad_norm": 0.0028427077922970057, "learning_rate": 1.3283498670634597e-06, "loss": 0.0, "step": 21824 }, { "epoch": 20.985576923076923, "grad_norm": 0.0022526357788592577, "learning_rate": 1.3277295620711695e-06, "loss": 0.0, "step": 21825 }, { "epoch": 20.986538461538462, "grad_norm": 0.012356243096292019, "learning_rate": 1.3271093916470623e-06, "loss": 0.0, "step": 21826 }, { "epoch": 20.9875, "grad_norm": 0.0027836530935019255, "learning_rate": 1.3264893558007618e-06, "loss": 0.0, "step": 21827 }, { "epoch": 20.98846153846154, "grad_norm": 0.0013905754312872887, "learning_rate": 1.3258694545418927e-06, "loss": 0.0, "step": 21828 }, { "epoch": 20.989423076923078, "grad_norm": 0.0012007370823994279, "learning_rate": 1.3252496878800702e-06, "loss": 0.0, "step": 21829 }, { "epoch": 20.990384615384617, "grad_norm": 0.0045573897659778595, "learning_rate": 1.3246300558249136e-06, "loss": 0.0, "step": 21830 }, { "epoch": 20.991346153846155, "grad_norm": 0.001989982323721051, "learning_rate": 1.3240105583860341e-06, "loss": 0.0, "step": 21831 }, { "epoch": 20.99230769230769, "grad_norm": 0.11722374707460403, "learning_rate": 1.3233911955730473e-06, "loss": 0.0002, "step": 21832 }, { "epoch": 20.99326923076923, "grad_norm": 0.0038691696245223284, "learning_rate": 1.3227719673955664e-06, "loss": 0.0, "step": 21833 }, { "epoch": 20.994230769230768, "grad_norm": 0.0014605458127334714, "learning_rate": 1.322152873863194e-06, "loss": 0.0, "step": 21834 }, { "epoch": 20.995192307692307, "grad_norm": 0.002127218758687377, "learning_rate": 1.321533914985541e-06, "loss": 0.0, "step": 21835 }, { "epoch": 20.996153846153845, "grad_norm": 0.0010100069921463728, "learning_rate": 1.3209150907722124e-06, "loss": 0.0, "step": 21836 }, { "epoch": 20.997115384615384, "grad_norm": 0.0013853914570063353, "learning_rate": 1.3202964012328056e-06, "loss": 0.0, "step": 21837 }, { "epoch": 20.998076923076923, "grad_norm": 0.0009862990118563175, "learning_rate": 1.3196778463769256e-06, "loss": 0.0, "step": 21838 }, { "epoch": 20.99903846153846, "grad_norm": 0.0009822419378906488, "learning_rate": 1.3190594262141697e-06, "loss": 0.0, "step": 21839 }, { "epoch": 21.0, "grad_norm": 0.00159155682194978, "learning_rate": 1.318441140754132e-06, "loss": 0.0, "step": 21840 }, { "epoch": 21.00096153846154, "grad_norm": 0.0025703604333102703, "learning_rate": 1.31782299000641e-06, "loss": 0.0, "step": 21841 }, { "epoch": 21.001923076923077, "grad_norm": 0.0008636096608825028, "learning_rate": 1.3172049739805904e-06, "loss": 0.0, "step": 21842 }, { "epoch": 21.002884615384616, "grad_norm": 0.0012067847419530153, "learning_rate": 1.3165870926862667e-06, "loss": 0.0, "step": 21843 }, { "epoch": 21.003846153846155, "grad_norm": 0.0027594289276748896, "learning_rate": 1.315969346133027e-06, "loss": 0.0, "step": 21844 }, { "epoch": 21.004807692307693, "grad_norm": 0.0023593942169100046, "learning_rate": 1.3153517343304544e-06, "loss": 0.0, "step": 21845 }, { "epoch": 21.005769230769232, "grad_norm": 0.0013324739411473274, "learning_rate": 1.3147342572881339e-06, "loss": 0.0, "step": 21846 }, { "epoch": 21.00673076923077, "grad_norm": 0.0008156998082995415, "learning_rate": 1.3141169150156486e-06, "loss": 0.0, "step": 21847 }, { "epoch": 21.00769230769231, "grad_norm": 0.0013255317462608218, "learning_rate": 1.313499707522573e-06, "loss": 0.0, "step": 21848 }, { "epoch": 21.008653846153845, "grad_norm": 0.0015963444020599127, "learning_rate": 1.3128826348184886e-06, "loss": 0.0, "step": 21849 }, { "epoch": 21.009615384615383, "grad_norm": 0.0009876699186861515, "learning_rate": 1.312265696912971e-06, "loss": 0.0, "step": 21850 }, { "epoch": 21.010576923076922, "grad_norm": 0.0009207157418131828, "learning_rate": 1.311648893815589e-06, "loss": 0.0, "step": 21851 }, { "epoch": 21.01153846153846, "grad_norm": 0.0014984009321779013, "learning_rate": 1.3110322255359198e-06, "loss": 0.0, "step": 21852 }, { "epoch": 21.0125, "grad_norm": 0.0030560798477381468, "learning_rate": 1.3104156920835254e-06, "loss": 0.0, "step": 21853 }, { "epoch": 21.013461538461538, "grad_norm": 0.0011208669748157263, "learning_rate": 1.309799293467976e-06, "loss": 0.0, "step": 21854 }, { "epoch": 21.014423076923077, "grad_norm": 0.0014777005417272449, "learning_rate": 1.3091830296988384e-06, "loss": 0.0, "step": 21855 }, { "epoch": 21.015384615384615, "grad_norm": 0.0005803397507406771, "learning_rate": 1.3085669007856705e-06, "loss": 0.0, "step": 21856 }, { "epoch": 21.016346153846154, "grad_norm": 0.0008075839723460376, "learning_rate": 1.3079509067380346e-06, "loss": 0.0, "step": 21857 }, { "epoch": 21.017307692307693, "grad_norm": 0.0010192019399255514, "learning_rate": 1.3073350475654933e-06, "loss": 0.0, "step": 21858 }, { "epoch": 21.01826923076923, "grad_norm": 0.002690386725589633, "learning_rate": 1.3067193232775964e-06, "loss": 0.0, "step": 21859 }, { "epoch": 21.01923076923077, "grad_norm": 0.0034976848401129246, "learning_rate": 1.306103733883901e-06, "loss": 0.0, "step": 21860 }, { "epoch": 21.02019230769231, "grad_norm": 0.012913200072944164, "learning_rate": 1.3054882793939616e-06, "loss": 0.0001, "step": 21861 }, { "epoch": 21.021153846153847, "grad_norm": 0.0006127911037765443, "learning_rate": 1.3048729598173248e-06, "loss": 0.0, "step": 21862 }, { "epoch": 21.022115384615386, "grad_norm": 0.008649519644677639, "learning_rate": 1.304257775163541e-06, "loss": 0.0, "step": 21863 }, { "epoch": 21.023076923076925, "grad_norm": 0.0019095015013590455, "learning_rate": 1.3036427254421524e-06, "loss": 0.0, "step": 21864 }, { "epoch": 21.02403846153846, "grad_norm": 0.0020212174858897924, "learning_rate": 1.3030278106627058e-06, "loss": 0.0, "step": 21865 }, { "epoch": 21.025, "grad_norm": 0.008075885474681854, "learning_rate": 1.3024130308347449e-06, "loss": 0.0, "step": 21866 }, { "epoch": 21.025961538461537, "grad_norm": 0.001995204482227564, "learning_rate": 1.3017983859678041e-06, "loss": 0.0, "step": 21867 }, { "epoch": 21.026923076923076, "grad_norm": 0.002946115331724286, "learning_rate": 1.3011838760714234e-06, "loss": 0.0, "step": 21868 }, { "epoch": 21.027884615384615, "grad_norm": 0.0017754411092028022, "learning_rate": 1.3005695011551401e-06, "loss": 0.0, "step": 21869 }, { "epoch": 21.028846153846153, "grad_norm": 0.0012941776076331735, "learning_rate": 1.2999552612284838e-06, "loss": 0.0, "step": 21870 }, { "epoch": 21.029807692307692, "grad_norm": 0.0009714002371765673, "learning_rate": 1.2993411563009872e-06, "loss": 0.0, "step": 21871 }, { "epoch": 21.03076923076923, "grad_norm": 0.0011736280284821987, "learning_rate": 1.2987271863821816e-06, "loss": 0.0, "step": 21872 }, { "epoch": 21.03173076923077, "grad_norm": 0.0010634318459779024, "learning_rate": 1.2981133514815903e-06, "loss": 0.0, "step": 21873 }, { "epoch": 21.032692307692308, "grad_norm": 0.0008901262190192938, "learning_rate": 1.2974996516087424e-06, "loss": 0.0, "step": 21874 }, { "epoch": 21.033653846153847, "grad_norm": 0.0011993714142590761, "learning_rate": 1.296886086773157e-06, "loss": 0.0, "step": 21875 }, { "epoch": 21.034615384615385, "grad_norm": 0.0015166746452450752, "learning_rate": 1.2962726569843553e-06, "loss": 0.0, "step": 21876 }, { "epoch": 21.035576923076924, "grad_norm": 0.0035573116037994623, "learning_rate": 1.2956593622518588e-06, "loss": 0.0, "step": 21877 }, { "epoch": 21.036538461538463, "grad_norm": 0.0022710321936756372, "learning_rate": 1.2950462025851795e-06, "loss": 0.0, "step": 21878 }, { "epoch": 21.0375, "grad_norm": 0.0019650692120194435, "learning_rate": 1.2944331779938356e-06, "loss": 0.0, "step": 21879 }, { "epoch": 21.03846153846154, "grad_norm": 0.002121690893545747, "learning_rate": 1.2938202884873397e-06, "loss": 0.0, "step": 21880 }, { "epoch": 21.039423076923075, "grad_norm": 0.0012462949380278587, "learning_rate": 1.293207534075198e-06, "loss": 0.0, "step": 21881 }, { "epoch": 21.040384615384614, "grad_norm": 0.0016113019082695246, "learning_rate": 1.2925949147669226e-06, "loss": 0.0, "step": 21882 }, { "epoch": 21.041346153846153, "grad_norm": 0.0017929482273757458, "learning_rate": 1.2919824305720196e-06, "loss": 0.0, "step": 21883 }, { "epoch": 21.04230769230769, "grad_norm": 0.000701127399224788, "learning_rate": 1.2913700814999885e-06, "loss": 0.0, "step": 21884 }, { "epoch": 21.04326923076923, "grad_norm": 0.0021701701916754246, "learning_rate": 1.2907578675603383e-06, "loss": 0.0, "step": 21885 }, { "epoch": 21.04423076923077, "grad_norm": 0.0022625543642789125, "learning_rate": 1.2901457887625613e-06, "loss": 0.0, "step": 21886 }, { "epoch": 21.045192307692307, "grad_norm": 0.0014254929265007377, "learning_rate": 1.2895338451161587e-06, "loss": 0.0, "step": 21887 }, { "epoch": 21.046153846153846, "grad_norm": 0.0032399699557572603, "learning_rate": 1.2889220366306276e-06, "loss": 0.0, "step": 21888 }, { "epoch": 21.047115384615385, "grad_norm": 0.001545248436741531, "learning_rate": 1.2883103633154582e-06, "loss": 0.0, "step": 21889 }, { "epoch": 21.048076923076923, "grad_norm": 0.0029627562034875154, "learning_rate": 1.2876988251801436e-06, "loss": 0.0, "step": 21890 }, { "epoch": 21.049038461538462, "grad_norm": 0.000967731059063226, "learning_rate": 1.2870874222341746e-06, "loss": 0.0, "step": 21891 }, { "epoch": 21.05, "grad_norm": 0.0020421489607542753, "learning_rate": 1.2864761544870353e-06, "loss": 0.0, "step": 21892 }, { "epoch": 21.05096153846154, "grad_norm": 0.0017837820341810584, "learning_rate": 1.2858650219482116e-06, "loss": 0.0, "step": 21893 }, { "epoch": 21.051923076923078, "grad_norm": 0.0018041424918919802, "learning_rate": 1.2852540246271894e-06, "loss": 0.0, "step": 21894 }, { "epoch": 21.052884615384617, "grad_norm": 0.0012929579243063927, "learning_rate": 1.2846431625334444e-06, "loss": 0.0, "step": 21895 }, { "epoch": 21.053846153846155, "grad_norm": 0.0005715508013963699, "learning_rate": 1.2840324356764589e-06, "loss": 0.0, "step": 21896 }, { "epoch": 21.05480769230769, "grad_norm": 0.0037032689433544874, "learning_rate": 1.2834218440657108e-06, "loss": 0.0, "step": 21897 }, { "epoch": 21.05576923076923, "grad_norm": 0.0011345602106302977, "learning_rate": 1.2828113877106718e-06, "loss": 0.0, "step": 21898 }, { "epoch": 21.056730769230768, "grad_norm": 0.0010083410888910294, "learning_rate": 1.2822010666208163e-06, "loss": 0.0, "step": 21899 }, { "epoch": 21.057692307692307, "grad_norm": 0.0014878828078508377, "learning_rate": 1.281590880805612e-06, "loss": 0.0, "step": 21900 }, { "epoch": 21.058653846153845, "grad_norm": 0.00270923157222569, "learning_rate": 1.2809808302745298e-06, "loss": 0.0, "step": 21901 }, { "epoch": 21.059615384615384, "grad_norm": 0.0016482645878568292, "learning_rate": 1.2803709150370358e-06, "loss": 0.0, "step": 21902 }, { "epoch": 21.060576923076923, "grad_norm": 0.0021724002435803413, "learning_rate": 1.2797611351025917e-06, "loss": 0.0, "step": 21903 }, { "epoch": 21.06153846153846, "grad_norm": 0.001495151431299746, "learning_rate": 1.2791514904806624e-06, "loss": 0.0, "step": 21904 }, { "epoch": 21.0625, "grad_norm": 0.002972227754071355, "learning_rate": 1.2785419811807075e-06, "loss": 0.0, "step": 21905 }, { "epoch": 21.06346153846154, "grad_norm": 0.003172881668433547, "learning_rate": 1.2779326072121823e-06, "loss": 0.0, "step": 21906 }, { "epoch": 21.064423076923077, "grad_norm": 0.0014932549092918634, "learning_rate": 1.2773233685845433e-06, "loss": 0.0, "step": 21907 }, { "epoch": 21.065384615384616, "grad_norm": 0.0016017016023397446, "learning_rate": 1.2767142653072485e-06, "loss": 0.0, "step": 21908 }, { "epoch": 21.066346153846155, "grad_norm": 0.0029205912724137306, "learning_rate": 1.2761052973897425e-06, "loss": 0.0, "step": 21909 }, { "epoch": 21.067307692307693, "grad_norm": 0.008734708651900291, "learning_rate": 1.2754964648414802e-06, "loss": 0.0001, "step": 21910 }, { "epoch": 21.068269230769232, "grad_norm": 0.002654917538166046, "learning_rate": 1.274887767671904e-06, "loss": 0.0, "step": 21911 }, { "epoch": 21.06923076923077, "grad_norm": 0.0009792700875550508, "learning_rate": 1.274279205890463e-06, "loss": 0.0, "step": 21912 }, { "epoch": 21.07019230769231, "grad_norm": 0.0015272916061803699, "learning_rate": 1.2736707795066006e-06, "loss": 0.0, "step": 21913 }, { "epoch": 21.071153846153845, "grad_norm": 0.0005252036498859525, "learning_rate": 1.2730624885297537e-06, "loss": 0.0, "step": 21914 }, { "epoch": 21.072115384615383, "grad_norm": 0.001550251618027687, "learning_rate": 1.272454332969364e-06, "loss": 0.0, "step": 21915 }, { "epoch": 21.073076923076922, "grad_norm": 0.0013615087373182178, "learning_rate": 1.27184631283487e-06, "loss": 0.0, "step": 21916 }, { "epoch": 21.07403846153846, "grad_norm": 0.421690434217453, "learning_rate": 1.2712384281357026e-06, "loss": 0.0046, "step": 21917 }, { "epoch": 21.075, "grad_norm": 0.0006917575956322253, "learning_rate": 1.2706306788812961e-06, "loss": 0.0, "step": 21918 }, { "epoch": 21.075961538461538, "grad_norm": 0.0018146111397072673, "learning_rate": 1.270023065081083e-06, "loss": 0.0, "step": 21919 }, { "epoch": 21.076923076923077, "grad_norm": 0.0009797007078304887, "learning_rate": 1.269415586744488e-06, "loss": 0.0, "step": 21920 }, { "epoch": 21.077884615384615, "grad_norm": 0.0016569501021876931, "learning_rate": 1.2688082438809413e-06, "loss": 0.0, "step": 21921 }, { "epoch": 21.078846153846154, "grad_norm": 0.0024070509243756533, "learning_rate": 1.268201036499862e-06, "loss": 0.0, "step": 21922 }, { "epoch": 21.079807692307693, "grad_norm": 0.0009350949549116194, "learning_rate": 1.267593964610675e-06, "loss": 0.0, "step": 21923 }, { "epoch": 21.08076923076923, "grad_norm": 0.0015387574676424265, "learning_rate": 1.2669870282228036e-06, "loss": 0.0, "step": 21924 }, { "epoch": 21.08173076923077, "grad_norm": 0.0023987358435988426, "learning_rate": 1.2663802273456593e-06, "loss": 0.0, "step": 21925 }, { "epoch": 21.08269230769231, "grad_norm": 0.0024929798673838377, "learning_rate": 1.2657735619886625e-06, "loss": 0.0, "step": 21926 }, { "epoch": 21.083653846153847, "grad_norm": 0.002858757507055998, "learning_rate": 1.2651670321612264e-06, "loss": 0.0, "step": 21927 }, { "epoch": 21.084615384615386, "grad_norm": 0.001517338678240776, "learning_rate": 1.2645606378727592e-06, "loss": 0.0, "step": 21928 }, { "epoch": 21.085576923076925, "grad_norm": 0.001105283503420651, "learning_rate": 1.2639543791326726e-06, "loss": 0.0, "step": 21929 }, { "epoch": 21.08653846153846, "grad_norm": 0.0010465001687407494, "learning_rate": 1.2633482559503774e-06, "loss": 0.0, "step": 21930 }, { "epoch": 21.0875, "grad_norm": 0.0030660154297947884, "learning_rate": 1.262742268335272e-06, "loss": 0.0, "step": 21931 }, { "epoch": 21.088461538461537, "grad_norm": 0.0018229987472295761, "learning_rate": 1.2621364162967664e-06, "loss": 0.0, "step": 21932 }, { "epoch": 21.089423076923076, "grad_norm": 0.00021351322357077152, "learning_rate": 1.2615306998442545e-06, "loss": 0.0, "step": 21933 }, { "epoch": 21.090384615384615, "grad_norm": 0.0019788616336882114, "learning_rate": 1.2609251189871408e-06, "loss": 0.0, "step": 21934 }, { "epoch": 21.091346153846153, "grad_norm": 0.001744094188325107, "learning_rate": 1.2603196737348211e-06, "loss": 0.0, "step": 21935 }, { "epoch": 21.092307692307692, "grad_norm": 0.0009486018097959459, "learning_rate": 1.259714364096688e-06, "loss": 0.0, "step": 21936 }, { "epoch": 21.09326923076923, "grad_norm": 0.0009711309103295207, "learning_rate": 1.2591091900821362e-06, "loss": 0.0, "step": 21937 }, { "epoch": 21.09423076923077, "grad_norm": 0.001319073955528438, "learning_rate": 1.258504151700557e-06, "loss": 0.0, "step": 21938 }, { "epoch": 21.095192307692308, "grad_norm": 0.0012422798899933696, "learning_rate": 1.257899248961335e-06, "loss": 0.0, "step": 21939 }, { "epoch": 21.096153846153847, "grad_norm": 0.0014734485885128379, "learning_rate": 1.2572944818738587e-06, "loss": 0.0, "step": 21940 }, { "epoch": 21.097115384615385, "grad_norm": 0.0006502367905341089, "learning_rate": 1.2566898504475157e-06, "loss": 0.0, "step": 21941 }, { "epoch": 21.098076923076924, "grad_norm": 0.0007947906851768494, "learning_rate": 1.2560853546916818e-06, "loss": 0.0, "step": 21942 }, { "epoch": 21.099038461538463, "grad_norm": 0.001337156631052494, "learning_rate": 1.255480994615742e-06, "loss": 0.0, "step": 21943 }, { "epoch": 21.1, "grad_norm": 0.0018901345320045948, "learning_rate": 1.254876770229071e-06, "loss": 0.0, "step": 21944 }, { "epoch": 21.10096153846154, "grad_norm": 0.0021683129016309977, "learning_rate": 1.2542726815410456e-06, "loss": 0.0, "step": 21945 }, { "epoch": 21.101923076923075, "grad_norm": 0.0029605343006551266, "learning_rate": 1.2536687285610427e-06, "loss": 0.0, "step": 21946 }, { "epoch": 21.102884615384614, "grad_norm": 0.0008739913464523852, "learning_rate": 1.2530649112984273e-06, "loss": 0.0, "step": 21947 }, { "epoch": 21.103846153846153, "grad_norm": 0.0014977286336943507, "learning_rate": 1.2524612297625737e-06, "loss": 0.0, "step": 21948 }, { "epoch": 21.10480769230769, "grad_norm": 0.21191006898880005, "learning_rate": 1.2518576839628494e-06, "loss": 0.0004, "step": 21949 }, { "epoch": 21.10576923076923, "grad_norm": 0.0008336796890944242, "learning_rate": 1.2512542739086176e-06, "loss": 0.0, "step": 21950 }, { "epoch": 21.10673076923077, "grad_norm": 0.0010998235084116459, "learning_rate": 1.250650999609241e-06, "loss": 0.0, "step": 21951 }, { "epoch": 21.107692307692307, "grad_norm": 0.0027029355987906456, "learning_rate": 1.250047861074084e-06, "loss": 0.0, "step": 21952 }, { "epoch": 21.108653846153846, "grad_norm": 0.0021142305340617895, "learning_rate": 1.249444858312502e-06, "loss": 0.0, "step": 21953 }, { "epoch": 21.109615384615385, "grad_norm": 0.0019064589869230986, "learning_rate": 1.2488419913338546e-06, "loss": 0.0, "step": 21954 }, { "epoch": 21.110576923076923, "grad_norm": 0.0016475875163450837, "learning_rate": 1.2482392601474935e-06, "loss": 0.0, "step": 21955 }, { "epoch": 21.111538461538462, "grad_norm": 0.001992053585126996, "learning_rate": 1.2476366647627724e-06, "loss": 0.0, "step": 21956 }, { "epoch": 21.1125, "grad_norm": 0.002080463571473956, "learning_rate": 1.247034205189045e-06, "loss": 0.0, "step": 21957 }, { "epoch": 21.11346153846154, "grad_norm": 0.0025382007006555796, "learning_rate": 1.2464318814356546e-06, "loss": 0.0, "step": 21958 }, { "epoch": 21.114423076923078, "grad_norm": 0.0017966293962672353, "learning_rate": 1.2458296935119508e-06, "loss": 0.0, "step": 21959 }, { "epoch": 21.115384615384617, "grad_norm": 0.000994455418549478, "learning_rate": 1.2452276414272779e-06, "loss": 0.0, "step": 21960 }, { "epoch": 21.116346153846155, "grad_norm": 0.0027054306119680405, "learning_rate": 1.2446257251909754e-06, "loss": 0.0, "step": 21961 }, { "epoch": 21.11730769230769, "grad_norm": 0.0014915080973878503, "learning_rate": 1.2440239448123858e-06, "loss": 0.0, "step": 21962 }, { "epoch": 21.11826923076923, "grad_norm": 0.0021915296092629433, "learning_rate": 1.2434223003008473e-06, "loss": 0.0, "step": 21963 }, { "epoch": 21.119230769230768, "grad_norm": 0.0036517572589218616, "learning_rate": 1.242820791665692e-06, "loss": 0.0, "step": 21964 }, { "epoch": 21.120192307692307, "grad_norm": 0.0011471753241494298, "learning_rate": 1.2422194189162595e-06, "loss": 0.0, "step": 21965 }, { "epoch": 21.121153846153845, "grad_norm": 0.0008534429362043738, "learning_rate": 1.2416181820618745e-06, "loss": 0.0, "step": 21966 }, { "epoch": 21.122115384615384, "grad_norm": 0.0024367752484977245, "learning_rate": 1.2410170811118705e-06, "loss": 0.0, "step": 21967 }, { "epoch": 21.123076923076923, "grad_norm": 0.001172941061668098, "learning_rate": 1.2404161160755757e-06, "loss": 0.0, "step": 21968 }, { "epoch": 21.12403846153846, "grad_norm": 0.002038302831351757, "learning_rate": 1.2398152869623115e-06, "loss": 0.0, "step": 21969 }, { "epoch": 21.125, "grad_norm": 0.0004762061289511621, "learning_rate": 1.2392145937814038e-06, "loss": 0.0, "step": 21970 }, { "epoch": 21.12596153846154, "grad_norm": 0.0028597847558557987, "learning_rate": 1.238614036542175e-06, "loss": 0.0, "step": 21971 }, { "epoch": 21.126923076923077, "grad_norm": 0.00117824028711766, "learning_rate": 1.238013615253939e-06, "loss": 0.0, "step": 21972 }, { "epoch": 21.127884615384616, "grad_norm": 0.0011032819747924805, "learning_rate": 1.2374133299260173e-06, "loss": 0.0, "step": 21973 }, { "epoch": 21.128846153846155, "grad_norm": 0.0012091731186956167, "learning_rate": 1.2368131805677231e-06, "loss": 0.0, "step": 21974 }, { "epoch": 21.129807692307693, "grad_norm": 0.0018317123176530004, "learning_rate": 1.2362131671883671e-06, "loss": 0.0, "step": 21975 }, { "epoch": 21.130769230769232, "grad_norm": 0.000927403976675123, "learning_rate": 1.235613289797264e-06, "loss": 0.0, "step": 21976 }, { "epoch": 21.13173076923077, "grad_norm": 0.0015080712037160993, "learning_rate": 1.2350135484037173e-06, "loss": 0.0, "step": 21977 }, { "epoch": 21.13269230769231, "grad_norm": 0.0016255812952294946, "learning_rate": 1.2344139430170354e-06, "loss": 0.0, "step": 21978 }, { "epoch": 21.133653846153845, "grad_norm": 0.002458190079778433, "learning_rate": 1.233814473646524e-06, "loss": 0.0, "step": 21979 }, { "epoch": 21.134615384615383, "grad_norm": 0.0014702932676300406, "learning_rate": 1.2332151403014814e-06, "loss": 0.0, "step": 21980 }, { "epoch": 21.135576923076922, "grad_norm": 0.0013567754067480564, "learning_rate": 1.23261594299121e-06, "loss": 0.0, "step": 21981 }, { "epoch": 21.13653846153846, "grad_norm": 0.003357707755640149, "learning_rate": 1.23201688172501e-06, "loss": 0.0, "step": 21982 }, { "epoch": 21.1375, "grad_norm": 0.0025421930477023125, "learning_rate": 1.2314179565121709e-06, "loss": 0.0, "step": 21983 }, { "epoch": 21.138461538461538, "grad_norm": 0.0024254266172647476, "learning_rate": 1.2308191673619906e-06, "loss": 0.0, "step": 21984 }, { "epoch": 21.139423076923077, "grad_norm": 0.0005483550485223532, "learning_rate": 1.2302205142837609e-06, "loss": 0.0, "step": 21985 }, { "epoch": 21.140384615384615, "grad_norm": 0.0009289829176850617, "learning_rate": 1.2296219972867684e-06, "loss": 0.0, "step": 21986 }, { "epoch": 21.141346153846154, "grad_norm": 0.0027133969124406576, "learning_rate": 1.2290236163803016e-06, "loss": 0.0, "step": 21987 }, { "epoch": 21.142307692307693, "grad_norm": 0.001467582187615335, "learning_rate": 1.2284253715736471e-06, "loss": 0.0, "step": 21988 }, { "epoch": 21.14326923076923, "grad_norm": 0.001102639944292605, "learning_rate": 1.2278272628760857e-06, "loss": 0.0, "step": 21989 }, { "epoch": 21.14423076923077, "grad_norm": 0.0015783318085595965, "learning_rate": 1.2272292902969007e-06, "loss": 0.0, "step": 21990 }, { "epoch": 21.14519230769231, "grad_norm": 0.0017161959549412131, "learning_rate": 1.2266314538453671e-06, "loss": 0.0, "step": 21991 }, { "epoch": 21.146153846153847, "grad_norm": 0.0008353078737854958, "learning_rate": 1.226033753530763e-06, "loss": 0.0, "step": 21992 }, { "epoch": 21.147115384615386, "grad_norm": 0.0025354393292218447, "learning_rate": 1.225436189362368e-06, "loss": 0.0, "step": 21993 }, { "epoch": 21.148076923076925, "grad_norm": 0.0048569319769740105, "learning_rate": 1.2248387613494462e-06, "loss": 0.0, "step": 21994 }, { "epoch": 21.14903846153846, "grad_norm": 0.0014468930894508958, "learning_rate": 1.2242414695012727e-06, "loss": 0.0, "step": 21995 }, { "epoch": 21.15, "grad_norm": 0.0011266282526776195, "learning_rate": 1.223644313827117e-06, "loss": 0.0, "step": 21996 }, { "epoch": 21.150961538461537, "grad_norm": 0.0014964333968237042, "learning_rate": 1.2230472943362414e-06, "loss": 0.0, "step": 21997 }, { "epoch": 21.151923076923076, "grad_norm": 0.0021680686622858047, "learning_rate": 1.222450411037912e-06, "loss": 0.0, "step": 21998 }, { "epoch": 21.152884615384615, "grad_norm": 0.001814994728192687, "learning_rate": 1.2218536639413925e-06, "loss": 0.0, "step": 21999 }, { "epoch": 21.153846153846153, "grad_norm": 0.0038482891395688057, "learning_rate": 1.2212570530559377e-06, "loss": 0.0, "step": 22000 }, { "epoch": 21.154807692307692, "grad_norm": 0.45312368869781494, "learning_rate": 1.220660578390811e-06, "loss": 0.0031, "step": 22001 }, { "epoch": 21.15576923076923, "grad_norm": 0.002072339178994298, "learning_rate": 1.2200642399552632e-06, "loss": 0.0, "step": 22002 }, { "epoch": 21.15673076923077, "grad_norm": 0.0017889471491798759, "learning_rate": 1.2194680377585488e-06, "loss": 0.0, "step": 22003 }, { "epoch": 21.157692307692308, "grad_norm": 0.002185034565627575, "learning_rate": 1.218871971809923e-06, "loss": 0.0, "step": 22004 }, { "epoch": 21.158653846153847, "grad_norm": 0.007839388214051723, "learning_rate": 1.218276042118629e-06, "loss": 0.0, "step": 22005 }, { "epoch": 21.159615384615385, "grad_norm": 0.0018939345609396696, "learning_rate": 1.2176802486939166e-06, "loss": 0.0, "step": 22006 }, { "epoch": 21.160576923076924, "grad_norm": 0.0008595241815783083, "learning_rate": 1.2170845915450336e-06, "loss": 0.0, "step": 22007 }, { "epoch": 21.161538461538463, "grad_norm": 0.001406008959747851, "learning_rate": 1.2164890706812172e-06, "loss": 0.0, "step": 22008 }, { "epoch": 21.1625, "grad_norm": 0.0014144068118184805, "learning_rate": 1.215893686111712e-06, "loss": 0.0, "step": 22009 }, { "epoch": 21.16346153846154, "grad_norm": 0.0007838515448383987, "learning_rate": 1.2152984378457577e-06, "loss": 0.0, "step": 22010 }, { "epoch": 21.164423076923075, "grad_norm": 0.0010946199763566256, "learning_rate": 1.2147033258925867e-06, "loss": 0.0, "step": 22011 }, { "epoch": 21.165384615384614, "grad_norm": 0.000999639742076397, "learning_rate": 1.214108350261437e-06, "loss": 0.0, "step": 22012 }, { "epoch": 21.166346153846153, "grad_norm": 0.0005799263599328697, "learning_rate": 1.213513510961538e-06, "loss": 0.0, "step": 22013 }, { "epoch": 21.16730769230769, "grad_norm": 0.0015114463167265058, "learning_rate": 1.2129188080021215e-06, "loss": 0.0, "step": 22014 }, { "epoch": 21.16826923076923, "grad_norm": 0.0011457893997430801, "learning_rate": 1.2123242413924174e-06, "loss": 0.0, "step": 22015 }, { "epoch": 21.16923076923077, "grad_norm": 0.004910652060061693, "learning_rate": 1.2117298111416476e-06, "loss": 0.0, "step": 22016 }, { "epoch": 21.170192307692307, "grad_norm": 0.0018544086487963796, "learning_rate": 1.2111355172590366e-06, "loss": 0.0, "step": 22017 }, { "epoch": 21.171153846153846, "grad_norm": 0.0011240958701819181, "learning_rate": 1.2105413597538107e-06, "loss": 0.0, "step": 22018 }, { "epoch": 21.172115384615385, "grad_norm": 0.0025290101766586304, "learning_rate": 1.2099473386351835e-06, "loss": 0.0, "step": 22019 }, { "epoch": 21.173076923076923, "grad_norm": 0.0013511208817362785, "learning_rate": 1.2093534539123752e-06, "loss": 0.0, "step": 22020 }, { "epoch": 21.174038461538462, "grad_norm": 0.0007086530677042902, "learning_rate": 1.2087597055946032e-06, "loss": 0.0, "step": 22021 }, { "epoch": 21.175, "grad_norm": 0.0008825117256492376, "learning_rate": 1.2081660936910767e-06, "loss": 0.0, "step": 22022 }, { "epoch": 21.17596153846154, "grad_norm": 0.0013407751685008407, "learning_rate": 1.2075726182110114e-06, "loss": 0.0, "step": 22023 }, { "epoch": 21.176923076923078, "grad_norm": 0.0036466189194470644, "learning_rate": 1.2069792791636103e-06, "loss": 0.0, "step": 22024 }, { "epoch": 21.177884615384617, "grad_norm": 0.0025046980008482933, "learning_rate": 1.2063860765580836e-06, "loss": 0.0, "step": 22025 }, { "epoch": 21.178846153846155, "grad_norm": 0.002437453716993332, "learning_rate": 1.2057930104036385e-06, "loss": 0.0, "step": 22026 }, { "epoch": 21.17980769230769, "grad_norm": 0.0017369706183671951, "learning_rate": 1.2052000807094733e-06, "loss": 0.0, "step": 22027 }, { "epoch": 21.18076923076923, "grad_norm": 0.0027714006137102842, "learning_rate": 1.2046072874847904e-06, "loss": 0.0, "step": 22028 }, { "epoch": 21.181730769230768, "grad_norm": 0.001431903219781816, "learning_rate": 1.2040146307387901e-06, "loss": 0.0, "step": 22029 }, { "epoch": 21.182692307692307, "grad_norm": 0.002147197024896741, "learning_rate": 1.2034221104806653e-06, "loss": 0.0, "step": 22030 }, { "epoch": 21.183653846153845, "grad_norm": 0.001274718320928514, "learning_rate": 1.202829726719611e-06, "loss": 0.0, "step": 22031 }, { "epoch": 21.184615384615384, "grad_norm": 0.002790863858535886, "learning_rate": 1.2022374794648229e-06, "loss": 0.0, "step": 22032 }, { "epoch": 21.185576923076923, "grad_norm": 0.0013178110821172595, "learning_rate": 1.2016453687254847e-06, "loss": 0.0, "step": 22033 }, { "epoch": 21.18653846153846, "grad_norm": 0.0007425501244142652, "learning_rate": 1.2010533945107905e-06, "loss": 0.0, "step": 22034 }, { "epoch": 21.1875, "grad_norm": 0.0017011001473292708, "learning_rate": 1.2004615568299204e-06, "loss": 0.0, "step": 22035 }, { "epoch": 21.18846153846154, "grad_norm": 0.0007085521938279271, "learning_rate": 1.1998698556920608e-06, "loss": 0.0, "step": 22036 }, { "epoch": 21.189423076923077, "grad_norm": 0.0026525079738348722, "learning_rate": 1.199278291106395e-06, "loss": 0.0, "step": 22037 }, { "epoch": 21.190384615384616, "grad_norm": 0.0013068626867607236, "learning_rate": 1.198686863082098e-06, "loss": 0.0, "step": 22038 }, { "epoch": 21.191346153846155, "grad_norm": 0.0032464840915054083, "learning_rate": 1.1980955716283504e-06, "loss": 0.0, "step": 22039 }, { "epoch": 21.192307692307693, "grad_norm": 0.002428135136142373, "learning_rate": 1.197504416754327e-06, "loss": 0.0, "step": 22040 }, { "epoch": 21.193269230769232, "grad_norm": 0.0005262363119982183, "learning_rate": 1.1969133984691983e-06, "loss": 0.0, "step": 22041 }, { "epoch": 21.19423076923077, "grad_norm": 0.002037956379354, "learning_rate": 1.196322516782138e-06, "loss": 0.0, "step": 22042 }, { "epoch": 21.19519230769231, "grad_norm": 0.002391290385276079, "learning_rate": 1.1957317717023142e-06, "loss": 0.0, "step": 22043 }, { "epoch": 21.196153846153845, "grad_norm": 0.0008152807131409645, "learning_rate": 1.195141163238892e-06, "loss": 0.0, "step": 22044 }, { "epoch": 21.197115384615383, "grad_norm": 0.0018344096606597304, "learning_rate": 1.1945506914010385e-06, "loss": 0.0, "step": 22045 }, { "epoch": 21.198076923076922, "grad_norm": 0.0026916901115328074, "learning_rate": 1.193960356197913e-06, "loss": 0.0, "step": 22046 }, { "epoch": 21.19903846153846, "grad_norm": 0.0022302581928670406, "learning_rate": 1.193370157638677e-06, "loss": 0.0, "step": 22047 }, { "epoch": 21.2, "grad_norm": 0.0014131220523267984, "learning_rate": 1.192780095732492e-06, "loss": 0.0, "step": 22048 }, { "epoch": 21.200961538461538, "grad_norm": 0.0023682634346187115, "learning_rate": 1.1921901704885075e-06, "loss": 0.0, "step": 22049 }, { "epoch": 21.201923076923077, "grad_norm": 0.0008759599877521396, "learning_rate": 1.1916003819158816e-06, "loss": 0.0, "step": 22050 }, { "epoch": 21.202884615384615, "grad_norm": 0.002463076263666153, "learning_rate": 1.1910107300237682e-06, "loss": 0.0, "step": 22051 }, { "epoch": 21.203846153846154, "grad_norm": 0.0016222518170252442, "learning_rate": 1.1904212148213113e-06, "loss": 0.0, "step": 22052 }, { "epoch": 21.204807692307693, "grad_norm": 0.0011689208913594484, "learning_rate": 1.189831836317662e-06, "loss": 0.0, "step": 22053 }, { "epoch": 21.20576923076923, "grad_norm": 0.003809761954471469, "learning_rate": 1.1892425945219666e-06, "loss": 0.0, "step": 22054 }, { "epoch": 21.20673076923077, "grad_norm": 0.0020104260183870792, "learning_rate": 1.1886534894433643e-06, "loss": 0.0, "step": 22055 }, { "epoch": 21.20769230769231, "grad_norm": 0.0012670831056311727, "learning_rate": 1.1880645210910025e-06, "loss": 0.0, "step": 22056 }, { "epoch": 21.208653846153847, "grad_norm": 0.0012013086816295981, "learning_rate": 1.1874756894740137e-06, "loss": 0.0, "step": 22057 }, { "epoch": 21.209615384615386, "grad_norm": 0.0013894130242988467, "learning_rate": 1.186886994601537e-06, "loss": 0.0, "step": 22058 }, { "epoch": 21.210576923076925, "grad_norm": 0.002763062948361039, "learning_rate": 1.1862984364827101e-06, "loss": 0.0, "step": 22059 }, { "epoch": 21.21153846153846, "grad_norm": 0.0014825474936515093, "learning_rate": 1.185710015126662e-06, "loss": 0.0, "step": 22060 }, { "epoch": 21.2125, "grad_norm": 0.001054868334904313, "learning_rate": 1.1851217305425256e-06, "loss": 0.0, "step": 22061 }, { "epoch": 21.213461538461537, "grad_norm": 0.0036486885510385036, "learning_rate": 1.18453358273943e-06, "loss": 0.0, "step": 22062 }, { "epoch": 21.214423076923076, "grad_norm": 0.0033160638995468616, "learning_rate": 1.183945571726498e-06, "loss": 0.0, "step": 22063 }, { "epoch": 21.215384615384615, "grad_norm": 0.008008509874343872, "learning_rate": 1.1833576975128557e-06, "loss": 0.0, "step": 22064 }, { "epoch": 21.216346153846153, "grad_norm": 0.00180308788549155, "learning_rate": 1.182769960107628e-06, "loss": 0.0, "step": 22065 }, { "epoch": 21.217307692307692, "grad_norm": 0.0008075057994574308, "learning_rate": 1.18218235951993e-06, "loss": 0.0, "step": 22066 }, { "epoch": 21.21826923076923, "grad_norm": 0.0010143696563318372, "learning_rate": 1.181594895758883e-06, "loss": 0.0, "step": 22067 }, { "epoch": 21.21923076923077, "grad_norm": 0.0008112846990115941, "learning_rate": 1.1810075688336008e-06, "loss": 0.0, "step": 22068 }, { "epoch": 21.220192307692308, "grad_norm": 0.001607084646821022, "learning_rate": 1.1804203787531965e-06, "loss": 0.0, "step": 22069 }, { "epoch": 21.221153846153847, "grad_norm": 0.0012185120722278953, "learning_rate": 1.1798333255267857e-06, "loss": 0.0, "step": 22070 }, { "epoch": 21.222115384615385, "grad_norm": 0.0011159841669723392, "learning_rate": 1.1792464091634725e-06, "loss": 0.0, "step": 22071 }, { "epoch": 21.223076923076924, "grad_norm": 0.0005706195952370763, "learning_rate": 1.1786596296723663e-06, "loss": 0.0, "step": 22072 }, { "epoch": 21.224038461538463, "grad_norm": 0.004741874057799578, "learning_rate": 1.178072987062574e-06, "loss": 0.0, "step": 22073 }, { "epoch": 21.225, "grad_norm": 0.0020331197883933783, "learning_rate": 1.1774864813431952e-06, "loss": 0.0, "step": 22074 }, { "epoch": 21.22596153846154, "grad_norm": 0.0009775826474651694, "learning_rate": 1.1769001125233326e-06, "loss": 0.0, "step": 22075 }, { "epoch": 21.226923076923075, "grad_norm": 0.0006002157460898161, "learning_rate": 1.1763138806120866e-06, "loss": 0.0, "step": 22076 }, { "epoch": 21.227884615384614, "grad_norm": 0.0020411584991961718, "learning_rate": 1.17572778561855e-06, "loss": 0.0, "step": 22077 }, { "epoch": 21.228846153846153, "grad_norm": 0.002169410465285182, "learning_rate": 1.1751418275518201e-06, "loss": 0.0, "step": 22078 }, { "epoch": 21.22980769230769, "grad_norm": 0.0026539822574704885, "learning_rate": 1.1745560064209894e-06, "loss": 0.0, "step": 22079 }, { "epoch": 21.23076923076923, "grad_norm": 0.00430638762190938, "learning_rate": 1.173970322235145e-06, "loss": 0.0, "step": 22080 }, { "epoch": 21.23173076923077, "grad_norm": 0.0019059651531279087, "learning_rate": 1.17338477500338e-06, "loss": 0.0, "step": 22081 }, { "epoch": 21.232692307692307, "grad_norm": 0.0021203982178121805, "learning_rate": 1.1727993647347747e-06, "loss": 0.0, "step": 22082 }, { "epoch": 21.233653846153846, "grad_norm": 0.0026421311777085066, "learning_rate": 1.1722140914384162e-06, "loss": 0.0, "step": 22083 }, { "epoch": 21.234615384615385, "grad_norm": 0.002121842000633478, "learning_rate": 1.1716289551233873e-06, "loss": 0.0, "step": 22084 }, { "epoch": 21.235576923076923, "grad_norm": 0.0014740488259121776, "learning_rate": 1.1710439557987641e-06, "loss": 0.0, "step": 22085 }, { "epoch": 21.236538461538462, "grad_norm": 0.0014831789303570986, "learning_rate": 1.1704590934736259e-06, "loss": 0.0, "step": 22086 }, { "epoch": 21.2375, "grad_norm": 0.00130185775924474, "learning_rate": 1.1698743681570513e-06, "loss": 0.0, "step": 22087 }, { "epoch": 21.23846153846154, "grad_norm": 0.001609968370757997, "learning_rate": 1.1692897798581071e-06, "loss": 0.0, "step": 22088 }, { "epoch": 21.239423076923078, "grad_norm": 0.0010186592116951942, "learning_rate": 1.1687053285858685e-06, "loss": 0.0, "step": 22089 }, { "epoch": 21.240384615384617, "grad_norm": 0.0007280500722117722, "learning_rate": 1.1681210143494048e-06, "loss": 0.0, "step": 22090 }, { "epoch": 21.241346153846155, "grad_norm": 0.003423972986638546, "learning_rate": 1.16753683715778e-06, "loss": 0.0, "step": 22091 }, { "epoch": 21.24230769230769, "grad_norm": 0.0011009827721863985, "learning_rate": 1.1669527970200634e-06, "loss": 0.0, "step": 22092 }, { "epoch": 21.24326923076923, "grad_norm": 0.0008988805930130184, "learning_rate": 1.1663688939453122e-06, "loss": 0.0, "step": 22093 }, { "epoch": 21.244230769230768, "grad_norm": 0.0011261504841968417, "learning_rate": 1.1657851279425891e-06, "loss": 0.0, "step": 22094 }, { "epoch": 21.245192307692307, "grad_norm": 0.0015393126523122191, "learning_rate": 1.1652014990209549e-06, "loss": 0.0, "step": 22095 }, { "epoch": 21.246153846153845, "grad_norm": 0.0011404744582250714, "learning_rate": 1.1646180071894608e-06, "loss": 0.0, "step": 22096 }, { "epoch": 21.247115384615384, "grad_norm": 0.0009368694736622274, "learning_rate": 1.1640346524571643e-06, "loss": 0.0, "step": 22097 }, { "epoch": 21.248076923076923, "grad_norm": 0.0006756207440048456, "learning_rate": 1.1634514348331193e-06, "loss": 0.0, "step": 22098 }, { "epoch": 21.24903846153846, "grad_norm": 0.0020813532173633575, "learning_rate": 1.1628683543263708e-06, "loss": 0.0, "step": 22099 }, { "epoch": 21.25, "grad_norm": 0.0019831620156764984, "learning_rate": 1.1622854109459692e-06, "loss": 0.0, "step": 22100 }, { "epoch": 21.25096153846154, "grad_norm": 0.001172183663584292, "learning_rate": 1.1617026047009606e-06, "loss": 0.0, "step": 22101 }, { "epoch": 21.251923076923077, "grad_norm": 0.0010610601166263223, "learning_rate": 1.1611199356003854e-06, "loss": 0.0, "step": 22102 }, { "epoch": 21.252884615384616, "grad_norm": 0.0012132399715483189, "learning_rate": 1.1605374036532902e-06, "loss": 0.0, "step": 22103 }, { "epoch": 21.253846153846155, "grad_norm": 0.0024243646766990423, "learning_rate": 1.1599550088687083e-06, "loss": 0.0, "step": 22104 }, { "epoch": 21.254807692307693, "grad_norm": 0.0006475320551544428, "learning_rate": 1.1593727512556797e-06, "loss": 0.0, "step": 22105 }, { "epoch": 21.255769230769232, "grad_norm": 0.0007944042445160449, "learning_rate": 1.158790630823241e-06, "loss": 0.0, "step": 22106 }, { "epoch": 21.25673076923077, "grad_norm": 0.005493944510817528, "learning_rate": 1.15820864758042e-06, "loss": 0.0, "step": 22107 }, { "epoch": 21.25769230769231, "grad_norm": 0.0006578059401363134, "learning_rate": 1.1576268015362513e-06, "loss": 0.0, "step": 22108 }, { "epoch": 21.258653846153845, "grad_norm": 0.000589050876442343, "learning_rate": 1.1570450926997657e-06, "loss": 0.0, "step": 22109 }, { "epoch": 21.259615384615383, "grad_norm": 0.001454424811527133, "learning_rate": 1.1564635210799834e-06, "loss": 0.0, "step": 22110 }, { "epoch": 21.260576923076922, "grad_norm": 0.0006603115471079946, "learning_rate": 1.1558820866859311e-06, "loss": 0.0, "step": 22111 }, { "epoch": 21.26153846153846, "grad_norm": 0.0024081203155219555, "learning_rate": 1.1553007895266345e-06, "loss": 0.0, "step": 22112 }, { "epoch": 21.2625, "grad_norm": 0.0010500357020646334, "learning_rate": 1.1547196296111085e-06, "loss": 0.0, "step": 22113 }, { "epoch": 21.263461538461538, "grad_norm": 0.0013186748838052154, "learning_rate": 1.1541386069483762e-06, "loss": 0.0, "step": 22114 }, { "epoch": 21.264423076923077, "grad_norm": 0.0030162897892296314, "learning_rate": 1.153557721547447e-06, "loss": 0.0, "step": 22115 }, { "epoch": 21.265384615384615, "grad_norm": 0.004130674060434103, "learning_rate": 1.152976973417339e-06, "loss": 0.0, "step": 22116 }, { "epoch": 21.266346153846154, "grad_norm": 0.0010501883225515485, "learning_rate": 1.1523963625670643e-06, "loss": 0.0, "step": 22117 }, { "epoch": 21.267307692307693, "grad_norm": 0.0009940873133018613, "learning_rate": 1.1518158890056286e-06, "loss": 0.0, "step": 22118 }, { "epoch": 21.26826923076923, "grad_norm": 0.0006502529722638428, "learning_rate": 1.1512355527420406e-06, "loss": 0.0, "step": 22119 }, { "epoch": 21.26923076923077, "grad_norm": 0.00181962875649333, "learning_rate": 1.1506553537853094e-06, "loss": 0.0, "step": 22120 }, { "epoch": 21.27019230769231, "grad_norm": 0.0029124876018613577, "learning_rate": 1.1500752921444314e-06, "loss": 0.0, "step": 22121 }, { "epoch": 21.271153846153847, "grad_norm": 0.000785986369010061, "learning_rate": 1.1494953678284105e-06, "loss": 0.0, "step": 22122 }, { "epoch": 21.272115384615386, "grad_norm": 0.009706109762191772, "learning_rate": 1.1489155808462483e-06, "loss": 0.0001, "step": 22123 }, { "epoch": 21.273076923076925, "grad_norm": 0.0026495237834751606, "learning_rate": 1.1483359312069364e-06, "loss": 0.0, "step": 22124 }, { "epoch": 21.27403846153846, "grad_norm": 0.0024501534644514322, "learning_rate": 1.1477564189194745e-06, "loss": 0.0, "step": 22125 }, { "epoch": 21.275, "grad_norm": 0.0010555757908150554, "learning_rate": 1.1471770439928487e-06, "loss": 0.0, "step": 22126 }, { "epoch": 21.275961538461537, "grad_norm": 0.005151731893420219, "learning_rate": 1.1465978064360528e-06, "loss": 0.0, "step": 22127 }, { "epoch": 21.276923076923076, "grad_norm": 0.476993590593338, "learning_rate": 1.1460187062580763e-06, "loss": 0.0011, "step": 22128 }, { "epoch": 21.277884615384615, "grad_norm": 0.007344768848270178, "learning_rate": 1.1454397434679022e-06, "loss": 0.0001, "step": 22129 }, { "epoch": 21.278846153846153, "grad_norm": 0.0015609703259542584, "learning_rate": 1.144860918074514e-06, "loss": 0.0, "step": 22130 }, { "epoch": 21.279807692307692, "grad_norm": 0.0007220601546578109, "learning_rate": 1.1442822300868983e-06, "loss": 0.0, "step": 22131 }, { "epoch": 21.28076923076923, "grad_norm": 0.0008147076587192714, "learning_rate": 1.1437036795140288e-06, "loss": 0.0, "step": 22132 }, { "epoch": 21.28173076923077, "grad_norm": 0.012233896180987358, "learning_rate": 1.1431252663648851e-06, "loss": 0.0, "step": 22133 }, { "epoch": 21.282692307692308, "grad_norm": 0.002226068638265133, "learning_rate": 1.1425469906484442e-06, "loss": 0.0, "step": 22134 }, { "epoch": 21.283653846153847, "grad_norm": 0.0047509195283055305, "learning_rate": 1.1419688523736761e-06, "loss": 0.0, "step": 22135 }, { "epoch": 21.284615384615385, "grad_norm": 0.002035250188782811, "learning_rate": 1.1413908515495553e-06, "loss": 0.0, "step": 22136 }, { "epoch": 21.285576923076924, "grad_norm": 0.0022060233168303967, "learning_rate": 1.140812988185047e-06, "loss": 0.0, "step": 22137 }, { "epoch": 21.286538461538463, "grad_norm": 0.0018095806008204818, "learning_rate": 1.140235262289119e-06, "loss": 0.0, "step": 22138 }, { "epoch": 21.2875, "grad_norm": 0.0005383284296840429, "learning_rate": 1.1396576738707399e-06, "loss": 0.0, "step": 22139 }, { "epoch": 21.28846153846154, "grad_norm": 0.0018028623890131712, "learning_rate": 1.1390802229388664e-06, "loss": 0.0, "step": 22140 }, { "epoch": 21.289423076923075, "grad_norm": 0.0009052067762240767, "learning_rate": 1.1385029095024602e-06, "loss": 0.0, "step": 22141 }, { "epoch": 21.290384615384614, "grad_norm": 0.0018053915118798614, "learning_rate": 1.1379257335704841e-06, "loss": 0.0, "step": 22142 }, { "epoch": 21.291346153846153, "grad_norm": 0.0027342606335878372, "learning_rate": 1.1373486951518887e-06, "loss": 0.0, "step": 22143 }, { "epoch": 21.29230769230769, "grad_norm": 0.0012843032600358129, "learning_rate": 1.1367717942556289e-06, "loss": 0.0, "step": 22144 }, { "epoch": 21.29326923076923, "grad_norm": 0.0006446658517234027, "learning_rate": 1.13619503089066e-06, "loss": 0.0, "step": 22145 }, { "epoch": 21.29423076923077, "grad_norm": 0.0011958800023421645, "learning_rate": 1.1356184050659281e-06, "loss": 0.0, "step": 22146 }, { "epoch": 21.295192307692307, "grad_norm": 0.0012648970587179065, "learning_rate": 1.1350419167903837e-06, "loss": 0.0, "step": 22147 }, { "epoch": 21.296153846153846, "grad_norm": 0.0015447454061359167, "learning_rate": 1.1344655660729676e-06, "loss": 0.0, "step": 22148 }, { "epoch": 21.297115384615385, "grad_norm": 0.0014312025159597397, "learning_rate": 1.1338893529226257e-06, "loss": 0.0, "step": 22149 }, { "epoch": 21.298076923076923, "grad_norm": 0.0017684708582237363, "learning_rate": 1.1333132773483025e-06, "loss": 0.0, "step": 22150 }, { "epoch": 21.299038461538462, "grad_norm": 0.0010238849790766835, "learning_rate": 1.1327373393589315e-06, "loss": 0.0, "step": 22151 }, { "epoch": 21.3, "grad_norm": 0.0020663810428231955, "learning_rate": 1.132161538963451e-06, "loss": 0.0, "step": 22152 }, { "epoch": 21.30096153846154, "grad_norm": 0.001021426753140986, "learning_rate": 1.1315858761707999e-06, "loss": 0.0, "step": 22153 }, { "epoch": 21.301923076923078, "grad_norm": 0.0005072149797342718, "learning_rate": 1.131010350989905e-06, "loss": 0.0, "step": 22154 }, { "epoch": 21.302884615384617, "grad_norm": 0.0014669442316517234, "learning_rate": 1.1304349634296984e-06, "loss": 0.0, "step": 22155 }, { "epoch": 21.303846153846155, "grad_norm": 0.000983410864137113, "learning_rate": 1.129859713499113e-06, "loss": 0.0, "step": 22156 }, { "epoch": 21.30480769230769, "grad_norm": 0.0007740879082120955, "learning_rate": 1.1292846012070679e-06, "loss": 0.0, "step": 22157 }, { "epoch": 21.30576923076923, "grad_norm": 0.0012998535530641675, "learning_rate": 1.128709626562491e-06, "loss": 0.0, "step": 22158 }, { "epoch": 21.306730769230768, "grad_norm": 0.004024486523121595, "learning_rate": 1.1281347895743056e-06, "loss": 0.0, "step": 22159 }, { "epoch": 21.307692307692307, "grad_norm": 0.0015153700951486826, "learning_rate": 1.1275600902514283e-06, "loss": 0.0, "step": 22160 }, { "epoch": 21.308653846153845, "grad_norm": 0.0010243675205856562, "learning_rate": 1.1269855286027798e-06, "loss": 0.0, "step": 22161 }, { "epoch": 21.309615384615384, "grad_norm": 0.0010543747339397669, "learning_rate": 1.1264111046372716e-06, "loss": 0.0, "step": 22162 }, { "epoch": 21.310576923076923, "grad_norm": 0.0004675918316934258, "learning_rate": 1.125836818363819e-06, "loss": 0.0, "step": 22163 }, { "epoch": 21.31153846153846, "grad_norm": 0.001644719042815268, "learning_rate": 1.125262669791336e-06, "loss": 0.0, "step": 22164 }, { "epoch": 21.3125, "grad_norm": 0.0009902039309963584, "learning_rate": 1.1246886589287275e-06, "loss": 0.0, "step": 22165 }, { "epoch": 21.31346153846154, "grad_norm": 0.0012558542657643557, "learning_rate": 1.1241147857849021e-06, "loss": 0.0, "step": 22166 }, { "epoch": 21.314423076923077, "grad_norm": 0.004222522955387831, "learning_rate": 1.1235410503687672e-06, "loss": 0.0, "step": 22167 }, { "epoch": 21.315384615384616, "grad_norm": 0.000634486903436482, "learning_rate": 1.122967452689221e-06, "loss": 0.0, "step": 22168 }, { "epoch": 21.316346153846155, "grad_norm": 0.0015223500085994601, "learning_rate": 1.1223939927551664e-06, "loss": 0.0, "step": 22169 }, { "epoch": 21.317307692307693, "grad_norm": 0.002308591967448592, "learning_rate": 1.121820670575503e-06, "loss": 0.0, "step": 22170 }, { "epoch": 21.318269230769232, "grad_norm": 0.004567859228700399, "learning_rate": 1.1212474861591239e-06, "loss": 0.0001, "step": 22171 }, { "epoch": 21.31923076923077, "grad_norm": 0.0012688908027485013, "learning_rate": 1.1206744395149273e-06, "loss": 0.0, "step": 22172 }, { "epoch": 21.32019230769231, "grad_norm": 0.0012920115841552615, "learning_rate": 1.1201015306518005e-06, "loss": 0.0, "step": 22173 }, { "epoch": 21.321153846153845, "grad_norm": 0.0007318991702049971, "learning_rate": 1.1195287595786352e-06, "loss": 0.0, "step": 22174 }, { "epoch": 21.322115384615383, "grad_norm": 0.0007654766668565571, "learning_rate": 1.1189561263043225e-06, "loss": 0.0, "step": 22175 }, { "epoch": 21.323076923076922, "grad_norm": 0.001196869183331728, "learning_rate": 1.1183836308377428e-06, "loss": 0.0, "step": 22176 }, { "epoch": 21.32403846153846, "grad_norm": 0.0011730147525668144, "learning_rate": 1.1178112731877822e-06, "loss": 0.0, "step": 22177 }, { "epoch": 21.325, "grad_norm": 0.000940893602091819, "learning_rate": 1.1172390533633237e-06, "loss": 0.0, "step": 22178 }, { "epoch": 21.325961538461538, "grad_norm": 0.0019567531999200583, "learning_rate": 1.1166669713732425e-06, "loss": 0.0, "step": 22179 }, { "epoch": 21.326923076923077, "grad_norm": 0.0026469635777175426, "learning_rate": 1.116095027226417e-06, "loss": 0.0, "step": 22180 }, { "epoch": 21.327884615384615, "grad_norm": 0.0009402793366461992, "learning_rate": 1.1155232209317258e-06, "loss": 0.0, "step": 22181 }, { "epoch": 21.328846153846154, "grad_norm": 0.002132631139829755, "learning_rate": 1.1149515524980358e-06, "loss": 0.0, "step": 22182 }, { "epoch": 21.329807692307693, "grad_norm": 0.0013658382231369615, "learning_rate": 1.1143800219342226e-06, "loss": 0.0, "step": 22183 }, { "epoch": 21.33076923076923, "grad_norm": 0.0030655355658382177, "learning_rate": 1.1138086292491502e-06, "loss": 0.0, "step": 22184 }, { "epoch": 21.33173076923077, "grad_norm": 0.0018539706943556666, "learning_rate": 1.1132373744516878e-06, "loss": 0.0, "step": 22185 }, { "epoch": 21.33269230769231, "grad_norm": 0.0010683831060305238, "learning_rate": 1.1126662575507009e-06, "loss": 0.0, "step": 22186 }, { "epoch": 21.333653846153847, "grad_norm": 1.087026834487915, "learning_rate": 1.1120952785550477e-06, "loss": 0.005, "step": 22187 }, { "epoch": 21.334615384615386, "grad_norm": 0.001472757663577795, "learning_rate": 1.11152443747359e-06, "loss": 0.0, "step": 22188 }, { "epoch": 21.335576923076925, "grad_norm": 0.0009628087864257395, "learning_rate": 1.1109537343151877e-06, "loss": 0.0, "step": 22189 }, { "epoch": 21.33653846153846, "grad_norm": 0.004008375108242035, "learning_rate": 1.1103831690886912e-06, "loss": 0.0, "step": 22190 }, { "epoch": 21.3375, "grad_norm": 0.0013834470883011818, "learning_rate": 1.1098127418029592e-06, "loss": 0.0, "step": 22191 }, { "epoch": 21.338461538461537, "grad_norm": 0.0005804047104902565, "learning_rate": 1.1092424524668422e-06, "loss": 0.0, "step": 22192 }, { "epoch": 21.339423076923076, "grad_norm": 0.0016574043547734618, "learning_rate": 1.1086723010891852e-06, "loss": 0.0, "step": 22193 }, { "epoch": 21.340384615384615, "grad_norm": 0.0017810299759730697, "learning_rate": 1.1081022876788416e-06, "loss": 0.0, "step": 22194 }, { "epoch": 21.341346153846153, "grad_norm": 0.002142286626622081, "learning_rate": 1.1075324122446517e-06, "loss": 0.0, "step": 22195 }, { "epoch": 21.342307692307692, "grad_norm": 0.001798731042072177, "learning_rate": 1.1069626747954587e-06, "loss": 0.0, "step": 22196 }, { "epoch": 21.34326923076923, "grad_norm": 0.0012381370179355145, "learning_rate": 1.1063930753401065e-06, "loss": 0.0, "step": 22197 }, { "epoch": 21.34423076923077, "grad_norm": 0.005081928335130215, "learning_rate": 1.105823613887429e-06, "loss": 0.0001, "step": 22198 }, { "epoch": 21.345192307692308, "grad_norm": 0.0017252740217372775, "learning_rate": 1.1052542904462648e-06, "loss": 0.0, "step": 22199 }, { "epoch": 21.346153846153847, "grad_norm": 0.0025633827317506075, "learning_rate": 1.1046851050254504e-06, "loss": 0.0, "step": 22200 }, { "epoch": 21.347115384615385, "grad_norm": 0.0014821685617789626, "learning_rate": 1.1041160576338139e-06, "loss": 0.0, "step": 22201 }, { "epoch": 21.348076923076924, "grad_norm": 0.003394585568457842, "learning_rate": 1.1035471482801873e-06, "loss": 0.0, "step": 22202 }, { "epoch": 21.349038461538463, "grad_norm": 0.0020937819499522448, "learning_rate": 1.1029783769734003e-06, "loss": 0.0, "step": 22203 }, { "epoch": 21.35, "grad_norm": 0.3516179323196411, "learning_rate": 1.1024097437222736e-06, "loss": 0.0007, "step": 22204 }, { "epoch": 21.35096153846154, "grad_norm": 0.0016511069843545556, "learning_rate": 1.1018412485356366e-06, "loss": 0.0, "step": 22205 }, { "epoch": 21.351923076923075, "grad_norm": 0.0011983781587332487, "learning_rate": 1.1012728914223048e-06, "loss": 0.0, "step": 22206 }, { "epoch": 21.352884615384614, "grad_norm": 0.0011782444780692458, "learning_rate": 1.1007046723911009e-06, "loss": 0.0, "step": 22207 }, { "epoch": 21.353846153846153, "grad_norm": 0.0013588910223916173, "learning_rate": 1.1001365914508434e-06, "loss": 0.0, "step": 22208 }, { "epoch": 21.35480769230769, "grad_norm": 0.0020867539569735527, "learning_rate": 1.099568648610343e-06, "loss": 0.0, "step": 22209 }, { "epoch": 21.35576923076923, "grad_norm": 0.0010843350319191813, "learning_rate": 1.0990008438784138e-06, "loss": 0.0, "step": 22210 }, { "epoch": 21.35673076923077, "grad_norm": 0.0005909845931455493, "learning_rate": 1.0984331772638701e-06, "loss": 0.0, "step": 22211 }, { "epoch": 21.357692307692307, "grad_norm": 0.0024447196628898382, "learning_rate": 1.0978656487755156e-06, "loss": 0.0, "step": 22212 }, { "epoch": 21.358653846153846, "grad_norm": 0.003726630937308073, "learning_rate": 1.0972982584221592e-06, "loss": 0.0, "step": 22213 }, { "epoch": 21.359615384615385, "grad_norm": 0.0021950011141598225, "learning_rate": 1.0967310062126068e-06, "loss": 0.0, "step": 22214 }, { "epoch": 21.360576923076923, "grad_norm": 0.0005343161756172776, "learning_rate": 1.0961638921556561e-06, "loss": 0.0, "step": 22215 }, { "epoch": 21.361538461538462, "grad_norm": 0.001423424226231873, "learning_rate": 1.095596916260111e-06, "loss": 0.0, "step": 22216 }, { "epoch": 21.3625, "grad_norm": 0.0007797836442478001, "learning_rate": 1.0950300785347668e-06, "loss": 0.0, "step": 22217 }, { "epoch": 21.36346153846154, "grad_norm": 0.0019582747481763363, "learning_rate": 1.0944633789884184e-06, "loss": 0.0, "step": 22218 }, { "epoch": 21.364423076923078, "grad_norm": 0.002187219448387623, "learning_rate": 1.0938968176298637e-06, "loss": 0.0, "step": 22219 }, { "epoch": 21.365384615384617, "grad_norm": 0.00047898420598357916, "learning_rate": 1.0933303944678897e-06, "loss": 0.0, "step": 22220 }, { "epoch": 21.366346153846155, "grad_norm": 0.0014595647808164358, "learning_rate": 1.0927641095112874e-06, "loss": 0.0, "step": 22221 }, { "epoch": 21.36730769230769, "grad_norm": 0.011847822926938534, "learning_rate": 1.092197962768845e-06, "loss": 0.0, "step": 22222 }, { "epoch": 21.36826923076923, "grad_norm": 0.0019058875041082501, "learning_rate": 1.0916319542493436e-06, "loss": 0.0, "step": 22223 }, { "epoch": 21.369230769230768, "grad_norm": 0.002020298270508647, "learning_rate": 1.0910660839615695e-06, "loss": 0.0, "step": 22224 }, { "epoch": 21.370192307692307, "grad_norm": 0.0008196308044716716, "learning_rate": 1.0905003519143042e-06, "loss": 0.0, "step": 22225 }, { "epoch": 21.371153846153845, "grad_norm": 0.0028373750392347574, "learning_rate": 1.0899347581163222e-06, "loss": 0.0, "step": 22226 }, { "epoch": 21.372115384615384, "grad_norm": 0.0009640381904318929, "learning_rate": 1.0893693025764041e-06, "loss": 0.0, "step": 22227 }, { "epoch": 21.373076923076923, "grad_norm": 0.0008411793387494981, "learning_rate": 1.0888039853033193e-06, "loss": 0.0, "step": 22228 }, { "epoch": 21.37403846153846, "grad_norm": 0.0012152064591646194, "learning_rate": 1.0882388063058437e-06, "loss": 0.0, "step": 22229 }, { "epoch": 21.375, "grad_norm": 0.003888217732310295, "learning_rate": 1.087673765592747e-06, "loss": 0.0, "step": 22230 }, { "epoch": 21.37596153846154, "grad_norm": 0.0013033393770456314, "learning_rate": 1.0871088631727956e-06, "loss": 0.0, "step": 22231 }, { "epoch": 21.376923076923077, "grad_norm": 0.0018195932498201728, "learning_rate": 1.0865440990547548e-06, "loss": 0.0, "step": 22232 }, { "epoch": 21.377884615384616, "grad_norm": 0.0011897659860551357, "learning_rate": 1.0859794732473904e-06, "loss": 0.0, "step": 22233 }, { "epoch": 21.378846153846155, "grad_norm": 0.0009592318092472851, "learning_rate": 1.0854149857594608e-06, "loss": 0.0, "step": 22234 }, { "epoch": 21.379807692307693, "grad_norm": 0.0009481104789301753, "learning_rate": 1.084850636599727e-06, "loss": 0.0, "step": 22235 }, { "epoch": 21.380769230769232, "grad_norm": 0.002078915247693658, "learning_rate": 1.084286425776947e-06, "loss": 0.0, "step": 22236 }, { "epoch": 21.38173076923077, "grad_norm": 0.0013878809986636043, "learning_rate": 1.0837223532998731e-06, "loss": 0.0, "step": 22237 }, { "epoch": 21.38269230769231, "grad_norm": 0.0013107508420944214, "learning_rate": 1.0831584191772604e-06, "loss": 0.0, "step": 22238 }, { "epoch": 21.383653846153845, "grad_norm": 0.0013218383537605405, "learning_rate": 1.0825946234178575e-06, "loss": 0.0, "step": 22239 }, { "epoch": 21.384615384615383, "grad_norm": 0.0010547678684815764, "learning_rate": 1.082030966030413e-06, "loss": 0.0, "step": 22240 }, { "epoch": 21.385576923076922, "grad_norm": 0.001503856503404677, "learning_rate": 1.0814674470236753e-06, "loss": 0.0, "step": 22241 }, { "epoch": 21.38653846153846, "grad_norm": 0.0008109982591122389, "learning_rate": 1.0809040664063863e-06, "loss": 0.0, "step": 22242 }, { "epoch": 21.3875, "grad_norm": 0.0003240800288040191, "learning_rate": 1.0803408241872893e-06, "loss": 0.0, "step": 22243 }, { "epoch": 21.388461538461538, "grad_norm": 0.0028959207702428102, "learning_rate": 1.0797777203751247e-06, "loss": 0.0, "step": 22244 }, { "epoch": 21.389423076923077, "grad_norm": 0.0004466300015337765, "learning_rate": 1.0792147549786281e-06, "loss": 0.0, "step": 22245 }, { "epoch": 21.390384615384615, "grad_norm": 0.009487152099609375, "learning_rate": 1.0786519280065355e-06, "loss": 0.0001, "step": 22246 }, { "epoch": 21.391346153846154, "grad_norm": 0.0011122396681457758, "learning_rate": 1.0780892394675846e-06, "loss": 0.0, "step": 22247 }, { "epoch": 21.392307692307693, "grad_norm": 0.0012398172402754426, "learning_rate": 1.0775266893704994e-06, "loss": 0.0, "step": 22248 }, { "epoch": 21.39326923076923, "grad_norm": 0.0005453343619592488, "learning_rate": 1.076964277724014e-06, "loss": 0.0, "step": 22249 }, { "epoch": 21.39423076923077, "grad_norm": 0.0005427966825664043, "learning_rate": 1.076402004536856e-06, "loss": 0.0, "step": 22250 }, { "epoch": 21.39519230769231, "grad_norm": 0.026951443403959274, "learning_rate": 1.0758398698177464e-06, "loss": 0.0001, "step": 22251 }, { "epoch": 21.396153846153847, "grad_norm": 0.0015933209797367454, "learning_rate": 1.0752778735754121e-06, "loss": 0.0, "step": 22252 }, { "epoch": 21.397115384615386, "grad_norm": 0.001351388986222446, "learning_rate": 1.07471601581857e-06, "loss": 0.0, "step": 22253 }, { "epoch": 21.398076923076925, "grad_norm": 0.000968524138443172, "learning_rate": 1.0741542965559393e-06, "loss": 0.0, "step": 22254 }, { "epoch": 21.39903846153846, "grad_norm": 0.0015974640846252441, "learning_rate": 1.0735927157962378e-06, "loss": 0.0, "step": 22255 }, { "epoch": 21.4, "grad_norm": 0.002519880421459675, "learning_rate": 1.0730312735481785e-06, "loss": 0.0, "step": 22256 }, { "epoch": 21.400961538461537, "grad_norm": 0.01581893302500248, "learning_rate": 1.0724699698204722e-06, "loss": 0.0, "step": 22257 }, { "epoch": 21.401923076923076, "grad_norm": 0.001066537806764245, "learning_rate": 1.071908804621833e-06, "loss": 0.0, "step": 22258 }, { "epoch": 21.402884615384615, "grad_norm": 0.0011554703814908862, "learning_rate": 1.0713477779609627e-06, "loss": 0.0, "step": 22259 }, { "epoch": 21.403846153846153, "grad_norm": 0.0015222304500639439, "learning_rate": 1.07078688984657e-06, "loss": 0.0, "step": 22260 }, { "epoch": 21.404807692307692, "grad_norm": 0.0058359368704259396, "learning_rate": 1.0702261402873592e-06, "loss": 0.0, "step": 22261 }, { "epoch": 21.40576923076923, "grad_norm": 0.0015740996459499002, "learning_rate": 1.069665529292029e-06, "loss": 0.0, "step": 22262 }, { "epoch": 21.40673076923077, "grad_norm": 0.005510641261935234, "learning_rate": 1.0691050568692807e-06, "loss": 0.0, "step": 22263 }, { "epoch": 21.407692307692308, "grad_norm": 0.0012126225046813488, "learning_rate": 1.068544723027809e-06, "loss": 0.0, "step": 22264 }, { "epoch": 21.408653846153847, "grad_norm": 0.0036129276268184185, "learning_rate": 1.067984527776309e-06, "loss": 0.0, "step": 22265 }, { "epoch": 21.409615384615385, "grad_norm": 0.00037496883305720985, "learning_rate": 1.067424471123476e-06, "loss": 0.0, "step": 22266 }, { "epoch": 21.410576923076924, "grad_norm": 0.0007920266361907125, "learning_rate": 1.0668645530779975e-06, "loss": 0.0, "step": 22267 }, { "epoch": 21.411538461538463, "grad_norm": 0.0013844178756698966, "learning_rate": 1.066304773648561e-06, "loss": 0.0, "step": 22268 }, { "epoch": 21.4125, "grad_norm": 0.004645740147680044, "learning_rate": 1.0657451328438573e-06, "loss": 0.0, "step": 22269 }, { "epoch": 21.41346153846154, "grad_norm": 0.000746876175981015, "learning_rate": 1.0651856306725661e-06, "loss": 0.0, "step": 22270 }, { "epoch": 21.414423076923075, "grad_norm": 0.002881850814446807, "learning_rate": 1.0646262671433695e-06, "loss": 0.0, "step": 22271 }, { "epoch": 21.415384615384614, "grad_norm": 0.000638669531326741, "learning_rate": 1.0640670422649502e-06, "loss": 0.0, "step": 22272 }, { "epoch": 21.416346153846153, "grad_norm": 0.0009870203211903572, "learning_rate": 1.0635079560459826e-06, "loss": 0.0, "step": 22273 }, { "epoch": 21.41730769230769, "grad_norm": 0.001090599806047976, "learning_rate": 1.0629490084951445e-06, "loss": 0.0, "step": 22274 }, { "epoch": 21.41826923076923, "grad_norm": 0.00156666140537709, "learning_rate": 1.062390199621106e-06, "loss": 0.0, "step": 22275 }, { "epoch": 21.41923076923077, "grad_norm": 0.001176761114038527, "learning_rate": 1.0618315294325398e-06, "loss": 0.0, "step": 22276 }, { "epoch": 21.420192307692307, "grad_norm": 0.0016163851832970977, "learning_rate": 1.0612729979381175e-06, "loss": 0.0, "step": 22277 }, { "epoch": 21.421153846153846, "grad_norm": 0.00260986783541739, "learning_rate": 1.0607146051465011e-06, "loss": 0.0, "step": 22278 }, { "epoch": 21.422115384615385, "grad_norm": 0.0011099620023742318, "learning_rate": 1.0601563510663582e-06, "loss": 0.0, "step": 22279 }, { "epoch": 21.423076923076923, "grad_norm": 0.0013831999385729432, "learning_rate": 1.0595982357063516e-06, "loss": 0.0, "step": 22280 }, { "epoch": 21.424038461538462, "grad_norm": 0.0015353048220276833, "learning_rate": 1.0590402590751392e-06, "loss": 0.0, "step": 22281 }, { "epoch": 21.425, "grad_norm": 0.0013244429137557745, "learning_rate": 1.0584824211813804e-06, "loss": 0.0, "step": 22282 }, { "epoch": 21.42596153846154, "grad_norm": 0.001897653448395431, "learning_rate": 1.057924722033733e-06, "loss": 0.0, "step": 22283 }, { "epoch": 21.426923076923078, "grad_norm": 0.0009856245014816523, "learning_rate": 1.0573671616408476e-06, "loss": 0.0, "step": 22284 }, { "epoch": 21.427884615384617, "grad_norm": 0.0005969231715425849, "learning_rate": 1.0568097400113796e-06, "loss": 0.0, "step": 22285 }, { "epoch": 21.428846153846155, "grad_norm": 0.001731951953843236, "learning_rate": 1.0562524571539745e-06, "loss": 0.0, "step": 22286 }, { "epoch": 21.42980769230769, "grad_norm": 0.0007555357296951115, "learning_rate": 1.0556953130772818e-06, "loss": 0.0, "step": 22287 }, { "epoch": 21.43076923076923, "grad_norm": 0.0010992602910846472, "learning_rate": 1.055138307789949e-06, "loss": 0.0, "step": 22288 }, { "epoch": 21.431730769230768, "grad_norm": 0.0006899591535329819, "learning_rate": 1.0545814413006138e-06, "loss": 0.0, "step": 22289 }, { "epoch": 21.432692307692307, "grad_norm": 0.0012315618805587292, "learning_rate": 1.0540247136179215e-06, "loss": 0.0, "step": 22290 }, { "epoch": 21.433653846153845, "grad_norm": 0.0012783035635948181, "learning_rate": 1.0534681247505107e-06, "loss": 0.0, "step": 22291 }, { "epoch": 21.434615384615384, "grad_norm": 0.0009912445675581694, "learning_rate": 1.0529116747070145e-06, "loss": 0.0, "step": 22292 }, { "epoch": 21.435576923076923, "grad_norm": 0.0008745189406909049, "learning_rate": 1.0523553634960704e-06, "loss": 0.0, "step": 22293 }, { "epoch": 21.43653846153846, "grad_norm": 0.0012337584048509598, "learning_rate": 1.0517991911263114e-06, "loss": 0.0, "step": 22294 }, { "epoch": 21.4375, "grad_norm": 0.0005031016189604998, "learning_rate": 1.0512431576063642e-06, "loss": 0.0, "step": 22295 }, { "epoch": 21.43846153846154, "grad_norm": 0.0013846470974385738, "learning_rate": 1.0506872629448618e-06, "loss": 0.0, "step": 22296 }, { "epoch": 21.439423076923077, "grad_norm": 0.0005268270615488291, "learning_rate": 1.0501315071504247e-06, "loss": 0.0, "step": 22297 }, { "epoch": 21.440384615384616, "grad_norm": 0.0006499704322777689, "learning_rate": 1.0495758902316788e-06, "loss": 0.0, "step": 22298 }, { "epoch": 21.441346153846155, "grad_norm": 0.0009486275957897305, "learning_rate": 1.049020412197248e-06, "loss": 0.0, "step": 22299 }, { "epoch": 21.442307692307693, "grad_norm": 0.003165161469951272, "learning_rate": 1.0484650730557477e-06, "loss": 0.0, "step": 22300 }, { "epoch": 21.443269230769232, "grad_norm": 0.0011780449422076344, "learning_rate": 1.0479098728157977e-06, "loss": 0.0, "step": 22301 }, { "epoch": 21.44423076923077, "grad_norm": 0.001190123613923788, "learning_rate": 1.0473548114860133e-06, "loss": 0.0, "step": 22302 }, { "epoch": 21.44519230769231, "grad_norm": 0.00074257783126086, "learning_rate": 1.0467998890750052e-06, "loss": 0.0, "step": 22303 }, { "epoch": 21.446153846153845, "grad_norm": 0.0007129762088879943, "learning_rate": 1.0462451055913847e-06, "loss": 0.0, "step": 22304 }, { "epoch": 21.447115384615383, "grad_norm": 0.0012636918108910322, "learning_rate": 1.0456904610437645e-06, "loss": 0.0, "step": 22305 }, { "epoch": 21.448076923076922, "grad_norm": 0.0017461131792515516, "learning_rate": 1.0451359554407447e-06, "loss": 0.0, "step": 22306 }, { "epoch": 21.44903846153846, "grad_norm": 0.0006092882831580937, "learning_rate": 1.044581588790935e-06, "loss": 0.0, "step": 22307 }, { "epoch": 21.45, "grad_norm": 0.0006267917924560606, "learning_rate": 1.0440273611029328e-06, "loss": 0.0, "step": 22308 }, { "epoch": 21.450961538461538, "grad_norm": 0.0008058820967562497, "learning_rate": 1.0434732723853414e-06, "loss": 0.0, "step": 22309 }, { "epoch": 21.451923076923077, "grad_norm": 0.002739949384704232, "learning_rate": 1.0429193226467594e-06, "loss": 0.0, "step": 22310 }, { "epoch": 21.452884615384615, "grad_norm": 0.0008085730951279402, "learning_rate": 1.0423655118957787e-06, "loss": 0.0, "step": 22311 }, { "epoch": 21.453846153846154, "grad_norm": 0.0015766238793730736, "learning_rate": 1.0418118401409949e-06, "loss": 0.0, "step": 22312 }, { "epoch": 21.454807692307693, "grad_norm": 0.0006500417366623878, "learning_rate": 1.041258307391001e-06, "loss": 0.0, "step": 22313 }, { "epoch": 21.45576923076923, "grad_norm": 0.008629443123936653, "learning_rate": 1.0407049136543823e-06, "loss": 0.0, "step": 22314 }, { "epoch": 21.45673076923077, "grad_norm": 0.0013163731200620532, "learning_rate": 1.0401516589397286e-06, "loss": 0.0, "step": 22315 }, { "epoch": 21.45769230769231, "grad_norm": 0.0008586301701143384, "learning_rate": 1.0395985432556266e-06, "loss": 0.0, "step": 22316 }, { "epoch": 21.458653846153847, "grad_norm": 0.0021362234838306904, "learning_rate": 1.0390455666106547e-06, "loss": 0.0, "step": 22317 }, { "epoch": 21.459615384615386, "grad_norm": 0.0017354085575789213, "learning_rate": 1.0384927290133972e-06, "loss": 0.0, "step": 22318 }, { "epoch": 21.460576923076925, "grad_norm": 0.001278202049434185, "learning_rate": 1.0379400304724297e-06, "loss": 0.0, "step": 22319 }, { "epoch": 21.46153846153846, "grad_norm": 0.006386212073266506, "learning_rate": 1.0373874709963283e-06, "loss": 0.0, "step": 22320 }, { "epoch": 21.4625, "grad_norm": 0.0006918426370248199, "learning_rate": 1.0368350505936708e-06, "loss": 0.0, "step": 22321 }, { "epoch": 21.463461538461537, "grad_norm": 0.0024315237533301115, "learning_rate": 1.036282769273026e-06, "loss": 0.0, "step": 22322 }, { "epoch": 21.464423076923076, "grad_norm": 0.004341526888310909, "learning_rate": 1.0357306270429623e-06, "loss": 0.0, "step": 22323 }, { "epoch": 21.465384615384615, "grad_norm": 0.0017907997826114297, "learning_rate": 1.0351786239120532e-06, "loss": 0.0, "step": 22324 }, { "epoch": 21.466346153846153, "grad_norm": 0.0017095590010285378, "learning_rate": 1.034626759888857e-06, "loss": 0.0, "step": 22325 }, { "epoch": 21.467307692307692, "grad_norm": 0.0012105440255254507, "learning_rate": 1.0340750349819416e-06, "loss": 0.0, "step": 22326 }, { "epoch": 21.46826923076923, "grad_norm": 0.005300987046211958, "learning_rate": 1.033523449199869e-06, "loss": 0.0, "step": 22327 }, { "epoch": 21.46923076923077, "grad_norm": 0.0008557598339393735, "learning_rate": 1.0329720025511935e-06, "loss": 0.0, "step": 22328 }, { "epoch": 21.470192307692308, "grad_norm": 0.0016444065840914845, "learning_rate": 1.0324206950444771e-06, "loss": 0.0, "step": 22329 }, { "epoch": 21.471153846153847, "grad_norm": 0.0016163383843377233, "learning_rate": 1.0318695266882696e-06, "loss": 0.0, "step": 22330 }, { "epoch": 21.472115384615385, "grad_norm": 0.00102688604965806, "learning_rate": 1.031318497491126e-06, "loss": 0.0, "step": 22331 }, { "epoch": 21.473076923076924, "grad_norm": 0.000692417030222714, "learning_rate": 1.030767607461598e-06, "loss": 0.0, "step": 22332 }, { "epoch": 21.474038461538463, "grad_norm": 0.00092484918422997, "learning_rate": 1.0302168566082315e-06, "loss": 0.0, "step": 22333 }, { "epoch": 21.475, "grad_norm": 0.0015035313554108143, "learning_rate": 1.0296662449395722e-06, "loss": 0.0, "step": 22334 }, { "epoch": 21.47596153846154, "grad_norm": 0.0008458750671707094, "learning_rate": 1.0291157724641686e-06, "loss": 0.0, "step": 22335 }, { "epoch": 21.476923076923075, "grad_norm": 0.0005066985613666475, "learning_rate": 1.0285654391905564e-06, "loss": 0.0, "step": 22336 }, { "epoch": 21.477884615384614, "grad_norm": 0.001446246518753469, "learning_rate": 1.0280152451272774e-06, "loss": 0.0, "step": 22337 }, { "epoch": 21.478846153846153, "grad_norm": 0.0010122217936441302, "learning_rate": 1.0274651902828714e-06, "loss": 0.0, "step": 22338 }, { "epoch": 21.47980769230769, "grad_norm": 0.0036638411693274975, "learning_rate": 1.0269152746658695e-06, "loss": 0.0, "step": 22339 }, { "epoch": 21.48076923076923, "grad_norm": 0.0008271439583040774, "learning_rate": 1.026365498284807e-06, "loss": 0.0, "step": 22340 }, { "epoch": 21.48173076923077, "grad_norm": 0.0029660991858690977, "learning_rate": 1.025815861148216e-06, "loss": 0.0, "step": 22341 }, { "epoch": 21.482692307692307, "grad_norm": 0.002636128570884466, "learning_rate": 1.025266363264623e-06, "loss": 0.0, "step": 22342 }, { "epoch": 21.483653846153846, "grad_norm": 0.0011567645706236362, "learning_rate": 1.024717004642557e-06, "loss": 0.0, "step": 22343 }, { "epoch": 21.484615384615385, "grad_norm": 0.000908436079043895, "learning_rate": 1.0241677852905384e-06, "loss": 0.0, "step": 22344 }, { "epoch": 21.485576923076923, "grad_norm": 0.0020058555528521538, "learning_rate": 1.023618705217093e-06, "loss": 0.0, "step": 22345 }, { "epoch": 21.486538461538462, "grad_norm": 0.0011859608348459005, "learning_rate": 1.0230697644307419e-06, "loss": 0.0, "step": 22346 }, { "epoch": 21.4875, "grad_norm": 0.0016453859861940145, "learning_rate": 1.022520962939999e-06, "loss": 0.0, "step": 22347 }, { "epoch": 21.48846153846154, "grad_norm": 0.0014281291514635086, "learning_rate": 1.0219723007533822e-06, "loss": 0.0, "step": 22348 }, { "epoch": 21.489423076923078, "grad_norm": 0.0014302601339295506, "learning_rate": 1.0214237778794068e-06, "loss": 0.0, "step": 22349 }, { "epoch": 21.490384615384617, "grad_norm": 0.0006032210658304393, "learning_rate": 1.0208753943265803e-06, "loss": 0.0, "step": 22350 }, { "epoch": 21.491346153846155, "grad_norm": 0.003994848113507032, "learning_rate": 1.020327150103414e-06, "loss": 0.0, "step": 22351 }, { "epoch": 21.49230769230769, "grad_norm": 0.0015414315275847912, "learning_rate": 1.0197790452184187e-06, "loss": 0.0, "step": 22352 }, { "epoch": 21.49326923076923, "grad_norm": 0.0014382239896804094, "learning_rate": 1.019231079680093e-06, "loss": 0.0, "step": 22353 }, { "epoch": 21.494230769230768, "grad_norm": 0.002203993033617735, "learning_rate": 1.018683253496945e-06, "loss": 0.0, "step": 22354 }, { "epoch": 21.495192307692307, "grad_norm": 0.001144222216680646, "learning_rate": 1.0181355666774706e-06, "loss": 0.0, "step": 22355 }, { "epoch": 21.496153846153845, "grad_norm": 0.0011910314206033945, "learning_rate": 1.0175880192301713e-06, "loss": 0.0, "step": 22356 }, { "epoch": 21.497115384615384, "grad_norm": 0.0018939337460324168, "learning_rate": 1.0170406111635445e-06, "loss": 0.0, "step": 22357 }, { "epoch": 21.498076923076923, "grad_norm": 0.0007879119948484004, "learning_rate": 1.0164933424860801e-06, "loss": 0.0, "step": 22358 }, { "epoch": 21.49903846153846, "grad_norm": 0.0040602595545351505, "learning_rate": 1.0159462132062736e-06, "loss": 0.0, "step": 22359 }, { "epoch": 21.5, "grad_norm": 0.001318406662903726, "learning_rate": 1.015399223332616e-06, "loss": 0.0, "step": 22360 }, { "epoch": 21.50096153846154, "grad_norm": 0.0006114220595918596, "learning_rate": 1.0148523728735915e-06, "loss": 0.0, "step": 22361 }, { "epoch": 21.501923076923077, "grad_norm": 0.0013608684530481696, "learning_rate": 1.0143056618376856e-06, "loss": 0.0, "step": 22362 }, { "epoch": 21.502884615384616, "grad_norm": 0.0006256106426008046, "learning_rate": 1.0137590902333861e-06, "loss": 0.0, "step": 22363 }, { "epoch": 21.503846153846155, "grad_norm": 0.0006175205926410854, "learning_rate": 1.0132126580691692e-06, "loss": 0.0, "step": 22364 }, { "epoch": 21.504807692307693, "grad_norm": 0.0015931824455037713, "learning_rate": 1.0126663653535184e-06, "loss": 0.0, "step": 22365 }, { "epoch": 21.505769230769232, "grad_norm": 0.0011232636170461774, "learning_rate": 1.0121202120949048e-06, "loss": 0.0, "step": 22366 }, { "epoch": 21.50673076923077, "grad_norm": 0.0020179080311208963, "learning_rate": 1.011574198301808e-06, "loss": 0.0, "step": 22367 }, { "epoch": 21.50769230769231, "grad_norm": 0.0012646486284211278, "learning_rate": 1.0110283239827002e-06, "loss": 0.0, "step": 22368 }, { "epoch": 21.508653846153845, "grad_norm": 0.0024843208957463503, "learning_rate": 1.010482589146048e-06, "loss": 0.0, "step": 22369 }, { "epoch": 21.509615384615383, "grad_norm": 0.000438313843915239, "learning_rate": 1.0099369938003224e-06, "loss": 0.0, "step": 22370 }, { "epoch": 21.510576923076922, "grad_norm": 0.0008147300104610622, "learning_rate": 1.009391537953991e-06, "loss": 0.0, "step": 22371 }, { "epoch": 21.51153846153846, "grad_norm": 0.0010481603676453233, "learning_rate": 1.0088462216155137e-06, "loss": 0.0, "step": 22372 }, { "epoch": 21.5125, "grad_norm": 1.545990228652954, "learning_rate": 1.008301044793355e-06, "loss": 0.0047, "step": 22373 }, { "epoch": 21.513461538461538, "grad_norm": 0.0020148635376244783, "learning_rate": 1.0077560074959758e-06, "loss": 0.0, "step": 22374 }, { "epoch": 21.514423076923077, "grad_norm": 0.0020843353122472763, "learning_rate": 1.0072111097318282e-06, "loss": 0.0, "step": 22375 }, { "epoch": 21.515384615384615, "grad_norm": 0.0011544027365744114, "learning_rate": 1.0066663515093743e-06, "loss": 0.0, "step": 22376 }, { "epoch": 21.516346153846154, "grad_norm": 0.0007608390296809375, "learning_rate": 1.0061217328370609e-06, "loss": 0.0, "step": 22377 }, { "epoch": 21.517307692307693, "grad_norm": 0.0010368638904765248, "learning_rate": 1.005577253723341e-06, "loss": 0.0, "step": 22378 }, { "epoch": 21.51826923076923, "grad_norm": 0.0007033250876702368, "learning_rate": 1.0050329141766657e-06, "loss": 0.0, "step": 22379 }, { "epoch": 21.51923076923077, "grad_norm": 0.00036978957359679043, "learning_rate": 1.0044887142054781e-06, "loss": 0.0, "step": 22380 }, { "epoch": 21.52019230769231, "grad_norm": 0.0029149523470550776, "learning_rate": 1.003944653818224e-06, "loss": 0.0, "step": 22381 }, { "epoch": 21.521153846153847, "grad_norm": 0.001337378635071218, "learning_rate": 1.0034007330233487e-06, "loss": 0.0, "step": 22382 }, { "epoch": 21.522115384615386, "grad_norm": 0.0007137152715586126, "learning_rate": 1.0028569518292863e-06, "loss": 0.0, "step": 22383 }, { "epoch": 21.523076923076925, "grad_norm": 0.0018780543468892574, "learning_rate": 1.0023133102444782e-06, "loss": 0.0, "step": 22384 }, { "epoch": 21.52403846153846, "grad_norm": 0.0019749593921005726, "learning_rate": 1.0017698082773608e-06, "loss": 0.0, "step": 22385 }, { "epoch": 21.525, "grad_norm": 0.00040439158328808844, "learning_rate": 1.0012264459363652e-06, "loss": 0.0, "step": 22386 }, { "epoch": 21.525961538461537, "grad_norm": 0.0010341753950342536, "learning_rate": 1.0006832232299256e-06, "loss": 0.0, "step": 22387 }, { "epoch": 21.526923076923076, "grad_norm": 0.0019348972709849477, "learning_rate": 1.000140140166468e-06, "loss": 0.0, "step": 22388 }, { "epoch": 21.527884615384615, "grad_norm": 0.0011787198018282652, "learning_rate": 9.995971967544217e-07, "loss": 0.0, "step": 22389 }, { "epoch": 21.528846153846153, "grad_norm": 0.001594453933648765, "learning_rate": 9.990543930022124e-07, "loss": 0.0, "step": 22390 }, { "epoch": 21.529807692307692, "grad_norm": 0.0013948807027190924, "learning_rate": 9.98511728918259e-07, "loss": 0.0, "step": 22391 }, { "epoch": 21.53076923076923, "grad_norm": 0.0010497192852199078, "learning_rate": 9.979692045109857e-07, "loss": 0.0, "step": 22392 }, { "epoch": 21.53173076923077, "grad_norm": 0.0009882452432066202, "learning_rate": 9.974268197888104e-07, "loss": 0.0, "step": 22393 }, { "epoch": 21.532692307692308, "grad_norm": 0.0006048829527571797, "learning_rate": 9.968845747601474e-07, "loss": 0.0, "step": 22394 }, { "epoch": 21.533653846153847, "grad_norm": 0.0012380314292386174, "learning_rate": 9.963424694334122e-07, "loss": 0.0, "step": 22395 }, { "epoch": 21.534615384615385, "grad_norm": 0.00185396708548069, "learning_rate": 9.958005038170182e-07, "loss": 0.0, "step": 22396 }, { "epoch": 21.535576923076924, "grad_norm": 0.0008923744317144156, "learning_rate": 9.952586779193718e-07, "loss": 0.0, "step": 22397 }, { "epoch": 21.536538461538463, "grad_norm": 0.0011490870965644717, "learning_rate": 9.94716991748883e-07, "loss": 0.0, "step": 22398 }, { "epoch": 21.5375, "grad_norm": 0.0009523687185719609, "learning_rate": 9.941754453139552e-07, "loss": 0.0, "step": 22399 }, { "epoch": 21.53846153846154, "grad_norm": 0.000760587805416435, "learning_rate": 9.936340386229926e-07, "loss": 0.0, "step": 22400 }, { "epoch": 21.539423076923075, "grad_norm": 0.0013851700350642204, "learning_rate": 9.930927716843975e-07, "loss": 0.0, "step": 22401 }, { "epoch": 21.540384615384614, "grad_norm": 0.0009194600861519575, "learning_rate": 9.925516445065663e-07, "loss": 0.0, "step": 22402 }, { "epoch": 21.541346153846153, "grad_norm": 0.002559748012572527, "learning_rate": 9.92010657097897e-07, "loss": 0.0, "step": 22403 }, { "epoch": 21.54230769230769, "grad_norm": 0.0008055958314798772, "learning_rate": 9.914698094667863e-07, "loss": 0.0, "step": 22404 }, { "epoch": 21.54326923076923, "grad_norm": 0.0008290184196084738, "learning_rate": 9.909291016216228e-07, "loss": 0.0, "step": 22405 }, { "epoch": 21.54423076923077, "grad_norm": 0.0009042149176821113, "learning_rate": 9.903885335707974e-07, "loss": 0.0, "step": 22406 }, { "epoch": 21.545192307692307, "grad_norm": 0.001866840641014278, "learning_rate": 9.898481053227015e-07, "loss": 0.0, "step": 22407 }, { "epoch": 21.546153846153846, "grad_norm": 0.001165969530120492, "learning_rate": 9.893078168857173e-07, "loss": 0.0, "step": 22408 }, { "epoch": 21.547115384615385, "grad_norm": 0.0017006206326186657, "learning_rate": 9.887676682682312e-07, "loss": 0.0, "step": 22409 }, { "epoch": 21.548076923076923, "grad_norm": 0.003733863355591893, "learning_rate": 9.88227659478621e-07, "loss": 0.0, "step": 22410 }, { "epoch": 21.549038461538462, "grad_norm": 0.0005119474371895194, "learning_rate": 9.87687790525268e-07, "loss": 0.0, "step": 22411 }, { "epoch": 21.55, "grad_norm": 0.0025568983983248472, "learning_rate": 9.871480614165517e-07, "loss": 0.0, "step": 22412 }, { "epoch": 21.55096153846154, "grad_norm": 0.0014578299596905708, "learning_rate": 9.866084721608438e-07, "loss": 0.0, "step": 22413 }, { "epoch": 21.551923076923078, "grad_norm": 0.0017747068777680397, "learning_rate": 9.860690227665183e-07, "loss": 0.0, "step": 22414 }, { "epoch": 21.552884615384617, "grad_norm": 0.002443841891363263, "learning_rate": 9.855297132419472e-07, "loss": 0.0, "step": 22415 }, { "epoch": 21.553846153846155, "grad_norm": 0.002294978592544794, "learning_rate": 9.849905435954966e-07, "loss": 0.0, "step": 22416 }, { "epoch": 21.55480769230769, "grad_norm": 0.0015068695647642016, "learning_rate": 9.844515138355337e-07, "loss": 0.0, "step": 22417 }, { "epoch": 21.55576923076923, "grad_norm": 0.0014688782393932343, "learning_rate": 9.839126239704244e-07, "loss": 0.0, "step": 22418 }, { "epoch": 21.556730769230768, "grad_norm": 0.0013411702821031213, "learning_rate": 9.833738740085286e-07, "loss": 0.0, "step": 22419 }, { "epoch": 21.557692307692307, "grad_norm": 0.001715341117233038, "learning_rate": 9.828352639582073e-07, "loss": 0.0, "step": 22420 }, { "epoch": 21.558653846153845, "grad_norm": 0.002242073882371187, "learning_rate": 9.822967938278172e-07, "loss": 0.0, "step": 22421 }, { "epoch": 21.559615384615384, "grad_norm": 0.002024848246946931, "learning_rate": 9.817584636257126e-07, "loss": 0.0, "step": 22422 }, { "epoch": 21.560576923076923, "grad_norm": 0.0012027252232655883, "learning_rate": 9.812202733602516e-07, "loss": 0.0, "step": 22423 }, { "epoch": 21.56153846153846, "grad_norm": 0.0010781941236928105, "learning_rate": 9.806822230397805e-07, "loss": 0.0, "step": 22424 }, { "epoch": 21.5625, "grad_norm": 0.0008386333356611431, "learning_rate": 9.801443126726485e-07, "loss": 0.0, "step": 22425 }, { "epoch": 21.56346153846154, "grad_norm": 0.0006690422305837274, "learning_rate": 9.796065422672075e-07, "loss": 0.0, "step": 22426 }, { "epoch": 21.564423076923077, "grad_norm": 0.0012266072444617748, "learning_rate": 9.790689118317953e-07, "loss": 0.0, "step": 22427 }, { "epoch": 21.565384615384616, "grad_norm": 0.0013827108778059483, "learning_rate": 9.785314213747576e-07, "loss": 0.0, "step": 22428 }, { "epoch": 21.566346153846155, "grad_norm": 0.001197837758809328, "learning_rate": 9.779940709044367e-07, "loss": 0.0, "step": 22429 }, { "epoch": 21.567307692307693, "grad_norm": 0.0018697200575843453, "learning_rate": 9.774568604291657e-07, "loss": 0.0, "step": 22430 }, { "epoch": 21.568269230769232, "grad_norm": 0.002845857059583068, "learning_rate": 9.769197899572847e-07, "loss": 0.0, "step": 22431 }, { "epoch": 21.56923076923077, "grad_norm": 0.0008109689224511385, "learning_rate": 9.763828594971269e-07, "loss": 0.0, "step": 22432 }, { "epoch": 21.57019230769231, "grad_norm": 0.0007617324590682983, "learning_rate": 9.758460690570227e-07, "loss": 0.0, "step": 22433 }, { "epoch": 21.571153846153845, "grad_norm": 0.0008480894612148404, "learning_rate": 9.753094186453028e-07, "loss": 0.0, "step": 22434 }, { "epoch": 21.572115384615383, "grad_norm": 0.0014755419688299298, "learning_rate": 9.747729082702918e-07, "loss": 0.0, "step": 22435 }, { "epoch": 21.573076923076922, "grad_norm": 0.0015810118056833744, "learning_rate": 9.742365379403163e-07, "loss": 0.0, "step": 22436 }, { "epoch": 21.57403846153846, "grad_norm": 0.0006520937313325703, "learning_rate": 9.73700307663702e-07, "loss": 0.0, "step": 22437 }, { "epoch": 21.575, "grad_norm": 0.001702679437585175, "learning_rate": 9.731642174487644e-07, "loss": 0.0, "step": 22438 }, { "epoch": 21.575961538461538, "grad_norm": 0.0017705715727061033, "learning_rate": 9.726282673038245e-07, "loss": 0.0, "step": 22439 }, { "epoch": 21.576923076923077, "grad_norm": 0.0007934150635264814, "learning_rate": 9.720924572372014e-07, "loss": 0.0, "step": 22440 }, { "epoch": 21.577884615384615, "grad_norm": 0.0010959247592836618, "learning_rate": 9.715567872572051e-07, "loss": 0.0, "step": 22441 }, { "epoch": 21.578846153846154, "grad_norm": 0.001256326213479042, "learning_rate": 9.710212573721488e-07, "loss": 0.0, "step": 22442 }, { "epoch": 21.579807692307693, "grad_norm": 2.680111885070801, "learning_rate": 9.70485867590345e-07, "loss": 0.0123, "step": 22443 }, { "epoch": 21.58076923076923, "grad_norm": 0.001505814609117806, "learning_rate": 9.69950617920098e-07, "loss": 0.0, "step": 22444 }, { "epoch": 21.58173076923077, "grad_norm": 0.0031474134884774685, "learning_rate": 9.694155083697154e-07, "loss": 0.0, "step": 22445 }, { "epoch": 21.58269230769231, "grad_norm": 0.001197775942273438, "learning_rate": 9.688805389474975e-07, "loss": 0.0, "step": 22446 }, { "epoch": 21.583653846153847, "grad_norm": 0.0006469202926382422, "learning_rate": 9.683457096617487e-07, "loss": 0.0, "step": 22447 }, { "epoch": 21.584615384615386, "grad_norm": 0.0004836744046770036, "learning_rate": 9.67811020520768e-07, "loss": 0.0, "step": 22448 }, { "epoch": 21.585576923076925, "grad_norm": 0.001021327800117433, "learning_rate": 9.672764715328497e-07, "loss": 0.0, "step": 22449 }, { "epoch": 21.58653846153846, "grad_norm": 0.003516727825626731, "learning_rate": 9.667420627062896e-07, "loss": 0.0, "step": 22450 }, { "epoch": 21.5875, "grad_norm": 0.0007138408836908638, "learning_rate": 9.66207794049383e-07, "loss": 0.0, "step": 22451 }, { "epoch": 21.588461538461537, "grad_norm": 0.002679758006706834, "learning_rate": 9.656736655704146e-07, "loss": 0.0, "step": 22452 }, { "epoch": 21.589423076923076, "grad_norm": 0.0015832135686650872, "learning_rate": 9.651396772776755e-07, "loss": 0.0, "step": 22453 }, { "epoch": 21.590384615384615, "grad_norm": 0.0013827859656885266, "learning_rate": 9.646058291794546e-07, "loss": 0.0, "step": 22454 }, { "epoch": 21.591346153846153, "grad_norm": 0.0022577447816729546, "learning_rate": 9.640721212840298e-07, "loss": 0.0, "step": 22455 }, { "epoch": 21.592307692307692, "grad_norm": 0.0015059519791975617, "learning_rate": 9.635385535996876e-07, "loss": 0.0, "step": 22456 }, { "epoch": 21.59326923076923, "grad_norm": 0.0022664302960038185, "learning_rate": 9.63005126134704e-07, "loss": 0.0, "step": 22457 }, { "epoch": 21.59423076923077, "grad_norm": 0.0015951236709952354, "learning_rate": 9.624718388973565e-07, "loss": 0.0, "step": 22458 }, { "epoch": 21.595192307692308, "grad_norm": 0.0034948692191392183, "learning_rate": 9.61938691895924e-07, "loss": 0.0, "step": 22459 }, { "epoch": 21.596153846153847, "grad_norm": 0.0011899513192474842, "learning_rate": 9.614056851386743e-07, "loss": 0.0, "step": 22460 }, { "epoch": 21.597115384615385, "grad_norm": 0.0011743898503482342, "learning_rate": 9.608728186338812e-07, "loss": 0.0, "step": 22461 }, { "epoch": 21.598076923076924, "grad_norm": 0.001651837839744985, "learning_rate": 9.603400923898131e-07, "loss": 0.0, "step": 22462 }, { "epoch": 21.599038461538463, "grad_norm": 0.001470543909817934, "learning_rate": 9.598075064147328e-07, "loss": 0.0, "step": 22463 }, { "epoch": 21.6, "grad_norm": 0.0005983719020150602, "learning_rate": 9.59275060716911e-07, "loss": 0.0, "step": 22464 }, { "epoch": 21.60096153846154, "grad_norm": 0.0016214506467804313, "learning_rate": 9.587427553046036e-07, "loss": 0.0, "step": 22465 }, { "epoch": 21.601923076923075, "grad_norm": 0.0015688100829720497, "learning_rate": 9.58210590186074e-07, "loss": 0.0, "step": 22466 }, { "epoch": 21.602884615384614, "grad_norm": 0.000861142878420651, "learning_rate": 9.576785653695798e-07, "loss": 0.0, "step": 22467 }, { "epoch": 21.603846153846153, "grad_norm": 0.001219729776494205, "learning_rate": 9.571466808633733e-07, "loss": 0.0, "step": 22468 }, { "epoch": 21.60480769230769, "grad_norm": 0.0008614245452918112, "learning_rate": 9.566149366757104e-07, "loss": 0.0, "step": 22469 }, { "epoch": 21.60576923076923, "grad_norm": 0.0011536440579220653, "learning_rate": 9.560833328148433e-07, "loss": 0.0, "step": 22470 }, { "epoch": 21.60673076923077, "grad_norm": 0.0024243576917797327, "learning_rate": 9.555518692890165e-07, "loss": 0.0, "step": 22471 }, { "epoch": 21.607692307692307, "grad_norm": 0.0017746686935424805, "learning_rate": 9.550205461064832e-07, "loss": 0.0, "step": 22472 }, { "epoch": 21.608653846153846, "grad_norm": 0.0032686956692487, "learning_rate": 9.544893632754816e-07, "loss": 0.0, "step": 22473 }, { "epoch": 21.609615384615385, "grad_norm": 0.0018303717952221632, "learning_rate": 9.539583208042568e-07, "loss": 0.0, "step": 22474 }, { "epoch": 21.610576923076923, "grad_norm": 0.002711145207285881, "learning_rate": 9.534274187010506e-07, "loss": 0.0, "step": 22475 }, { "epoch": 21.611538461538462, "grad_norm": 0.0008488903986290097, "learning_rate": 9.528966569740983e-07, "loss": 0.0, "step": 22476 }, { "epoch": 21.6125, "grad_norm": 0.0021419909317046404, "learning_rate": 9.523660356316366e-07, "loss": 0.0, "step": 22477 }, { "epoch": 21.61346153846154, "grad_norm": 0.0008098274120129645, "learning_rate": 9.518355546819014e-07, "loss": 0.0, "step": 22478 }, { "epoch": 21.614423076923078, "grad_norm": 0.0006038116989657283, "learning_rate": 9.513052141331203e-07, "loss": 0.0, "step": 22479 }, { "epoch": 21.615384615384617, "grad_norm": 0.00125275703612715, "learning_rate": 9.507750139935246e-07, "loss": 0.0, "step": 22480 }, { "epoch": 21.616346153846155, "grad_norm": 0.0026906479615718126, "learning_rate": 9.502449542713432e-07, "loss": 0.0, "step": 22481 }, { "epoch": 21.61730769230769, "grad_norm": 0.00228579668328166, "learning_rate": 9.497150349747985e-07, "loss": 0.0, "step": 22482 }, { "epoch": 21.61826923076923, "grad_norm": 0.0016888021491467953, "learning_rate": 9.491852561121151e-07, "loss": 0.0, "step": 22483 }, { "epoch": 21.619230769230768, "grad_norm": 0.0016462217317894101, "learning_rate": 9.486556176915118e-07, "loss": 0.0, "step": 22484 }, { "epoch": 21.620192307692307, "grad_norm": 0.0004984189290553331, "learning_rate": 9.481261197212077e-07, "loss": 0.0, "step": 22485 }, { "epoch": 21.621153846153845, "grad_norm": 0.0022272744681686163, "learning_rate": 9.475967622094207e-07, "loss": 0.0, "step": 22486 }, { "epoch": 21.622115384615384, "grad_norm": 0.0011306606465950608, "learning_rate": 9.470675451643619e-07, "loss": 0.0, "step": 22487 }, { "epoch": 21.623076923076923, "grad_norm": 0.0016347168711945415, "learning_rate": 9.465384685942435e-07, "loss": 0.0, "step": 22488 }, { "epoch": 21.62403846153846, "grad_norm": 0.0011746386298909783, "learning_rate": 9.460095325072805e-07, "loss": 0.0, "step": 22489 }, { "epoch": 21.625, "grad_norm": 0.0006488747894763947, "learning_rate": 9.454807369116726e-07, "loss": 0.0, "step": 22490 }, { "epoch": 21.62596153846154, "grad_norm": 0.0005515827797353268, "learning_rate": 9.449520818156299e-07, "loss": 0.0, "step": 22491 }, { "epoch": 21.626923076923077, "grad_norm": 0.001850678352639079, "learning_rate": 9.444235672273571e-07, "loss": 0.0, "step": 22492 }, { "epoch": 21.627884615384616, "grad_norm": 0.0012126578949391842, "learning_rate": 9.438951931550499e-07, "loss": 0.0, "step": 22493 }, { "epoch": 21.628846153846155, "grad_norm": 0.0007385185454040766, "learning_rate": 9.433669596069128e-07, "loss": 0.0, "step": 22494 }, { "epoch": 21.629807692307693, "grad_norm": 0.0013759679859504104, "learning_rate": 9.428388665911369e-07, "loss": 0.0, "step": 22495 }, { "epoch": 21.630769230769232, "grad_norm": 0.0014943750575184822, "learning_rate": 9.423109141159193e-07, "loss": 0.0, "step": 22496 }, { "epoch": 21.63173076923077, "grad_norm": 0.000987751642242074, "learning_rate": 9.417831021894541e-07, "loss": 0.0, "step": 22497 }, { "epoch": 21.63269230769231, "grad_norm": 0.000789428420830518, "learning_rate": 9.412554308199284e-07, "loss": 0.0, "step": 22498 }, { "epoch": 21.633653846153845, "grad_norm": 0.0010208365274593234, "learning_rate": 9.407279000155311e-07, "loss": 0.0, "step": 22499 }, { "epoch": 21.634615384615383, "grad_norm": 0.0020634850952774286, "learning_rate": 9.402005097844502e-07, "loss": 0.0, "step": 22500 }, { "epoch": 21.635576923076922, "grad_norm": 0.0004088428395334631, "learning_rate": 9.396732601348645e-07, "loss": 0.0, "step": 22501 }, { "epoch": 21.63653846153846, "grad_norm": 0.002319647930562496, "learning_rate": 9.391461510749589e-07, "loss": 0.0, "step": 22502 }, { "epoch": 21.6375, "grad_norm": 0.0028052201960235834, "learning_rate": 9.386191826129132e-07, "loss": 0.0, "step": 22503 }, { "epoch": 21.638461538461538, "grad_norm": 0.0030428047757595778, "learning_rate": 9.38092354756901e-07, "loss": 0.0, "step": 22504 }, { "epoch": 21.639423076923077, "grad_norm": 0.0015042077284306288, "learning_rate": 9.375656675151013e-07, "loss": 0.0, "step": 22505 }, { "epoch": 21.640384615384615, "grad_norm": 0.003209540154784918, "learning_rate": 9.37039120895682e-07, "loss": 0.0, "step": 22506 }, { "epoch": 21.641346153846154, "grad_norm": 0.0010960521176457405, "learning_rate": 9.365127149068165e-07, "loss": 0.0, "step": 22507 }, { "epoch": 21.642307692307693, "grad_norm": 0.000936189666390419, "learning_rate": 9.35986449556675e-07, "loss": 0.0, "step": 22508 }, { "epoch": 21.64326923076923, "grad_norm": 0.0012160942424088717, "learning_rate": 9.354603248534189e-07, "loss": 0.0, "step": 22509 }, { "epoch": 21.64423076923077, "grad_norm": 0.0006343814893625677, "learning_rate": 9.349343408052147e-07, "loss": 0.0, "step": 22510 }, { "epoch": 21.64519230769231, "grad_norm": 0.0010381626198068261, "learning_rate": 9.34408497420225e-07, "loss": 0.0, "step": 22511 }, { "epoch": 21.646153846153847, "grad_norm": 0.00020758355094585568, "learning_rate": 9.338827947066076e-07, "loss": 0.0, "step": 22512 }, { "epoch": 21.647115384615386, "grad_norm": 0.0015818941174075007, "learning_rate": 9.333572326725193e-07, "loss": 0.0, "step": 22513 }, { "epoch": 21.648076923076925, "grad_norm": 0.0030200860928744078, "learning_rate": 9.328318113261192e-07, "loss": 0.0, "step": 22514 }, { "epoch": 21.64903846153846, "grad_norm": 0.0015532119432464242, "learning_rate": 9.323065306755552e-07, "loss": 0.0, "step": 22515 }, { "epoch": 21.65, "grad_norm": 0.0014879360096529126, "learning_rate": 9.317813907289819e-07, "loss": 0.0, "step": 22516 }, { "epoch": 21.650961538461537, "grad_norm": 0.002036181278526783, "learning_rate": 9.312563914945461e-07, "loss": 0.0, "step": 22517 }, { "epoch": 21.651923076923076, "grad_norm": 0.0008802211377769709, "learning_rate": 9.307315329803934e-07, "loss": 0.0, "step": 22518 }, { "epoch": 21.652884615384615, "grad_norm": 0.0031717324163764715, "learning_rate": 9.302068151946719e-07, "loss": 0.0, "step": 22519 }, { "epoch": 21.653846153846153, "grad_norm": 0.0007647423772141337, "learning_rate": 9.296822381455184e-07, "loss": 0.0, "step": 22520 }, { "epoch": 21.654807692307692, "grad_norm": 0.002774103544652462, "learning_rate": 9.291578018410762e-07, "loss": 0.0, "step": 22521 }, { "epoch": 21.65576923076923, "grad_norm": 0.0005188568611629307, "learning_rate": 9.286335062894836e-07, "loss": 0.0, "step": 22522 }, { "epoch": 21.65673076923077, "grad_norm": 0.0012079336447641253, "learning_rate": 9.281093514988737e-07, "loss": 0.0, "step": 22523 }, { "epoch": 21.657692307692308, "grad_norm": 0.0011454977793619037, "learning_rate": 9.275853374773802e-07, "loss": 0.0, "step": 22524 }, { "epoch": 21.658653846153847, "grad_norm": 0.0020735678263008595, "learning_rate": 9.270614642331377e-07, "loss": 0.0, "step": 22525 }, { "epoch": 21.659615384615385, "grad_norm": 0.0006604280788451433, "learning_rate": 9.265377317742707e-07, "loss": 0.0, "step": 22526 }, { "epoch": 21.660576923076924, "grad_norm": 0.0019275613594800234, "learning_rate": 9.260141401089095e-07, "loss": 0.0, "step": 22527 }, { "epoch": 21.661538461538463, "grad_norm": 0.007856171578168869, "learning_rate": 9.254906892451743e-07, "loss": 0.0001, "step": 22528 }, { "epoch": 21.6625, "grad_norm": 0.0013724631862714887, "learning_rate": 9.249673791911907e-07, "loss": 0.0, "step": 22529 }, { "epoch": 21.66346153846154, "grad_norm": 0.0022075006272643805, "learning_rate": 9.24444209955081e-07, "loss": 0.0, "step": 22530 }, { "epoch": 21.664423076923075, "grad_norm": 0.0008839106885716319, "learning_rate": 9.239211815449578e-07, "loss": 0.0, "step": 22531 }, { "epoch": 21.665384615384614, "grad_norm": 0.005540275014936924, "learning_rate": 9.233982939689412e-07, "loss": 0.0, "step": 22532 }, { "epoch": 21.666346153846153, "grad_norm": 0.0015175940934568644, "learning_rate": 9.228755472351447e-07, "loss": 0.0, "step": 22533 }, { "epoch": 21.66730769230769, "grad_norm": 0.0012955296551808715, "learning_rate": 9.223529413516764e-07, "loss": 0.0, "step": 22534 }, { "epoch": 21.66826923076923, "grad_norm": 0.0009547575027681887, "learning_rate": 9.218304763266483e-07, "loss": 0.0, "step": 22535 }, { "epoch": 21.66923076923077, "grad_norm": 0.0012948947260156274, "learning_rate": 9.213081521681689e-07, "loss": 0.0, "step": 22536 }, { "epoch": 21.670192307692307, "grad_norm": 0.0012753752525895834, "learning_rate": 9.207859688843402e-07, "loss": 0.0, "step": 22537 }, { "epoch": 21.671153846153846, "grad_norm": 0.0011606797343119979, "learning_rate": 9.202639264832669e-07, "loss": 0.0, "step": 22538 }, { "epoch": 21.672115384615385, "grad_norm": 0.0038670606445521116, "learning_rate": 9.197420249730471e-07, "loss": 0.0, "step": 22539 }, { "epoch": 21.673076923076923, "grad_norm": 0.0008355767349712551, "learning_rate": 9.19220264361781e-07, "loss": 0.0, "step": 22540 }, { "epoch": 21.674038461538462, "grad_norm": 0.001545147504657507, "learning_rate": 9.186986446575663e-07, "loss": 0.0, "step": 22541 }, { "epoch": 21.675, "grad_norm": 0.0008199464064091444, "learning_rate": 9.181771658684935e-07, "loss": 0.0, "step": 22542 }, { "epoch": 21.67596153846154, "grad_norm": 0.0004989883746020496, "learning_rate": 9.17655828002657e-07, "loss": 0.0, "step": 22543 }, { "epoch": 21.676923076923078, "grad_norm": 0.001422531553544104, "learning_rate": 9.171346310681472e-07, "loss": 0.0, "step": 22544 }, { "epoch": 21.677884615384617, "grad_norm": 0.0013371934182941914, "learning_rate": 9.166135750730476e-07, "loss": 0.0, "step": 22545 }, { "epoch": 21.678846153846155, "grad_norm": 0.0009698990033939481, "learning_rate": 9.160926600254461e-07, "loss": 0.0, "step": 22546 }, { "epoch": 21.67980769230769, "grad_norm": 0.0007830469403415918, "learning_rate": 9.155718859334273e-07, "loss": 0.0, "step": 22547 }, { "epoch": 21.68076923076923, "grad_norm": 0.0011955337831750512, "learning_rate": 9.150512528050693e-07, "loss": 0.0, "step": 22548 }, { "epoch": 21.681730769230768, "grad_norm": 0.02220255881547928, "learning_rate": 9.14530760648451e-07, "loss": 0.0001, "step": 22549 }, { "epoch": 21.682692307692307, "grad_norm": 0.002971168840304017, "learning_rate": 9.140104094716518e-07, "loss": 0.0, "step": 22550 }, { "epoch": 21.683653846153845, "grad_norm": 0.0006631508585996926, "learning_rate": 9.134901992827427e-07, "loss": 0.0, "step": 22551 }, { "epoch": 21.684615384615384, "grad_norm": 0.0006353050121106207, "learning_rate": 9.129701300897987e-07, "loss": 0.0, "step": 22552 }, { "epoch": 21.685576923076923, "grad_norm": 0.0004447548126336187, "learning_rate": 9.124502019008863e-07, "loss": 0.0, "step": 22553 }, { "epoch": 21.68653846153846, "grad_norm": 0.0020869423169642687, "learning_rate": 9.119304147240748e-07, "loss": 0.0, "step": 22554 }, { "epoch": 21.6875, "grad_norm": 0.0011622384190559387, "learning_rate": 9.114107685674323e-07, "loss": 0.0, "step": 22555 }, { "epoch": 21.68846153846154, "grad_norm": 0.0021966418717056513, "learning_rate": 9.108912634390177e-07, "loss": 0.0, "step": 22556 }, { "epoch": 21.689423076923077, "grad_norm": 0.0009018155978992581, "learning_rate": 9.103718993468946e-07, "loss": 0.0, "step": 22557 }, { "epoch": 21.690384615384616, "grad_norm": 0.004151888657361269, "learning_rate": 9.098526762991234e-07, "loss": 0.0, "step": 22558 }, { "epoch": 21.691346153846155, "grad_norm": 0.0019111294532194734, "learning_rate": 9.093335943037584e-07, "loss": 0.0, "step": 22559 }, { "epoch": 21.692307692307693, "grad_norm": 0.001104532042518258, "learning_rate": 9.088146533688547e-07, "loss": 0.0, "step": 22560 }, { "epoch": 21.693269230769232, "grad_norm": 0.0010405683424323797, "learning_rate": 9.082958535024677e-07, "loss": 0.0, "step": 22561 }, { "epoch": 21.69423076923077, "grad_norm": 0.0032981839030981064, "learning_rate": 9.077771947126424e-07, "loss": 0.0, "step": 22562 }, { "epoch": 21.69519230769231, "grad_norm": 0.000848091091029346, "learning_rate": 9.072586770074321e-07, "loss": 0.0, "step": 22563 }, { "epoch": 21.696153846153845, "grad_norm": 0.0008264639182016253, "learning_rate": 9.067403003948783e-07, "loss": 0.0, "step": 22564 }, { "epoch": 21.697115384615383, "grad_norm": 0.000916667515411973, "learning_rate": 9.062220648830267e-07, "loss": 0.0, "step": 22565 }, { "epoch": 21.698076923076922, "grad_norm": 0.0010319271823391318, "learning_rate": 9.057039704799209e-07, "loss": 0.0, "step": 22566 }, { "epoch": 21.69903846153846, "grad_norm": 0.0019203913398087025, "learning_rate": 9.051860171935955e-07, "loss": 0.0, "step": 22567 }, { "epoch": 21.7, "grad_norm": 0.0008664875640533864, "learning_rate": 9.04668205032091e-07, "loss": 0.0, "step": 22568 }, { "epoch": 21.700961538461538, "grad_norm": 0.0009906601626425982, "learning_rate": 9.041505340034429e-07, "loss": 0.0, "step": 22569 }, { "epoch": 21.701923076923077, "grad_norm": 0.005247403867542744, "learning_rate": 9.036330041156804e-07, "loss": 0.0, "step": 22570 }, { "epoch": 21.702884615384615, "grad_norm": 0.0012093785917386413, "learning_rate": 9.031156153768361e-07, "loss": 0.0, "step": 22571 }, { "epoch": 21.703846153846154, "grad_norm": 0.0018883516313508153, "learning_rate": 9.025983677949412e-07, "loss": 0.0, "step": 22572 }, { "epoch": 21.704807692307693, "grad_norm": 0.0005871729808859527, "learning_rate": 9.020812613780161e-07, "loss": 0.0, "step": 22573 }, { "epoch": 21.70576923076923, "grad_norm": 0.0016331103397533298, "learning_rate": 9.015642961340909e-07, "loss": 0.0, "step": 22574 }, { "epoch": 21.70673076923077, "grad_norm": 0.003433810779824853, "learning_rate": 9.010474720711814e-07, "loss": 0.0, "step": 22575 }, { "epoch": 21.70769230769231, "grad_norm": 0.0005162517190910876, "learning_rate": 9.005307891973103e-07, "loss": 0.0, "step": 22576 }, { "epoch": 21.708653846153847, "grad_norm": 0.0009953927947208285, "learning_rate": 9.000142475204965e-07, "loss": 0.0, "step": 22577 }, { "epoch": 21.709615384615386, "grad_norm": 0.001134435529820621, "learning_rate": 8.994978470487515e-07, "loss": 0.0, "step": 22578 }, { "epoch": 21.710576923076925, "grad_norm": 0.0011111089261248708, "learning_rate": 8.989815877900898e-07, "loss": 0.0, "step": 22579 }, { "epoch": 21.71153846153846, "grad_norm": 0.0006614441517740488, "learning_rate": 8.984654697525252e-07, "loss": 0.0, "step": 22580 }, { "epoch": 21.7125, "grad_norm": 0.0009810886112973094, "learning_rate": 8.979494929440625e-07, "loss": 0.0, "step": 22581 }, { "epoch": 21.713461538461537, "grad_norm": 0.0008873805054463446, "learning_rate": 8.974336573727082e-07, "loss": 0.0, "step": 22582 }, { "epoch": 21.714423076923076, "grad_norm": 0.0009279008954763412, "learning_rate": 8.969179630464709e-07, "loss": 0.0, "step": 22583 }, { "epoch": 21.715384615384615, "grad_norm": 0.00079864397412166, "learning_rate": 8.96402409973347e-07, "loss": 0.0, "step": 22584 }, { "epoch": 21.716346153846153, "grad_norm": 0.0012447914341464639, "learning_rate": 8.958869981613405e-07, "loss": 0.0, "step": 22585 }, { "epoch": 21.717307692307692, "grad_norm": 0.0020997226238250732, "learning_rate": 8.953717276184459e-07, "loss": 0.0, "step": 22586 }, { "epoch": 21.71826923076923, "grad_norm": 0.0015227339463308454, "learning_rate": 8.948565983526613e-07, "loss": 0.0, "step": 22587 }, { "epoch": 21.71923076923077, "grad_norm": 0.001227938337251544, "learning_rate": 8.943416103719805e-07, "loss": 0.0, "step": 22588 }, { "epoch": 21.720192307692308, "grad_norm": 0.002172954147681594, "learning_rate": 8.9382676368439e-07, "loss": 0.0, "step": 22589 }, { "epoch": 21.721153846153847, "grad_norm": 0.0015453877858817577, "learning_rate": 8.933120582978827e-07, "loss": 0.0, "step": 22590 }, { "epoch": 21.722115384615385, "grad_norm": 0.0028460780158638954, "learning_rate": 8.927974942204464e-07, "loss": 0.0, "step": 22591 }, { "epoch": 21.723076923076924, "grad_norm": 0.0016514266608282924, "learning_rate": 8.922830714600616e-07, "loss": 0.0, "step": 22592 }, { "epoch": 21.724038461538463, "grad_norm": 0.0024772747419774532, "learning_rate": 8.91768790024713e-07, "loss": 0.0, "step": 22593 }, { "epoch": 21.725, "grad_norm": 0.0006792847998440266, "learning_rate": 8.912546499223828e-07, "loss": 0.0, "step": 22594 }, { "epoch": 21.72596153846154, "grad_norm": 0.0008962912252172828, "learning_rate": 8.907406511610439e-07, "loss": 0.0, "step": 22595 }, { "epoch": 21.726923076923075, "grad_norm": 0.0007445730152539909, "learning_rate": 8.902267937486764e-07, "loss": 0.0, "step": 22596 }, { "epoch": 21.727884615384614, "grad_norm": 0.0010179008822888136, "learning_rate": 8.897130776932506e-07, "loss": 0.0, "step": 22597 }, { "epoch": 21.728846153846153, "grad_norm": 0.004839334636926651, "learning_rate": 8.8919950300274e-07, "loss": 0.0, "step": 22598 }, { "epoch": 21.72980769230769, "grad_norm": 0.001037698588334024, "learning_rate": 8.886860696851141e-07, "loss": 0.0, "step": 22599 }, { "epoch": 21.73076923076923, "grad_norm": 0.0017422650707885623, "learning_rate": 8.881727777483384e-07, "loss": 0.0, "step": 22600 }, { "epoch": 21.73173076923077, "grad_norm": 0.00203651818446815, "learning_rate": 8.876596272003768e-07, "loss": 0.0, "step": 22601 }, { "epoch": 21.732692307692307, "grad_norm": 0.0024335538037121296, "learning_rate": 8.871466180491961e-07, "loss": 0.0, "step": 22602 }, { "epoch": 21.733653846153846, "grad_norm": 0.0016142093809321523, "learning_rate": 8.866337503027523e-07, "loss": 0.0, "step": 22603 }, { "epoch": 21.734615384615385, "grad_norm": 0.0013376398710533977, "learning_rate": 8.861210239690055e-07, "loss": 0.0, "step": 22604 }, { "epoch": 21.735576923076923, "grad_norm": 0.002012006239965558, "learning_rate": 8.856084390559128e-07, "loss": 0.0, "step": 22605 }, { "epoch": 21.736538461538462, "grad_norm": 0.0025752249639481306, "learning_rate": 8.850959955714245e-07, "loss": 0.0, "step": 22606 }, { "epoch": 21.7375, "grad_norm": 0.006578514352440834, "learning_rate": 8.845836935234975e-07, "loss": 0.0, "step": 22607 }, { "epoch": 21.73846153846154, "grad_norm": 0.002200900809839368, "learning_rate": 8.840715329200756e-07, "loss": 0.0, "step": 22608 }, { "epoch": 21.739423076923078, "grad_norm": 0.001943085459060967, "learning_rate": 8.835595137691078e-07, "loss": 0.0, "step": 22609 }, { "epoch": 21.740384615384617, "grad_norm": 0.0035587346646934748, "learning_rate": 8.830476360785434e-07, "loss": 0.0, "step": 22610 }, { "epoch": 21.741346153846155, "grad_norm": 0.0031103675719350576, "learning_rate": 8.825358998563182e-07, "loss": 0.0, "step": 22611 }, { "epoch": 21.74230769230769, "grad_norm": 0.0012527992948889732, "learning_rate": 8.820243051103771e-07, "loss": 0.0, "step": 22612 }, { "epoch": 21.74326923076923, "grad_norm": 0.00215578218922019, "learning_rate": 8.815128518486604e-07, "loss": 0.0, "step": 22613 }, { "epoch": 21.744230769230768, "grad_norm": 0.00040041402098722756, "learning_rate": 8.810015400790994e-07, "loss": 0.0, "step": 22614 }, { "epoch": 21.745192307692307, "grad_norm": 0.0004480471252463758, "learning_rate": 8.804903698096301e-07, "loss": 0.0, "step": 22615 }, { "epoch": 21.746153846153845, "grad_norm": 0.001288450788706541, "learning_rate": 8.799793410481871e-07, "loss": 0.0, "step": 22616 }, { "epoch": 21.747115384615384, "grad_norm": 0.00111985148396343, "learning_rate": 8.794684538026965e-07, "loss": 0.0, "step": 22617 }, { "epoch": 21.748076923076923, "grad_norm": 0.0005046332953497767, "learning_rate": 8.789577080810874e-07, "loss": 0.0, "step": 22618 }, { "epoch": 21.74903846153846, "grad_norm": 0.0005249542882665992, "learning_rate": 8.784471038912845e-07, "loss": 0.0, "step": 22619 }, { "epoch": 21.75, "grad_norm": 0.00048302768846042454, "learning_rate": 8.779366412412104e-07, "loss": 0.0, "step": 22620 }, { "epoch": 21.75096153846154, "grad_norm": 0.0009529190720058978, "learning_rate": 8.774263201387889e-07, "loss": 0.0, "step": 22621 }, { "epoch": 21.751923076923077, "grad_norm": 0.0016130636213347316, "learning_rate": 8.769161405919335e-07, "loss": 0.0, "step": 22622 }, { "epoch": 21.752884615384616, "grad_norm": 0.000835384416859597, "learning_rate": 8.764061026085646e-07, "loss": 0.0, "step": 22623 }, { "epoch": 21.753846153846155, "grad_norm": 0.0007945545366965234, "learning_rate": 8.75896206196597e-07, "loss": 0.0, "step": 22624 }, { "epoch": 21.754807692307693, "grad_norm": 0.0008032438345253468, "learning_rate": 8.753864513639399e-07, "loss": 0.0, "step": 22625 }, { "epoch": 21.755769230769232, "grad_norm": 0.0008981486898846924, "learning_rate": 8.748768381185047e-07, "loss": 0.0, "step": 22626 }, { "epoch": 21.75673076923077, "grad_norm": 0.005505684297531843, "learning_rate": 8.743673664682007e-07, "loss": 0.0, "step": 22627 }, { "epoch": 21.75769230769231, "grad_norm": 0.0015792838530614972, "learning_rate": 8.738580364209304e-07, "loss": 0.0, "step": 22628 }, { "epoch": 21.758653846153845, "grad_norm": 0.0035641472786664963, "learning_rate": 8.733488479845997e-07, "loss": 0.0, "step": 22629 }, { "epoch": 21.759615384615383, "grad_norm": 0.0014339913614094257, "learning_rate": 8.728398011671069e-07, "loss": 0.0, "step": 22630 }, { "epoch": 21.760576923076922, "grad_norm": 0.0008750579436309636, "learning_rate": 8.723308959763522e-07, "loss": 0.0, "step": 22631 }, { "epoch": 21.76153846153846, "grad_norm": 0.0007653943030163646, "learning_rate": 8.718221324202347e-07, "loss": 0.0, "step": 22632 }, { "epoch": 21.7625, "grad_norm": 0.0012172145070508122, "learning_rate": 8.71313510506645e-07, "loss": 0.0, "step": 22633 }, { "epoch": 21.763461538461538, "grad_norm": 0.0024543702602386475, "learning_rate": 8.708050302434768e-07, "loss": 0.0, "step": 22634 }, { "epoch": 21.764423076923077, "grad_norm": 0.0006576994201168418, "learning_rate": 8.702966916386235e-07, "loss": 0.0, "step": 22635 }, { "epoch": 21.765384615384615, "grad_norm": 0.0008872383623383939, "learning_rate": 8.697884946999668e-07, "loss": 0.0, "step": 22636 }, { "epoch": 21.766346153846154, "grad_norm": 0.0013194880448281765, "learning_rate": 8.692804394353971e-07, "loss": 0.0, "step": 22637 }, { "epoch": 21.767307692307693, "grad_norm": 0.0013686579186469316, "learning_rate": 8.687725258527979e-07, "loss": 0.0, "step": 22638 }, { "epoch": 21.76826923076923, "grad_norm": 0.00038030024734325707, "learning_rate": 8.682647539600475e-07, "loss": 0.0, "step": 22639 }, { "epoch": 21.76923076923077, "grad_norm": 0.0009011868387460709, "learning_rate": 8.677571237650273e-07, "loss": 0.0, "step": 22640 }, { "epoch": 21.77019230769231, "grad_norm": 0.0013880552724003792, "learning_rate": 8.672496352756154e-07, "loss": 0.0, "step": 22641 }, { "epoch": 21.771153846153847, "grad_norm": 0.004641864914447069, "learning_rate": 8.667422884996823e-07, "loss": 0.0, "step": 22642 }, { "epoch": 21.772115384615386, "grad_norm": 0.0017475091153755784, "learning_rate": 8.662350834451061e-07, "loss": 0.0, "step": 22643 }, { "epoch": 21.773076923076925, "grad_norm": 0.003408443881198764, "learning_rate": 8.657280201197515e-07, "loss": 0.0, "step": 22644 }, { "epoch": 21.77403846153846, "grad_norm": 0.0015629836125299335, "learning_rate": 8.65221098531489e-07, "loss": 0.0, "step": 22645 }, { "epoch": 21.775, "grad_norm": 0.0012608785182237625, "learning_rate": 8.647143186881868e-07, "loss": 0.0, "step": 22646 }, { "epoch": 21.775961538461537, "grad_norm": 0.00047881732461974025, "learning_rate": 8.642076805977051e-07, "loss": 0.0, "step": 22647 }, { "epoch": 21.776923076923076, "grad_norm": 0.00036700916825793684, "learning_rate": 8.637011842679077e-07, "loss": 0.0, "step": 22648 }, { "epoch": 21.777884615384615, "grad_norm": 0.0010111643932759762, "learning_rate": 8.631948297066539e-07, "loss": 0.0, "step": 22649 }, { "epoch": 21.778846153846153, "grad_norm": 0.004728866741061211, "learning_rate": 8.626886169217985e-07, "loss": 0.0, "step": 22650 }, { "epoch": 21.779807692307692, "grad_norm": 0.0012463731691241264, "learning_rate": 8.621825459211985e-07, "loss": 0.0, "step": 22651 }, { "epoch": 21.78076923076923, "grad_norm": 0.00209277612157166, "learning_rate": 8.616766167127077e-07, "loss": 0.0, "step": 22652 }, { "epoch": 21.78173076923077, "grad_norm": 0.0024098875001072884, "learning_rate": 8.611708293041732e-07, "loss": 0.0, "step": 22653 }, { "epoch": 21.782692307692308, "grad_norm": 0.0018349871970713139, "learning_rate": 8.606651837034474e-07, "loss": 0.0, "step": 22654 }, { "epoch": 21.783653846153847, "grad_norm": 0.0008114472148008645, "learning_rate": 8.60159679918372e-07, "loss": 0.0, "step": 22655 }, { "epoch": 21.784615384615385, "grad_norm": 0.0010239641414955258, "learning_rate": 8.59654317956794e-07, "loss": 0.0, "step": 22656 }, { "epoch": 21.785576923076924, "grad_norm": 0.001469828188419342, "learning_rate": 8.59149097826556e-07, "loss": 0.0, "step": 22657 }, { "epoch": 21.786538461538463, "grad_norm": 0.0032417504116892815, "learning_rate": 8.58644019535494e-07, "loss": 0.0, "step": 22658 }, { "epoch": 21.7875, "grad_norm": 0.0014277801383286715, "learning_rate": 8.581390830914471e-07, "loss": 0.0, "step": 22659 }, { "epoch": 21.78846153846154, "grad_norm": 0.001365155098028481, "learning_rate": 8.576342885022527e-07, "loss": 0.0, "step": 22660 }, { "epoch": 21.789423076923075, "grad_norm": 0.0035253793466836214, "learning_rate": 8.571296357757397e-07, "loss": 0.0, "step": 22661 }, { "epoch": 21.790384615384614, "grad_norm": 0.0011135851964354515, "learning_rate": 8.56625124919741e-07, "loss": 0.0, "step": 22662 }, { "epoch": 21.791346153846153, "grad_norm": 0.002133755013346672, "learning_rate": 8.561207559420859e-07, "loss": 0.0, "step": 22663 }, { "epoch": 21.79230769230769, "grad_norm": 0.000731235952116549, "learning_rate": 8.556165288505979e-07, "loss": 0.0, "step": 22664 }, { "epoch": 21.79326923076923, "grad_norm": 0.0013619237579405308, "learning_rate": 8.551124436531055e-07, "loss": 0.0, "step": 22665 }, { "epoch": 21.79423076923077, "grad_norm": 0.0014625496696680784, "learning_rate": 8.546085003574267e-07, "loss": 0.0, "step": 22666 }, { "epoch": 21.795192307692307, "grad_norm": 0.0014618259156122804, "learning_rate": 8.541046989713808e-07, "loss": 0.0, "step": 22667 }, { "epoch": 21.796153846153846, "grad_norm": 0.0009011377114802599, "learning_rate": 8.536010395027905e-07, "loss": 0.0, "step": 22668 }, { "epoch": 21.797115384615385, "grad_norm": 0.0007379436865448952, "learning_rate": 8.530975219594651e-07, "loss": 0.0, "step": 22669 }, { "epoch": 21.798076923076923, "grad_norm": 0.0011878447839990258, "learning_rate": 8.525941463492204e-07, "loss": 0.0, "step": 22670 }, { "epoch": 21.799038461538462, "grad_norm": 0.0008999858982861042, "learning_rate": 8.520909126798693e-07, "loss": 0.0, "step": 22671 }, { "epoch": 21.8, "grad_norm": 0.0019825221970677376, "learning_rate": 8.515878209592165e-07, "loss": 0.0, "step": 22672 }, { "epoch": 21.80096153846154, "grad_norm": 0.0007958218338899314, "learning_rate": 8.510848711950704e-07, "loss": 0.0, "step": 22673 }, { "epoch": 21.801923076923078, "grad_norm": 0.0039975945837795734, "learning_rate": 8.505820633952378e-07, "loss": 0.0, "step": 22674 }, { "epoch": 21.802884615384617, "grad_norm": 0.00606071762740612, "learning_rate": 8.500793975675159e-07, "loss": 0.0, "step": 22675 }, { "epoch": 21.803846153846155, "grad_norm": 0.0021790119353681803, "learning_rate": 8.495768737197085e-07, "loss": 0.0, "step": 22676 }, { "epoch": 21.80480769230769, "grad_norm": 0.0017918808152899146, "learning_rate": 8.490744918596106e-07, "loss": 0.0, "step": 22677 }, { "epoch": 21.80576923076923, "grad_norm": 0.004984820727258921, "learning_rate": 8.485722519950191e-07, "loss": 0.0, "step": 22678 }, { "epoch": 21.806730769230768, "grad_norm": 0.0013202857226133347, "learning_rate": 8.480701541337288e-07, "loss": 0.0, "step": 22679 }, { "epoch": 21.807692307692307, "grad_norm": 0.0056720091961324215, "learning_rate": 8.475681982835271e-07, "loss": 0.0, "step": 22680 }, { "epoch": 21.808653846153845, "grad_norm": 0.0012414746452122927, "learning_rate": 8.470663844522053e-07, "loss": 0.0, "step": 22681 }, { "epoch": 21.809615384615384, "grad_norm": 0.002547026379033923, "learning_rate": 8.465647126475507e-07, "loss": 0.0, "step": 22682 }, { "epoch": 21.810576923076923, "grad_norm": 0.0034026596695184708, "learning_rate": 8.460631828773458e-07, "loss": 0.0, "step": 22683 }, { "epoch": 21.81153846153846, "grad_norm": 0.0010466863168403506, "learning_rate": 8.455617951493733e-07, "loss": 0.0, "step": 22684 }, { "epoch": 21.8125, "grad_norm": 0.0015590626280754805, "learning_rate": 8.450605494714159e-07, "loss": 0.0, "step": 22685 }, { "epoch": 21.81346153846154, "grad_norm": 0.0010891000274568796, "learning_rate": 8.445594458512473e-07, "loss": 0.0, "step": 22686 }, { "epoch": 21.814423076923077, "grad_norm": 0.0009529832168482244, "learning_rate": 8.440584842966471e-07, "loss": 0.0, "step": 22687 }, { "epoch": 21.815384615384616, "grad_norm": 0.0014353106962516904, "learning_rate": 8.435576648153854e-07, "loss": 0.0, "step": 22688 }, { "epoch": 21.816346153846155, "grad_norm": 0.0005190617521293461, "learning_rate": 8.430569874152339e-07, "loss": 0.0, "step": 22689 }, { "epoch": 21.817307692307693, "grad_norm": 0.0015993975102901459, "learning_rate": 8.425564521039653e-07, "loss": 0.0, "step": 22690 }, { "epoch": 21.818269230769232, "grad_norm": 0.0010036873864009976, "learning_rate": 8.420560588893412e-07, "loss": 0.0, "step": 22691 }, { "epoch": 21.81923076923077, "grad_norm": 0.005105872172862291, "learning_rate": 8.415558077791297e-07, "loss": 0.0, "step": 22692 }, { "epoch": 21.82019230769231, "grad_norm": 0.001560010015964508, "learning_rate": 8.410556987810937e-07, "loss": 0.0, "step": 22693 }, { "epoch": 21.821153846153845, "grad_norm": 0.0009355649235658348, "learning_rate": 8.405557319029911e-07, "loss": 0.0, "step": 22694 }, { "epoch": 21.822115384615383, "grad_norm": 0.002175253117457032, "learning_rate": 8.400559071525794e-07, "loss": 0.0, "step": 22695 }, { "epoch": 21.823076923076922, "grad_norm": 0.0009970383252948523, "learning_rate": 8.395562245376188e-07, "loss": 0.0, "step": 22696 }, { "epoch": 21.82403846153846, "grad_norm": 0.010366021655499935, "learning_rate": 8.390566840658576e-07, "loss": 0.0, "step": 22697 }, { "epoch": 21.825, "grad_norm": 0.001903121592476964, "learning_rate": 8.385572857450519e-07, "loss": 0.0, "step": 22698 }, { "epoch": 21.825961538461538, "grad_norm": 0.0015737268840894103, "learning_rate": 8.380580295829466e-07, "loss": 0.0, "step": 22699 }, { "epoch": 21.826923076923077, "grad_norm": 0.0006686584674753249, "learning_rate": 8.37558915587291e-07, "loss": 0.0, "step": 22700 }, { "epoch": 21.827884615384615, "grad_norm": 0.0012247292324900627, "learning_rate": 8.370599437658311e-07, "loss": 0.0, "step": 22701 }, { "epoch": 21.828846153846154, "grad_norm": 0.0008401689119637012, "learning_rate": 8.365611141263063e-07, "loss": 0.0, "step": 22702 }, { "epoch": 21.829807692307693, "grad_norm": 0.002281338907778263, "learning_rate": 8.360624266764583e-07, "loss": 0.0, "step": 22703 }, { "epoch": 21.83076923076923, "grad_norm": 0.000532432459294796, "learning_rate": 8.355638814240286e-07, "loss": 0.0, "step": 22704 }, { "epoch": 21.83173076923077, "grad_norm": 0.001185436500236392, "learning_rate": 8.350654783767475e-07, "loss": 0.0, "step": 22705 }, { "epoch": 21.83269230769231, "grad_norm": 0.002912041964009404, "learning_rate": 8.345672175423514e-07, "loss": 0.0, "step": 22706 }, { "epoch": 21.833653846153847, "grad_norm": 0.0061563001945614815, "learning_rate": 8.340690989285727e-07, "loss": 0.0, "step": 22707 }, { "epoch": 21.834615384615386, "grad_norm": 0.003708244301378727, "learning_rate": 8.335711225431398e-07, "loss": 0.0, "step": 22708 }, { "epoch": 21.835576923076925, "grad_norm": 0.001069732359610498, "learning_rate": 8.330732883937808e-07, "loss": 0.0, "step": 22709 }, { "epoch": 21.83653846153846, "grad_norm": 0.0006127202068455517, "learning_rate": 8.325755964882176e-07, "loss": 0.0, "step": 22710 }, { "epoch": 21.8375, "grad_norm": 0.0024774393532425165, "learning_rate": 8.320780468341761e-07, "loss": 0.0, "step": 22711 }, { "epoch": 21.838461538461537, "grad_norm": 0.00035986420698463917, "learning_rate": 8.315806394393766e-07, "loss": 0.0, "step": 22712 }, { "epoch": 21.839423076923076, "grad_norm": 0.0016697356477379799, "learning_rate": 8.310833743115354e-07, "loss": 0.0, "step": 22713 }, { "epoch": 21.840384615384615, "grad_norm": 0.002030529547482729, "learning_rate": 8.305862514583696e-07, "loss": 0.0, "step": 22714 }, { "epoch": 21.841346153846153, "grad_norm": 0.0037765069864690304, "learning_rate": 8.300892708875952e-07, "loss": 0.0, "step": 22715 }, { "epoch": 21.842307692307692, "grad_norm": 0.002360397484153509, "learning_rate": 8.295924326069204e-07, "loss": 0.0, "step": 22716 }, { "epoch": 21.84326923076923, "grad_norm": 0.0005327691324055195, "learning_rate": 8.29095736624056e-07, "loss": 0.0, "step": 22717 }, { "epoch": 21.84423076923077, "grad_norm": 0.0014928480377420783, "learning_rate": 8.28599182946711e-07, "loss": 0.0, "step": 22718 }, { "epoch": 21.845192307692308, "grad_norm": 0.0006445898325182498, "learning_rate": 8.281027715825873e-07, "loss": 0.0, "step": 22719 }, { "epoch": 21.846153846153847, "grad_norm": 0.0007673639920540154, "learning_rate": 8.276065025393909e-07, "loss": 0.0, "step": 22720 }, { "epoch": 21.847115384615385, "grad_norm": 0.0015814413782209158, "learning_rate": 8.271103758248189e-07, "loss": 0.0, "step": 22721 }, { "epoch": 21.848076923076924, "grad_norm": 0.0008018341031856835, "learning_rate": 8.26614391446573e-07, "loss": 0.0, "step": 22722 }, { "epoch": 21.849038461538463, "grad_norm": 0.002450178377330303, "learning_rate": 8.261185494123492e-07, "loss": 0.0, "step": 22723 }, { "epoch": 21.85, "grad_norm": 0.0007946949335746467, "learning_rate": 8.256228497298391e-07, "loss": 0.0, "step": 22724 }, { "epoch": 21.85096153846154, "grad_norm": 0.001032438245601952, "learning_rate": 8.251272924067355e-07, "loss": 0.0, "step": 22725 }, { "epoch": 21.851923076923075, "grad_norm": 0.0014299077447503805, "learning_rate": 8.246318774507301e-07, "loss": 0.0, "step": 22726 }, { "epoch": 21.852884615384614, "grad_norm": 0.0003940328606404364, "learning_rate": 8.241366048695065e-07, "loss": 0.0, "step": 22727 }, { "epoch": 21.853846153846153, "grad_norm": 0.002462614793330431, "learning_rate": 8.236414746707522e-07, "loss": 0.0, "step": 22728 }, { "epoch": 21.85480769230769, "grad_norm": 0.0010795618873089552, "learning_rate": 8.231464868621519e-07, "loss": 0.0, "step": 22729 }, { "epoch": 21.85576923076923, "grad_norm": 0.006293424870818853, "learning_rate": 8.22651641451383e-07, "loss": 0.0, "step": 22730 }, { "epoch": 21.85673076923077, "grad_norm": 0.0004279361746739596, "learning_rate": 8.221569384461247e-07, "loss": 0.0, "step": 22731 }, { "epoch": 21.857692307692307, "grad_norm": 0.003421203698962927, "learning_rate": 8.216623778540567e-07, "loss": 0.0, "step": 22732 }, { "epoch": 21.858653846153846, "grad_norm": 0.001856547431088984, "learning_rate": 8.211679596828481e-07, "loss": 0.0, "step": 22733 }, { "epoch": 21.859615384615385, "grad_norm": 0.004849536344408989, "learning_rate": 8.206736839401752e-07, "loss": 0.0, "step": 22734 }, { "epoch": 21.860576923076923, "grad_norm": 0.0014593602390959859, "learning_rate": 8.201795506337029e-07, "loss": 0.0, "step": 22735 }, { "epoch": 21.861538461538462, "grad_norm": 0.0019287359900772572, "learning_rate": 8.196855597711029e-07, "loss": 0.0, "step": 22736 }, { "epoch": 21.8625, "grad_norm": 0.0006362307467497885, "learning_rate": 8.191917113600401e-07, "loss": 0.0, "step": 22737 }, { "epoch": 21.86346153846154, "grad_norm": 0.0022534735035151243, "learning_rate": 8.186980054081761e-07, "loss": 0.0, "step": 22738 }, { "epoch": 21.864423076923078, "grad_norm": 0.0004879562766291201, "learning_rate": 8.182044419231705e-07, "loss": 0.0, "step": 22739 }, { "epoch": 21.865384615384617, "grad_norm": 0.0007188590825535357, "learning_rate": 8.177110209126871e-07, "loss": 0.0, "step": 22740 }, { "epoch": 21.866346153846155, "grad_norm": 0.004261379595845938, "learning_rate": 8.172177423843763e-07, "loss": 0.0, "step": 22741 }, { "epoch": 21.86730769230769, "grad_norm": 0.006245134864002466, "learning_rate": 8.167246063458944e-07, "loss": 0.0, "step": 22742 }, { "epoch": 21.86826923076923, "grad_norm": 0.0011456473730504513, "learning_rate": 8.162316128048964e-07, "loss": 0.0, "step": 22743 }, { "epoch": 21.869230769230768, "grad_norm": 0.0016669774195179343, "learning_rate": 8.157387617690282e-07, "loss": 0.0, "step": 22744 }, { "epoch": 21.870192307692307, "grad_norm": 0.0015208512777462602, "learning_rate": 8.152460532459405e-07, "loss": 0.0, "step": 22745 }, { "epoch": 21.871153846153845, "grad_norm": 0.0009270035079680383, "learning_rate": 8.147534872432761e-07, "loss": 0.0, "step": 22746 }, { "epoch": 21.872115384615384, "grad_norm": 0.000934855779632926, "learning_rate": 8.142610637686776e-07, "loss": 0.0, "step": 22747 }, { "epoch": 21.873076923076923, "grad_norm": 0.0010471985442563891, "learning_rate": 8.137687828297902e-07, "loss": 0.0, "step": 22748 }, { "epoch": 21.87403846153846, "grad_norm": 0.0007562811952084303, "learning_rate": 8.132766444342488e-07, "loss": 0.0, "step": 22749 }, { "epoch": 21.875, "grad_norm": 0.0012297783978283405, "learning_rate": 8.127846485896907e-07, "loss": 0.0, "step": 22750 }, { "epoch": 21.87596153846154, "grad_norm": 0.001681696972809732, "learning_rate": 8.12292795303753e-07, "loss": 0.0, "step": 22751 }, { "epoch": 21.876923076923077, "grad_norm": 0.002154540503397584, "learning_rate": 8.118010845840629e-07, "loss": 0.0, "step": 22752 }, { "epoch": 21.877884615384616, "grad_norm": 0.0014415273908525705, "learning_rate": 8.113095164382534e-07, "loss": 0.0, "step": 22753 }, { "epoch": 21.878846153846155, "grad_norm": 0.0011245104251429439, "learning_rate": 8.108180908739527e-07, "loss": 0.0, "step": 22754 }, { "epoch": 21.879807692307693, "grad_norm": 0.002032496267929673, "learning_rate": 8.103268078987847e-07, "loss": 0.0, "step": 22755 }, { "epoch": 21.880769230769232, "grad_norm": 0.00037184302345849574, "learning_rate": 8.098356675203744e-07, "loss": 0.0, "step": 22756 }, { "epoch": 21.88173076923077, "grad_norm": 0.0006182382931001484, "learning_rate": 8.093446697463392e-07, "loss": 0.0, "step": 22757 }, { "epoch": 21.88269230769231, "grad_norm": 0.002665483858436346, "learning_rate": 8.088538145843017e-07, "loss": 0.0, "step": 22758 }, { "epoch": 21.883653846153845, "grad_norm": 0.0008039537933655083, "learning_rate": 8.083631020418792e-07, "loss": 0.0, "step": 22759 }, { "epoch": 21.884615384615383, "grad_norm": 0.0032017186749726534, "learning_rate": 8.078725321266811e-07, "loss": 0.0, "step": 22760 }, { "epoch": 21.885576923076922, "grad_norm": 0.003489845898002386, "learning_rate": 8.073821048463237e-07, "loss": 0.0, "step": 22761 }, { "epoch": 21.88653846153846, "grad_norm": 0.000693507376126945, "learning_rate": 8.068918202084175e-07, "loss": 0.0, "step": 22762 }, { "epoch": 21.8875, "grad_norm": 0.002219904912635684, "learning_rate": 8.064016782205675e-07, "loss": 0.0, "step": 22763 }, { "epoch": 21.888461538461538, "grad_norm": 0.0013115755282342434, "learning_rate": 8.059116788903809e-07, "loss": 0.0, "step": 22764 }, { "epoch": 21.889423076923077, "grad_norm": 0.002147459425032139, "learning_rate": 8.054218222254618e-07, "loss": 0.0, "step": 22765 }, { "epoch": 21.890384615384615, "grad_norm": 0.0009770578471943736, "learning_rate": 8.049321082334083e-07, "loss": 0.0, "step": 22766 }, { "epoch": 21.891346153846154, "grad_norm": 0.0010246310848742723, "learning_rate": 8.044425369218234e-07, "loss": 0.0, "step": 22767 }, { "epoch": 21.892307692307693, "grad_norm": 0.0015548192895948887, "learning_rate": 8.039531082982999e-07, "loss": 0.0, "step": 22768 }, { "epoch": 21.89326923076923, "grad_norm": 0.003486302448436618, "learning_rate": 8.034638223704338e-07, "loss": 0.0, "step": 22769 }, { "epoch": 21.89423076923077, "grad_norm": 0.0009197985637001693, "learning_rate": 8.029746791458193e-07, "loss": 0.0, "step": 22770 }, { "epoch": 21.89519230769231, "grad_norm": 0.0002602191234473139, "learning_rate": 8.024856786320434e-07, "loss": 0.0, "step": 22771 }, { "epoch": 21.896153846153847, "grad_norm": 0.002133341506123543, "learning_rate": 8.019968208366958e-07, "loss": 0.0, "step": 22772 }, { "epoch": 21.897115384615386, "grad_norm": 0.010305851697921753, "learning_rate": 8.015081057673623e-07, "loss": 0.0, "step": 22773 }, { "epoch": 21.898076923076925, "grad_norm": 0.0008119409321807325, "learning_rate": 8.010195334316251e-07, "loss": 0.0, "step": 22774 }, { "epoch": 21.89903846153846, "grad_norm": 0.0007741133449599147, "learning_rate": 8.005311038370656e-07, "loss": 0.0, "step": 22775 }, { "epoch": 21.9, "grad_norm": 0.004493905697017908, "learning_rate": 8.000428169912656e-07, "loss": 0.0, "step": 22776 }, { "epoch": 21.900961538461537, "grad_norm": 0.0015315633499994874, "learning_rate": 7.995546729017978e-07, "loss": 0.0, "step": 22777 }, { "epoch": 21.901923076923076, "grad_norm": 0.0025677208323031664, "learning_rate": 7.990666715762396e-07, "loss": 0.0, "step": 22778 }, { "epoch": 21.902884615384615, "grad_norm": 0.0013321127044036984, "learning_rate": 7.985788130221606e-07, "loss": 0.0, "step": 22779 }, { "epoch": 21.903846153846153, "grad_norm": 0.0008228863589465618, "learning_rate": 7.980910972471323e-07, "loss": 0.0, "step": 22780 }, { "epoch": 21.904807692307692, "grad_norm": 0.0012830720515921712, "learning_rate": 7.976035242587254e-07, "loss": 0.0, "step": 22781 }, { "epoch": 21.90576923076923, "grad_norm": 0.0009500577580183744, "learning_rate": 7.971160940645006e-07, "loss": 0.0, "step": 22782 }, { "epoch": 21.90673076923077, "grad_norm": 0.001510151196271181, "learning_rate": 7.96628806672024e-07, "loss": 0.0, "step": 22783 }, { "epoch": 21.907692307692308, "grad_norm": 0.0010018337052315474, "learning_rate": 7.961416620888584e-07, "loss": 0.0, "step": 22784 }, { "epoch": 21.908653846153847, "grad_norm": 0.0016407823422923684, "learning_rate": 7.956546603225601e-07, "loss": 0.0, "step": 22785 }, { "epoch": 21.909615384615385, "grad_norm": 0.0006265120464377105, "learning_rate": 7.951678013806863e-07, "loss": 0.0, "step": 22786 }, { "epoch": 21.910576923076924, "grad_norm": 0.0012274269247427583, "learning_rate": 7.946810852707942e-07, "loss": 0.0, "step": 22787 }, { "epoch": 21.911538461538463, "grad_norm": 0.0011036142241209745, "learning_rate": 7.941945120004335e-07, "loss": 0.0, "step": 22788 }, { "epoch": 21.9125, "grad_norm": 0.0012391095515340567, "learning_rate": 7.937080815771569e-07, "loss": 0.0, "step": 22789 }, { "epoch": 21.91346153846154, "grad_norm": 0.0010050421115010977, "learning_rate": 7.932217940085085e-07, "loss": 0.0, "step": 22790 }, { "epoch": 21.914423076923075, "grad_norm": 0.0020591390784829855, "learning_rate": 7.927356493020355e-07, "loss": 0.0, "step": 22791 }, { "epoch": 21.915384615384614, "grad_norm": 0.0012043002061545849, "learning_rate": 7.922496474652852e-07, "loss": 0.0, "step": 22792 }, { "epoch": 21.916346153846153, "grad_norm": 0.001297307782806456, "learning_rate": 7.917637885057938e-07, "loss": 0.0, "step": 22793 }, { "epoch": 21.91730769230769, "grad_norm": 0.05203338712453842, "learning_rate": 7.91278072431102e-07, "loss": 0.0001, "step": 22794 }, { "epoch": 21.91826923076923, "grad_norm": 0.001602615462616086, "learning_rate": 7.907924992487503e-07, "loss": 0.0, "step": 22795 }, { "epoch": 21.91923076923077, "grad_norm": 0.0016263495199382305, "learning_rate": 7.903070689662673e-07, "loss": 0.0, "step": 22796 }, { "epoch": 21.920192307692307, "grad_norm": 0.0005663951742462814, "learning_rate": 7.89821781591189e-07, "loss": 0.0, "step": 22797 }, { "epoch": 21.921153846153846, "grad_norm": 0.0006996509036980569, "learning_rate": 7.893366371310463e-07, "loss": 0.0, "step": 22798 }, { "epoch": 21.922115384615385, "grad_norm": 0.0023112520575523376, "learning_rate": 7.888516355933651e-07, "loss": 0.0, "step": 22799 }, { "epoch": 21.923076923076923, "grad_norm": 0.0018815059447661042, "learning_rate": 7.883667769856739e-07, "loss": 0.0, "step": 22800 }, { "epoch": 21.924038461538462, "grad_norm": 0.0007664008880965412, "learning_rate": 7.878820613154925e-07, "loss": 0.0, "step": 22801 }, { "epoch": 21.925, "grad_norm": 0.00060380989452824, "learning_rate": 7.873974885903435e-07, "loss": 0.0, "step": 22802 }, { "epoch": 21.92596153846154, "grad_norm": 0.0013688679318875074, "learning_rate": 7.869130588177498e-07, "loss": 0.0, "step": 22803 }, { "epoch": 21.926923076923078, "grad_norm": 0.0011629228247329593, "learning_rate": 7.864287720052221e-07, "loss": 0.0, "step": 22804 }, { "epoch": 21.927884615384617, "grad_norm": 0.0015197091270238161, "learning_rate": 7.859446281602801e-07, "loss": 0.0, "step": 22805 }, { "epoch": 21.928846153846155, "grad_norm": 0.0011455218773335218, "learning_rate": 7.854606272904353e-07, "loss": 0.0, "step": 22806 }, { "epoch": 21.92980769230769, "grad_norm": 0.0018817199161276221, "learning_rate": 7.849767694031951e-07, "loss": 0.0, "step": 22807 }, { "epoch": 21.93076923076923, "grad_norm": 0.0005317513132467866, "learning_rate": 7.844930545060703e-07, "loss": 0.0, "step": 22808 }, { "epoch": 21.931730769230768, "grad_norm": 0.0005676971049979329, "learning_rate": 7.84009482606568e-07, "loss": 0.0, "step": 22809 }, { "epoch": 21.932692307692307, "grad_norm": 0.002246553311124444, "learning_rate": 7.83526053712188e-07, "loss": 0.0, "step": 22810 }, { "epoch": 21.933653846153845, "grad_norm": 0.0009798590326681733, "learning_rate": 7.830427678304353e-07, "loss": 0.0, "step": 22811 }, { "epoch": 21.934615384615384, "grad_norm": 0.0005181085434742272, "learning_rate": 7.82559624968805e-07, "loss": 0.0, "step": 22812 }, { "epoch": 21.935576923076923, "grad_norm": 0.0038170558400452137, "learning_rate": 7.820766251347978e-07, "loss": 0.0, "step": 22813 }, { "epoch": 21.93653846153846, "grad_norm": 0.0011595586547628045, "learning_rate": 7.815937683359077e-07, "loss": 0.0, "step": 22814 }, { "epoch": 21.9375, "grad_norm": 0.001537456875666976, "learning_rate": 7.811110545796252e-07, "loss": 0.0, "step": 22815 }, { "epoch": 21.93846153846154, "grad_norm": 0.00040800217539072037, "learning_rate": 7.806284838734423e-07, "loss": 0.0, "step": 22816 }, { "epoch": 21.939423076923077, "grad_norm": 0.0005490205367095768, "learning_rate": 7.801460562248486e-07, "loss": 0.0, "step": 22817 }, { "epoch": 21.940384615384616, "grad_norm": 0.0019873734563589096, "learning_rate": 7.796637716413258e-07, "loss": 0.0, "step": 22818 }, { "epoch": 21.941346153846155, "grad_norm": 0.0016721595311537385, "learning_rate": 7.791816301303601e-07, "loss": 0.0, "step": 22819 }, { "epoch": 21.942307692307693, "grad_norm": 0.006299496628344059, "learning_rate": 7.786996316994344e-07, "loss": 0.0, "step": 22820 }, { "epoch": 21.943269230769232, "grad_norm": 0.00043198736966587603, "learning_rate": 7.78217776356025e-07, "loss": 0.0, "step": 22821 }, { "epoch": 21.94423076923077, "grad_norm": 0.0011046479921787977, "learning_rate": 7.777360641076104e-07, "loss": 0.0, "step": 22822 }, { "epoch": 21.94519230769231, "grad_norm": 0.0048061846755445, "learning_rate": 7.772544949616656e-07, "loss": 0.0, "step": 22823 }, { "epoch": 21.946153846153845, "grad_norm": 0.0015611420385539532, "learning_rate": 7.767730689256614e-07, "loss": 0.0, "step": 22824 }, { "epoch": 21.947115384615383, "grad_norm": 0.000681262812577188, "learning_rate": 7.762917860070718e-07, "loss": 0.0, "step": 22825 }, { "epoch": 21.948076923076922, "grad_norm": 0.0072413175366818905, "learning_rate": 7.758106462133597e-07, "loss": 0.0, "step": 22826 }, { "epoch": 21.94903846153846, "grad_norm": 0.006758501753211021, "learning_rate": 7.753296495519946e-07, "loss": 0.0, "step": 22827 }, { "epoch": 21.95, "grad_norm": 0.0013151798630133271, "learning_rate": 7.748487960304396e-07, "loss": 0.0, "step": 22828 }, { "epoch": 21.950961538461538, "grad_norm": 0.009229343384504318, "learning_rate": 7.743680856561552e-07, "loss": 0.0, "step": 22829 }, { "epoch": 21.951923076923077, "grad_norm": 0.000687626248691231, "learning_rate": 7.738875184366013e-07, "loss": 0.0, "step": 22830 }, { "epoch": 21.952884615384615, "grad_norm": 0.0014811104629188776, "learning_rate": 7.73407094379236e-07, "loss": 0.0, "step": 22831 }, { "epoch": 21.953846153846154, "grad_norm": 0.000488222052808851, "learning_rate": 7.729268134915113e-07, "loss": 0.0, "step": 22832 }, { "epoch": 21.954807692307693, "grad_norm": 0.000972938141785562, "learning_rate": 7.724466757808813e-07, "loss": 0.0, "step": 22833 }, { "epoch": 21.95576923076923, "grad_norm": 0.002693318761885166, "learning_rate": 7.719666812547977e-07, "loss": 0.0, "step": 22834 }, { "epoch": 21.95673076923077, "grad_norm": 0.018748395144939423, "learning_rate": 7.714868299207057e-07, "loss": 0.0, "step": 22835 }, { "epoch": 21.95769230769231, "grad_norm": 0.001270801993086934, "learning_rate": 7.710071217860548e-07, "loss": 0.0, "step": 22836 }, { "epoch": 21.958653846153847, "grad_norm": 0.024343563243746758, "learning_rate": 7.705275568582848e-07, "loss": 0.0, "step": 22837 }, { "epoch": 21.959615384615386, "grad_norm": 0.0016581960953772068, "learning_rate": 7.700481351448386e-07, "loss": 0.0, "step": 22838 }, { "epoch": 21.960576923076925, "grad_norm": 0.004254212137311697, "learning_rate": 7.695688566531578e-07, "loss": 0.0, "step": 22839 }, { "epoch": 21.96153846153846, "grad_norm": 0.0006804619915783405, "learning_rate": 7.690897213906756e-07, "loss": 0.0, "step": 22840 }, { "epoch": 21.9625, "grad_norm": 0.001617939444258809, "learning_rate": 7.686107293648282e-07, "loss": 0.0, "step": 22841 }, { "epoch": 21.963461538461537, "grad_norm": 0.0005122621660120785, "learning_rate": 7.681318805830507e-07, "loss": 0.0, "step": 22842 }, { "epoch": 21.964423076923076, "grad_norm": 0.0011514119105413556, "learning_rate": 7.676531750527694e-07, "loss": 0.0, "step": 22843 }, { "epoch": 21.965384615384615, "grad_norm": 0.0007921015494503081, "learning_rate": 7.671746127814139e-07, "loss": 0.0, "step": 22844 }, { "epoch": 21.966346153846153, "grad_norm": 0.0010059692431241274, "learning_rate": 7.666961937764128e-07, "loss": 0.0, "step": 22845 }, { "epoch": 21.967307692307692, "grad_norm": 0.0013475302839651704, "learning_rate": 7.662179180451845e-07, "loss": 0.0, "step": 22846 }, { "epoch": 21.96826923076923, "grad_norm": 0.01069691963493824, "learning_rate": 7.657397855951565e-07, "loss": 0.0, "step": 22847 }, { "epoch": 21.96923076923077, "grad_norm": 0.001424183719791472, "learning_rate": 7.652617964337417e-07, "loss": 0.0, "step": 22848 }, { "epoch": 21.970192307692308, "grad_norm": 0.0005656542489305139, "learning_rate": 7.647839505683608e-07, "loss": 0.0, "step": 22849 }, { "epoch": 21.971153846153847, "grad_norm": 0.003582498524338007, "learning_rate": 7.643062480064301e-07, "loss": 0.0, "step": 22850 }, { "epoch": 21.972115384615385, "grad_norm": 0.0009989321697503328, "learning_rate": 7.63828688755357e-07, "loss": 0.0, "step": 22851 }, { "epoch": 21.973076923076924, "grad_norm": 0.00089341338025406, "learning_rate": 7.633512728225556e-07, "loss": 0.0, "step": 22852 }, { "epoch": 21.974038461538463, "grad_norm": 0.0024299572687596083, "learning_rate": 7.628740002154356e-07, "loss": 0.0, "step": 22853 }, { "epoch": 21.975, "grad_norm": 0.0017026143614202738, "learning_rate": 7.623968709413976e-07, "loss": 0.0, "step": 22854 }, { "epoch": 21.97596153846154, "grad_norm": 0.0015941880410537124, "learning_rate": 7.619198850078491e-07, "loss": 0.0, "step": 22855 }, { "epoch": 21.976923076923075, "grad_norm": 0.0008147135959006846, "learning_rate": 7.614430424221919e-07, "loss": 0.0, "step": 22856 }, { "epoch": 21.977884615384614, "grad_norm": 0.0012394950026646256, "learning_rate": 7.609663431918213e-07, "loss": 0.0, "step": 22857 }, { "epoch": 21.978846153846153, "grad_norm": 0.002451189560815692, "learning_rate": 7.60489787324139e-07, "loss": 0.0, "step": 22858 }, { "epoch": 21.97980769230769, "grad_norm": 0.0015266708796843886, "learning_rate": 7.60013374826536e-07, "loss": 0.0, "step": 22859 }, { "epoch": 21.98076923076923, "grad_norm": 0.00098258291836828, "learning_rate": 7.595371057064072e-07, "loss": 0.0, "step": 22860 }, { "epoch": 21.98173076923077, "grad_norm": 0.0010894621955230832, "learning_rate": 7.590609799711424e-07, "loss": 0.0, "step": 22861 }, { "epoch": 21.982692307692307, "grad_norm": 0.0010300304275006056, "learning_rate": 7.58584997628129e-07, "loss": 0.0, "step": 22862 }, { "epoch": 21.983653846153846, "grad_norm": 0.0008831159793771803, "learning_rate": 7.581091586847522e-07, "loss": 0.0, "step": 22863 }, { "epoch": 21.984615384615385, "grad_norm": 0.0008902039844542742, "learning_rate": 7.576334631483984e-07, "loss": 0.0, "step": 22864 }, { "epoch": 21.985576923076923, "grad_norm": 0.01348442304879427, "learning_rate": 7.57157911026446e-07, "loss": 0.0, "step": 22865 }, { "epoch": 21.986538461538462, "grad_norm": 0.0004123652179259807, "learning_rate": 7.566825023262747e-07, "loss": 0.0, "step": 22866 }, { "epoch": 21.9875, "grad_norm": 0.0009123127092607319, "learning_rate": 7.562072370552642e-07, "loss": 0.0, "step": 22867 }, { "epoch": 21.98846153846154, "grad_norm": 0.004763169679790735, "learning_rate": 7.557321152207864e-07, "loss": 0.0, "step": 22868 }, { "epoch": 21.989423076923078, "grad_norm": 0.0006063545006327331, "learning_rate": 7.552571368302153e-07, "loss": 0.0, "step": 22869 }, { "epoch": 21.990384615384617, "grad_norm": 0.0029496673960238695, "learning_rate": 7.547823018909184e-07, "loss": 0.0, "step": 22870 }, { "epoch": 21.991346153846155, "grad_norm": 0.0006543806521221995, "learning_rate": 7.543076104102664e-07, "loss": 0.0, "step": 22871 }, { "epoch": 21.99230769230769, "grad_norm": 0.0006167900282889605, "learning_rate": 7.538330623956259e-07, "loss": 0.0, "step": 22872 }, { "epoch": 21.99326923076923, "grad_norm": 0.0021904846653342247, "learning_rate": 7.533586578543583e-07, "loss": 0.0, "step": 22873 }, { "epoch": 21.994230769230768, "grad_norm": 0.0009755001519806683, "learning_rate": 7.528843967938248e-07, "loss": 0.0, "step": 22874 }, { "epoch": 21.995192307692307, "grad_norm": 0.004135259427130222, "learning_rate": 7.524102792213872e-07, "loss": 0.0, "step": 22875 }, { "epoch": 21.996153846153845, "grad_norm": 0.0018308801809325814, "learning_rate": 7.519363051443996e-07, "loss": 0.0, "step": 22876 }, { "epoch": 21.997115384615384, "grad_norm": 0.002283502370119095, "learning_rate": 7.51462474570217e-07, "loss": 0.0, "step": 22877 }, { "epoch": 21.998076923076923, "grad_norm": 0.00030724797397851944, "learning_rate": 7.50988787506195e-07, "loss": 0.0, "step": 22878 }, { "epoch": 21.99903846153846, "grad_norm": 0.0008573743398301303, "learning_rate": 7.505152439596796e-07, "loss": 0.0, "step": 22879 }, { "epoch": 22.0, "grad_norm": 0.0005841657985001802, "learning_rate": 7.50041843938023e-07, "loss": 0.0, "step": 22880 }, { "epoch": 22.00096153846154, "grad_norm": 0.0008994181407615542, "learning_rate": 7.495685874485658e-07, "loss": 0.0, "step": 22881 }, { "epoch": 22.001923076923077, "grad_norm": 0.00072590442141518, "learning_rate": 7.490954744986557e-07, "loss": 0.0, "step": 22882 }, { "epoch": 22.002884615384616, "grad_norm": 0.001031887368299067, "learning_rate": 7.48622505095633e-07, "loss": 0.0, "step": 22883 }, { "epoch": 22.003846153846155, "grad_norm": 0.0014428857248276472, "learning_rate": 7.481496792468357e-07, "loss": 0.0, "step": 22884 }, { "epoch": 22.004807692307693, "grad_norm": 0.0009681730880402029, "learning_rate": 7.476769969596021e-07, "loss": 0.0, "step": 22885 }, { "epoch": 22.005769230769232, "grad_norm": 0.0009752607438713312, "learning_rate": 7.472044582412674e-07, "loss": 0.0, "step": 22886 }, { "epoch": 22.00673076923077, "grad_norm": 0.0013566312845796347, "learning_rate": 7.467320630991604e-07, "loss": 0.0, "step": 22887 }, { "epoch": 22.00769230769231, "grad_norm": 0.000454791821539402, "learning_rate": 7.46259811540615e-07, "loss": 0.0, "step": 22888 }, { "epoch": 22.008653846153845, "grad_norm": 0.0010905879316851497, "learning_rate": 7.457877035729588e-07, "loss": 0.0, "step": 22889 }, { "epoch": 22.009615384615383, "grad_norm": 0.0006641292129643261, "learning_rate": 7.45315739203516e-07, "loss": 0.0, "step": 22890 }, { "epoch": 22.010576923076922, "grad_norm": 0.0010225762380287051, "learning_rate": 7.448439184396117e-07, "loss": 0.0, "step": 22891 }, { "epoch": 22.01153846153846, "grad_norm": 0.0006938553997315466, "learning_rate": 7.443722412885646e-07, "loss": 0.0, "step": 22892 }, { "epoch": 22.0125, "grad_norm": 0.0012990068644285202, "learning_rate": 7.439007077576943e-07, "loss": 0.0, "step": 22893 }, { "epoch": 22.013461538461538, "grad_norm": 0.00023877678904682398, "learning_rate": 7.434293178543217e-07, "loss": 0.0, "step": 22894 }, { "epoch": 22.014423076923077, "grad_norm": 0.008784537203609943, "learning_rate": 7.429580715857565e-07, "loss": 0.0001, "step": 22895 }, { "epoch": 22.015384615384615, "grad_norm": 0.0013573135947808623, "learning_rate": 7.424869689593128e-07, "loss": 0.0, "step": 22896 }, { "epoch": 22.016346153846154, "grad_norm": 0.0021278266794979572, "learning_rate": 7.420160099823026e-07, "loss": 0.0, "step": 22897 }, { "epoch": 22.017307692307693, "grad_norm": 0.0007588310400024056, "learning_rate": 7.4154519466203e-07, "loss": 0.0, "step": 22898 }, { "epoch": 22.01826923076923, "grad_norm": 0.002890459494665265, "learning_rate": 7.410745230058037e-07, "loss": 0.0, "step": 22899 }, { "epoch": 22.01923076923077, "grad_norm": 0.001228503999300301, "learning_rate": 7.406039950209276e-07, "loss": 0.0, "step": 22900 }, { "epoch": 22.02019230769231, "grad_norm": 0.0011420754017308354, "learning_rate": 7.401336107146995e-07, "loss": 0.0, "step": 22901 }, { "epoch": 22.021153846153847, "grad_norm": 0.0007976028136909008, "learning_rate": 7.3966337009442e-07, "loss": 0.0, "step": 22902 }, { "epoch": 22.022115384615386, "grad_norm": 0.0009110061218962073, "learning_rate": 7.391932731673879e-07, "loss": 0.0, "step": 22903 }, { "epoch": 22.023076923076925, "grad_norm": 0.0012954098638147116, "learning_rate": 7.38723319940895e-07, "loss": 0.0, "step": 22904 }, { "epoch": 22.02403846153846, "grad_norm": 0.001276109367609024, "learning_rate": 7.382535104222366e-07, "loss": 0.0, "step": 22905 }, { "epoch": 22.025, "grad_norm": 0.0012450935319066048, "learning_rate": 7.37783844618698e-07, "loss": 0.0, "step": 22906 }, { "epoch": 22.025961538461537, "grad_norm": 0.0005350466235540807, "learning_rate": 7.373143225375711e-07, "loss": 0.0, "step": 22907 }, { "epoch": 22.026923076923076, "grad_norm": 0.0014733378775417805, "learning_rate": 7.368449441861414e-07, "loss": 0.0, "step": 22908 }, { "epoch": 22.027884615384615, "grad_norm": 0.0009995566215366125, "learning_rate": 7.363757095716884e-07, "loss": 0.0, "step": 22909 }, { "epoch": 22.028846153846153, "grad_norm": 0.0011618721764534712, "learning_rate": 7.359066187014973e-07, "loss": 0.0, "step": 22910 }, { "epoch": 22.029807692307692, "grad_norm": 0.0007262779981829226, "learning_rate": 7.35437671582847e-07, "loss": 0.0, "step": 22911 }, { "epoch": 22.03076923076923, "grad_norm": 0.0009981687180697918, "learning_rate": 7.349688682230116e-07, "loss": 0.0, "step": 22912 }, { "epoch": 22.03173076923077, "grad_norm": 0.0006801158306188881, "learning_rate": 7.345002086292663e-07, "loss": 0.0, "step": 22913 }, { "epoch": 22.032692307692308, "grad_norm": 0.001330479164607823, "learning_rate": 7.340316928088853e-07, "loss": 0.0, "step": 22914 }, { "epoch": 22.033653846153847, "grad_norm": 0.0009040741133503616, "learning_rate": 7.335633207691362e-07, "loss": 0.0, "step": 22915 }, { "epoch": 22.034615384615385, "grad_norm": 0.0025942649226635695, "learning_rate": 7.330950925172886e-07, "loss": 0.0, "step": 22916 }, { "epoch": 22.035576923076924, "grad_norm": 0.0007414801511913538, "learning_rate": 7.326270080606058e-07, "loss": 0.0, "step": 22917 }, { "epoch": 22.036538461538463, "grad_norm": 0.000992321758531034, "learning_rate": 7.321590674063527e-07, "loss": 0.0, "step": 22918 }, { "epoch": 22.0375, "grad_norm": 0.0016969688003882766, "learning_rate": 7.316912705617918e-07, "loss": 0.0, "step": 22919 }, { "epoch": 22.03846153846154, "grad_norm": 0.0010447422973811626, "learning_rate": 7.31223617534178e-07, "loss": 0.0, "step": 22920 }, { "epoch": 22.039423076923075, "grad_norm": 0.00196305220015347, "learning_rate": 7.307561083307702e-07, "loss": 0.0, "step": 22921 }, { "epoch": 22.040384615384614, "grad_norm": 0.0007138984510675073, "learning_rate": 7.302887429588246e-07, "loss": 0.0, "step": 22922 }, { "epoch": 22.041346153846153, "grad_norm": 0.000736774003598839, "learning_rate": 7.298215214255899e-07, "loss": 0.0, "step": 22923 }, { "epoch": 22.04230769230769, "grad_norm": 0.0010536551708355546, "learning_rate": 7.293544437383171e-07, "loss": 0.0, "step": 22924 }, { "epoch": 22.04326923076923, "grad_norm": 0.0004490796127356589, "learning_rate": 7.288875099042569e-07, "loss": 0.0, "step": 22925 }, { "epoch": 22.04423076923077, "grad_norm": 0.0016786897322162986, "learning_rate": 7.284207199306493e-07, "loss": 0.0, "step": 22926 }, { "epoch": 22.045192307692307, "grad_norm": 0.18960432708263397, "learning_rate": 7.279540738247425e-07, "loss": 0.0011, "step": 22927 }, { "epoch": 22.046153846153846, "grad_norm": 0.001368907862342894, "learning_rate": 7.274875715937746e-07, "loss": 0.0, "step": 22928 }, { "epoch": 22.047115384615385, "grad_norm": 0.0029661962762475014, "learning_rate": 7.270212132449839e-07, "loss": 0.0, "step": 22929 }, { "epoch": 22.048076923076923, "grad_norm": 0.0010892268037423491, "learning_rate": 7.265549987856102e-07, "loss": 0.0, "step": 22930 }, { "epoch": 22.049038461538462, "grad_norm": 0.0004848281969316304, "learning_rate": 7.260889282228834e-07, "loss": 0.0, "step": 22931 }, { "epoch": 22.05, "grad_norm": 0.0012259023496881127, "learning_rate": 7.256230015640386e-07, "loss": 0.0, "step": 22932 }, { "epoch": 22.05096153846154, "grad_norm": 0.0009688291465863585, "learning_rate": 7.251572188163059e-07, "loss": 0.0, "step": 22933 }, { "epoch": 22.051923076923078, "grad_norm": 0.0012182153295725584, "learning_rate": 7.246915799869114e-07, "loss": 0.0, "step": 22934 }, { "epoch": 22.052884615384617, "grad_norm": 0.00225563021376729, "learning_rate": 7.242260850830796e-07, "loss": 0.0, "step": 22935 }, { "epoch": 22.053846153846155, "grad_norm": 0.0007453238358721137, "learning_rate": 7.237607341120368e-07, "loss": 0.0, "step": 22936 }, { "epoch": 22.05480769230769, "grad_norm": 0.004232619423419237, "learning_rate": 7.232955270810005e-07, "loss": 0.0, "step": 22937 }, { "epoch": 22.05576923076923, "grad_norm": 0.0011400266084820032, "learning_rate": 7.228304639971928e-07, "loss": 0.0, "step": 22938 }, { "epoch": 22.056730769230768, "grad_norm": 0.001431385986506939, "learning_rate": 7.223655448678257e-07, "loss": 0.0, "step": 22939 }, { "epoch": 22.057692307692307, "grad_norm": 0.002087064553052187, "learning_rate": 7.219007697001168e-07, "loss": 0.0, "step": 22940 }, { "epoch": 22.058653846153845, "grad_norm": 0.002546430565416813, "learning_rate": 7.21436138501278e-07, "loss": 0.0, "step": 22941 }, { "epoch": 22.059615384615384, "grad_norm": 0.001018955372273922, "learning_rate": 7.209716512785159e-07, "loss": 0.0, "step": 22942 }, { "epoch": 22.060576923076923, "grad_norm": 0.0007759285508655012, "learning_rate": 7.205073080390412e-07, "loss": 0.0, "step": 22943 }, { "epoch": 22.06153846153846, "grad_norm": 0.0003258507640566677, "learning_rate": 7.200431087900595e-07, "loss": 0.0, "step": 22944 }, { "epoch": 22.0625, "grad_norm": 0.0015570452669635415, "learning_rate": 7.195790535387703e-07, "loss": 0.0, "step": 22945 }, { "epoch": 22.06346153846154, "grad_norm": 0.0005454311612993479, "learning_rate": 7.19115142292377e-07, "loss": 0.0, "step": 22946 }, { "epoch": 22.064423076923077, "grad_norm": 0.00026510239695198834, "learning_rate": 7.186513750580793e-07, "loss": 0.0, "step": 22947 }, { "epoch": 22.065384615384616, "grad_norm": 0.0014902049442753196, "learning_rate": 7.181877518430702e-07, "loss": 0.0, "step": 22948 }, { "epoch": 22.066346153846155, "grad_norm": 0.0005912640481255949, "learning_rate": 7.177242726545464e-07, "loss": 0.0, "step": 22949 }, { "epoch": 22.067307692307693, "grad_norm": 0.0027306973934173584, "learning_rate": 7.172609374996975e-07, "loss": 0.0, "step": 22950 }, { "epoch": 22.068269230769232, "grad_norm": 0.002914122538641095, "learning_rate": 7.167977463857145e-07, "loss": 0.0, "step": 22951 }, { "epoch": 22.06923076923077, "grad_norm": 0.0012545757927000523, "learning_rate": 7.163346993197862e-07, "loss": 0.0, "step": 22952 }, { "epoch": 22.07019230769231, "grad_norm": 0.00100943841971457, "learning_rate": 7.158717963090945e-07, "loss": 0.0, "step": 22953 }, { "epoch": 22.071153846153845, "grad_norm": 0.0009338748641312122, "learning_rate": 7.154090373608236e-07, "loss": 0.0, "step": 22954 }, { "epoch": 22.072115384615383, "grad_norm": 0.0013162463437765837, "learning_rate": 7.149464224821556e-07, "loss": 0.0, "step": 22955 }, { "epoch": 22.073076923076922, "grad_norm": 0.0019185858545824885, "learning_rate": 7.144839516802671e-07, "loss": 0.0, "step": 22956 }, { "epoch": 22.07403846153846, "grad_norm": 0.0012913531390950084, "learning_rate": 7.140216249623355e-07, "loss": 0.0, "step": 22957 }, { "epoch": 22.075, "grad_norm": 0.0008607819909229875, "learning_rate": 7.135594423355352e-07, "loss": 0.0, "step": 22958 }, { "epoch": 22.075961538461538, "grad_norm": 0.0007366110803559422, "learning_rate": 7.130974038070349e-07, "loss": 0.0, "step": 22959 }, { "epoch": 22.076923076923077, "grad_norm": 0.0010197455994784832, "learning_rate": 7.126355093840088e-07, "loss": 0.0, "step": 22960 }, { "epoch": 22.077884615384615, "grad_norm": 0.0007410508114844561, "learning_rate": 7.121737590736188e-07, "loss": 0.0, "step": 22961 }, { "epoch": 22.078846153846154, "grad_norm": 0.0019075218588113785, "learning_rate": 7.117121528830328e-07, "loss": 0.0, "step": 22962 }, { "epoch": 22.079807692307693, "grad_norm": 0.0005998312262818217, "learning_rate": 7.112506908194161e-07, "loss": 0.0, "step": 22963 }, { "epoch": 22.08076923076923, "grad_norm": 0.0007740067085251212, "learning_rate": 7.10789372889924e-07, "loss": 0.0, "step": 22964 }, { "epoch": 22.08173076923077, "grad_norm": 0.0012882783776149154, "learning_rate": 7.103281991017174e-07, "loss": 0.0, "step": 22965 }, { "epoch": 22.08269230769231, "grad_norm": 0.00205987342633307, "learning_rate": 7.098671694619541e-07, "loss": 0.0, "step": 22966 }, { "epoch": 22.083653846153847, "grad_norm": 0.0015774571802467108, "learning_rate": 7.094062839777838e-07, "loss": 0.0, "step": 22967 }, { "epoch": 22.084615384615386, "grad_norm": 0.0008038093219511211, "learning_rate": 7.089455426563608e-07, "loss": 0.0, "step": 22968 }, { "epoch": 22.085576923076925, "grad_norm": 0.0007372877444140613, "learning_rate": 7.08484945504836e-07, "loss": 0.0, "step": 22969 }, { "epoch": 22.08653846153846, "grad_norm": 0.0020004427060484886, "learning_rate": 7.080244925303536e-07, "loss": 0.0, "step": 22970 }, { "epoch": 22.0875, "grad_norm": 0.0028645985294133425, "learning_rate": 7.075641837400604e-07, "loss": 0.0, "step": 22971 }, { "epoch": 22.088461538461537, "grad_norm": 0.0017025322886183858, "learning_rate": 7.07104019141096e-07, "loss": 0.0, "step": 22972 }, { "epoch": 22.089423076923076, "grad_norm": 0.0008430400630459189, "learning_rate": 7.066439987406026e-07, "loss": 0.0, "step": 22973 }, { "epoch": 22.090384615384615, "grad_norm": 0.0012096037389710546, "learning_rate": 7.061841225457211e-07, "loss": 0.0, "step": 22974 }, { "epoch": 22.091346153846153, "grad_norm": 0.001120773027651012, "learning_rate": 7.057243905635836e-07, "loss": 0.0, "step": 22975 }, { "epoch": 22.092307692307692, "grad_norm": 0.0006948456284590065, "learning_rate": 7.052648028013254e-07, "loss": 0.0, "step": 22976 }, { "epoch": 22.09326923076923, "grad_norm": 0.001177815138362348, "learning_rate": 7.048053592660786e-07, "loss": 0.0, "step": 22977 }, { "epoch": 22.09423076923077, "grad_norm": 0.0006763224373571575, "learning_rate": 7.043460599649699e-07, "loss": 0.0, "step": 22978 }, { "epoch": 22.095192307692308, "grad_norm": 0.0019455354195088148, "learning_rate": 7.038869049051277e-07, "loss": 0.0, "step": 22979 }, { "epoch": 22.096153846153847, "grad_norm": 0.0013846816727891564, "learning_rate": 7.03427894093679e-07, "loss": 0.0, "step": 22980 }, { "epoch": 22.097115384615385, "grad_norm": 0.0009805524023249745, "learning_rate": 7.029690275377432e-07, "loss": 0.0, "step": 22981 }, { "epoch": 22.098076923076924, "grad_norm": 0.000811350648291409, "learning_rate": 7.025103052444426e-07, "loss": 0.0, "step": 22982 }, { "epoch": 22.099038461538463, "grad_norm": 0.0005521198036149144, "learning_rate": 7.020517272208927e-07, "loss": 0.0, "step": 22983 }, { "epoch": 22.1, "grad_norm": 0.000667350017465651, "learning_rate": 7.015932934742109e-07, "loss": 0.0, "step": 22984 }, { "epoch": 22.10096153846154, "grad_norm": 0.0013886463129892945, "learning_rate": 7.011350040115128e-07, "loss": 0.0, "step": 22985 }, { "epoch": 22.101923076923075, "grad_norm": 0.0003560422337614, "learning_rate": 7.006768588399049e-07, "loss": 0.0, "step": 22986 }, { "epoch": 22.102884615384614, "grad_norm": 0.0010269413469359279, "learning_rate": 7.002188579665004e-07, "loss": 0.0, "step": 22987 }, { "epoch": 22.103846153846153, "grad_norm": 0.0014241766184568405, "learning_rate": 6.997610013984046e-07, "loss": 0.0, "step": 22988 }, { "epoch": 22.10480769230769, "grad_norm": 0.0009439229033887386, "learning_rate": 6.993032891427221e-07, "loss": 0.0, "step": 22989 }, { "epoch": 22.10576923076923, "grad_norm": 0.0012715671909973025, "learning_rate": 6.988457212065547e-07, "loss": 0.0, "step": 22990 }, { "epoch": 22.10673076923077, "grad_norm": 0.001613675500266254, "learning_rate": 6.983882975970047e-07, "loss": 0.0, "step": 22991 }, { "epoch": 22.107692307692307, "grad_norm": 0.0014625238254666328, "learning_rate": 6.979310183211663e-07, "loss": 0.0, "step": 22992 }, { "epoch": 22.108653846153846, "grad_norm": 0.0008820239454507828, "learning_rate": 6.974738833861383e-07, "loss": 0.0, "step": 22993 }, { "epoch": 22.109615384615385, "grad_norm": 0.0019440087489783764, "learning_rate": 6.970168927990139e-07, "loss": 0.0, "step": 22994 }, { "epoch": 22.110576923076923, "grad_norm": 0.0009729478042572737, "learning_rate": 6.96560046566882e-07, "loss": 0.0, "step": 22995 }, { "epoch": 22.111538461538462, "grad_norm": 0.0014173912350088358, "learning_rate": 6.961033446968346e-07, "loss": 0.0, "step": 22996 }, { "epoch": 22.1125, "grad_norm": 0.0005375336040742695, "learning_rate": 6.956467871959549e-07, "loss": 0.0, "step": 22997 }, { "epoch": 22.11346153846154, "grad_norm": 0.0014663732144981623, "learning_rate": 6.951903740713295e-07, "loss": 0.0, "step": 22998 }, { "epoch": 22.114423076923078, "grad_norm": 0.0005192321259528399, "learning_rate": 6.947341053300416e-07, "loss": 0.0, "step": 22999 }, { "epoch": 22.115384615384617, "grad_norm": 0.0007592548499815166, "learning_rate": 6.942779809791678e-07, "loss": 0.0, "step": 23000 }, { "epoch": 22.116346153846155, "grad_norm": 0.0009872879600152373, "learning_rate": 6.93822001025789e-07, "loss": 0.0, "step": 23001 }, { "epoch": 22.11730769230769, "grad_norm": 0.004569733049720526, "learning_rate": 6.933661654769797e-07, "loss": 0.0, "step": 23002 }, { "epoch": 22.11826923076923, "grad_norm": 0.001012851600535214, "learning_rate": 6.929104743398119e-07, "loss": 0.0, "step": 23003 }, { "epoch": 22.119230769230768, "grad_norm": 0.0010134989861398935, "learning_rate": 6.924549276213577e-07, "loss": 0.0, "step": 23004 }, { "epoch": 22.120192307692307, "grad_norm": 0.0028353389352560043, "learning_rate": 6.919995253286859e-07, "loss": 0.0, "step": 23005 }, { "epoch": 22.121153846153845, "grad_norm": 0.0007357285358011723, "learning_rate": 6.915442674688633e-07, "loss": 0.0, "step": 23006 }, { "epoch": 22.122115384615384, "grad_norm": 0.0020185066387057304, "learning_rate": 6.910891540489539e-07, "loss": 0.0, "step": 23007 }, { "epoch": 22.123076923076923, "grad_norm": 0.00038085179403424263, "learning_rate": 6.906341850760178e-07, "loss": 0.0, "step": 23008 }, { "epoch": 22.12403846153846, "grad_norm": 0.0016229244647547603, "learning_rate": 6.901793605571172e-07, "loss": 0.0, "step": 23009 }, { "epoch": 22.125, "grad_norm": 0.0008592692320235074, "learning_rate": 6.897246804993108e-07, "loss": 0.0, "step": 23010 }, { "epoch": 22.12596153846154, "grad_norm": 0.0020300683099776506, "learning_rate": 6.892701449096495e-07, "loss": 0.0, "step": 23011 }, { "epoch": 22.126923076923077, "grad_norm": 0.0014009946025907993, "learning_rate": 6.88815753795189e-07, "loss": 0.0, "step": 23012 }, { "epoch": 22.127884615384616, "grad_norm": 0.0008798199705779552, "learning_rate": 6.883615071629823e-07, "loss": 0.0, "step": 23013 }, { "epoch": 22.128846153846155, "grad_norm": 0.0011555292876437306, "learning_rate": 6.87907405020074e-07, "loss": 0.0, "step": 23014 }, { "epoch": 22.129807692307693, "grad_norm": 0.0014461851678788662, "learning_rate": 6.874534473735118e-07, "loss": 0.0, "step": 23015 }, { "epoch": 22.130769230769232, "grad_norm": 0.003850724548101425, "learning_rate": 6.869996342303409e-07, "loss": 0.0, "step": 23016 }, { "epoch": 22.13173076923077, "grad_norm": 0.0006124938372522593, "learning_rate": 6.865459655976014e-07, "loss": 0.0, "step": 23017 }, { "epoch": 22.13269230769231, "grad_norm": 0.0025417832657694817, "learning_rate": 6.860924414823356e-07, "loss": 0.0, "step": 23018 }, { "epoch": 22.133653846153845, "grad_norm": 0.0007172267651185393, "learning_rate": 6.856390618915775e-07, "loss": 0.0, "step": 23019 }, { "epoch": 22.134615384615383, "grad_norm": 0.0019996624905616045, "learning_rate": 6.851858268323641e-07, "loss": 0.0, "step": 23020 }, { "epoch": 22.135576923076922, "grad_norm": 0.0009200599743053317, "learning_rate": 6.847327363117296e-07, "loss": 0.0, "step": 23021 }, { "epoch": 22.13653846153846, "grad_norm": 0.0017321017803624272, "learning_rate": 6.842797903367015e-07, "loss": 0.0, "step": 23022 }, { "epoch": 22.1375, "grad_norm": 0.0015595764853060246, "learning_rate": 6.838269889143089e-07, "loss": 0.0, "step": 23023 }, { "epoch": 22.138461538461538, "grad_norm": 0.0022434615530073643, "learning_rate": 6.833743320515807e-07, "loss": 0.0, "step": 23024 }, { "epoch": 22.139423076923077, "grad_norm": 0.0005705999792553484, "learning_rate": 6.829218197555376e-07, "loss": 0.0, "step": 23025 }, { "epoch": 22.140384615384615, "grad_norm": 0.002591858385130763, "learning_rate": 6.824694520332031e-07, "loss": 0.0, "step": 23026 }, { "epoch": 22.141346153846154, "grad_norm": 0.0002804211399052292, "learning_rate": 6.820172288915972e-07, "loss": 0.0, "step": 23027 }, { "epoch": 22.142307692307693, "grad_norm": 0.000922448409255594, "learning_rate": 6.81565150337734e-07, "loss": 0.0, "step": 23028 }, { "epoch": 22.14326923076923, "grad_norm": 0.0014114374062046409, "learning_rate": 6.811132163786316e-07, "loss": 0.0, "step": 23029 }, { "epoch": 22.14423076923077, "grad_norm": 0.0018155191792175174, "learning_rate": 6.806614270213008e-07, "loss": 0.0, "step": 23030 }, { "epoch": 22.14519230769231, "grad_norm": 0.0009549203095957637, "learning_rate": 6.802097822727527e-07, "loss": 0.0, "step": 23031 }, { "epoch": 22.146153846153847, "grad_norm": 0.0005082943825982511, "learning_rate": 6.797582821399973e-07, "loss": 0.0, "step": 23032 }, { "epoch": 22.147115384615386, "grad_norm": 0.0018720448715612292, "learning_rate": 6.793069266300367e-07, "loss": 0.0, "step": 23033 }, { "epoch": 22.148076923076925, "grad_norm": 0.001049357932060957, "learning_rate": 6.788557157498765e-07, "loss": 0.0, "step": 23034 }, { "epoch": 22.14903846153846, "grad_norm": 0.0006164730293676257, "learning_rate": 6.7840464950652e-07, "loss": 0.0, "step": 23035 }, { "epoch": 22.15, "grad_norm": 0.0014728466048836708, "learning_rate": 6.779537279069637e-07, "loss": 0.0, "step": 23036 }, { "epoch": 22.150961538461537, "grad_norm": 0.0006209221901372075, "learning_rate": 6.775029509582054e-07, "loss": 0.0, "step": 23037 }, { "epoch": 22.151923076923076, "grad_norm": 0.0014957418898120522, "learning_rate": 6.770523186672407e-07, "loss": 0.0, "step": 23038 }, { "epoch": 22.152884615384615, "grad_norm": 0.00034314158256165683, "learning_rate": 6.766018310410616e-07, "loss": 0.0, "step": 23039 }, { "epoch": 22.153846153846153, "grad_norm": 0.0012307486031204462, "learning_rate": 6.761514880866582e-07, "loss": 0.0, "step": 23040 }, { "epoch": 22.154807692307692, "grad_norm": 0.0009487210772931576, "learning_rate": 6.757012898110182e-07, "loss": 0.0, "step": 23041 }, { "epoch": 22.15576923076923, "grad_norm": 0.0009810057235881686, "learning_rate": 6.752512362211272e-07, "loss": 0.0, "step": 23042 }, { "epoch": 22.15673076923077, "grad_norm": 0.0022660440299659967, "learning_rate": 6.748013273239707e-07, "loss": 0.0, "step": 23043 }, { "epoch": 22.157692307692308, "grad_norm": 0.0017446900019422174, "learning_rate": 6.743515631265274e-07, "loss": 0.0, "step": 23044 }, { "epoch": 22.158653846153847, "grad_norm": 0.0003078764711972326, "learning_rate": 6.739019436357774e-07, "loss": 0.0, "step": 23045 }, { "epoch": 22.159615384615385, "grad_norm": 0.0015600223559886217, "learning_rate": 6.734524688586985e-07, "loss": 0.0, "step": 23046 }, { "epoch": 22.160576923076924, "grad_norm": 0.0010436410084366798, "learning_rate": 6.730031388022628e-07, "loss": 0.0, "step": 23047 }, { "epoch": 22.161538461538463, "grad_norm": 0.0010712937219068408, "learning_rate": 6.725539534734437e-07, "loss": 0.0, "step": 23048 }, { "epoch": 22.1625, "grad_norm": 0.0009686712292023003, "learning_rate": 6.721049128792134e-07, "loss": 0.0, "step": 23049 }, { "epoch": 22.16346153846154, "grad_norm": 0.0012050258228555322, "learning_rate": 6.716560170265362e-07, "loss": 0.0, "step": 23050 }, { "epoch": 22.164423076923075, "grad_norm": 0.005763803608715534, "learning_rate": 6.712072659223812e-07, "loss": 0.0, "step": 23051 }, { "epoch": 22.165384615384614, "grad_norm": 0.0020748686511069536, "learning_rate": 6.707586595737081e-07, "loss": 0.0, "step": 23052 }, { "epoch": 22.166346153846153, "grad_norm": 0.0015904215397313237, "learning_rate": 6.703101979874793e-07, "loss": 0.0, "step": 23053 }, { "epoch": 22.16730769230769, "grad_norm": 0.0005490935291163623, "learning_rate": 6.698618811706547e-07, "loss": 0.0, "step": 23054 }, { "epoch": 22.16826923076923, "grad_norm": 0.0011293675052002072, "learning_rate": 6.6941370913019e-07, "loss": 0.0, "step": 23055 }, { "epoch": 22.16923076923077, "grad_norm": 0.0009825401939451694, "learning_rate": 6.689656818730383e-07, "loss": 0.0, "step": 23056 }, { "epoch": 22.170192307692307, "grad_norm": 0.000597040168941021, "learning_rate": 6.685177994061554e-07, "loss": 0.0, "step": 23057 }, { "epoch": 22.171153846153846, "grad_norm": 0.0006059268489480019, "learning_rate": 6.680700617364877e-07, "loss": 0.0, "step": 23058 }, { "epoch": 22.172115384615385, "grad_norm": 0.0009816616075113416, "learning_rate": 6.67622468870982e-07, "loss": 0.0, "step": 23059 }, { "epoch": 22.173076923076923, "grad_norm": 0.001297908602282405, "learning_rate": 6.671750208165883e-07, "loss": 0.0, "step": 23060 }, { "epoch": 22.174038461538462, "grad_norm": 0.0015816619852557778, "learning_rate": 6.667277175802444e-07, "loss": 0.0, "step": 23061 }, { "epoch": 22.175, "grad_norm": 0.0015230340650305152, "learning_rate": 6.662805591688959e-07, "loss": 0.0, "step": 23062 }, { "epoch": 22.17596153846154, "grad_norm": 0.0011941032717004418, "learning_rate": 6.658335455894771e-07, "loss": 0.0, "step": 23063 }, { "epoch": 22.176923076923078, "grad_norm": 0.0008992276270873845, "learning_rate": 6.653866768489259e-07, "loss": 0.0, "step": 23064 }, { "epoch": 22.177884615384617, "grad_norm": 0.0013546005357056856, "learning_rate": 6.649399529541789e-07, "loss": 0.0, "step": 23065 }, { "epoch": 22.178846153846155, "grad_norm": 0.002260790439322591, "learning_rate": 6.644933739121628e-07, "loss": 0.0, "step": 23066 }, { "epoch": 22.17980769230769, "grad_norm": 0.0006000194698572159, "learning_rate": 6.64046939729811e-07, "loss": 0.0, "step": 23067 }, { "epoch": 22.18076923076923, "grad_norm": 0.0014485277933999896, "learning_rate": 6.636006504140524e-07, "loss": 0.0, "step": 23068 }, { "epoch": 22.181730769230768, "grad_norm": 0.001458137878216803, "learning_rate": 6.631545059718069e-07, "loss": 0.0, "step": 23069 }, { "epoch": 22.182692307692307, "grad_norm": 0.0019384414190426469, "learning_rate": 6.627085064100014e-07, "loss": 0.0, "step": 23070 }, { "epoch": 22.183653846153845, "grad_norm": 0.0025559025816619396, "learning_rate": 6.622626517355557e-07, "loss": 0.0, "step": 23071 }, { "epoch": 22.184615384615384, "grad_norm": 0.0012277516070753336, "learning_rate": 6.618169419553866e-07, "loss": 0.0, "step": 23072 }, { "epoch": 22.185576923076923, "grad_norm": 0.0018273977329954505, "learning_rate": 6.613713770764129e-07, "loss": 0.0, "step": 23073 }, { "epoch": 22.18653846153846, "grad_norm": 0.0012899375287815928, "learning_rate": 6.609259571055449e-07, "loss": 0.0, "step": 23074 }, { "epoch": 22.1875, "grad_norm": 0.0020060234237462282, "learning_rate": 6.604806820496968e-07, "loss": 0.0, "step": 23075 }, { "epoch": 22.18846153846154, "grad_norm": 0.0011854220647364855, "learning_rate": 6.600355519157786e-07, "loss": 0.0, "step": 23076 }, { "epoch": 22.189423076923077, "grad_norm": 0.001208388595841825, "learning_rate": 6.59590566710695e-07, "loss": 0.0, "step": 23077 }, { "epoch": 22.190384615384616, "grad_norm": 0.0006843497976660728, "learning_rate": 6.591457264413514e-07, "loss": 0.0, "step": 23078 }, { "epoch": 22.191346153846155, "grad_norm": 0.0004974585026502609, "learning_rate": 6.587010311146524e-07, "loss": 0.0, "step": 23079 }, { "epoch": 22.192307692307693, "grad_norm": 0.0019390553934499621, "learning_rate": 6.582564807374958e-07, "loss": 0.0, "step": 23080 }, { "epoch": 22.193269230769232, "grad_norm": 0.0016484729712828994, "learning_rate": 6.578120753167816e-07, "loss": 0.0, "step": 23081 }, { "epoch": 22.19423076923077, "grad_norm": 0.001392055070027709, "learning_rate": 6.573678148594065e-07, "loss": 0.0, "step": 23082 }, { "epoch": 22.19519230769231, "grad_norm": 0.0016806997591629624, "learning_rate": 6.569236993722605e-07, "loss": 0.0, "step": 23083 }, { "epoch": 22.196153846153845, "grad_norm": 0.001310319872573018, "learning_rate": 6.564797288622371e-07, "loss": 0.0, "step": 23084 }, { "epoch": 22.197115384615383, "grad_norm": 0.00115045509301126, "learning_rate": 6.560359033362263e-07, "loss": 0.0, "step": 23085 }, { "epoch": 22.198076923076922, "grad_norm": 0.0005394210456870496, "learning_rate": 6.555922228011136e-07, "loss": 0.0, "step": 23086 }, { "epoch": 22.19903846153846, "grad_norm": 0.001647686818614602, "learning_rate": 6.551486872637857e-07, "loss": 0.0, "step": 23087 }, { "epoch": 22.2, "grad_norm": 0.002280519576743245, "learning_rate": 6.547052967311207e-07, "loss": 0.0, "step": 23088 }, { "epoch": 22.200961538461538, "grad_norm": 0.0008034990169107914, "learning_rate": 6.542620512100029e-07, "loss": 0.0, "step": 23089 }, { "epoch": 22.201923076923077, "grad_norm": 0.0009686677367426455, "learning_rate": 6.53818950707309e-07, "loss": 0.0, "step": 23090 }, { "epoch": 22.202884615384615, "grad_norm": 0.0009783864952623844, "learning_rate": 6.533759952299135e-07, "loss": 0.0, "step": 23091 }, { "epoch": 22.203846153846154, "grad_norm": 0.001430056756362319, "learning_rate": 6.529331847846899e-07, "loss": 0.0, "step": 23092 }, { "epoch": 22.204807692307693, "grad_norm": 0.0010507969418540597, "learning_rate": 6.524905193785114e-07, "loss": 0.0, "step": 23093 }, { "epoch": 22.20576923076923, "grad_norm": 0.0031522593926638365, "learning_rate": 6.520479990182449e-07, "loss": 0.0, "step": 23094 }, { "epoch": 22.20673076923077, "grad_norm": 0.0013612949987873435, "learning_rate": 6.516056237107571e-07, "loss": 0.0, "step": 23095 }, { "epoch": 22.20769230769231, "grad_norm": 0.00045320799108594656, "learning_rate": 6.511633934629136e-07, "loss": 0.0, "step": 23096 }, { "epoch": 22.208653846153847, "grad_norm": 0.0005639524897560477, "learning_rate": 6.507213082815745e-07, "loss": 0.0, "step": 23097 }, { "epoch": 22.209615384615386, "grad_norm": 0.0008637914434075356, "learning_rate": 6.502793681736031e-07, "loss": 0.0, "step": 23098 }, { "epoch": 22.210576923076925, "grad_norm": 0.0007413592538796365, "learning_rate": 6.498375731458529e-07, "loss": 0.0, "step": 23099 }, { "epoch": 22.21153846153846, "grad_norm": 0.00305181834846735, "learning_rate": 6.493959232051805e-07, "loss": 0.0, "step": 23100 }, { "epoch": 22.2125, "grad_norm": 0.0017511489568278193, "learning_rate": 6.489544183584418e-07, "loss": 0.0, "step": 23101 }, { "epoch": 22.213461538461537, "grad_norm": 0.015222236514091492, "learning_rate": 6.485130586124844e-07, "loss": 0.0, "step": 23102 }, { "epoch": 22.214423076923076, "grad_norm": 0.001184245222248137, "learning_rate": 6.480718439741574e-07, "loss": 0.0, "step": 23103 }, { "epoch": 22.215384615384615, "grad_norm": 0.0008569922065362334, "learning_rate": 6.476307744503096e-07, "loss": 0.0, "step": 23104 }, { "epoch": 22.216346153846153, "grad_norm": 0.002757259411737323, "learning_rate": 6.471898500477825e-07, "loss": 0.0, "step": 23105 }, { "epoch": 22.217307692307692, "grad_norm": 0.0006426351610571146, "learning_rate": 6.467490707734181e-07, "loss": 0.0, "step": 23106 }, { "epoch": 22.21826923076923, "grad_norm": 0.003171438118442893, "learning_rate": 6.463084366340577e-07, "loss": 0.0, "step": 23107 }, { "epoch": 22.21923076923077, "grad_norm": 0.0008564902236685157, "learning_rate": 6.45867947636536e-07, "loss": 0.0, "step": 23108 }, { "epoch": 22.220192307692308, "grad_norm": 0.0017281072214245796, "learning_rate": 6.454276037876927e-07, "loss": 0.0, "step": 23109 }, { "epoch": 22.221153846153847, "grad_norm": 0.001504269428551197, "learning_rate": 6.449874050943549e-07, "loss": 0.0, "step": 23110 }, { "epoch": 22.222115384615385, "grad_norm": 0.0008593773818574846, "learning_rate": 6.445473515633561e-07, "loss": 0.0, "step": 23111 }, { "epoch": 22.223076923076924, "grad_norm": 0.0006905666086822748, "learning_rate": 6.44107443201526e-07, "loss": 0.0, "step": 23112 }, { "epoch": 22.224038461538463, "grad_norm": 0.0005944959702901542, "learning_rate": 6.436676800156883e-07, "loss": 0.0, "step": 23113 }, { "epoch": 22.225, "grad_norm": 0.001011387095786631, "learning_rate": 6.432280620126663e-07, "loss": 0.0, "step": 23114 }, { "epoch": 22.22596153846154, "grad_norm": 0.0007774841506034136, "learning_rate": 6.427885891992858e-07, "loss": 0.0, "step": 23115 }, { "epoch": 22.226923076923075, "grad_norm": 0.0014696487924084067, "learning_rate": 6.423492615823612e-07, "loss": 0.0, "step": 23116 }, { "epoch": 22.227884615384614, "grad_norm": 0.0009582851198501885, "learning_rate": 6.419100791687116e-07, "loss": 0.0, "step": 23117 }, { "epoch": 22.228846153846153, "grad_norm": 0.001093243365176022, "learning_rate": 6.414710419651537e-07, "loss": 0.0, "step": 23118 }, { "epoch": 22.22980769230769, "grad_norm": 0.0007348765502683818, "learning_rate": 6.410321499784966e-07, "loss": 0.0, "step": 23119 }, { "epoch": 22.23076923076923, "grad_norm": 0.000891638919711113, "learning_rate": 6.405934032155536e-07, "loss": 0.0, "step": 23120 }, { "epoch": 22.23173076923077, "grad_norm": 0.0011675956193357706, "learning_rate": 6.401548016831305e-07, "loss": 0.0, "step": 23121 }, { "epoch": 22.232692307692307, "grad_norm": 0.0009631828288547695, "learning_rate": 6.397163453880339e-07, "loss": 0.0, "step": 23122 }, { "epoch": 22.233653846153846, "grad_norm": 0.0015366851584985852, "learning_rate": 6.392780343370686e-07, "loss": 0.0, "step": 23123 }, { "epoch": 22.234615384615385, "grad_norm": 0.002515240339562297, "learning_rate": 6.388398685370345e-07, "loss": 0.0, "step": 23124 }, { "epoch": 22.235576923076923, "grad_norm": 0.001088087446987629, "learning_rate": 6.384018479947296e-07, "loss": 0.0, "step": 23125 }, { "epoch": 22.236538461538462, "grad_norm": 0.0007516361074522138, "learning_rate": 6.37963972716954e-07, "loss": 0.0, "step": 23126 }, { "epoch": 22.2375, "grad_norm": 0.002579265274107456, "learning_rate": 6.375262427104989e-07, "loss": 0.0, "step": 23127 }, { "epoch": 22.23846153846154, "grad_norm": 0.0013021607883274555, "learning_rate": 6.370886579821578e-07, "loss": 0.0, "step": 23128 }, { "epoch": 22.239423076923078, "grad_norm": 0.0025209353771060705, "learning_rate": 6.366512185387231e-07, "loss": 0.0, "step": 23129 }, { "epoch": 22.240384615384617, "grad_norm": 0.0012476699193939567, "learning_rate": 6.362139243869781e-07, "loss": 0.0, "step": 23130 }, { "epoch": 22.241346153846155, "grad_norm": 0.0031187438871711493, "learning_rate": 6.35776775533713e-07, "loss": 0.0, "step": 23131 }, { "epoch": 22.24230769230769, "grad_norm": 0.0010389738017693162, "learning_rate": 6.353397719857057e-07, "loss": 0.0, "step": 23132 }, { "epoch": 22.24326923076923, "grad_norm": 0.0013392798136919737, "learning_rate": 6.349029137497409e-07, "loss": 0.0, "step": 23133 }, { "epoch": 22.244230769230768, "grad_norm": 0.0012221215292811394, "learning_rate": 6.344662008325986e-07, "loss": 0.0, "step": 23134 }, { "epoch": 22.245192307692307, "grad_norm": 0.001771388459019363, "learning_rate": 6.340296332410512e-07, "loss": 0.0, "step": 23135 }, { "epoch": 22.246153846153845, "grad_norm": 0.0017742322525009513, "learning_rate": 6.335932109818754e-07, "loss": 0.0, "step": 23136 }, { "epoch": 22.247115384615384, "grad_norm": 0.001136614941060543, "learning_rate": 6.331569340618448e-07, "loss": 0.0, "step": 23137 }, { "epoch": 22.248076923076923, "grad_norm": 0.0009431481012143195, "learning_rate": 6.327208024877251e-07, "loss": 0.0, "step": 23138 }, { "epoch": 22.24903846153846, "grad_norm": 0.0009514808771200478, "learning_rate": 6.322848162662865e-07, "loss": 0.0, "step": 23139 }, { "epoch": 22.25, "grad_norm": 0.0012024290626868606, "learning_rate": 6.318489754042945e-07, "loss": 0.0, "step": 23140 }, { "epoch": 22.25096153846154, "grad_norm": 0.0013668019091710448, "learning_rate": 6.314132799085104e-07, "loss": 0.0, "step": 23141 }, { "epoch": 22.251923076923077, "grad_norm": 0.0018710102885961533, "learning_rate": 6.309777297856967e-07, "loss": 0.0, "step": 23142 }, { "epoch": 22.252884615384616, "grad_norm": 0.0004800509777851403, "learning_rate": 6.305423250426091e-07, "loss": 0.0, "step": 23143 }, { "epoch": 22.253846153846155, "grad_norm": 0.004104669205844402, "learning_rate": 6.301070656860064e-07, "loss": 0.0, "step": 23144 }, { "epoch": 22.254807692307693, "grad_norm": 0.0024241686332970858, "learning_rate": 6.296719517226424e-07, "loss": 0.0, "step": 23145 }, { "epoch": 22.255769230769232, "grad_norm": 0.0005700906622223556, "learning_rate": 6.29236983159267e-07, "loss": 0.0, "step": 23146 }, { "epoch": 22.25673076923077, "grad_norm": 0.0015253924066200852, "learning_rate": 6.288021600026318e-07, "loss": 0.0, "step": 23147 }, { "epoch": 22.25769230769231, "grad_norm": 0.021625926718115807, "learning_rate": 6.283674822594832e-07, "loss": 0.0001, "step": 23148 }, { "epoch": 22.258653846153845, "grad_norm": 0.0019978792406618595, "learning_rate": 6.279329499365649e-07, "loss": 0.0, "step": 23149 }, { "epoch": 22.259615384615383, "grad_norm": 0.0006689127185381949, "learning_rate": 6.274985630406194e-07, "loss": 0.0, "step": 23150 }, { "epoch": 22.260576923076922, "grad_norm": 0.0005307896644808352, "learning_rate": 6.270643215783911e-07, "loss": 0.0, "step": 23151 }, { "epoch": 22.26153846153846, "grad_norm": 0.0019994345493614674, "learning_rate": 6.266302255566136e-07, "loss": 0.0, "step": 23152 }, { "epoch": 22.2625, "grad_norm": 0.0007079184288159013, "learning_rate": 6.26196274982025e-07, "loss": 0.0, "step": 23153 }, { "epoch": 22.263461538461538, "grad_norm": 0.0008232594700530171, "learning_rate": 6.257624698613574e-07, "loss": 0.0, "step": 23154 }, { "epoch": 22.264423076923077, "grad_norm": 0.0005205090856179595, "learning_rate": 6.253288102013444e-07, "loss": 0.0, "step": 23155 }, { "epoch": 22.265384615384615, "grad_norm": 0.0004056632751598954, "learning_rate": 6.24895296008714e-07, "loss": 0.0, "step": 23156 }, { "epoch": 22.266346153846154, "grad_norm": 0.002918530022725463, "learning_rate": 6.24461927290192e-07, "loss": 0.0, "step": 23157 }, { "epoch": 22.267307692307693, "grad_norm": 0.0006492831744253635, "learning_rate": 6.240287040525039e-07, "loss": 0.0, "step": 23158 }, { "epoch": 22.26826923076923, "grad_norm": 0.001554621383547783, "learning_rate": 6.235956263023735e-07, "loss": 0.0, "step": 23159 }, { "epoch": 22.26923076923077, "grad_norm": 0.0006034539546817541, "learning_rate": 6.231626940465185e-07, "loss": 0.0, "step": 23160 }, { "epoch": 22.27019230769231, "grad_norm": 0.00113364914432168, "learning_rate": 6.227299072916571e-07, "loss": 0.0, "step": 23161 }, { "epoch": 22.271153846153847, "grad_norm": 0.001034354791045189, "learning_rate": 6.222972660445082e-07, "loss": 0.0, "step": 23162 }, { "epoch": 22.272115384615386, "grad_norm": 0.0013422103365883231, "learning_rate": 6.218647703117808e-07, "loss": 0.0, "step": 23163 }, { "epoch": 22.273076923076925, "grad_norm": 0.0004957300843670964, "learning_rate": 6.214324201001887e-07, "loss": 0.0, "step": 23164 }, { "epoch": 22.27403846153846, "grad_norm": 0.0015694234753027558, "learning_rate": 6.210002154164374e-07, "loss": 0.0, "step": 23165 }, { "epoch": 22.275, "grad_norm": 0.0013729699421674013, "learning_rate": 6.205681562672372e-07, "loss": 0.0, "step": 23166 }, { "epoch": 22.275961538461537, "grad_norm": 0.001773708499968052, "learning_rate": 6.201362426592917e-07, "loss": 0.0, "step": 23167 }, { "epoch": 22.276923076923076, "grad_norm": 0.0007447809912264347, "learning_rate": 6.197044745993008e-07, "loss": 0.0, "step": 23168 }, { "epoch": 22.277884615384615, "grad_norm": 0.0015167909441515803, "learning_rate": 6.19272852093965e-07, "loss": 0.0, "step": 23169 }, { "epoch": 22.278846153846153, "grad_norm": 0.0017619323916733265, "learning_rate": 6.188413751499844e-07, "loss": 0.0, "step": 23170 }, { "epoch": 22.279807692307692, "grad_norm": 0.0009532026015222073, "learning_rate": 6.184100437740514e-07, "loss": 0.0, "step": 23171 }, { "epoch": 22.28076923076923, "grad_norm": 0.001232901937328279, "learning_rate": 6.179788579728585e-07, "loss": 0.0, "step": 23172 }, { "epoch": 22.28173076923077, "grad_norm": 0.0009860250866040587, "learning_rate": 6.175478177531002e-07, "loss": 0.0, "step": 23173 }, { "epoch": 22.282692307692308, "grad_norm": 0.0013279542326927185, "learning_rate": 6.171169231214613e-07, "loss": 0.0, "step": 23174 }, { "epoch": 22.283653846153847, "grad_norm": 0.0008940888219512999, "learning_rate": 6.166861740846297e-07, "loss": 0.0, "step": 23175 }, { "epoch": 22.284615384615385, "grad_norm": 0.0010871213162317872, "learning_rate": 6.16255570649289e-07, "loss": 0.0, "step": 23176 }, { "epoch": 22.285576923076924, "grad_norm": 0.0018872953951358795, "learning_rate": 6.158251128221205e-07, "loss": 0.0, "step": 23177 }, { "epoch": 22.286538461538463, "grad_norm": 0.0032973315101116896, "learning_rate": 6.153948006098054e-07, "loss": 0.0, "step": 23178 }, { "epoch": 22.2875, "grad_norm": 0.0005663608899340034, "learning_rate": 6.149646340190174e-07, "loss": 0.0, "step": 23179 }, { "epoch": 22.28846153846154, "grad_norm": 0.000525647250469774, "learning_rate": 6.145346130564345e-07, "loss": 0.0, "step": 23180 }, { "epoch": 22.289423076923075, "grad_norm": 0.000953954819124192, "learning_rate": 6.14104737728729e-07, "loss": 0.0, "step": 23181 }, { "epoch": 22.290384615384614, "grad_norm": 0.0008648246293887496, "learning_rate": 6.136750080425702e-07, "loss": 0.0, "step": 23182 }, { "epoch": 22.291346153846153, "grad_norm": 0.0008475126815028489, "learning_rate": 6.132454240046259e-07, "loss": 0.0, "step": 23183 }, { "epoch": 22.29230769230769, "grad_norm": 0.0017563548171892762, "learning_rate": 6.128159856215643e-07, "loss": 0.0, "step": 23184 }, { "epoch": 22.29326923076923, "grad_norm": 0.0010460236808285117, "learning_rate": 6.123866929000466e-07, "loss": 0.0, "step": 23185 }, { "epoch": 22.29423076923077, "grad_norm": 0.0010212041670456529, "learning_rate": 6.119575458467353e-07, "loss": 0.0, "step": 23186 }, { "epoch": 22.295192307692307, "grad_norm": 0.0012779203243553638, "learning_rate": 6.115285444682895e-07, "loss": 0.0, "step": 23187 }, { "epoch": 22.296153846153846, "grad_norm": 0.002089256653562188, "learning_rate": 6.11099688771366e-07, "loss": 0.0, "step": 23188 }, { "epoch": 22.297115384615385, "grad_norm": 0.0011348003754392266, "learning_rate": 6.106709787626198e-07, "loss": 0.0, "step": 23189 }, { "epoch": 22.298076923076923, "grad_norm": 0.0014681493630632758, "learning_rate": 6.102424144487007e-07, "loss": 0.0, "step": 23190 }, { "epoch": 22.299038461538462, "grad_norm": 0.00046591987484134734, "learning_rate": 6.098139958362615e-07, "loss": 0.0, "step": 23191 }, { "epoch": 22.3, "grad_norm": 0.001190638984553516, "learning_rate": 6.093857229319511e-07, "loss": 0.0, "step": 23192 }, { "epoch": 22.30096153846154, "grad_norm": 0.0013891509734094143, "learning_rate": 6.08957595742411e-07, "loss": 0.0, "step": 23193 }, { "epoch": 22.301923076923078, "grad_norm": 0.0016095958417281508, "learning_rate": 6.085296142742858e-07, "loss": 0.0, "step": 23194 }, { "epoch": 22.302884615384617, "grad_norm": 0.0010428030509501696, "learning_rate": 6.081017785342202e-07, "loss": 0.0, "step": 23195 }, { "epoch": 22.303846153846155, "grad_norm": 0.004545089788734913, "learning_rate": 6.076740885288479e-07, "loss": 0.0, "step": 23196 }, { "epoch": 22.30480769230769, "grad_norm": 0.0015321524115279317, "learning_rate": 6.072465442648079e-07, "loss": 0.0, "step": 23197 }, { "epoch": 22.30576923076923, "grad_norm": 0.0008759824559092522, "learning_rate": 6.06819145748736e-07, "loss": 0.0, "step": 23198 }, { "epoch": 22.306730769230768, "grad_norm": 0.001969442469999194, "learning_rate": 6.063918929872604e-07, "loss": 0.0, "step": 23199 }, { "epoch": 22.307692307692307, "grad_norm": 0.0015500851441174746, "learning_rate": 6.059647859870144e-07, "loss": 0.0, "step": 23200 }, { "epoch": 22.308653846153845, "grad_norm": 0.0010928373085334897, "learning_rate": 6.055378247546217e-07, "loss": 0.0, "step": 23201 }, { "epoch": 22.309615384615384, "grad_norm": 0.0005096221575513482, "learning_rate": 6.051110092967105e-07, "loss": 0.0, "step": 23202 }, { "epoch": 22.310576923076923, "grad_norm": 0.00151798443403095, "learning_rate": 6.046843396199031e-07, "loss": 0.0, "step": 23203 }, { "epoch": 22.31153846153846, "grad_norm": 0.003727470524609089, "learning_rate": 6.042578157308188e-07, "loss": 0.0, "step": 23204 }, { "epoch": 22.3125, "grad_norm": 0.001066574826836586, "learning_rate": 6.038314376360776e-07, "loss": 0.0, "step": 23205 }, { "epoch": 22.31346153846154, "grad_norm": 0.001781768281944096, "learning_rate": 6.034052053422956e-07, "loss": 0.0, "step": 23206 }, { "epoch": 22.314423076923077, "grad_norm": 0.0005422115791589022, "learning_rate": 6.029791188560851e-07, "loss": 0.0, "step": 23207 }, { "epoch": 22.315384615384616, "grad_norm": 0.0013159881345927715, "learning_rate": 6.025531781840588e-07, "loss": 0.0, "step": 23208 }, { "epoch": 22.316346153846155, "grad_norm": 0.001574033172801137, "learning_rate": 6.021273833328279e-07, "loss": 0.0, "step": 23209 }, { "epoch": 22.317307692307693, "grad_norm": 0.0006133878487162292, "learning_rate": 6.01701734308996e-07, "loss": 0.0, "step": 23210 }, { "epoch": 22.318269230769232, "grad_norm": 0.0010174857452511787, "learning_rate": 6.012762311191699e-07, "loss": 0.0, "step": 23211 }, { "epoch": 22.31923076923077, "grad_norm": 0.0018774229101836681, "learning_rate": 6.008508737699514e-07, "loss": 0.0, "step": 23212 }, { "epoch": 22.32019230769231, "grad_norm": 0.0007156349602155387, "learning_rate": 6.004256622679405e-07, "loss": 0.0, "step": 23213 }, { "epoch": 22.321153846153845, "grad_norm": 0.0009679189533926547, "learning_rate": 6.000005966197387e-07, "loss": 0.0, "step": 23214 }, { "epoch": 22.322115384615383, "grad_norm": 0.000592307944316417, "learning_rate": 5.995756768319361e-07, "loss": 0.0, "step": 23215 }, { "epoch": 22.323076923076922, "grad_norm": 0.0011908399173989892, "learning_rate": 5.991509029111297e-07, "loss": 0.0, "step": 23216 }, { "epoch": 22.32403846153846, "grad_norm": 0.0014912233455106616, "learning_rate": 5.987262748639111e-07, "loss": 0.0, "step": 23217 }, { "epoch": 22.325, "grad_norm": 0.0013538631610572338, "learning_rate": 5.98301792696867e-07, "loss": 0.0, "step": 23218 }, { "epoch": 22.325961538461538, "grad_norm": 0.0004747433995362371, "learning_rate": 5.978774564165857e-07, "loss": 0.0, "step": 23219 }, { "epoch": 22.326923076923077, "grad_norm": 0.0003429831995163113, "learning_rate": 5.974532660296528e-07, "loss": 0.0, "step": 23220 }, { "epoch": 22.327884615384615, "grad_norm": 0.001208246685564518, "learning_rate": 5.970292215426476e-07, "loss": 0.0, "step": 23221 }, { "epoch": 22.328846153846154, "grad_norm": 0.0008901856490410864, "learning_rate": 5.966053229621526e-07, "loss": 0.0, "step": 23222 }, { "epoch": 22.329807692307693, "grad_norm": 0.0016841336619108915, "learning_rate": 5.961815702947438e-07, "loss": 0.0, "step": 23223 }, { "epoch": 22.33076923076923, "grad_norm": 0.0015649936394765973, "learning_rate": 5.957579635469956e-07, "loss": 0.0, "step": 23224 }, { "epoch": 22.33173076923077, "grad_norm": 0.00150819041300565, "learning_rate": 5.953345027254854e-07, "loss": 0.0, "step": 23225 }, { "epoch": 22.33269230769231, "grad_norm": 0.0017510433681309223, "learning_rate": 5.949111878367797e-07, "loss": 0.0, "step": 23226 }, { "epoch": 22.333653846153847, "grad_norm": 0.0011829250724986196, "learning_rate": 5.94488018887448e-07, "loss": 0.0, "step": 23227 }, { "epoch": 22.334615384615386, "grad_norm": 0.001508415094576776, "learning_rate": 5.940649958840605e-07, "loss": 0.0, "step": 23228 }, { "epoch": 22.335576923076925, "grad_norm": 0.0010317222913727164, "learning_rate": 5.936421188331754e-07, "loss": 0.0, "step": 23229 }, { "epoch": 22.33653846153846, "grad_norm": 0.0011937114177271724, "learning_rate": 5.932193877413572e-07, "loss": 0.0, "step": 23230 }, { "epoch": 22.3375, "grad_norm": 0.0010940423235297203, "learning_rate": 5.927968026151676e-07, "loss": 0.0, "step": 23231 }, { "epoch": 22.338461538461537, "grad_norm": 0.0005751150310970843, "learning_rate": 5.923743634611601e-07, "loss": 0.0, "step": 23232 }, { "epoch": 22.339423076923076, "grad_norm": 0.0008062143460847437, "learning_rate": 5.919520702858928e-07, "loss": 0.0, "step": 23233 }, { "epoch": 22.340384615384615, "grad_norm": 0.0033235913142561913, "learning_rate": 5.91529923095916e-07, "loss": 0.0, "step": 23234 }, { "epoch": 22.341346153846153, "grad_norm": 0.0015358938835561275, "learning_rate": 5.911079218977811e-07, "loss": 0.0, "step": 23235 }, { "epoch": 22.342307692307692, "grad_norm": 0.0020353542640805244, "learning_rate": 5.906860666980374e-07, "loss": 0.0, "step": 23236 }, { "epoch": 22.34326923076923, "grad_norm": 0.00046173998271115124, "learning_rate": 5.902643575032285e-07, "loss": 0.0, "step": 23237 }, { "epoch": 22.34423076923077, "grad_norm": 0.00134539813734591, "learning_rate": 5.898427943199003e-07, "loss": 0.0, "step": 23238 }, { "epoch": 22.345192307692308, "grad_norm": 0.0016113118035718799, "learning_rate": 5.894213771545943e-07, "loss": 0.0, "step": 23239 }, { "epoch": 22.346153846153847, "grad_norm": 0.0007292084628716111, "learning_rate": 5.890001060138484e-07, "loss": 0.0, "step": 23240 }, { "epoch": 22.347115384615385, "grad_norm": 0.0012078718282282352, "learning_rate": 5.885789809041986e-07, "loss": 0.0, "step": 23241 }, { "epoch": 22.348076923076924, "grad_norm": 0.000692366564180702, "learning_rate": 5.881580018321831e-07, "loss": 0.0, "step": 23242 }, { "epoch": 22.349038461538463, "grad_norm": 0.0016798479482531548, "learning_rate": 5.877371688043298e-07, "loss": 0.0, "step": 23243 }, { "epoch": 22.35, "grad_norm": 0.0017841293010860682, "learning_rate": 5.873164818271726e-07, "loss": 0.0, "step": 23244 }, { "epoch": 22.35096153846154, "grad_norm": 0.0011984951561316848, "learning_rate": 5.868959409072372e-07, "loss": 0.0, "step": 23245 }, { "epoch": 22.351923076923075, "grad_norm": 0.0008457116200588644, "learning_rate": 5.864755460510485e-07, "loss": 0.0, "step": 23246 }, { "epoch": 22.352884615384614, "grad_norm": 0.0009570298134349287, "learning_rate": 5.860552972651334e-07, "loss": 0.0, "step": 23247 }, { "epoch": 22.353846153846153, "grad_norm": 0.002039390616118908, "learning_rate": 5.856351945560079e-07, "loss": 0.0, "step": 23248 }, { "epoch": 22.35480769230769, "grad_norm": 0.0002930668997578323, "learning_rate": 5.852152379301946e-07, "loss": 0.0, "step": 23249 }, { "epoch": 22.35576923076923, "grad_norm": 0.00020894531917292625, "learning_rate": 5.847954273942092e-07, "loss": 0.0, "step": 23250 }, { "epoch": 22.35673076923077, "grad_norm": 0.001712164143100381, "learning_rate": 5.843757629545632e-07, "loss": 0.0, "step": 23251 }, { "epoch": 22.357692307692307, "grad_norm": 0.0017597016412764788, "learning_rate": 5.839562446177738e-07, "loss": 0.0, "step": 23252 }, { "epoch": 22.358653846153846, "grad_norm": 0.0021869640331715345, "learning_rate": 5.835368723903456e-07, "loss": 0.0, "step": 23253 }, { "epoch": 22.359615384615385, "grad_norm": 0.0010701314313337207, "learning_rate": 5.83117646278788e-07, "loss": 0.0, "step": 23254 }, { "epoch": 22.360576923076923, "grad_norm": 0.0012664064997807145, "learning_rate": 5.82698566289609e-07, "loss": 0.0, "step": 23255 }, { "epoch": 22.361538461538462, "grad_norm": 0.0006735356873832643, "learning_rate": 5.822796324293067e-07, "loss": 0.0, "step": 23256 }, { "epoch": 22.3625, "grad_norm": 0.0007384770433418453, "learning_rate": 5.818608447043828e-07, "loss": 0.0, "step": 23257 }, { "epoch": 22.36346153846154, "grad_norm": 0.0012468204367905855, "learning_rate": 5.814422031213396e-07, "loss": 0.0, "step": 23258 }, { "epoch": 22.364423076923078, "grad_norm": 0.0007762921159155667, "learning_rate": 5.810237076866687e-07, "loss": 0.0, "step": 23259 }, { "epoch": 22.365384615384617, "grad_norm": 0.001538206241093576, "learning_rate": 5.806053584068661e-07, "loss": 0.0, "step": 23260 }, { "epoch": 22.366346153846155, "grad_norm": 0.001197503530420363, "learning_rate": 5.80187155288422e-07, "loss": 0.0, "step": 23261 }, { "epoch": 22.36730769230769, "grad_norm": 0.001636082655750215, "learning_rate": 5.797690983378257e-07, "loss": 0.0, "step": 23262 }, { "epoch": 22.36826923076923, "grad_norm": 0.0006180271739140153, "learning_rate": 5.793511875615676e-07, "loss": 0.0, "step": 23263 }, { "epoch": 22.369230769230768, "grad_norm": 0.0009992481209337711, "learning_rate": 5.789334229661269e-07, "loss": 0.0, "step": 23264 }, { "epoch": 22.370192307692307, "grad_norm": 0.0005678717279806733, "learning_rate": 5.785158045579897e-07, "loss": 0.0, "step": 23265 }, { "epoch": 22.371153846153845, "grad_norm": 0.0013956642942503095, "learning_rate": 5.780983323436374e-07, "loss": 0.0, "step": 23266 }, { "epoch": 22.372115384615384, "grad_norm": 0.0032481024973094463, "learning_rate": 5.776810063295435e-07, "loss": 0.0, "step": 23267 }, { "epoch": 22.373076923076923, "grad_norm": 0.0011221544118598104, "learning_rate": 5.772638265221875e-07, "loss": 0.0, "step": 23268 }, { "epoch": 22.37403846153846, "grad_norm": 0.0012677706545218825, "learning_rate": 5.76846792928042e-07, "loss": 0.0, "step": 23269 }, { "epoch": 22.375, "grad_norm": 0.0027742243837565184, "learning_rate": 5.76429905553576e-07, "loss": 0.0, "step": 23270 }, { "epoch": 22.37596153846154, "grad_norm": 0.0021208906546235085, "learning_rate": 5.760131644052625e-07, "loss": 0.0, "step": 23271 }, { "epoch": 22.376923076923077, "grad_norm": 0.005687607452273369, "learning_rate": 5.755965694895637e-07, "loss": 0.0, "step": 23272 }, { "epoch": 22.377884615384616, "grad_norm": 0.0014424592955037951, "learning_rate": 5.751801208129459e-07, "loss": 0.0, "step": 23273 }, { "epoch": 22.378846153846155, "grad_norm": 0.0009640015196055174, "learning_rate": 5.747638183818716e-07, "loss": 0.0, "step": 23274 }, { "epoch": 22.379807692307693, "grad_norm": 0.0009479389991611242, "learning_rate": 5.743476622028e-07, "loss": 0.0, "step": 23275 }, { "epoch": 22.380769230769232, "grad_norm": 0.0007847637170925736, "learning_rate": 5.73931652282188e-07, "loss": 0.0, "step": 23276 }, { "epoch": 22.38173076923077, "grad_norm": 0.0012456626864150167, "learning_rate": 5.735157886264931e-07, "loss": 0.0, "step": 23277 }, { "epoch": 22.38269230769231, "grad_norm": 0.0595981664955616, "learning_rate": 5.731000712421652e-07, "loss": 0.0001, "step": 23278 }, { "epoch": 22.383653846153845, "grad_norm": 0.0008581545553170145, "learning_rate": 5.726845001356573e-07, "loss": 0.0, "step": 23279 }, { "epoch": 22.384615384615383, "grad_norm": 0.0007675585220567882, "learning_rate": 5.722690753134186e-07, "loss": 0.0, "step": 23280 }, { "epoch": 22.385576923076922, "grad_norm": 0.0027639411855489016, "learning_rate": 5.718537967818915e-07, "loss": 0.0, "step": 23281 }, { "epoch": 22.38653846153846, "grad_norm": 0.0009682400850579143, "learning_rate": 5.714386645475234e-07, "loss": 0.0, "step": 23282 }, { "epoch": 22.3875, "grad_norm": 0.0008152031805366278, "learning_rate": 5.710236786167545e-07, "loss": 0.0, "step": 23283 }, { "epoch": 22.388461538461538, "grad_norm": 0.0003025927289854735, "learning_rate": 5.70608838996024e-07, "loss": 0.0, "step": 23284 }, { "epoch": 22.389423076923077, "grad_norm": 0.000527961878105998, "learning_rate": 5.701941456917703e-07, "loss": 0.0, "step": 23285 }, { "epoch": 22.390384615384615, "grad_norm": 0.0012558745220303535, "learning_rate": 5.697795987104259e-07, "loss": 0.0, "step": 23286 }, { "epoch": 22.391346153846154, "grad_norm": 0.001737771206535399, "learning_rate": 5.693651980584247e-07, "loss": 0.0, "step": 23287 }, { "epoch": 22.392307692307693, "grad_norm": 0.0008910720935091376, "learning_rate": 5.689509437421992e-07, "loss": 0.0, "step": 23288 }, { "epoch": 22.39326923076923, "grad_norm": 0.0008594177197664976, "learning_rate": 5.685368357681731e-07, "loss": 0.0, "step": 23289 }, { "epoch": 22.39423076923077, "grad_norm": 0.0005071143386885524, "learning_rate": 5.681228741427747e-07, "loss": 0.0, "step": 23290 }, { "epoch": 22.39519230769231, "grad_norm": 0.001657325541600585, "learning_rate": 5.677090588724288e-07, "loss": 0.0, "step": 23291 }, { "epoch": 22.396153846153847, "grad_norm": 0.0009522208129055798, "learning_rate": 5.672953899635524e-07, "loss": 0.0, "step": 23292 }, { "epoch": 22.397115384615386, "grad_norm": 0.0015480152796953917, "learning_rate": 5.668818674225684e-07, "loss": 0.0, "step": 23293 }, { "epoch": 22.398076923076925, "grad_norm": 0.0007239074911922216, "learning_rate": 5.664684912558926e-07, "loss": 0.0, "step": 23294 }, { "epoch": 22.39903846153846, "grad_norm": 0.0006798722897656262, "learning_rate": 5.660552614699366e-07, "loss": 0.0, "step": 23295 }, { "epoch": 22.4, "grad_norm": 0.0010146613931283355, "learning_rate": 5.656421780711175e-07, "loss": 0.0, "step": 23296 }, { "epoch": 22.400961538461537, "grad_norm": 0.0007335461559705436, "learning_rate": 5.6522924106584e-07, "loss": 0.0, "step": 23297 }, { "epoch": 22.401923076923076, "grad_norm": 0.0025855943094938993, "learning_rate": 5.648164504605136e-07, "loss": 0.0, "step": 23298 }, { "epoch": 22.402884615384615, "grad_norm": 0.0022558909840881824, "learning_rate": 5.644038062615453e-07, "loss": 0.0, "step": 23299 }, { "epoch": 22.403846153846153, "grad_norm": 0.0013265246525406837, "learning_rate": 5.639913084753368e-07, "loss": 0.0, "step": 23300 }, { "epoch": 22.404807692307692, "grad_norm": 0.002074344316497445, "learning_rate": 5.635789571082872e-07, "loss": 0.0, "step": 23301 }, { "epoch": 22.40576923076923, "grad_norm": 0.0003040776355192065, "learning_rate": 5.631667521667994e-07, "loss": 0.0, "step": 23302 }, { "epoch": 22.40673076923077, "grad_norm": 0.0011008350411430001, "learning_rate": 5.627546936572648e-07, "loss": 0.0, "step": 23303 }, { "epoch": 22.407692307692308, "grad_norm": 0.0006759330281056464, "learning_rate": 5.623427815860793e-07, "loss": 0.0, "step": 23304 }, { "epoch": 22.408653846153847, "grad_norm": 0.00035214709350839257, "learning_rate": 5.619310159596358e-07, "loss": 0.0, "step": 23305 }, { "epoch": 22.409615384615385, "grad_norm": 0.0021866867318749428, "learning_rate": 5.615193967843214e-07, "loss": 0.0, "step": 23306 }, { "epoch": 22.410576923076924, "grad_norm": 0.005720930173993111, "learning_rate": 5.611079240665252e-07, "loss": 0.0, "step": 23307 }, { "epoch": 22.411538461538463, "grad_norm": 0.0011317676398903131, "learning_rate": 5.60696597812631e-07, "loss": 0.0, "step": 23308 }, { "epoch": 22.4125, "grad_norm": 0.0006909508956596255, "learning_rate": 5.602854180290207e-07, "loss": 0.0, "step": 23309 }, { "epoch": 22.41346153846154, "grad_norm": 0.001236260635778308, "learning_rate": 5.598743847220767e-07, "loss": 0.0, "step": 23310 }, { "epoch": 22.414423076923075, "grad_norm": 0.00048295120359398425, "learning_rate": 5.59463497898175e-07, "loss": 0.0, "step": 23311 }, { "epoch": 22.415384615384614, "grad_norm": 0.0007570685120299459, "learning_rate": 5.590527575636917e-07, "loss": 0.0, "step": 23312 }, { "epoch": 22.416346153846153, "grad_norm": 0.003763016313314438, "learning_rate": 5.586421637250028e-07, "loss": 0.0, "step": 23313 }, { "epoch": 22.41730769230769, "grad_norm": 0.0009616933530196548, "learning_rate": 5.582317163884754e-07, "loss": 0.0, "step": 23314 }, { "epoch": 22.41826923076923, "grad_norm": 0.001614434877410531, "learning_rate": 5.578214155604811e-07, "loss": 0.0, "step": 23315 }, { "epoch": 22.41923076923077, "grad_norm": 0.001643574214540422, "learning_rate": 5.57411261247387e-07, "loss": 0.0, "step": 23316 }, { "epoch": 22.420192307692307, "grad_norm": 0.0018522909376770258, "learning_rate": 5.570012534555558e-07, "loss": 0.0, "step": 23317 }, { "epoch": 22.421153846153846, "grad_norm": 0.001745618530549109, "learning_rate": 5.565913921913513e-07, "loss": 0.0, "step": 23318 }, { "epoch": 22.422115384615385, "grad_norm": 0.00022493122378364205, "learning_rate": 5.561816774611307e-07, "loss": 0.0, "step": 23319 }, { "epoch": 22.423076923076923, "grad_norm": 0.0008655731799080968, "learning_rate": 5.55772109271252e-07, "loss": 0.0, "step": 23320 }, { "epoch": 22.424038461538462, "grad_norm": 0.00033754450851120055, "learning_rate": 5.553626876280749e-07, "loss": 0.0, "step": 23321 }, { "epoch": 22.425, "grad_norm": 0.009554652497172356, "learning_rate": 5.549534125379474e-07, "loss": 0.0001, "step": 23322 }, { "epoch": 22.42596153846154, "grad_norm": 0.00123834318947047, "learning_rate": 5.545442840072212e-07, "loss": 0.0, "step": 23323 }, { "epoch": 22.426923076923078, "grad_norm": 0.003262903308495879, "learning_rate": 5.54135302042248e-07, "loss": 0.0, "step": 23324 }, { "epoch": 22.427884615384617, "grad_norm": 0.0019377999706193805, "learning_rate": 5.537264666493703e-07, "loss": 0.0, "step": 23325 }, { "epoch": 22.428846153846155, "grad_norm": 0.001368870260193944, "learning_rate": 5.533177778349319e-07, "loss": 0.0, "step": 23326 }, { "epoch": 22.42980769230769, "grad_norm": 0.0007639386458322406, "learning_rate": 5.52909235605279e-07, "loss": 0.0, "step": 23327 }, { "epoch": 22.43076923076923, "grad_norm": 0.0004288962809368968, "learning_rate": 5.525008399667453e-07, "loss": 0.0, "step": 23328 }, { "epoch": 22.431730769230768, "grad_norm": 0.0008226541103795171, "learning_rate": 5.520925909256724e-07, "loss": 0.0, "step": 23329 }, { "epoch": 22.432692307692307, "grad_norm": 0.0025007042568176985, "learning_rate": 5.516844884883921e-07, "loss": 0.0, "step": 23330 }, { "epoch": 22.433653846153845, "grad_norm": 0.0009395058732479811, "learning_rate": 5.51276532661238e-07, "loss": 0.0, "step": 23331 }, { "epoch": 22.434615384615384, "grad_norm": 0.0006671335431747139, "learning_rate": 5.508687234505416e-07, "loss": 0.0, "step": 23332 }, { "epoch": 22.435576923076923, "grad_norm": 0.0023583590518683195, "learning_rate": 5.504610608626281e-07, "loss": 0.0, "step": 23333 }, { "epoch": 22.43653846153846, "grad_norm": 0.0009115863940678537, "learning_rate": 5.500535449038247e-07, "loss": 0.0, "step": 23334 }, { "epoch": 22.4375, "grad_norm": 0.0016101173823699355, "learning_rate": 5.496461755804572e-07, "loss": 0.0, "step": 23335 }, { "epoch": 22.43846153846154, "grad_norm": 0.0008477918454445899, "learning_rate": 5.492389528988429e-07, "loss": 0.0, "step": 23336 }, { "epoch": 22.439423076923077, "grad_norm": 0.45234838128089905, "learning_rate": 5.488318768653022e-07, "loss": 0.0054, "step": 23337 }, { "epoch": 22.440384615384616, "grad_norm": 0.0008519801194779575, "learning_rate": 5.484249474861536e-07, "loss": 0.0, "step": 23338 }, { "epoch": 22.441346153846155, "grad_norm": 0.0013606906868517399, "learning_rate": 5.480181647677085e-07, "loss": 0.0, "step": 23339 }, { "epoch": 22.442307692307693, "grad_norm": 0.0006232117884792387, "learning_rate": 5.476115287162809e-07, "loss": 0.0, "step": 23340 }, { "epoch": 22.443269230769232, "grad_norm": 0.0010401186300441623, "learning_rate": 5.47205039338179e-07, "loss": 0.0, "step": 23341 }, { "epoch": 22.44423076923077, "grad_norm": 0.0004909406416118145, "learning_rate": 5.46798696639712e-07, "loss": 0.0, "step": 23342 }, { "epoch": 22.44519230769231, "grad_norm": 0.001096078078262508, "learning_rate": 5.463925006271853e-07, "loss": 0.0, "step": 23343 }, { "epoch": 22.446153846153845, "grad_norm": 0.001188343740068376, "learning_rate": 5.459864513068991e-07, "loss": 0.0, "step": 23344 }, { "epoch": 22.447115384615383, "grad_norm": 0.0004503616946749389, "learning_rate": 5.455805486851562e-07, "loss": 0.0, "step": 23345 }, { "epoch": 22.448076923076922, "grad_norm": 0.0011134555097669363, "learning_rate": 5.45174792768256e-07, "loss": 0.0, "step": 23346 }, { "epoch": 22.44903846153846, "grad_norm": 0.0014153820229694247, "learning_rate": 5.447691835624924e-07, "loss": 0.0, "step": 23347 }, { "epoch": 22.45, "grad_norm": 0.0012421393766999245, "learning_rate": 5.443637210741603e-07, "loss": 0.0, "step": 23348 }, { "epoch": 22.450961538461538, "grad_norm": 0.001058995141647756, "learning_rate": 5.439584053095525e-07, "loss": 0.0, "step": 23349 }, { "epoch": 22.451923076923077, "grad_norm": 0.0009373955545015633, "learning_rate": 5.435532362749563e-07, "loss": 0.0, "step": 23350 }, { "epoch": 22.452884615384615, "grad_norm": 0.0017330936389043927, "learning_rate": 5.431482139766609e-07, "loss": 0.0, "step": 23351 }, { "epoch": 22.453846153846154, "grad_norm": 0.006476891227066517, "learning_rate": 5.427433384209469e-07, "loss": 0.0, "step": 23352 }, { "epoch": 22.454807692307693, "grad_norm": 0.001550542190670967, "learning_rate": 5.423386096141003e-07, "loss": 0.0, "step": 23353 }, { "epoch": 22.45576923076923, "grad_norm": 0.0008846722776070237, "learning_rate": 5.41934027562403e-07, "loss": 0.0, "step": 23354 }, { "epoch": 22.45673076923077, "grad_norm": 0.0009042552555911243, "learning_rate": 5.415295922721286e-07, "loss": 0.0, "step": 23355 }, { "epoch": 22.45769230769231, "grad_norm": 0.001239227713085711, "learning_rate": 5.411253037495534e-07, "loss": 0.0, "step": 23356 }, { "epoch": 22.458653846153847, "grad_norm": 0.001053369720466435, "learning_rate": 5.407211620009545e-07, "loss": 0.0, "step": 23357 }, { "epoch": 22.459615384615386, "grad_norm": 0.0011825751280412078, "learning_rate": 5.403171670325991e-07, "loss": 0.0, "step": 23358 }, { "epoch": 22.460576923076925, "grad_norm": 0.0029983643908053637, "learning_rate": 5.399133188507577e-07, "loss": 0.0, "step": 23359 }, { "epoch": 22.46153846153846, "grad_norm": 0.0006316252984106541, "learning_rate": 5.395096174616976e-07, "loss": 0.0, "step": 23360 }, { "epoch": 22.4625, "grad_norm": 0.0011279633035883307, "learning_rate": 5.391060628716805e-07, "loss": 0.0, "step": 23361 }, { "epoch": 22.463461538461537, "grad_norm": 0.0011633664835244417, "learning_rate": 5.387026550869711e-07, "loss": 0.0, "step": 23362 }, { "epoch": 22.464423076923076, "grad_norm": 0.0007147323922254145, "learning_rate": 5.382993941138271e-07, "loss": 0.0, "step": 23363 }, { "epoch": 22.465384615384615, "grad_norm": 0.0013920878991484642, "learning_rate": 5.378962799585063e-07, "loss": 0.0, "step": 23364 }, { "epoch": 22.466346153846153, "grad_norm": 0.0006285590352490544, "learning_rate": 5.374933126272664e-07, "loss": 0.0, "step": 23365 }, { "epoch": 22.467307692307692, "grad_norm": 0.0014941416447982192, "learning_rate": 5.370904921263565e-07, "loss": 0.0, "step": 23366 }, { "epoch": 22.46826923076923, "grad_norm": 0.0011742369970306754, "learning_rate": 5.366878184620284e-07, "loss": 0.0, "step": 23367 }, { "epoch": 22.46923076923077, "grad_norm": 0.0009120703907683492, "learning_rate": 5.362852916405325e-07, "loss": 0.0, "step": 23368 }, { "epoch": 22.470192307692308, "grad_norm": 0.002876145998016, "learning_rate": 5.358829116681119e-07, "loss": 0.0, "step": 23369 }, { "epoch": 22.471153846153847, "grad_norm": 0.0016684246947988868, "learning_rate": 5.354806785510113e-07, "loss": 0.0, "step": 23370 }, { "epoch": 22.472115384615385, "grad_norm": 0.0008426739950664341, "learning_rate": 5.350785922954738e-07, "loss": 0.0, "step": 23371 }, { "epoch": 22.473076923076924, "grad_norm": 0.004167803097516298, "learning_rate": 5.346766529077363e-07, "loss": 0.0, "step": 23372 }, { "epoch": 22.474038461538463, "grad_norm": 0.0016341239679604769, "learning_rate": 5.342748603940384e-07, "loss": 0.0, "step": 23373 }, { "epoch": 22.475, "grad_norm": 0.00040243269177153707, "learning_rate": 5.338732147606118e-07, "loss": 0.0, "step": 23374 }, { "epoch": 22.47596153846154, "grad_norm": 0.0018445991445332766, "learning_rate": 5.334717160136893e-07, "loss": 0.0, "step": 23375 }, { "epoch": 22.476923076923075, "grad_norm": 0.0022091949358582497, "learning_rate": 5.330703641595036e-07, "loss": 0.0, "step": 23376 }, { "epoch": 22.477884615384614, "grad_norm": 0.0006838878034614027, "learning_rate": 5.326691592042798e-07, "loss": 0.0, "step": 23377 }, { "epoch": 22.478846153846153, "grad_norm": 0.001139169093221426, "learning_rate": 5.322681011542442e-07, "loss": 0.0, "step": 23378 }, { "epoch": 22.47980769230769, "grad_norm": 0.002186106750741601, "learning_rate": 5.318671900156213e-07, "loss": 0.0, "step": 23379 }, { "epoch": 22.48076923076923, "grad_norm": 0.001184332650154829, "learning_rate": 5.314664257946301e-07, "loss": 0.0, "step": 23380 }, { "epoch": 22.48173076923077, "grad_norm": 0.0008675080607645214, "learning_rate": 5.310658084974896e-07, "loss": 0.0, "step": 23381 }, { "epoch": 22.482692307692307, "grad_norm": 0.0006276058848015964, "learning_rate": 5.306653381304183e-07, "loss": 0.0, "step": 23382 }, { "epoch": 22.483653846153846, "grad_norm": 0.0008742368663661182, "learning_rate": 5.30265014699628e-07, "loss": 0.0, "step": 23383 }, { "epoch": 22.484615384615385, "grad_norm": 0.006619008257985115, "learning_rate": 5.298648382113303e-07, "loss": 0.0, "step": 23384 }, { "epoch": 22.485576923076923, "grad_norm": 0.004684028681367636, "learning_rate": 5.29464808671738e-07, "loss": 0.0001, "step": 23385 }, { "epoch": 22.486538461538462, "grad_norm": 0.0006658995989710093, "learning_rate": 5.29064926087054e-07, "loss": 0.0, "step": 23386 }, { "epoch": 22.4875, "grad_norm": 0.0013906011590734124, "learning_rate": 5.286651904634876e-07, "loss": 0.0, "step": 23387 }, { "epoch": 22.48846153846154, "grad_norm": 0.005809127353131771, "learning_rate": 5.282656018072374e-07, "loss": 0.0, "step": 23388 }, { "epoch": 22.489423076923078, "grad_norm": 0.0008563402225263417, "learning_rate": 5.278661601245061e-07, "loss": 0.0, "step": 23389 }, { "epoch": 22.490384615384617, "grad_norm": 0.0010685393353924155, "learning_rate": 5.274668654214931e-07, "loss": 0.0, "step": 23390 }, { "epoch": 22.491346153846155, "grad_norm": 0.0008256086148321629, "learning_rate": 5.270677177043914e-07, "loss": 0.0, "step": 23391 }, { "epoch": 22.49230769230769, "grad_norm": 0.0006081467145122588, "learning_rate": 5.26668716979396e-07, "loss": 0.0, "step": 23392 }, { "epoch": 22.49326923076923, "grad_norm": 0.002067385707050562, "learning_rate": 5.262698632527008e-07, "loss": 0.0, "step": 23393 }, { "epoch": 22.494230769230768, "grad_norm": 0.0008250551181845367, "learning_rate": 5.258711565304897e-07, "loss": 0.0, "step": 23394 }, { "epoch": 22.495192307692307, "grad_norm": 0.00100145919714123, "learning_rate": 5.254725968189512e-07, "loss": 0.0, "step": 23395 }, { "epoch": 22.496153846153845, "grad_norm": 0.000991637585684657, "learning_rate": 5.250741841242735e-07, "loss": 0.0, "step": 23396 }, { "epoch": 22.497115384615384, "grad_norm": 0.0011602012673392892, "learning_rate": 5.246759184526328e-07, "loss": 0.0, "step": 23397 }, { "epoch": 22.498076923076923, "grad_norm": 0.0005347183323465288, "learning_rate": 5.242777998102155e-07, "loss": 0.0, "step": 23398 }, { "epoch": 22.49903846153846, "grad_norm": 0.0008989788475446403, "learning_rate": 5.238798282031932e-07, "loss": 0.0, "step": 23399 }, { "epoch": 22.5, "grad_norm": 0.0020183443557471037, "learning_rate": 5.234820036377441e-07, "loss": 0.0, "step": 23400 }, { "epoch": 22.50096153846154, "grad_norm": 0.002872945973649621, "learning_rate": 5.230843261200425e-07, "loss": 0.0, "step": 23401 }, { "epoch": 22.501923076923077, "grad_norm": 0.0012878266861662269, "learning_rate": 5.226867956562565e-07, "loss": 0.0, "step": 23402 }, { "epoch": 22.502884615384616, "grad_norm": 0.0008344076341018081, "learning_rate": 5.222894122525557e-07, "loss": 0.0, "step": 23403 }, { "epoch": 22.503846153846155, "grad_norm": 0.0036182112526148558, "learning_rate": 5.218921759151074e-07, "loss": 0.0, "step": 23404 }, { "epoch": 22.504807692307693, "grad_norm": 0.0009790414478629827, "learning_rate": 5.214950866500734e-07, "loss": 0.0, "step": 23405 }, { "epoch": 22.505769230769232, "grad_norm": 0.0015282436506822705, "learning_rate": 5.210981444636176e-07, "loss": 0.0, "step": 23406 }, { "epoch": 22.50673076923077, "grad_norm": 0.0018734412733465433, "learning_rate": 5.207013493618984e-07, "loss": 0.0, "step": 23407 }, { "epoch": 22.50769230769231, "grad_norm": 0.0038000913336873055, "learning_rate": 5.20304701351072e-07, "loss": 0.0, "step": 23408 }, { "epoch": 22.508653846153845, "grad_norm": 0.0011017626384273171, "learning_rate": 5.199082004372958e-07, "loss": 0.0, "step": 23409 }, { "epoch": 22.509615384615383, "grad_norm": 0.0007011998095549643, "learning_rate": 5.195118466267191e-07, "loss": 0.0, "step": 23410 }, { "epoch": 22.510576923076922, "grad_norm": 0.00106707657687366, "learning_rate": 5.191156399254937e-07, "loss": 0.0, "step": 23411 }, { "epoch": 22.51153846153846, "grad_norm": 0.0008130078203976154, "learning_rate": 5.187195803397682e-07, "loss": 0.0, "step": 23412 }, { "epoch": 22.5125, "grad_norm": 0.0012268316932022572, "learning_rate": 5.183236678756864e-07, "loss": 0.0, "step": 23413 }, { "epoch": 22.513461538461538, "grad_norm": 0.0009264799882657826, "learning_rate": 5.179279025393935e-07, "loss": 0.0, "step": 23414 }, { "epoch": 22.514423076923077, "grad_norm": 0.002308159600943327, "learning_rate": 5.175322843370312e-07, "loss": 0.0, "step": 23415 }, { "epoch": 22.515384615384615, "grad_norm": 0.000553902005776763, "learning_rate": 5.171368132747357e-07, "loss": 0.0, "step": 23416 }, { "epoch": 22.516346153846154, "grad_norm": 0.0007840635371394455, "learning_rate": 5.167414893586453e-07, "loss": 0.0, "step": 23417 }, { "epoch": 22.517307692307693, "grad_norm": 0.000923332292586565, "learning_rate": 5.163463125948942e-07, "loss": 0.0, "step": 23418 }, { "epoch": 22.51826923076923, "grad_norm": 0.0012952140532433987, "learning_rate": 5.15951282989614e-07, "loss": 0.0, "step": 23419 }, { "epoch": 22.51923076923077, "grad_norm": 0.001628731144592166, "learning_rate": 5.155564005489355e-07, "loss": 0.0, "step": 23420 }, { "epoch": 22.52019230769231, "grad_norm": 0.001341204042546451, "learning_rate": 5.151616652789837e-07, "loss": 0.0, "step": 23421 }, { "epoch": 22.521153846153847, "grad_norm": 0.0010384857887402177, "learning_rate": 5.147670771858848e-07, "loss": 0.0, "step": 23422 }, { "epoch": 22.522115384615386, "grad_norm": 0.007050937972962856, "learning_rate": 5.143726362757639e-07, "loss": 0.0001, "step": 23423 }, { "epoch": 22.523076923076925, "grad_norm": 0.00046078761806711555, "learning_rate": 5.139783425547385e-07, "loss": 0.0, "step": 23424 }, { "epoch": 22.52403846153846, "grad_norm": 0.0005934628425166011, "learning_rate": 5.135841960289267e-07, "loss": 0.0, "step": 23425 }, { "epoch": 22.525, "grad_norm": 0.002761151408776641, "learning_rate": 5.131901967044495e-07, "loss": 0.0, "step": 23426 }, { "epoch": 22.525961538461537, "grad_norm": 0.0004066307737957686, "learning_rate": 5.12796344587414e-07, "loss": 0.0, "step": 23427 }, { "epoch": 22.526923076923076, "grad_norm": 0.0031778744887560606, "learning_rate": 5.124026396839355e-07, "loss": 0.0, "step": 23428 }, { "epoch": 22.527884615384615, "grad_norm": 0.0006720129167661071, "learning_rate": 5.120090820001222e-07, "loss": 0.0, "step": 23429 }, { "epoch": 22.528846153846153, "grad_norm": 0.001547298626974225, "learning_rate": 5.116156715420817e-07, "loss": 0.0, "step": 23430 }, { "epoch": 22.529807692307692, "grad_norm": 0.001025608042255044, "learning_rate": 5.112224083159178e-07, "loss": 0.0, "step": 23431 }, { "epoch": 22.53076923076923, "grad_norm": 0.0010906700044870377, "learning_rate": 5.108292923277313e-07, "loss": 0.0, "step": 23432 }, { "epoch": 22.53173076923077, "grad_norm": 0.0007678552647121251, "learning_rate": 5.104363235836251e-07, "loss": 0.0, "step": 23433 }, { "epoch": 22.532692307692308, "grad_norm": 0.0004864123766310513, "learning_rate": 5.100435020896966e-07, "loss": 0.0, "step": 23434 }, { "epoch": 22.533653846153847, "grad_norm": 0.0020392555743455887, "learning_rate": 5.096508278520385e-07, "loss": 0.0, "step": 23435 }, { "epoch": 22.534615384615385, "grad_norm": 0.0008959976839832962, "learning_rate": 5.09258300876746e-07, "loss": 0.0, "step": 23436 }, { "epoch": 22.535576923076924, "grad_norm": 0.00031527745886705816, "learning_rate": 5.08865921169911e-07, "loss": 0.0, "step": 23437 }, { "epoch": 22.536538461538463, "grad_norm": 0.0010845264187082648, "learning_rate": 5.084736887376185e-07, "loss": 0.0, "step": 23438 }, { "epoch": 22.5375, "grad_norm": 0.001017634873278439, "learning_rate": 5.080816035859581e-07, "loss": 0.0, "step": 23439 }, { "epoch": 22.53846153846154, "grad_norm": 0.0010695329401642084, "learning_rate": 5.076896657210139e-07, "loss": 0.0, "step": 23440 }, { "epoch": 22.539423076923075, "grad_norm": 0.0009809640469029546, "learning_rate": 5.072978751488655e-07, "loss": 0.0, "step": 23441 }, { "epoch": 22.540384615384614, "grad_norm": 0.0014465892454609275, "learning_rate": 5.069062318755946e-07, "loss": 0.0, "step": 23442 }, { "epoch": 22.541346153846153, "grad_norm": 0.0010180494282394648, "learning_rate": 5.065147359072753e-07, "loss": 0.0, "step": 23443 }, { "epoch": 22.54230769230769, "grad_norm": 0.0013246771413832903, "learning_rate": 5.061233872499838e-07, "loss": 0.0, "step": 23444 }, { "epoch": 22.54326923076923, "grad_norm": 0.0014077209634706378, "learning_rate": 5.057321859097952e-07, "loss": 0.0, "step": 23445 }, { "epoch": 22.54423076923077, "grad_norm": 0.0007131003658287227, "learning_rate": 5.05341131892777e-07, "loss": 0.0, "step": 23446 }, { "epoch": 22.545192307692307, "grad_norm": 0.00147913652472198, "learning_rate": 5.049502252049965e-07, "loss": 0.0, "step": 23447 }, { "epoch": 22.546153846153846, "grad_norm": 0.0006512867985293269, "learning_rate": 5.045594658525232e-07, "loss": 0.0, "step": 23448 }, { "epoch": 22.547115384615385, "grad_norm": 0.0014023841358721256, "learning_rate": 5.041688538414169e-07, "loss": 0.0, "step": 23449 }, { "epoch": 22.548076923076923, "grad_norm": 0.0010140002705156803, "learning_rate": 5.037783891777393e-07, "loss": 0.0, "step": 23450 }, { "epoch": 22.549038461538462, "grad_norm": 0.0014180873986333609, "learning_rate": 5.033880718675522e-07, "loss": 0.0, "step": 23451 }, { "epoch": 22.55, "grad_norm": 0.0006220980430953205, "learning_rate": 5.029979019169084e-07, "loss": 0.0, "step": 23452 }, { "epoch": 22.55096153846154, "grad_norm": 0.0034244765993207693, "learning_rate": 5.026078793318645e-07, "loss": 0.0, "step": 23453 }, { "epoch": 22.551923076923078, "grad_norm": 0.0006906903581693769, "learning_rate": 5.02218004118471e-07, "loss": 0.0, "step": 23454 }, { "epoch": 22.552884615384617, "grad_norm": 0.0014575859531760216, "learning_rate": 5.018282762827786e-07, "loss": 0.0, "step": 23455 }, { "epoch": 22.553846153846155, "grad_norm": 0.0019377939170226455, "learning_rate": 5.014386958308359e-07, "loss": 0.0, "step": 23456 }, { "epoch": 22.55480769230769, "grad_norm": 0.00037712123594246805, "learning_rate": 5.010492627686848e-07, "loss": 0.0, "step": 23457 }, { "epoch": 22.55576923076923, "grad_norm": 0.0018571809632703662, "learning_rate": 5.006599771023701e-07, "loss": 0.0, "step": 23458 }, { "epoch": 22.556730769230768, "grad_norm": 0.0018799018580466509, "learning_rate": 5.00270838837934e-07, "loss": 0.0, "step": 23459 }, { "epoch": 22.557692307692307, "grad_norm": 0.001083974726498127, "learning_rate": 4.998818479814116e-07, "loss": 0.0, "step": 23460 }, { "epoch": 22.558653846153845, "grad_norm": 0.0010025291703641415, "learning_rate": 4.994930045388414e-07, "loss": 0.0, "step": 23461 }, { "epoch": 22.559615384615384, "grad_norm": 0.002607205184176564, "learning_rate": 4.991043085162562e-07, "loss": 0.0, "step": 23462 }, { "epoch": 22.560576923076923, "grad_norm": 0.0006235848995856941, "learning_rate": 4.987157599196868e-07, "loss": 0.0, "step": 23463 }, { "epoch": 22.56153846153846, "grad_norm": 0.0039903405122458935, "learning_rate": 4.983273587551629e-07, "loss": 0.0, "step": 23464 }, { "epoch": 22.5625, "grad_norm": 0.0007428581593558192, "learning_rate": 4.979391050287108e-07, "loss": 0.0, "step": 23465 }, { "epoch": 22.56346153846154, "grad_norm": 0.0021098144352436066, "learning_rate": 4.975509987463556e-07, "loss": 0.0, "step": 23466 }, { "epoch": 22.564423076923077, "grad_norm": 0.0031302457209676504, "learning_rate": 4.971630399141214e-07, "loss": 0.0, "step": 23467 }, { "epoch": 22.565384615384616, "grad_norm": 0.0007060920470394194, "learning_rate": 4.967752285380245e-07, "loss": 0.0, "step": 23468 }, { "epoch": 22.566346153846155, "grad_norm": 0.0007904372178018093, "learning_rate": 4.963875646240846e-07, "loss": 0.0, "step": 23469 }, { "epoch": 22.567307692307693, "grad_norm": 0.0009599158656783402, "learning_rate": 4.960000481783179e-07, "loss": 0.0, "step": 23470 }, { "epoch": 22.568269230769232, "grad_norm": 0.00035207756445743144, "learning_rate": 4.95612679206735e-07, "loss": 0.0, "step": 23471 }, { "epoch": 22.56923076923077, "grad_norm": 0.002157679758965969, "learning_rate": 4.952254577153492e-07, "loss": 0.0, "step": 23472 }, { "epoch": 22.57019230769231, "grad_norm": 0.0008530861814506352, "learning_rate": 4.94838383710169e-07, "loss": 0.0, "step": 23473 }, { "epoch": 22.571153846153845, "grad_norm": 0.0016345744952559471, "learning_rate": 4.944514571971981e-07, "loss": 0.0, "step": 23474 }, { "epoch": 22.572115384615383, "grad_norm": 0.0005204210756346583, "learning_rate": 4.940646781824432e-07, "loss": 0.0, "step": 23475 }, { "epoch": 22.573076923076922, "grad_norm": 0.0006155368755571544, "learning_rate": 4.936780466719049e-07, "loss": 0.0, "step": 23476 }, { "epoch": 22.57403846153846, "grad_norm": 0.0008845619158819318, "learning_rate": 4.932915626715817e-07, "loss": 0.0, "step": 23477 }, { "epoch": 22.575, "grad_norm": 0.0005259483586996794, "learning_rate": 4.929052261874734e-07, "loss": 0.0, "step": 23478 }, { "epoch": 22.575961538461538, "grad_norm": 0.0023349933326244354, "learning_rate": 4.925190372255717e-07, "loss": 0.0, "step": 23479 }, { "epoch": 22.576923076923077, "grad_norm": 0.0017944308929145336, "learning_rate": 4.921329957918697e-07, "loss": 0.0, "step": 23480 }, { "epoch": 22.577884615384615, "grad_norm": 0.0015057793352752924, "learning_rate": 4.917471018923603e-07, "loss": 0.0, "step": 23481 }, { "epoch": 22.578846153846154, "grad_norm": 0.000659435463603586, "learning_rate": 4.913613555330276e-07, "loss": 0.0, "step": 23482 }, { "epoch": 22.579807692307693, "grad_norm": 0.0007395062712021172, "learning_rate": 4.909757567198592e-07, "loss": 0.0, "step": 23483 }, { "epoch": 22.58076923076923, "grad_norm": 0.0006861098227091134, "learning_rate": 4.905903054588401e-07, "loss": 0.0, "step": 23484 }, { "epoch": 22.58173076923077, "grad_norm": 0.0015607899986207485, "learning_rate": 4.902050017559479e-07, "loss": 0.0, "step": 23485 }, { "epoch": 22.58269230769231, "grad_norm": 0.0006542007904499769, "learning_rate": 4.898198456171632e-07, "loss": 0.0, "step": 23486 }, { "epoch": 22.583653846153847, "grad_norm": 0.0008274783031083643, "learning_rate": 4.894348370484648e-07, "loss": 0.0, "step": 23487 }, { "epoch": 22.584615384615386, "grad_norm": 0.000757419562432915, "learning_rate": 4.89049976055822e-07, "loss": 0.0, "step": 23488 }, { "epoch": 22.585576923076925, "grad_norm": 0.0007404052885249257, "learning_rate": 4.886652626452115e-07, "loss": 0.0, "step": 23489 }, { "epoch": 22.58653846153846, "grad_norm": 0.0005075042136013508, "learning_rate": 4.882806968225984e-07, "loss": 0.0, "step": 23490 }, { "epoch": 22.5875, "grad_norm": 0.002836179221048951, "learning_rate": 4.878962785939533e-07, "loss": 0.0, "step": 23491 }, { "epoch": 22.588461538461537, "grad_norm": 0.0012256187619641423, "learning_rate": 4.875120079652407e-07, "loss": 0.0, "step": 23492 }, { "epoch": 22.589423076923076, "grad_norm": 0.0008816884947009385, "learning_rate": 4.871278849424221e-07, "loss": 0.0, "step": 23493 }, { "epoch": 22.590384615384615, "grad_norm": 0.0007204251596704125, "learning_rate": 4.867439095314597e-07, "loss": 0.0, "step": 23494 }, { "epoch": 22.591346153846153, "grad_norm": 0.0007982488605193794, "learning_rate": 4.863600817383107e-07, "loss": 0.0, "step": 23495 }, { "epoch": 22.592307692307692, "grad_norm": 0.0015918288845568895, "learning_rate": 4.859764015689294e-07, "loss": 0.0, "step": 23496 }, { "epoch": 22.59326923076923, "grad_norm": 0.007667687255889177, "learning_rate": 4.855928690292722e-07, "loss": 0.0, "step": 23497 }, { "epoch": 22.59423076923077, "grad_norm": 0.0006972519331611693, "learning_rate": 4.852094841252896e-07, "loss": 0.0, "step": 23498 }, { "epoch": 22.595192307692308, "grad_norm": 0.0016377483261749148, "learning_rate": 4.848262468629294e-07, "loss": 0.0, "step": 23499 }, { "epoch": 22.596153846153847, "grad_norm": 0.0006138073513284326, "learning_rate": 4.844431572481412e-07, "loss": 0.0, "step": 23500 }, { "epoch": 22.597115384615385, "grad_norm": 0.0015121233882382512, "learning_rate": 4.840602152868656e-07, "loss": 0.0, "step": 23501 }, { "epoch": 22.598076923076924, "grad_norm": 0.0037087248638272285, "learning_rate": 4.836774209850459e-07, "loss": 0.0, "step": 23502 }, { "epoch": 22.599038461538463, "grad_norm": 0.000772673636674881, "learning_rate": 4.832947743486238e-07, "loss": 0.0, "step": 23503 }, { "epoch": 22.6, "grad_norm": 0.0013118931092321873, "learning_rate": 4.829122753835347e-07, "loss": 0.0, "step": 23504 }, { "epoch": 22.60096153846154, "grad_norm": 0.001335382228717208, "learning_rate": 4.825299240957149e-07, "loss": 0.0, "step": 23505 }, { "epoch": 22.601923076923075, "grad_norm": 0.0008935659425333142, "learning_rate": 4.821477204910985e-07, "loss": 0.0, "step": 23506 }, { "epoch": 22.602884615384614, "grad_norm": 0.0010143413674086332, "learning_rate": 4.81765664575613e-07, "loss": 0.0, "step": 23507 }, { "epoch": 22.603846153846153, "grad_norm": 0.000639704114291817, "learning_rate": 4.813837563551893e-07, "loss": 0.0, "step": 23508 }, { "epoch": 22.60480769230769, "grad_norm": 0.0008114127558656037, "learning_rate": 4.810019958357548e-07, "loss": 0.0, "step": 23509 }, { "epoch": 22.60576923076923, "grad_norm": 0.0010512315202504396, "learning_rate": 4.806203830232292e-07, "loss": 0.0, "step": 23510 }, { "epoch": 22.60673076923077, "grad_norm": 0.002114553702995181, "learning_rate": 4.802389179235378e-07, "loss": 0.0, "step": 23511 }, { "epoch": 22.607692307692307, "grad_norm": 0.0005129780038259923, "learning_rate": 4.79857600542597e-07, "loss": 0.0, "step": 23512 }, { "epoch": 22.608653846153846, "grad_norm": 0.0007510491996072233, "learning_rate": 4.794764308863242e-07, "loss": 0.0, "step": 23513 }, { "epoch": 22.609615384615385, "grad_norm": 0.001595667446963489, "learning_rate": 4.79095408960637e-07, "loss": 0.0, "step": 23514 }, { "epoch": 22.610576923076923, "grad_norm": 0.0013012582203373313, "learning_rate": 4.787145347714439e-07, "loss": 0.0, "step": 23515 }, { "epoch": 22.611538461538462, "grad_norm": 0.0010246153688058257, "learning_rate": 4.783338083246569e-07, "loss": 0.0, "step": 23516 }, { "epoch": 22.6125, "grad_norm": 0.000864948146045208, "learning_rate": 4.779532296261846e-07, "loss": 0.0, "step": 23517 }, { "epoch": 22.61346153846154, "grad_norm": 0.0020374536979943514, "learning_rate": 4.775727986819301e-07, "loss": 0.0, "step": 23518 }, { "epoch": 22.614423076923078, "grad_norm": 0.0007310886867344379, "learning_rate": 4.771925154977975e-07, "loss": 0.0, "step": 23519 }, { "epoch": 22.615384615384617, "grad_norm": 0.000321382365655154, "learning_rate": 4.7681238007968864e-07, "loss": 0.0, "step": 23520 }, { "epoch": 22.616346153846155, "grad_norm": 0.00047119331429712474, "learning_rate": 4.764323924335013e-07, "loss": 0.0, "step": 23521 }, { "epoch": 22.61730769230769, "grad_norm": 0.0010983190732076764, "learning_rate": 4.7605255256513273e-07, "loss": 0.0, "step": 23522 }, { "epoch": 22.61826923076923, "grad_norm": 0.002146692480891943, "learning_rate": 4.756728604804739e-07, "loss": 0.0, "step": 23523 }, { "epoch": 22.619230769230768, "grad_norm": 0.0016033152351155877, "learning_rate": 4.752933161854201e-07, "loss": 0.0, "step": 23524 }, { "epoch": 22.620192307692307, "grad_norm": 0.002027960726991296, "learning_rate": 4.7491391968585987e-07, "loss": 0.0, "step": 23525 }, { "epoch": 22.621153846153845, "grad_norm": 0.00048039964167401195, "learning_rate": 4.745346709876786e-07, "loss": 0.0, "step": 23526 }, { "epoch": 22.622115384615384, "grad_norm": 0.0005123498267494142, "learning_rate": 4.741555700967626e-07, "loss": 0.0, "step": 23527 }, { "epoch": 22.623076923076923, "grad_norm": 0.00040180605719797313, "learning_rate": 4.7377661701899504e-07, "loss": 0.0, "step": 23528 }, { "epoch": 22.62403846153846, "grad_norm": 0.001393533544614911, "learning_rate": 4.7339781176025447e-07, "loss": 0.0, "step": 23529 }, { "epoch": 22.625, "grad_norm": 0.0004104429972358048, "learning_rate": 4.7301915432641954e-07, "loss": 0.0, "step": 23530 }, { "epoch": 22.62596153846154, "grad_norm": 0.001747053349390626, "learning_rate": 4.726406447233667e-07, "loss": 0.0, "step": 23531 }, { "epoch": 22.626923076923077, "grad_norm": 0.00039914692752063274, "learning_rate": 4.7226228295696786e-07, "loss": 0.0, "step": 23532 }, { "epoch": 22.627884615384616, "grad_norm": 0.000818042375613004, "learning_rate": 4.718840690330961e-07, "loss": 0.0, "step": 23533 }, { "epoch": 22.628846153846155, "grad_norm": 0.0008732500136829913, "learning_rate": 4.7150600295761794e-07, "loss": 0.0, "step": 23534 }, { "epoch": 22.629807692307693, "grad_norm": 0.0005264724022708833, "learning_rate": 4.711280847363997e-07, "loss": 0.0, "step": 23535 }, { "epoch": 22.630769230769232, "grad_norm": 0.0016816348070278764, "learning_rate": 4.7075031437530895e-07, "loss": 0.0, "step": 23536 }, { "epoch": 22.63173076923077, "grad_norm": 0.001610461506061256, "learning_rate": 4.703726918802032e-07, "loss": 0.0, "step": 23537 }, { "epoch": 22.63269230769231, "grad_norm": 0.0005988205084577203, "learning_rate": 4.699952172569455e-07, "loss": 0.0, "step": 23538 }, { "epoch": 22.633653846153845, "grad_norm": 0.0012721004895865917, "learning_rate": 4.696178905113913e-07, "loss": 0.0, "step": 23539 }, { "epoch": 22.634615384615383, "grad_norm": 0.0019668303430080414, "learning_rate": 4.692407116493958e-07, "loss": 0.0, "step": 23540 }, { "epoch": 22.635576923076922, "grad_norm": 0.0018687249394133687, "learning_rate": 4.688636806768121e-07, "loss": 0.0, "step": 23541 }, { "epoch": 22.63653846153846, "grad_norm": 0.0017056509386748075, "learning_rate": 4.6848679759949224e-07, "loss": 0.0, "step": 23542 }, { "epoch": 22.6375, "grad_norm": 0.000917975849006325, "learning_rate": 4.681100624232804e-07, "loss": 0.0, "step": 23543 }, { "epoch": 22.638461538461538, "grad_norm": 0.0006583642098121345, "learning_rate": 4.6773347515402635e-07, "loss": 0.0, "step": 23544 }, { "epoch": 22.639423076923077, "grad_norm": 0.0011310320114716887, "learning_rate": 4.6735703579757095e-07, "loss": 0.0, "step": 23545 }, { "epoch": 22.640384615384615, "grad_norm": 0.0008862162358127534, "learning_rate": 4.6698074435975513e-07, "loss": 0.0, "step": 23546 }, { "epoch": 22.641346153846154, "grad_norm": 0.0007403462077490985, "learning_rate": 4.66604600846422e-07, "loss": 0.0, "step": 23547 }, { "epoch": 22.642307692307693, "grad_norm": 0.0006439534481614828, "learning_rate": 4.662286052634024e-07, "loss": 0.0, "step": 23548 }, { "epoch": 22.64326923076923, "grad_norm": 0.0015023240121081471, "learning_rate": 4.65852757616535e-07, "loss": 0.0, "step": 23549 }, { "epoch": 22.64423076923077, "grad_norm": 0.000772239756770432, "learning_rate": 4.654770579116508e-07, "loss": 0.0, "step": 23550 }, { "epoch": 22.64519230769231, "grad_norm": 0.0011408383725211024, "learning_rate": 4.651015061545783e-07, "loss": 0.0, "step": 23551 }, { "epoch": 22.646153846153847, "grad_norm": 0.0009988993406295776, "learning_rate": 4.6472610235114513e-07, "loss": 0.0, "step": 23552 }, { "epoch": 22.647115384615386, "grad_norm": 0.001226282911375165, "learning_rate": 4.6435084650718e-07, "loss": 0.0, "step": 23553 }, { "epoch": 22.648076923076925, "grad_norm": 0.001292071770876646, "learning_rate": 4.639757386285004e-07, "loss": 0.0, "step": 23554 }, { "epoch": 22.64903846153846, "grad_norm": 0.0004408025124575943, "learning_rate": 4.6360077872093066e-07, "loss": 0.0, "step": 23555 }, { "epoch": 22.65, "grad_norm": 0.0006751972250640392, "learning_rate": 4.6322596679028717e-07, "loss": 0.0, "step": 23556 }, { "epoch": 22.650961538461537, "grad_norm": 0.0008587285992689431, "learning_rate": 4.6285130284238645e-07, "loss": 0.0, "step": 23557 }, { "epoch": 22.651923076923076, "grad_norm": 0.0006790771149098873, "learning_rate": 4.6247678688304263e-07, "loss": 0.0, "step": 23558 }, { "epoch": 22.652884615384615, "grad_norm": 0.002622209955006838, "learning_rate": 4.6210241891806674e-07, "loss": 0.0, "step": 23559 }, { "epoch": 22.653846153846153, "grad_norm": 0.004534500651061535, "learning_rate": 4.6172819895326736e-07, "loss": 0.0, "step": 23560 }, { "epoch": 22.654807692307692, "grad_norm": 0.0021353894844651222, "learning_rate": 4.6135412699445323e-07, "loss": 0.0, "step": 23561 }, { "epoch": 22.65576923076923, "grad_norm": 0.002227097051218152, "learning_rate": 4.609802030474264e-07, "loss": 0.0, "step": 23562 }, { "epoch": 22.65673076923077, "grad_norm": 0.0016483718063682318, "learning_rate": 4.6060642711799e-07, "loss": 0.0, "step": 23563 }, { "epoch": 22.657692307692308, "grad_norm": 0.0007345471531152725, "learning_rate": 4.602327992119449e-07, "loss": 0.0, "step": 23564 }, { "epoch": 22.658653846153847, "grad_norm": 0.001296319765970111, "learning_rate": 4.5985931933508757e-07, "loss": 0.0, "step": 23565 }, { "epoch": 22.659615384615385, "grad_norm": 0.006207574158906937, "learning_rate": 4.5948598749321225e-07, "loss": 0.0001, "step": 23566 }, { "epoch": 22.660576923076924, "grad_norm": 0.001599596464075148, "learning_rate": 4.5911280369211553e-07, "loss": 0.0, "step": 23567 }, { "epoch": 22.661538461538463, "grad_norm": 0.0010647219605743885, "learning_rate": 4.587397679375849e-07, "loss": 0.0, "step": 23568 }, { "epoch": 22.6625, "grad_norm": 0.0013198823435232043, "learning_rate": 4.583668802354102e-07, "loss": 0.0, "step": 23569 }, { "epoch": 22.66346153846154, "grad_norm": 0.0004914287128485739, "learning_rate": 4.5799414059137683e-07, "loss": 0.0, "step": 23570 }, { "epoch": 22.664423076923075, "grad_norm": 0.0005951283383183181, "learning_rate": 4.5762154901126787e-07, "loss": 0.0, "step": 23571 }, { "epoch": 22.665384615384614, "grad_norm": 0.0011198021238669753, "learning_rate": 4.5724910550086766e-07, "loss": 0.0, "step": 23572 }, { "epoch": 22.666346153846153, "grad_norm": 0.0006814583321101964, "learning_rate": 4.5687681006595265e-07, "loss": 0.0, "step": 23573 }, { "epoch": 22.66730769230769, "grad_norm": 0.0011774487793445587, "learning_rate": 4.565046627123015e-07, "loss": 0.0, "step": 23574 }, { "epoch": 22.66826923076923, "grad_norm": 0.0018522192258387804, "learning_rate": 4.5613266344568864e-07, "loss": 0.0, "step": 23575 }, { "epoch": 22.66923076923077, "grad_norm": 0.0013741774018853903, "learning_rate": 4.5576081227188485e-07, "loss": 0.0, "step": 23576 }, { "epoch": 22.670192307692307, "grad_norm": 0.0019605644047260284, "learning_rate": 4.5538910919666115e-07, "loss": 0.0, "step": 23577 }, { "epoch": 22.671153846153846, "grad_norm": 0.0008118209079839289, "learning_rate": 4.550175542257862e-07, "loss": 0.0, "step": 23578 }, { "epoch": 22.672115384615385, "grad_norm": 0.0009517884464003146, "learning_rate": 4.546461473650232e-07, "loss": 0.0, "step": 23579 }, { "epoch": 22.673076923076923, "grad_norm": 0.0008580186404287815, "learning_rate": 4.542748886201387e-07, "loss": 0.0, "step": 23580 }, { "epoch": 22.674038461538462, "grad_norm": 0.000982598285190761, "learning_rate": 4.539037779968902e-07, "loss": 0.0, "step": 23581 }, { "epoch": 22.675, "grad_norm": 0.0007259391713887453, "learning_rate": 4.535328155010377e-07, "loss": 0.0, "step": 23582 }, { "epoch": 22.67596153846154, "grad_norm": 0.0011026563588529825, "learning_rate": 4.5316200113833866e-07, "loss": 0.0, "step": 23583 }, { "epoch": 22.676923076923078, "grad_norm": 0.0018540030578151345, "learning_rate": 4.5279133491454406e-07, "loss": 0.0, "step": 23584 }, { "epoch": 22.677884615384617, "grad_norm": 0.0007551831076852977, "learning_rate": 4.524208168354072e-07, "loss": 0.0, "step": 23585 }, { "epoch": 22.678846153846155, "grad_norm": 0.0017974205547943711, "learning_rate": 4.520504469066789e-07, "loss": 0.0, "step": 23586 }, { "epoch": 22.67980769230769, "grad_norm": 0.0009873919188976288, "learning_rate": 4.516802251341035e-07, "loss": 0.0, "step": 23587 }, { "epoch": 22.68076923076923, "grad_norm": 0.0006856837426312268, "learning_rate": 4.513101515234275e-07, "loss": 0.0, "step": 23588 }, { "epoch": 22.681730769230768, "grad_norm": 0.00129980081692338, "learning_rate": 4.50940226080393e-07, "loss": 0.0, "step": 23589 }, { "epoch": 22.682692307692307, "grad_norm": 0.0006263374234549701, "learning_rate": 4.5057044881073984e-07, "loss": 0.0, "step": 23590 }, { "epoch": 22.683653846153845, "grad_norm": 0.0005989489145576954, "learning_rate": 4.502008197202068e-07, "loss": 0.0, "step": 23591 }, { "epoch": 22.684615384615384, "grad_norm": 0.0005391027079895139, "learning_rate": 4.498313388145281e-07, "loss": 0.0, "step": 23592 }, { "epoch": 22.685576923076923, "grad_norm": 0.0003280490345787257, "learning_rate": 4.4946200609943704e-07, "loss": 0.0, "step": 23593 }, { "epoch": 22.68653846153846, "grad_norm": 0.02479013055562973, "learning_rate": 4.490928215806667e-07, "loss": 0.0001, "step": 23594 }, { "epoch": 22.6875, "grad_norm": 0.0005199349252507091, "learning_rate": 4.4872378526394254e-07, "loss": 0.0, "step": 23595 }, { "epoch": 22.68846153846154, "grad_norm": 0.0011527914321050048, "learning_rate": 4.4835489715499225e-07, "loss": 0.0, "step": 23596 }, { "epoch": 22.689423076923077, "grad_norm": 0.00043350449413992465, "learning_rate": 4.479861572595423e-07, "loss": 0.0, "step": 23597 }, { "epoch": 22.690384615384616, "grad_norm": 0.001963895047083497, "learning_rate": 4.476175655833115e-07, "loss": 0.0, "step": 23598 }, { "epoch": 22.691346153846155, "grad_norm": 0.0010252312058582902, "learning_rate": 4.4724912213201853e-07, "loss": 0.0, "step": 23599 }, { "epoch": 22.692307692307693, "grad_norm": 0.001273083034902811, "learning_rate": 4.4688082691138444e-07, "loss": 0.0, "step": 23600 }, { "epoch": 22.693269230769232, "grad_norm": 0.0017290456453338265, "learning_rate": 4.465126799271213e-07, "loss": 0.0, "step": 23601 }, { "epoch": 22.69423076923077, "grad_norm": 0.0011578339617699385, "learning_rate": 4.461446811849424e-07, "loss": 0.0, "step": 23602 }, { "epoch": 22.69519230769231, "grad_norm": 0.0007857325836084783, "learning_rate": 4.457768306905563e-07, "loss": 0.0, "step": 23603 }, { "epoch": 22.696153846153845, "grad_norm": 0.000725158432032913, "learning_rate": 4.454091284496731e-07, "loss": 0.0, "step": 23604 }, { "epoch": 22.697115384615383, "grad_norm": 0.0004332934331614524, "learning_rate": 4.450415744679992e-07, "loss": 0.0, "step": 23605 }, { "epoch": 22.698076923076922, "grad_norm": 0.0012452092487365007, "learning_rate": 4.446741687512357e-07, "loss": 0.0, "step": 23606 }, { "epoch": 22.69903846153846, "grad_norm": 0.0010769404470920563, "learning_rate": 4.4430691130508354e-07, "loss": 0.0, "step": 23607 }, { "epoch": 22.7, "grad_norm": 0.0011455308413133025, "learning_rate": 4.439398021352448e-07, "loss": 0.0, "step": 23608 }, { "epoch": 22.700961538461538, "grad_norm": 0.00033012789208441973, "learning_rate": 4.4357284124741163e-07, "loss": 0.0, "step": 23609 }, { "epoch": 22.701923076923077, "grad_norm": 0.0011989515041932464, "learning_rate": 4.4320602864728056e-07, "loss": 0.0, "step": 23610 }, { "epoch": 22.702884615384615, "grad_norm": 0.0006065805209800601, "learning_rate": 4.4283936434054376e-07, "loss": 0.0, "step": 23611 }, { "epoch": 22.703846153846154, "grad_norm": 0.0004706622858066112, "learning_rate": 4.4247284833288996e-07, "loss": 0.0, "step": 23612 }, { "epoch": 22.704807692307693, "grad_norm": 0.0011923381825909019, "learning_rate": 4.421064806300068e-07, "loss": 0.0, "step": 23613 }, { "epoch": 22.70576923076923, "grad_norm": 0.0009467106428928673, "learning_rate": 4.4174026123757763e-07, "loss": 0.0, "step": 23614 }, { "epoch": 22.70673076923077, "grad_norm": 0.0015741168754175305, "learning_rate": 4.4137419016128666e-07, "loss": 0.0, "step": 23615 }, { "epoch": 22.70769230769231, "grad_norm": 0.0006137100863270462, "learning_rate": 4.410082674068161e-07, "loss": 0.0, "step": 23616 }, { "epoch": 22.708653846153847, "grad_norm": 0.0009788903407752514, "learning_rate": 4.406424929798403e-07, "loss": 0.0, "step": 23617 }, { "epoch": 22.709615384615386, "grad_norm": 0.0003994640428572893, "learning_rate": 4.402768668860358e-07, "loss": 0.0, "step": 23618 }, { "epoch": 22.710576923076925, "grad_norm": 0.0009566923836246133, "learning_rate": 4.399113891310791e-07, "loss": 0.0, "step": 23619 }, { "epoch": 22.71153846153846, "grad_norm": 0.0018802994163706899, "learning_rate": 4.3954605972063693e-07, "loss": 0.0, "step": 23620 }, { "epoch": 22.7125, "grad_norm": 0.0012835828820243478, "learning_rate": 4.391808786603813e-07, "loss": 0.0, "step": 23621 }, { "epoch": 22.713461538461537, "grad_norm": 0.0013240397674962878, "learning_rate": 4.388158459559788e-07, "loss": 0.0, "step": 23622 }, { "epoch": 22.714423076923076, "grad_norm": 0.000541444169357419, "learning_rate": 4.3845096161309055e-07, "loss": 0.0, "step": 23623 }, { "epoch": 22.715384615384615, "grad_norm": 0.0003107810625806451, "learning_rate": 4.38086225637383e-07, "loss": 0.0, "step": 23624 }, { "epoch": 22.716346153846153, "grad_norm": 0.0015448223566636443, "learning_rate": 4.377216380345106e-07, "loss": 0.0, "step": 23625 }, { "epoch": 22.717307692307692, "grad_norm": 0.0005919627728872001, "learning_rate": 4.373571988101344e-07, "loss": 0.0, "step": 23626 }, { "epoch": 22.71826923076923, "grad_norm": 0.0016235668445006013, "learning_rate": 4.369929079699098e-07, "loss": 0.0, "step": 23627 }, { "epoch": 22.71923076923077, "grad_norm": 0.0019061578204855323, "learning_rate": 4.366287655194867e-07, "loss": 0.0, "step": 23628 }, { "epoch": 22.720192307692308, "grad_norm": 0.0010338122956454754, "learning_rate": 4.362647714645174e-07, "loss": 0.0, "step": 23629 }, { "epoch": 22.721153846153847, "grad_norm": 0.0006522561307065189, "learning_rate": 4.3590092581065055e-07, "loss": 0.0, "step": 23630 }, { "epoch": 22.722115384615385, "grad_norm": 0.0009037738782353699, "learning_rate": 4.3553722856352954e-07, "loss": 0.0, "step": 23631 }, { "epoch": 22.723076923076924, "grad_norm": 0.0007618356612510979, "learning_rate": 4.3517367972879975e-07, "loss": 0.0, "step": 23632 }, { "epoch": 22.724038461538463, "grad_norm": 0.0014871340245008469, "learning_rate": 4.348102793121034e-07, "loss": 0.0, "step": 23633 }, { "epoch": 22.725, "grad_norm": 0.0008595014805905521, "learning_rate": 4.34447027319076e-07, "loss": 0.0, "step": 23634 }, { "epoch": 22.72596153846154, "grad_norm": 0.0004482941294554621, "learning_rate": 4.3408392375535844e-07, "loss": 0.0, "step": 23635 }, { "epoch": 22.726923076923075, "grad_norm": 0.001615356421098113, "learning_rate": 4.337209686265809e-07, "loss": 0.0, "step": 23636 }, { "epoch": 22.727884615384614, "grad_norm": 0.0024767310824245214, "learning_rate": 4.333581619383775e-07, "loss": 0.0, "step": 23637 }, { "epoch": 22.728846153846153, "grad_norm": 0.0028546249959617853, "learning_rate": 4.329955036963784e-07, "loss": 0.0, "step": 23638 }, { "epoch": 22.72980769230769, "grad_norm": 0.0011771401623263955, "learning_rate": 4.3263299390620903e-07, "loss": 0.0, "step": 23639 }, { "epoch": 22.73076923076923, "grad_norm": 0.0012977637816220522, "learning_rate": 4.32270632573496e-07, "loss": 0.0, "step": 23640 }, { "epoch": 22.73173076923077, "grad_norm": 0.0005599388969130814, "learning_rate": 4.3190841970386367e-07, "loss": 0.0, "step": 23641 }, { "epoch": 22.732692307692307, "grad_norm": 0.0012539997696876526, "learning_rate": 4.3154635530292866e-07, "loss": 0.0, "step": 23642 }, { "epoch": 22.733653846153846, "grad_norm": 0.0011412905296310782, "learning_rate": 4.3118443937631094e-07, "loss": 0.0, "step": 23643 }, { "epoch": 22.734615384615385, "grad_norm": 0.0015825097216293216, "learning_rate": 4.3082267192962823e-07, "loss": 0.0, "step": 23644 }, { "epoch": 22.735576923076923, "grad_norm": 0.0009876956464722753, "learning_rate": 4.304610529684916e-07, "loss": 0.0, "step": 23645 }, { "epoch": 22.736538461538462, "grad_norm": 0.0010780635057017207, "learning_rate": 4.3009958249851323e-07, "loss": 0.0, "step": 23646 }, { "epoch": 22.7375, "grad_norm": 0.0009062890312634408, "learning_rate": 4.29738260525302e-07, "loss": 0.0, "step": 23647 }, { "epoch": 22.73846153846154, "grad_norm": 0.0014597363770008087, "learning_rate": 4.2937708705446445e-07, "loss": 0.0, "step": 23648 }, { "epoch": 22.739423076923078, "grad_norm": 0.0007082123192958534, "learning_rate": 4.2901606209160616e-07, "loss": 0.0, "step": 23649 }, { "epoch": 22.740384615384617, "grad_norm": 0.0010030055418610573, "learning_rate": 4.28655185642326e-07, "loss": 0.0, "step": 23650 }, { "epoch": 22.741346153846155, "grad_norm": 0.0005902579869143665, "learning_rate": 4.2829445771222723e-07, "loss": 0.0, "step": 23651 }, { "epoch": 22.74230769230769, "grad_norm": 0.0007737927371636033, "learning_rate": 4.279338783069065e-07, "loss": 0.0, "step": 23652 }, { "epoch": 22.74326923076923, "grad_norm": 0.001939200796186924, "learning_rate": 4.2757344743195596e-07, "loss": 0.0, "step": 23653 }, { "epoch": 22.744230769230768, "grad_norm": 0.0007842230843380094, "learning_rate": 4.2721316509297226e-07, "loss": 0.0, "step": 23654 }, { "epoch": 22.745192307692307, "grad_norm": 0.0010312381200492382, "learning_rate": 4.2685303129554544e-07, "loss": 0.0, "step": 23655 }, { "epoch": 22.746153846153845, "grad_norm": 0.001982833258807659, "learning_rate": 4.26493046045261e-07, "loss": 0.0, "step": 23656 }, { "epoch": 22.747115384615384, "grad_norm": 0.0011061455588787794, "learning_rate": 4.261332093477066e-07, "loss": 0.0, "step": 23657 }, { "epoch": 22.748076923076923, "grad_norm": 0.004552626051008701, "learning_rate": 4.257735212084668e-07, "loss": 0.0, "step": 23658 }, { "epoch": 22.74903846153846, "grad_norm": 0.0005481344414874911, "learning_rate": 4.2541398163312154e-07, "loss": 0.0, "step": 23659 }, { "epoch": 22.75, "grad_norm": 0.0006880852160975337, "learning_rate": 4.2505459062725075e-07, "loss": 0.0, "step": 23660 }, { "epoch": 22.75096153846154, "grad_norm": 0.0018878239206969738, "learning_rate": 4.2469534819643e-07, "loss": 0.0, "step": 23661 }, { "epoch": 22.751923076923077, "grad_norm": 0.001024999306537211, "learning_rate": 4.2433625434623373e-07, "loss": 0.0, "step": 23662 }, { "epoch": 22.752884615384616, "grad_norm": 0.0013002627529203892, "learning_rate": 4.2397730908223636e-07, "loss": 0.0, "step": 23663 }, { "epoch": 22.753846153846155, "grad_norm": 0.0006861358997412026, "learning_rate": 4.236185124100034e-07, "loss": 0.0, "step": 23664 }, { "epoch": 22.754807692307693, "grad_norm": 0.0009839385747909546, "learning_rate": 4.23259864335106e-07, "loss": 0.0, "step": 23665 }, { "epoch": 22.755769230769232, "grad_norm": 0.0005545193562284112, "learning_rate": 4.229013648631086e-07, "loss": 0.0, "step": 23666 }, { "epoch": 22.75673076923077, "grad_norm": 0.00041255319956690073, "learning_rate": 4.225430139995734e-07, "loss": 0.0, "step": 23667 }, { "epoch": 22.75769230769231, "grad_norm": 0.0010562825482338667, "learning_rate": 4.2218481175006045e-07, "loss": 0.0, "step": 23668 }, { "epoch": 22.758653846153845, "grad_norm": 0.0008584429160691798, "learning_rate": 4.218267581201296e-07, "loss": 0.0, "step": 23669 }, { "epoch": 22.759615384615383, "grad_norm": 0.0002937013632617891, "learning_rate": 4.214688531153344e-07, "loss": 0.0, "step": 23670 }, { "epoch": 22.760576923076922, "grad_norm": 0.0004825059149879962, "learning_rate": 4.211110967412324e-07, "loss": 0.0, "step": 23671 }, { "epoch": 22.76153846153846, "grad_norm": 0.0008898095111362636, "learning_rate": 4.207534890033704e-07, "loss": 0.0, "step": 23672 }, { "epoch": 22.7625, "grad_norm": 0.0009901017183437943, "learning_rate": 4.2039602990729953e-07, "loss": 0.0, "step": 23673 }, { "epoch": 22.763461538461538, "grad_norm": 0.0005316170863807201, "learning_rate": 4.2003871945856755e-07, "loss": 0.0, "step": 23674 }, { "epoch": 22.764423076923077, "grad_norm": 0.0023766502272337675, "learning_rate": 4.1968155766271667e-07, "loss": 0.0, "step": 23675 }, { "epoch": 22.765384615384615, "grad_norm": 0.00046223995741456747, "learning_rate": 4.193245445252903e-07, "loss": 0.0, "step": 23676 }, { "epoch": 22.766346153846154, "grad_norm": 0.00029201223514974117, "learning_rate": 4.189676800518294e-07, "loss": 0.0, "step": 23677 }, { "epoch": 22.767307692307693, "grad_norm": 0.0005705334478989244, "learning_rate": 4.1861096424786864e-07, "loss": 0.0, "step": 23678 }, { "epoch": 22.76826923076923, "grad_norm": 0.001381153822876513, "learning_rate": 4.1825439711894454e-07, "loss": 0.0, "step": 23679 }, { "epoch": 22.76923076923077, "grad_norm": 0.0013575174380093813, "learning_rate": 4.1789797867059167e-07, "loss": 0.0, "step": 23680 }, { "epoch": 22.77019230769231, "grad_norm": 0.0010565801057964563, "learning_rate": 4.1754170890833777e-07, "loss": 0.0, "step": 23681 }, { "epoch": 22.771153846153847, "grad_norm": 0.00286847073584795, "learning_rate": 4.17185587837714e-07, "loss": 0.0, "step": 23682 }, { "epoch": 22.772115384615386, "grad_norm": 0.0011262818006798625, "learning_rate": 4.168296154642426e-07, "loss": 0.0, "step": 23683 }, { "epoch": 22.773076923076925, "grad_norm": 0.0018361379625275731, "learning_rate": 4.164737917934503e-07, "loss": 0.0, "step": 23684 }, { "epoch": 22.77403846153846, "grad_norm": 0.0010164682753384113, "learning_rate": 4.161181168308581e-07, "loss": 0.0, "step": 23685 }, { "epoch": 22.775, "grad_norm": 0.0008481238037347794, "learning_rate": 4.157625905819829e-07, "loss": 0.0, "step": 23686 }, { "epoch": 22.775961538461537, "grad_norm": 0.0012062545865774155, "learning_rate": 4.1540721305234234e-07, "loss": 0.0, "step": 23687 }, { "epoch": 22.776923076923076, "grad_norm": 0.0012242724187672138, "learning_rate": 4.150519842474543e-07, "loss": 0.0, "step": 23688 }, { "epoch": 22.777884615384615, "grad_norm": 0.00014084034773986787, "learning_rate": 4.146969041728255e-07, "loss": 0.0, "step": 23689 }, { "epoch": 22.778846153846153, "grad_norm": 0.0012778087984770536, "learning_rate": 4.1434197283396815e-07, "loss": 0.0, "step": 23690 }, { "epoch": 22.779807692307692, "grad_norm": 0.005095372907817364, "learning_rate": 4.139871902363912e-07, "loss": 0.0, "step": 23691 }, { "epoch": 22.78076923076923, "grad_norm": 0.0012156149605289102, "learning_rate": 4.136325563855981e-07, "loss": 0.0, "step": 23692 }, { "epoch": 22.78173076923077, "grad_norm": 0.0005079637630842626, "learning_rate": 4.1327807128709207e-07, "loss": 0.0, "step": 23693 }, { "epoch": 22.782692307692308, "grad_norm": 0.006778252311050892, "learning_rate": 4.129237349463733e-07, "loss": 0.0001, "step": 23694 }, { "epoch": 22.783653846153847, "grad_norm": 0.0014653169782832265, "learning_rate": 4.125695473689406e-07, "loss": 0.0, "step": 23695 }, { "epoch": 22.784615384615385, "grad_norm": 0.0004270209465175867, "learning_rate": 4.122155085602908e-07, "loss": 0.0, "step": 23696 }, { "epoch": 22.785576923076924, "grad_norm": 0.0014405845431610942, "learning_rate": 4.11861618525915e-07, "loss": 0.0, "step": 23697 }, { "epoch": 22.786538461538463, "grad_norm": 0.0009798312094062567, "learning_rate": 4.115078772713055e-07, "loss": 0.0, "step": 23698 }, { "epoch": 22.7875, "grad_norm": 0.0009654098539613187, "learning_rate": 4.1115428480195343e-07, "loss": 0.0, "step": 23699 }, { "epoch": 22.78846153846154, "grad_norm": 0.000988065148703754, "learning_rate": 4.108008411233433e-07, "loss": 0.0, "step": 23700 }, { "epoch": 22.789423076923075, "grad_norm": 0.0023113256320357323, "learning_rate": 4.104475462409596e-07, "loss": 0.0, "step": 23701 }, { "epoch": 22.790384615384614, "grad_norm": 0.0019502483773976564, "learning_rate": 4.100944001602869e-07, "loss": 0.0, "step": 23702 }, { "epoch": 22.791346153846153, "grad_norm": 0.0012008283520117402, "learning_rate": 4.0974140288680074e-07, "loss": 0.0, "step": 23703 }, { "epoch": 22.79230769230769, "grad_norm": 0.004454844631254673, "learning_rate": 4.0938855442598233e-07, "loss": 0.0, "step": 23704 }, { "epoch": 22.79326923076923, "grad_norm": 0.0019606538116931915, "learning_rate": 4.090358547833051e-07, "loss": 0.0, "step": 23705 }, { "epoch": 22.79423076923077, "grad_norm": 0.003205347340553999, "learning_rate": 4.086833039642424e-07, "loss": 0.0, "step": 23706 }, { "epoch": 22.795192307692307, "grad_norm": 0.002444314770400524, "learning_rate": 4.083309019742643e-07, "loss": 0.0, "step": 23707 }, { "epoch": 22.796153846153846, "grad_norm": 0.0008336821920238435, "learning_rate": 4.0797864881883977e-07, "loss": 0.0, "step": 23708 }, { "epoch": 22.797115384615385, "grad_norm": 0.0006382057908922434, "learning_rate": 4.076265445034333e-07, "loss": 0.0, "step": 23709 }, { "epoch": 22.798076923076923, "grad_norm": 0.001812431961297989, "learning_rate": 4.072745890335117e-07, "loss": 0.0, "step": 23710 }, { "epoch": 22.799038461538462, "grad_norm": 0.0005622825119644403, "learning_rate": 4.0692278241453276e-07, "loss": 0.0, "step": 23711 }, { "epoch": 22.8, "grad_norm": 0.00039645572542212903, "learning_rate": 4.0657112465195657e-07, "loss": 0.0, "step": 23712 }, { "epoch": 22.80096153846154, "grad_norm": 0.0010094065219163895, "learning_rate": 4.06219615751241e-07, "loss": 0.0, "step": 23713 }, { "epoch": 22.801923076923078, "grad_norm": 0.0011442372342571616, "learning_rate": 4.058682557178384e-07, "loss": 0.0, "step": 23714 }, { "epoch": 22.802884615384617, "grad_norm": 0.0006064109038561583, "learning_rate": 4.055170445572043e-07, "loss": 0.0, "step": 23715 }, { "epoch": 22.803846153846155, "grad_norm": 0.0019609625451266766, "learning_rate": 4.0516598227478443e-07, "loss": 0.0, "step": 23716 }, { "epoch": 22.80480769230769, "grad_norm": 0.00036380832898430526, "learning_rate": 4.0481506887602773e-07, "loss": 0.0, "step": 23717 }, { "epoch": 22.80576923076923, "grad_norm": 0.004034692887216806, "learning_rate": 4.0446430436638093e-07, "loss": 0.0, "step": 23718 }, { "epoch": 22.806730769230768, "grad_norm": 0.0009624559315852821, "learning_rate": 4.041136887512842e-07, "loss": 0.0, "step": 23719 }, { "epoch": 22.807692307692307, "grad_norm": 0.0016529130516573787, "learning_rate": 4.037632220361798e-07, "loss": 0.0, "step": 23720 }, { "epoch": 22.808653846153845, "grad_norm": 0.0035132821649312973, "learning_rate": 4.034129042265067e-07, "loss": 0.0, "step": 23721 }, { "epoch": 22.809615384615384, "grad_norm": 0.001028627622872591, "learning_rate": 4.0306273532769834e-07, "loss": 0.0, "step": 23722 }, { "epoch": 22.810576923076923, "grad_norm": 0.007338121999055147, "learning_rate": 4.027127153451893e-07, "loss": 0.0, "step": 23723 }, { "epoch": 22.81153846153846, "grad_norm": 0.002819367917254567, "learning_rate": 4.0236284428441297e-07, "loss": 0.0, "step": 23724 }, { "epoch": 22.8125, "grad_norm": 0.0008028286974877119, "learning_rate": 4.0201312215079614e-07, "loss": 0.0, "step": 23725 }, { "epoch": 22.81346153846154, "grad_norm": 0.00038630227209068835, "learning_rate": 4.0166354894976557e-07, "loss": 0.0, "step": 23726 }, { "epoch": 22.814423076923077, "grad_norm": 0.0013142396928742528, "learning_rate": 4.013141246867458e-07, "loss": 0.0, "step": 23727 }, { "epoch": 22.815384615384616, "grad_norm": 0.0016435649013146758, "learning_rate": 4.0096484936715805e-07, "loss": 0.0, "step": 23728 }, { "epoch": 22.816346153846155, "grad_norm": 0.0016578295035287738, "learning_rate": 4.006157229964247e-07, "loss": 0.0, "step": 23729 }, { "epoch": 22.817307692307693, "grad_norm": 0.002798588015139103, "learning_rate": 4.0026674557996025e-07, "loss": 0.0, "step": 23730 }, { "epoch": 22.818269230769232, "grad_norm": 0.0006391014903783798, "learning_rate": 3.999179171231815e-07, "loss": 0.0, "step": 23731 }, { "epoch": 22.81923076923077, "grad_norm": 0.0007362348842434585, "learning_rate": 3.9956923763150193e-07, "loss": 0.0, "step": 23732 }, { "epoch": 22.82019230769231, "grad_norm": 0.0015122295590117574, "learning_rate": 3.9922070711032934e-07, "loss": 0.0, "step": 23733 }, { "epoch": 22.821153846153845, "grad_norm": 0.0012256826739758253, "learning_rate": 3.988723255650728e-07, "loss": 0.0, "step": 23734 }, { "epoch": 22.822115384615383, "grad_norm": 0.00041614132351242006, "learning_rate": 3.9852409300114135e-07, "loss": 0.0, "step": 23735 }, { "epoch": 22.823076923076922, "grad_norm": 0.0016347747296094894, "learning_rate": 3.9817600942393397e-07, "loss": 0.0, "step": 23736 }, { "epoch": 22.82403846153846, "grad_norm": 0.0011061986442655325, "learning_rate": 3.97828074838853e-07, "loss": 0.0, "step": 23737 }, { "epoch": 22.825, "grad_norm": 0.00017605655011720955, "learning_rate": 3.974802892513008e-07, "loss": 0.0, "step": 23738 }, { "epoch": 22.825961538461538, "grad_norm": 0.0008598472340963781, "learning_rate": 3.9713265266666966e-07, "loss": 0.0, "step": 23739 }, { "epoch": 22.826923076923077, "grad_norm": 0.0010403397027403116, "learning_rate": 3.967851650903576e-07, "loss": 0.0, "step": 23740 }, { "epoch": 22.827884615384615, "grad_norm": 0.0007590187015011907, "learning_rate": 3.964378265277524e-07, "loss": 0.0, "step": 23741 }, { "epoch": 22.828846153846154, "grad_norm": 0.0007988273282535374, "learning_rate": 3.9609063698424765e-07, "loss": 0.0, "step": 23742 }, { "epoch": 22.829807692307693, "grad_norm": 0.0012633985606953502, "learning_rate": 3.9574359646522897e-07, "loss": 0.0, "step": 23743 }, { "epoch": 22.83076923076923, "grad_norm": 0.000736822432372719, "learning_rate": 3.9539670497607986e-07, "loss": 0.0, "step": 23744 }, { "epoch": 22.83173076923077, "grad_norm": 0.0008728295215405524, "learning_rate": 3.9504996252218597e-07, "loss": 0.0, "step": 23745 }, { "epoch": 22.83269230769231, "grad_norm": 0.0011158352717757225, "learning_rate": 3.947033691089275e-07, "loss": 0.0, "step": 23746 }, { "epoch": 22.833653846153847, "grad_norm": 0.0017609550850465894, "learning_rate": 3.943569247416801e-07, "loss": 0.0, "step": 23747 }, { "epoch": 22.834615384615386, "grad_norm": 0.0015126630896702409, "learning_rate": 3.940106294258206e-07, "loss": 0.0, "step": 23748 }, { "epoch": 22.835576923076925, "grad_norm": 0.0006240682560019195, "learning_rate": 3.9366448316672356e-07, "loss": 0.0, "step": 23749 }, { "epoch": 22.83653846153846, "grad_norm": 0.0005537208053283393, "learning_rate": 3.9331848596975917e-07, "loss": 0.0, "step": 23750 }, { "epoch": 22.8375, "grad_norm": 0.0008889074670150876, "learning_rate": 3.929726378402987e-07, "loss": 0.0, "step": 23751 }, { "epoch": 22.838461538461537, "grad_norm": 0.001171707408502698, "learning_rate": 3.926269387837034e-07, "loss": 0.0, "step": 23752 }, { "epoch": 22.839423076923076, "grad_norm": 0.0010177582735195756, "learning_rate": 3.9228138880534227e-07, "loss": 0.0, "step": 23753 }, { "epoch": 22.840384615384615, "grad_norm": 0.0009431391954421997, "learning_rate": 3.919359879105766e-07, "loss": 0.0, "step": 23754 }, { "epoch": 22.841346153846153, "grad_norm": 0.0009586751111783087, "learning_rate": 3.9159073610476326e-07, "loss": 0.0, "step": 23755 }, { "epoch": 22.842307692307692, "grad_norm": 0.00048626706120558083, "learning_rate": 3.9124563339326125e-07, "loss": 0.0, "step": 23756 }, { "epoch": 22.84326923076923, "grad_norm": 0.0013699880801141262, "learning_rate": 3.9090067978142745e-07, "loss": 0.0, "step": 23757 }, { "epoch": 22.84423076923077, "grad_norm": 0.0010733327362686396, "learning_rate": 3.9055587527461194e-07, "loss": 0.0, "step": 23758 }, { "epoch": 22.845192307692308, "grad_norm": 0.001978329150006175, "learning_rate": 3.9021121987816493e-07, "loss": 0.0, "step": 23759 }, { "epoch": 22.846153846153847, "grad_norm": 0.006514360662549734, "learning_rate": 3.8986671359743767e-07, "loss": 0.0001, "step": 23760 }, { "epoch": 22.847115384615385, "grad_norm": 0.0015428569167852402, "learning_rate": 3.895223564377715e-07, "loss": 0.0, "step": 23761 }, { "epoch": 22.848076923076924, "grad_norm": 0.00031097454484552145, "learning_rate": 3.891781484045132e-07, "loss": 0.0, "step": 23762 }, { "epoch": 22.849038461538463, "grad_norm": 0.0005989954224787652, "learning_rate": 3.888340895030018e-07, "loss": 0.0, "step": 23763 }, { "epoch": 22.85, "grad_norm": 0.0009953900007531047, "learning_rate": 3.884901797385765e-07, "loss": 0.0, "step": 23764 }, { "epoch": 22.85096153846154, "grad_norm": 0.003612960921600461, "learning_rate": 3.8814641911657516e-07, "loss": 0.0, "step": 23765 }, { "epoch": 22.851923076923075, "grad_norm": 0.0006123696221038699, "learning_rate": 3.8780280764233016e-07, "loss": 0.0, "step": 23766 }, { "epoch": 22.852884615384614, "grad_norm": 0.0011975298402830958, "learning_rate": 3.874593453211739e-07, "loss": 0.0, "step": 23767 }, { "epoch": 22.853846153846153, "grad_norm": 0.001728479634039104, "learning_rate": 3.871160321584377e-07, "loss": 0.0, "step": 23768 }, { "epoch": 22.85480769230769, "grad_norm": 0.0013614487834274769, "learning_rate": 3.8677286815944627e-07, "loss": 0.0, "step": 23769 }, { "epoch": 22.85576923076923, "grad_norm": 0.00122066552285105, "learning_rate": 3.864298533295252e-07, "loss": 0.0, "step": 23770 }, { "epoch": 22.85673076923077, "grad_norm": 0.0013945777900516987, "learning_rate": 3.860869876739981e-07, "loss": 0.0, "step": 23771 }, { "epoch": 22.857692307692307, "grad_norm": 0.0007804307388141751, "learning_rate": 3.8574427119818294e-07, "loss": 0.0, "step": 23772 }, { "epoch": 22.858653846153846, "grad_norm": 0.002795261098071933, "learning_rate": 3.8540170390740097e-07, "loss": 0.0, "step": 23773 }, { "epoch": 22.859615384615385, "grad_norm": 0.0003631502913776785, "learning_rate": 3.850592858069646e-07, "loss": 0.0, "step": 23774 }, { "epoch": 22.860576923076923, "grad_norm": 0.0021271202713251114, "learning_rate": 3.8471701690218966e-07, "loss": 0.0, "step": 23775 }, { "epoch": 22.861538461538462, "grad_norm": 0.0012745448620989919, "learning_rate": 3.8437489719838626e-07, "loss": 0.0, "step": 23776 }, { "epoch": 22.8625, "grad_norm": 0.0008984074229374528, "learning_rate": 3.840329267008613e-07, "loss": 0.0, "step": 23777 }, { "epoch": 22.86346153846154, "grad_norm": 0.0009124143980443478, "learning_rate": 3.8369110541492396e-07, "loss": 0.0, "step": 23778 }, { "epoch": 22.864423076923078, "grad_norm": 0.0007195802172645926, "learning_rate": 3.833494333458776e-07, "loss": 0.0, "step": 23779 }, { "epoch": 22.865384615384617, "grad_norm": 0.004321647342294455, "learning_rate": 3.8300791049902255e-07, "loss": 0.0, "step": 23780 }, { "epoch": 22.866346153846155, "grad_norm": 0.0006015777471475303, "learning_rate": 3.826665368796589e-07, "loss": 0.0, "step": 23781 }, { "epoch": 22.86730769230769, "grad_norm": 0.0014399626525118947, "learning_rate": 3.8232531249308593e-07, "loss": 0.0, "step": 23782 }, { "epoch": 22.86826923076923, "grad_norm": 0.0008066450245678425, "learning_rate": 3.8198423734459476e-07, "loss": 0.0, "step": 23783 }, { "epoch": 22.869230769230768, "grad_norm": 0.0008908016025088727, "learning_rate": 3.816433114394813e-07, "loss": 0.0, "step": 23784 }, { "epoch": 22.870192307692307, "grad_norm": 0.0007098338683135808, "learning_rate": 3.8130253478303234e-07, "loss": 0.0, "step": 23785 }, { "epoch": 22.871153846153845, "grad_norm": 0.0015547899529337883, "learning_rate": 3.8096190738053815e-07, "loss": 0.0, "step": 23786 }, { "epoch": 22.872115384615384, "grad_norm": 0.0006983439670875669, "learning_rate": 3.806214292372845e-07, "loss": 0.0, "step": 23787 }, { "epoch": 22.873076923076923, "grad_norm": 0.002146755112335086, "learning_rate": 3.802811003585527e-07, "loss": 0.0, "step": 23788 }, { "epoch": 22.87403846153846, "grad_norm": 0.0005839256336912513, "learning_rate": 3.7994092074962405e-07, "loss": 0.0, "step": 23789 }, { "epoch": 22.875, "grad_norm": 0.0025572143495082855, "learning_rate": 3.7960089041578e-07, "loss": 0.0, "step": 23790 }, { "epoch": 22.87596153846154, "grad_norm": 0.00032965524587780237, "learning_rate": 3.792610093622928e-07, "loss": 0.0, "step": 23791 }, { "epoch": 22.876923076923077, "grad_norm": 0.0013261322164908051, "learning_rate": 3.7892127759443733e-07, "loss": 0.0, "step": 23792 }, { "epoch": 22.877884615384616, "grad_norm": 0.0005894204950891435, "learning_rate": 3.785816951174881e-07, "loss": 0.0, "step": 23793 }, { "epoch": 22.878846153846155, "grad_norm": 0.001155814272351563, "learning_rate": 3.782422619367121e-07, "loss": 0.0, "step": 23794 }, { "epoch": 22.879807692307693, "grad_norm": 0.0007123303366824985, "learning_rate": 3.779029780573773e-07, "loss": 0.0, "step": 23795 }, { "epoch": 22.880769230769232, "grad_norm": 0.0055343820713460445, "learning_rate": 3.775638434847462e-07, "loss": 0.0, "step": 23796 }, { "epoch": 22.88173076923077, "grad_norm": 0.0003509101225063205, "learning_rate": 3.772248582240823e-07, "loss": 0.0, "step": 23797 }, { "epoch": 22.88269230769231, "grad_norm": 0.0004447965766303241, "learning_rate": 3.7688602228064807e-07, "loss": 0.0, "step": 23798 }, { "epoch": 22.883653846153845, "grad_norm": 0.0007485945825465024, "learning_rate": 3.7654733565969826e-07, "loss": 0.0, "step": 23799 }, { "epoch": 22.884615384615383, "grad_norm": 0.0003511624818202108, "learning_rate": 3.762087983664886e-07, "loss": 0.0, "step": 23800 }, { "epoch": 22.885576923076922, "grad_norm": 0.0005612082895822823, "learning_rate": 3.758704104062749e-07, "loss": 0.0, "step": 23801 }, { "epoch": 22.88653846153846, "grad_norm": 0.001995810540392995, "learning_rate": 3.7553217178430414e-07, "loss": 0.0, "step": 23802 }, { "epoch": 22.8875, "grad_norm": 0.0011307409731671214, "learning_rate": 3.751940825058265e-07, "loss": 0.0, "step": 23803 }, { "epoch": 22.888461538461538, "grad_norm": 0.003919864073395729, "learning_rate": 3.748561425760899e-07, "loss": 0.0, "step": 23804 }, { "epoch": 22.889423076923077, "grad_norm": 0.0026717951986938715, "learning_rate": 3.7451835200033484e-07, "loss": 0.0, "step": 23805 }, { "epoch": 22.890384615384615, "grad_norm": 0.001138572464697063, "learning_rate": 3.7418071078380693e-07, "loss": 0.0, "step": 23806 }, { "epoch": 22.891346153846154, "grad_norm": 0.00021588540403172374, "learning_rate": 3.738432189317409e-07, "loss": 0.0, "step": 23807 }, { "epoch": 22.892307692307693, "grad_norm": 0.0010382243199273944, "learning_rate": 3.7350587644937484e-07, "loss": 0.0, "step": 23808 }, { "epoch": 22.89326923076923, "grad_norm": 0.0020231178496032953, "learning_rate": 3.7316868334194676e-07, "loss": 0.0, "step": 23809 }, { "epoch": 22.89423076923077, "grad_norm": 0.0020936615765094757, "learning_rate": 3.728316396146836e-07, "loss": 0.0, "step": 23810 }, { "epoch": 22.89519230769231, "grad_norm": 0.0023336780723184347, "learning_rate": 3.724947452728189e-07, "loss": 0.0, "step": 23811 }, { "epoch": 22.896153846153847, "grad_norm": 0.0011638449504971504, "learning_rate": 3.721580003215808e-07, "loss": 0.0, "step": 23812 }, { "epoch": 22.897115384615386, "grad_norm": 0.0015911293448880315, "learning_rate": 3.7182140476619166e-07, "loss": 0.0, "step": 23813 }, { "epoch": 22.898076923076925, "grad_norm": 0.0016658218810334802, "learning_rate": 3.714849586118763e-07, "loss": 0.0, "step": 23814 }, { "epoch": 22.89903846153846, "grad_norm": 0.001626836252398789, "learning_rate": 3.71148661863856e-07, "loss": 0.0, "step": 23815 }, { "epoch": 22.9, "grad_norm": 0.0007218847167678177, "learning_rate": 3.7081251452734666e-07, "loss": 0.0, "step": 23816 }, { "epoch": 22.900961538461537, "grad_norm": 0.0021636607125401497, "learning_rate": 3.7047651660756745e-07, "loss": 0.0, "step": 23817 }, { "epoch": 22.901923076923076, "grad_norm": 0.0009787090821191669, "learning_rate": 3.701406681097286e-07, "loss": 0.0, "step": 23818 }, { "epoch": 22.902884615384615, "grad_norm": 0.0011451768223196268, "learning_rate": 3.6980496903904373e-07, "loss": 0.0, "step": 23819 }, { "epoch": 22.903846153846153, "grad_norm": 0.001630432321690023, "learning_rate": 3.69469419400722e-07, "loss": 0.0, "step": 23820 }, { "epoch": 22.904807692307692, "grad_norm": 0.0013242277782410383, "learning_rate": 3.691340191999693e-07, "loss": 0.0, "step": 23821 }, { "epoch": 22.90576923076923, "grad_norm": 0.0006607282557524741, "learning_rate": 3.6879876844198915e-07, "loss": 0.0, "step": 23822 }, { "epoch": 22.90673076923077, "grad_norm": 0.0002766275138128549, "learning_rate": 3.6846366713198745e-07, "loss": 0.0, "step": 23823 }, { "epoch": 22.907692307692308, "grad_norm": 0.0011127152247354388, "learning_rate": 3.6812871527516003e-07, "loss": 0.0, "step": 23824 }, { "epoch": 22.908653846153847, "grad_norm": 0.001711872173473239, "learning_rate": 3.67793912876705e-07, "loss": 0.0, "step": 23825 }, { "epoch": 22.909615384615385, "grad_norm": 0.0010269545018672943, "learning_rate": 3.674592599418192e-07, "loss": 0.0, "step": 23826 }, { "epoch": 22.910576923076924, "grad_norm": 0.0016401162138208747, "learning_rate": 3.6712475647569414e-07, "loss": 0.0, "step": 23827 }, { "epoch": 22.911538461538463, "grad_norm": 0.0008949089678935707, "learning_rate": 3.667904024835212e-07, "loss": 0.0, "step": 23828 }, { "epoch": 22.9125, "grad_norm": 0.0009513510740362108, "learning_rate": 3.6645619797048835e-07, "loss": 0.0, "step": 23829 }, { "epoch": 22.91346153846154, "grad_norm": 0.0003588595718611032, "learning_rate": 3.661221429417805e-07, "loss": 0.0, "step": 23830 }, { "epoch": 22.914423076923075, "grad_norm": 0.0011579705169424415, "learning_rate": 3.6578823740258227e-07, "loss": 0.0, "step": 23831 }, { "epoch": 22.915384615384614, "grad_norm": 0.0003122114285361022, "learning_rate": 3.6545448135807406e-07, "loss": 0.0, "step": 23832 }, { "epoch": 22.916346153846153, "grad_norm": 0.005229311063885689, "learning_rate": 3.6512087481343493e-07, "loss": 0.0, "step": 23833 }, { "epoch": 22.91730769230769, "grad_norm": 0.000924766412936151, "learning_rate": 3.647874177738431e-07, "loss": 0.0, "step": 23834 }, { "epoch": 22.91826923076923, "grad_norm": 0.001328786718659103, "learning_rate": 3.6445411024447096e-07, "loss": 0.0, "step": 23835 }, { "epoch": 22.91923076923077, "grad_norm": 0.00039890126208774745, "learning_rate": 3.6412095223049e-07, "loss": 0.0, "step": 23836 }, { "epoch": 22.920192307692307, "grad_norm": 0.0013786004856228828, "learning_rate": 3.6378794373707283e-07, "loss": 0.0, "step": 23837 }, { "epoch": 22.921153846153846, "grad_norm": 0.00033747890847735107, "learning_rate": 3.6345508476938296e-07, "loss": 0.0, "step": 23838 }, { "epoch": 22.922115384615385, "grad_norm": 0.0014678919687867165, "learning_rate": 3.631223753325874e-07, "loss": 0.0, "step": 23839 }, { "epoch": 22.923076923076923, "grad_norm": 0.0009849505731835961, "learning_rate": 3.62789815431851e-07, "loss": 0.0, "step": 23840 }, { "epoch": 22.924038461538462, "grad_norm": 0.0017000179504975677, "learning_rate": 3.624574050723295e-07, "loss": 0.0, "step": 23841 }, { "epoch": 22.925, "grad_norm": 0.0022581813391298056, "learning_rate": 3.6212514425918445e-07, "loss": 0.0, "step": 23842 }, { "epoch": 22.92596153846154, "grad_norm": 0.0016311721410602331, "learning_rate": 3.617930329975694e-07, "loss": 0.0, "step": 23843 }, { "epoch": 22.926923076923078, "grad_norm": 0.00077247922308743, "learning_rate": 3.6146107129263805e-07, "loss": 0.0, "step": 23844 }, { "epoch": 22.927884615384617, "grad_norm": 0.0005822549574077129, "learning_rate": 3.611292591495441e-07, "loss": 0.0, "step": 23845 }, { "epoch": 22.928846153846155, "grad_norm": 0.0005937484093010426, "learning_rate": 3.607975965734334e-07, "loss": 0.0, "step": 23846 }, { "epoch": 22.92980769230769, "grad_norm": 0.0002750498242676258, "learning_rate": 3.604660835694518e-07, "loss": 0.0, "step": 23847 }, { "epoch": 22.93076923076923, "grad_norm": 0.0007338965078815818, "learning_rate": 3.601347201427474e-07, "loss": 0.0, "step": 23848 }, { "epoch": 22.931730769230768, "grad_norm": 0.00039777508936822414, "learning_rate": 3.598035062984584e-07, "loss": 0.0, "step": 23849 }, { "epoch": 22.932692307692307, "grad_norm": 0.006857094820588827, "learning_rate": 3.5947244204172505e-07, "loss": 0.0001, "step": 23850 }, { "epoch": 22.933653846153845, "grad_norm": 0.0021355540957301855, "learning_rate": 3.591415273776855e-07, "loss": 0.0, "step": 23851 }, { "epoch": 22.934615384615384, "grad_norm": 0.0009830208728089929, "learning_rate": 3.5881076231147226e-07, "loss": 0.0, "step": 23852 }, { "epoch": 22.935576923076923, "grad_norm": 0.0013751467922702432, "learning_rate": 3.584801468482213e-07, "loss": 0.0, "step": 23853 }, { "epoch": 22.93653846153846, "grad_norm": 0.0007316616247408092, "learning_rate": 3.581496809930607e-07, "loss": 0.0, "step": 23854 }, { "epoch": 22.9375, "grad_norm": 0.0023868989665061235, "learning_rate": 3.5781936475111746e-07, "loss": 0.0, "step": 23855 }, { "epoch": 22.93846153846154, "grad_norm": 0.0031542524229735136, "learning_rate": 3.574891981275197e-07, "loss": 0.0, "step": 23856 }, { "epoch": 22.939423076923077, "grad_norm": 0.0026214292738586664, "learning_rate": 3.5715918112738777e-07, "loss": 0.0, "step": 23857 }, { "epoch": 22.940384615384616, "grad_norm": 0.001192388590425253, "learning_rate": 3.568293137558443e-07, "loss": 0.0, "step": 23858 }, { "epoch": 22.941346153846155, "grad_norm": 0.0023622512817382812, "learning_rate": 3.564995960180073e-07, "loss": 0.0, "step": 23859 }, { "epoch": 22.942307692307693, "grad_norm": 0.00062610674649477, "learning_rate": 3.561700279189939e-07, "loss": 0.0, "step": 23860 }, { "epoch": 22.943269230769232, "grad_norm": 0.0020202721934765577, "learning_rate": 3.5584060946391664e-07, "loss": 0.0, "step": 23861 }, { "epoch": 22.94423076923077, "grad_norm": 0.0017925528809428215, "learning_rate": 3.555113406578892e-07, "loss": 0.0, "step": 23862 }, { "epoch": 22.94519230769231, "grad_norm": 0.0008497464586980641, "learning_rate": 3.551822215060174e-07, "loss": 0.0, "step": 23863 }, { "epoch": 22.946153846153845, "grad_norm": 0.0013188602169975638, "learning_rate": 3.548532520134129e-07, "loss": 0.0, "step": 23864 }, { "epoch": 22.947115384615383, "grad_norm": 0.0008232833351939917, "learning_rate": 3.5452443218517597e-07, "loss": 0.0, "step": 23865 }, { "epoch": 22.948076923076922, "grad_norm": 0.0009261891827918589, "learning_rate": 3.5419576202641024e-07, "loss": 0.0, "step": 23866 }, { "epoch": 22.94903846153846, "grad_norm": 0.001293921610340476, "learning_rate": 3.5386724154221843e-07, "loss": 0.0, "step": 23867 }, { "epoch": 22.95, "grad_norm": 0.001031813328154385, "learning_rate": 3.5353887073769413e-07, "loss": 0.0, "step": 23868 }, { "epoch": 22.950961538461538, "grad_norm": 0.001045245211571455, "learning_rate": 3.532106496179344e-07, "loss": 0.0, "step": 23869 }, { "epoch": 22.951923076923077, "grad_norm": 0.0008284251671284437, "learning_rate": 3.5288257818803406e-07, "loss": 0.0, "step": 23870 }, { "epoch": 22.952884615384615, "grad_norm": 0.0009278591605834663, "learning_rate": 3.5255465645308017e-07, "loss": 0.0, "step": 23871 }, { "epoch": 22.953846153846154, "grad_norm": 0.0011671404354274273, "learning_rate": 3.5222688441816423e-07, "loss": 0.0, "step": 23872 }, { "epoch": 22.954807692307693, "grad_norm": 0.0020519671961665154, "learning_rate": 3.51899262088371e-07, "loss": 0.0, "step": 23873 }, { "epoch": 22.95576923076923, "grad_norm": 0.0010536343324929476, "learning_rate": 3.5157178946878424e-07, "loss": 0.0, "step": 23874 }, { "epoch": 22.95673076923077, "grad_norm": 0.0013061827048659325, "learning_rate": 3.5124446656448654e-07, "loss": 0.0, "step": 23875 }, { "epoch": 22.95769230769231, "grad_norm": 0.00022564652317669243, "learning_rate": 3.509172933805549e-07, "loss": 0.0, "step": 23876 }, { "epoch": 22.958653846153847, "grad_norm": 0.0009100190363824368, "learning_rate": 3.5059026992206645e-07, "loss": 0.0, "step": 23877 }, { "epoch": 22.959615384615386, "grad_norm": 0.00207663606852293, "learning_rate": 3.5026339619409823e-07, "loss": 0.0, "step": 23878 }, { "epoch": 22.960576923076925, "grad_norm": 0.0008439907687716186, "learning_rate": 3.4993667220171943e-07, "loss": 0.0, "step": 23879 }, { "epoch": 22.96153846153846, "grad_norm": 0.0011684579076245427, "learning_rate": 3.4961009794999944e-07, "loss": 0.0, "step": 23880 }, { "epoch": 22.9625, "grad_norm": 0.0011983939912170172, "learning_rate": 3.492836734440097e-07, "loss": 0.0, "step": 23881 }, { "epoch": 22.963461538461537, "grad_norm": 0.002990491921082139, "learning_rate": 3.4895739868881175e-07, "loss": 0.0, "step": 23882 }, { "epoch": 22.964423076923076, "grad_norm": 0.0021413052454590797, "learning_rate": 3.4863127368946923e-07, "loss": 0.0, "step": 23883 }, { "epoch": 22.965384615384615, "grad_norm": 0.0025285889860242605, "learning_rate": 3.483052984510438e-07, "loss": 0.0, "step": 23884 }, { "epoch": 22.966346153846153, "grad_norm": 0.0005376716144382954, "learning_rate": 3.4797947297859126e-07, "loss": 0.0, "step": 23885 }, { "epoch": 22.967307692307692, "grad_norm": 0.0006705395062454045, "learning_rate": 3.4765379727717097e-07, "loss": 0.0, "step": 23886 }, { "epoch": 22.96826923076923, "grad_norm": 0.0018669284181669354, "learning_rate": 3.4732827135183335e-07, "loss": 0.0, "step": 23887 }, { "epoch": 22.96923076923077, "grad_norm": 0.0009580704499967396, "learning_rate": 3.4700289520762984e-07, "loss": 0.0, "step": 23888 }, { "epoch": 22.970192307692308, "grad_norm": 0.00201575830578804, "learning_rate": 3.4667766884961205e-07, "loss": 0.0, "step": 23889 }, { "epoch": 22.971153846153847, "grad_norm": 0.00206837709993124, "learning_rate": 3.4635259228282256e-07, "loss": 0.0, "step": 23890 }, { "epoch": 22.972115384615385, "grad_norm": 0.001513361232355237, "learning_rate": 3.460276655123085e-07, "loss": 0.0, "step": 23891 }, { "epoch": 22.973076923076924, "grad_norm": 0.0010641423286870122, "learning_rate": 3.457028885431124e-07, "loss": 0.0, "step": 23892 }, { "epoch": 22.974038461538463, "grad_norm": 0.00151938630733639, "learning_rate": 3.453782613802703e-07, "loss": 0.0, "step": 23893 }, { "epoch": 22.975, "grad_norm": 0.00112510088365525, "learning_rate": 3.450537840288226e-07, "loss": 0.0, "step": 23894 }, { "epoch": 22.97596153846154, "grad_norm": 0.0015976333525031805, "learning_rate": 3.447294564938042e-07, "loss": 0.0, "step": 23895 }, { "epoch": 22.976923076923075, "grad_norm": 0.0011630847584456205, "learning_rate": 3.4440527878024543e-07, "loss": 0.0, "step": 23896 }, { "epoch": 22.977884615384614, "grad_norm": 0.00029715802520513535, "learning_rate": 3.44081250893179e-07, "loss": 0.0, "step": 23897 }, { "epoch": 22.978846153846153, "grad_norm": 0.0007910992135293782, "learning_rate": 3.437573728376309e-07, "loss": 0.0, "step": 23898 }, { "epoch": 22.97980769230769, "grad_norm": 0.0006955467397347093, "learning_rate": 3.4343364461862705e-07, "loss": 0.0, "step": 23899 }, { "epoch": 22.98076923076923, "grad_norm": 0.0021727802231907845, "learning_rate": 3.4311006624119346e-07, "loss": 0.0, "step": 23900 }, { "epoch": 22.98173076923077, "grad_norm": 0.0012905491748824716, "learning_rate": 3.427866377103473e-07, "loss": 0.0, "step": 23901 }, { "epoch": 22.982692307692307, "grad_norm": 0.0016444859793409705, "learning_rate": 3.4246335903110884e-07, "loss": 0.0, "step": 23902 }, { "epoch": 22.983653846153846, "grad_norm": 0.0007848804816603661, "learning_rate": 3.421402302084953e-07, "loss": 0.0, "step": 23903 }, { "epoch": 22.984615384615385, "grad_norm": 0.0003908418002538383, "learning_rate": 3.418172512475193e-07, "loss": 0.0, "step": 23904 }, { "epoch": 22.985576923076923, "grad_norm": 0.002106294734403491, "learning_rate": 3.414944221531935e-07, "loss": 0.0, "step": 23905 }, { "epoch": 22.986538461538462, "grad_norm": 0.0010749788489192724, "learning_rate": 3.4117174293052726e-07, "loss": 0.0, "step": 23906 }, { "epoch": 22.9875, "grad_norm": 0.001387315453030169, "learning_rate": 3.408492135845265e-07, "loss": 0.0, "step": 23907 }, { "epoch": 22.98846153846154, "grad_norm": 0.0010531350271776319, "learning_rate": 3.4052683412019837e-07, "loss": 0.0, "step": 23908 }, { "epoch": 22.989423076923078, "grad_norm": 0.0006868832861073315, "learning_rate": 3.4020460454254113e-07, "loss": 0.0, "step": 23909 }, { "epoch": 22.990384615384617, "grad_norm": 0.0007301248842850327, "learning_rate": 3.3988252485655846e-07, "loss": 0.0, "step": 23910 }, { "epoch": 22.991346153846155, "grad_norm": 0.0004640588886104524, "learning_rate": 3.395605950672476e-07, "loss": 0.0, "step": 23911 }, { "epoch": 22.99230769230769, "grad_norm": 0.001799796475097537, "learning_rate": 3.392388151796022e-07, "loss": 0.0, "step": 23912 }, { "epoch": 22.99326923076923, "grad_norm": 0.0008193756220862269, "learning_rate": 3.389171851986162e-07, "loss": 0.0, "step": 23913 }, { "epoch": 22.994230769230768, "grad_norm": 0.0011425166158005595, "learning_rate": 3.3859570512928096e-07, "loss": 0.0, "step": 23914 }, { "epoch": 22.995192307692307, "grad_norm": 0.0003807322063948959, "learning_rate": 3.382743749765849e-07, "loss": 0.0, "step": 23915 }, { "epoch": 22.996153846153845, "grad_norm": 0.0014482481637969613, "learning_rate": 3.379531947455128e-07, "loss": 0.0, "step": 23916 }, { "epoch": 22.997115384615384, "grad_norm": 0.0008797093760222197, "learning_rate": 3.3763216444105075e-07, "loss": 0.0, "step": 23917 }, { "epoch": 22.998076923076923, "grad_norm": 0.0002726005914155394, "learning_rate": 3.3731128406817693e-07, "loss": 0.0, "step": 23918 }, { "epoch": 22.99903846153846, "grad_norm": 0.0003257238713558763, "learning_rate": 3.369905536318729e-07, "loss": 0.0, "step": 23919 }, { "epoch": 23.0, "grad_norm": 0.003308022627606988, "learning_rate": 3.36669973137117e-07, "loss": 0.0, "step": 23920 }, { "epoch": 23.00096153846154, "grad_norm": 0.0012116070138290524, "learning_rate": 3.3634954258887964e-07, "loss": 0.0, "step": 23921 }, { "epoch": 23.001923076923077, "grad_norm": 0.00045372024760581553, "learning_rate": 3.360292619921357e-07, "loss": 0.0, "step": 23922 }, { "epoch": 23.002884615384616, "grad_norm": 0.0005706097581423819, "learning_rate": 3.3570913135185347e-07, "loss": 0.0, "step": 23923 }, { "epoch": 23.003846153846155, "grad_norm": 0.0007774868281558156, "learning_rate": 3.3538915067300005e-07, "loss": 0.0, "step": 23924 }, { "epoch": 23.004807692307693, "grad_norm": 0.0006632093572989106, "learning_rate": 3.350693199605437e-07, "loss": 0.0, "step": 23925 }, { "epoch": 23.005769230769232, "grad_norm": 0.0010856904555112123, "learning_rate": 3.347496392194449e-07, "loss": 0.0, "step": 23926 }, { "epoch": 23.00673076923077, "grad_norm": 0.0015121634351089597, "learning_rate": 3.344301084546642e-07, "loss": 0.0, "step": 23927 }, { "epoch": 23.00769230769231, "grad_norm": 0.000801229733042419, "learning_rate": 3.341107276711608e-07, "loss": 0.0, "step": 23928 }, { "epoch": 23.008653846153845, "grad_norm": 0.0006781848496757448, "learning_rate": 3.3379149687388866e-07, "loss": 0.0, "step": 23929 }, { "epoch": 23.009615384615383, "grad_norm": 0.0011165355099365115, "learning_rate": 3.3347241606780267e-07, "loss": 0.0, "step": 23930 }, { "epoch": 23.010576923076922, "grad_norm": 0.0007634491194039583, "learning_rate": 3.331534852578544e-07, "loss": 0.0, "step": 23931 }, { "epoch": 23.01153846153846, "grad_norm": 0.000768216559663415, "learning_rate": 3.328347044489921e-07, "loss": 0.0, "step": 23932 }, { "epoch": 23.0125, "grad_norm": 0.0016081704525277019, "learning_rate": 3.32516073646163e-07, "loss": 0.0, "step": 23933 }, { "epoch": 23.013461538461538, "grad_norm": 0.0014966203598305583, "learning_rate": 3.321975928543097e-07, "loss": 0.0, "step": 23934 }, { "epoch": 23.014423076923077, "grad_norm": 0.0008954452350735664, "learning_rate": 3.3187926207837506e-07, "loss": 0.0, "step": 23935 }, { "epoch": 23.015384615384615, "grad_norm": 0.0005290896515361965, "learning_rate": 3.3156108132329944e-07, "loss": 0.0, "step": 23936 }, { "epoch": 23.016346153846154, "grad_norm": 0.0016322542214766145, "learning_rate": 3.3124305059401896e-07, "loss": 0.0, "step": 23937 }, { "epoch": 23.017307692307693, "grad_norm": 0.0012850124621763825, "learning_rate": 3.309251698954685e-07, "loss": 0.0, "step": 23938 }, { "epoch": 23.01826923076923, "grad_norm": 0.0013325265608727932, "learning_rate": 3.30607439232582e-07, "loss": 0.0, "step": 23939 }, { "epoch": 23.01923076923077, "grad_norm": 0.001103463233448565, "learning_rate": 3.302898586102876e-07, "loss": 0.0, "step": 23940 }, { "epoch": 23.02019230769231, "grad_norm": 0.0008009501616470516, "learning_rate": 3.299724280335148e-07, "loss": 0.0, "step": 23941 }, { "epoch": 23.021153846153847, "grad_norm": 0.0009241641964763403, "learning_rate": 3.2965514750718964e-07, "loss": 0.0, "step": 23942 }, { "epoch": 23.022115384615386, "grad_norm": 0.000588199298363179, "learning_rate": 3.293380170362326e-07, "loss": 0.0, "step": 23943 }, { "epoch": 23.023076923076925, "grad_norm": 0.001714452519081533, "learning_rate": 3.2902103662556863e-07, "loss": 0.0, "step": 23944 }, { "epoch": 23.02403846153846, "grad_norm": 0.0007646643789485097, "learning_rate": 3.2870420628011267e-07, "loss": 0.0, "step": 23945 }, { "epoch": 23.025, "grad_norm": 0.0012321654940024018, "learning_rate": 3.2838752600478194e-07, "loss": 0.0, "step": 23946 }, { "epoch": 23.025961538461537, "grad_norm": 0.0004379121237434447, "learning_rate": 3.280709958044925e-07, "loss": 0.0, "step": 23947 }, { "epoch": 23.026923076923076, "grad_norm": 0.0015330302994698286, "learning_rate": 3.2775461568415377e-07, "loss": 0.0, "step": 23948 }, { "epoch": 23.027884615384615, "grad_norm": 0.0010538353817537427, "learning_rate": 3.2743838564867513e-07, "loss": 0.0, "step": 23949 }, { "epoch": 23.028846153846153, "grad_norm": 0.001628383994102478, "learning_rate": 3.27122305702966e-07, "loss": 0.0, "step": 23950 }, { "epoch": 23.029807692307692, "grad_norm": 0.0005021648248657584, "learning_rate": 3.2680637585192687e-07, "loss": 0.0, "step": 23951 }, { "epoch": 23.03076923076923, "grad_norm": 0.0008777822949923575, "learning_rate": 3.2649059610046276e-07, "loss": 0.0, "step": 23952 }, { "epoch": 23.03173076923077, "grad_norm": 0.0018013276858255267, "learning_rate": 3.2617496645347526e-07, "loss": 0.0, "step": 23953 }, { "epoch": 23.032692307692308, "grad_norm": 0.0009076778660528362, "learning_rate": 3.258594869158571e-07, "loss": 0.0, "step": 23954 }, { "epoch": 23.033653846153847, "grad_norm": 0.0015751203754916787, "learning_rate": 3.255441574925089e-07, "loss": 0.0, "step": 23955 }, { "epoch": 23.034615384615385, "grad_norm": 0.0026699525769799948, "learning_rate": 3.2522897818832e-07, "loss": 0.0, "step": 23956 }, { "epoch": 23.035576923076924, "grad_norm": 0.0014776373282074928, "learning_rate": 3.2491394900818207e-07, "loss": 0.0, "step": 23957 }, { "epoch": 23.036538461538463, "grad_norm": 0.0012992004631087184, "learning_rate": 3.2459906995698454e-07, "loss": 0.0, "step": 23958 }, { "epoch": 23.0375, "grad_norm": 0.0005550992791540921, "learning_rate": 3.2428434103961124e-07, "loss": 0.0, "step": 23959 }, { "epoch": 23.03846153846154, "grad_norm": 0.0011806386755779386, "learning_rate": 3.2396976226094723e-07, "loss": 0.0, "step": 23960 }, { "epoch": 23.039423076923075, "grad_norm": 0.001366089447401464, "learning_rate": 3.236553336258741e-07, "loss": 0.0, "step": 23961 }, { "epoch": 23.040384615384614, "grad_norm": 0.00032521638786420226, "learning_rate": 3.2334105513927016e-07, "loss": 0.0, "step": 23962 }, { "epoch": 23.041346153846153, "grad_norm": 0.00036307095433585346, "learning_rate": 3.2302692680601264e-07, "loss": 0.0, "step": 23963 }, { "epoch": 23.04230769230769, "grad_norm": 0.002862997120246291, "learning_rate": 3.2271294863097656e-07, "loss": 0.0, "step": 23964 }, { "epoch": 23.04326923076923, "grad_norm": 0.0025336630642414093, "learning_rate": 3.2239912061903133e-07, "loss": 0.0, "step": 23965 }, { "epoch": 23.04423076923077, "grad_norm": 0.0006072688265703619, "learning_rate": 3.2208544277504974e-07, "loss": 0.0, "step": 23966 }, { "epoch": 23.045192307692307, "grad_norm": 0.0006920272717252374, "learning_rate": 3.217719151038967e-07, "loss": 0.0, "step": 23967 }, { "epoch": 23.046153846153846, "grad_norm": 0.0007012482383288443, "learning_rate": 3.2145853761043844e-07, "loss": 0.0, "step": 23968 }, { "epoch": 23.047115384615385, "grad_norm": 0.0011950633488595486, "learning_rate": 3.2114531029953877e-07, "loss": 0.0, "step": 23969 }, { "epoch": 23.048076923076923, "grad_norm": 0.0016068856930360198, "learning_rate": 3.2083223317605495e-07, "loss": 0.0, "step": 23970 }, { "epoch": 23.049038461538462, "grad_norm": 0.0009015598916448653, "learning_rate": 3.205193062448475e-07, "loss": 0.0, "step": 23971 }, { "epoch": 23.05, "grad_norm": 0.0005815870244987309, "learning_rate": 3.2020652951077256e-07, "loss": 0.0, "step": 23972 }, { "epoch": 23.05096153846154, "grad_norm": 0.0007878773030824959, "learning_rate": 3.1989390297868183e-07, "loss": 0.0, "step": 23973 }, { "epoch": 23.051923076923078, "grad_norm": 0.00329818413592875, "learning_rate": 3.195814266534258e-07, "loss": 0.0, "step": 23974 }, { "epoch": 23.052884615384617, "grad_norm": 0.0005535502568818629, "learning_rate": 3.192691005398563e-07, "loss": 0.0, "step": 23975 }, { "epoch": 23.053846153846155, "grad_norm": 0.0007658868562430143, "learning_rate": 3.189569246428159e-07, "loss": 0.0, "step": 23976 }, { "epoch": 23.05480769230769, "grad_norm": 0.001339251408353448, "learning_rate": 3.18644898967152e-07, "loss": 0.0, "step": 23977 }, { "epoch": 23.05576923076923, "grad_norm": 0.0012066814815625548, "learning_rate": 3.18333023517704e-07, "loss": 0.0, "step": 23978 }, { "epoch": 23.056730769230768, "grad_norm": 0.0013984915567561984, "learning_rate": 3.180212982993114e-07, "loss": 0.0, "step": 23979 }, { "epoch": 23.057692307692307, "grad_norm": 0.001143262255936861, "learning_rate": 3.177097233168136e-07, "loss": 0.0, "step": 23980 }, { "epoch": 23.058653846153845, "grad_norm": 0.0009369269828312099, "learning_rate": 3.1739829857504235e-07, "loss": 0.0, "step": 23981 }, { "epoch": 23.059615384615384, "grad_norm": 0.0003838038828689605, "learning_rate": 3.170870240788315e-07, "loss": 0.0, "step": 23982 }, { "epoch": 23.060576923076923, "grad_norm": 0.0012709208531305194, "learning_rate": 3.167758998330128e-07, "loss": 0.0, "step": 23983 }, { "epoch": 23.06153846153846, "grad_norm": 0.0024086786434054375, "learning_rate": 3.164649258424102e-07, "loss": 0.0, "step": 23984 }, { "epoch": 23.0625, "grad_norm": 0.000959328783210367, "learning_rate": 3.1615410211185196e-07, "loss": 0.0, "step": 23985 }, { "epoch": 23.06346153846154, "grad_norm": 0.0007336550625041127, "learning_rate": 3.1584342864616093e-07, "loss": 0.0, "step": 23986 }, { "epoch": 23.064423076923077, "grad_norm": 0.0012015654938295484, "learning_rate": 3.1553290545015547e-07, "loss": 0.0, "step": 23987 }, { "epoch": 23.065384615384616, "grad_norm": 0.0013553895987570286, "learning_rate": 3.152225325286573e-07, "loss": 0.0, "step": 23988 }, { "epoch": 23.066346153846155, "grad_norm": 0.0008643597830086946, "learning_rate": 3.1491230988648035e-07, "loss": 0.0, "step": 23989 }, { "epoch": 23.067307692307693, "grad_norm": 0.0012942739995196462, "learning_rate": 3.1460223752843854e-07, "loss": 0.0, "step": 23990 }, { "epoch": 23.068269230769232, "grad_norm": 0.0010534081375226378, "learning_rate": 3.1429231545934467e-07, "loss": 0.0, "step": 23991 }, { "epoch": 23.06923076923077, "grad_norm": 0.0024320543743669987, "learning_rate": 3.13982543684006e-07, "loss": 0.0, "step": 23992 }, { "epoch": 23.07019230769231, "grad_norm": 0.001227190368808806, "learning_rate": 3.13672922207231e-07, "loss": 0.0, "step": 23993 }, { "epoch": 23.071153846153845, "grad_norm": 0.0002821441739797592, "learning_rate": 3.133634510338235e-07, "loss": 0.0, "step": 23994 }, { "epoch": 23.072115384615383, "grad_norm": 0.00025701915728859603, "learning_rate": 3.130541301685841e-07, "loss": 0.0, "step": 23995 }, { "epoch": 23.073076923076922, "grad_norm": 0.0006608654512092471, "learning_rate": 3.127449596163135e-07, "loss": 0.0, "step": 23996 }, { "epoch": 23.07403846153846, "grad_norm": 0.0005906790029257536, "learning_rate": 3.1243593938181106e-07, "loss": 0.0, "step": 23997 }, { "epoch": 23.075, "grad_norm": 0.3868582248687744, "learning_rate": 3.121270694698686e-07, "loss": 0.0037, "step": 23998 }, { "epoch": 23.075961538461538, "grad_norm": 0.0018843049183487892, "learning_rate": 3.1181834988528113e-07, "loss": 0.0, "step": 23999 }, { "epoch": 23.076923076923077, "grad_norm": 0.0006170331034809351, "learning_rate": 3.1150978063283823e-07, "loss": 0.0, "step": 24000 }, { "epoch": 23.077884615384615, "grad_norm": 0.0004116980417165905, "learning_rate": 3.1120136171732816e-07, "loss": 0.0, "step": 24001 }, { "epoch": 23.078846153846154, "grad_norm": 0.001518215867690742, "learning_rate": 3.108930931435372e-07, "loss": 0.0, "step": 24002 }, { "epoch": 23.079807692307693, "grad_norm": 0.0007615136564709246, "learning_rate": 3.1058497491624704e-07, "loss": 0.0, "step": 24003 }, { "epoch": 23.08076923076923, "grad_norm": 0.0011160260764881968, "learning_rate": 3.1027700704024057e-07, "loss": 0.0, "step": 24004 }, { "epoch": 23.08173076923077, "grad_norm": 0.0011794542660936713, "learning_rate": 3.0996918952029717e-07, "loss": 0.0, "step": 24005 }, { "epoch": 23.08269230769231, "grad_norm": 0.0005711938138119876, "learning_rate": 3.0966152236118983e-07, "loss": 0.0, "step": 24006 }, { "epoch": 23.083653846153847, "grad_norm": 0.0019056047312915325, "learning_rate": 3.093540055676958e-07, "loss": 0.0, "step": 24007 }, { "epoch": 23.084615384615386, "grad_norm": 0.0014646605122834444, "learning_rate": 3.090466391445868e-07, "loss": 0.0, "step": 24008 }, { "epoch": 23.085576923076925, "grad_norm": 0.0004579324449878186, "learning_rate": 3.087394230966312e-07, "loss": 0.0, "step": 24009 }, { "epoch": 23.08653846153846, "grad_norm": 0.0013262310530990362, "learning_rate": 3.084323574285952e-07, "loss": 0.0, "step": 24010 }, { "epoch": 23.0875, "grad_norm": 0.0004870393604505807, "learning_rate": 3.081254421452451e-07, "loss": 0.0, "step": 24011 }, { "epoch": 23.088461538461537, "grad_norm": 0.0009032902307808399, "learning_rate": 3.078186772513425e-07, "loss": 0.0, "step": 24012 }, { "epoch": 23.089423076923076, "grad_norm": 0.001108181313611567, "learning_rate": 3.0751206275164925e-07, "loss": 0.0, "step": 24013 }, { "epoch": 23.090384615384615, "grad_norm": 0.0005133913946337998, "learning_rate": 3.072055986509204e-07, "loss": 0.0, "step": 24014 }, { "epoch": 23.091346153846153, "grad_norm": 0.0002982072765007615, "learning_rate": 3.0689928495391207e-07, "loss": 0.0, "step": 24015 }, { "epoch": 23.092307692307692, "grad_norm": 0.0036176880821585655, "learning_rate": 3.0659312166537947e-07, "loss": 0.0, "step": 24016 }, { "epoch": 23.09326923076923, "grad_norm": 0.000806482566986233, "learning_rate": 3.0628710879007095e-07, "loss": 0.0, "step": 24017 }, { "epoch": 23.09423076923077, "grad_norm": 0.0014620390720665455, "learning_rate": 3.0598124633273493e-07, "loss": 0.0, "step": 24018 }, { "epoch": 23.095192307692308, "grad_norm": 0.00284905219450593, "learning_rate": 3.0567553429811994e-07, "loss": 0.0, "step": 24019 }, { "epoch": 23.096153846153847, "grad_norm": 0.0013480064226314425, "learning_rate": 3.053699726909676e-07, "loss": 0.0, "step": 24020 }, { "epoch": 23.097115384615385, "grad_norm": 0.004271280486136675, "learning_rate": 3.050645615160197e-07, "loss": 0.0, "step": 24021 }, { "epoch": 23.098076923076924, "grad_norm": 0.0012341247638687491, "learning_rate": 3.0475930077801584e-07, "loss": 0.0, "step": 24022 }, { "epoch": 23.099038461538463, "grad_norm": 0.0009095814311876893, "learning_rate": 3.0445419048169224e-07, "loss": 0.0, "step": 24023 }, { "epoch": 23.1, "grad_norm": 0.0013807155191898346, "learning_rate": 3.041492306317839e-07, "loss": 0.0, "step": 24024 }, { "epoch": 23.10096153846154, "grad_norm": 0.0006987737724557519, "learning_rate": 3.038444212330216e-07, "loss": 0.0, "step": 24025 }, { "epoch": 23.101923076923075, "grad_norm": 0.0014242634642869234, "learning_rate": 3.0353976229013596e-07, "loss": 0.0, "step": 24026 }, { "epoch": 23.102884615384614, "grad_norm": 0.0007564491243101656, "learning_rate": 3.0323525380785645e-07, "loss": 0.0, "step": 24027 }, { "epoch": 23.103846153846153, "grad_norm": 0.0005307120736688375, "learning_rate": 3.029308957909038e-07, "loss": 0.0, "step": 24028 }, { "epoch": 23.10480769230769, "grad_norm": 0.0018359109526500106, "learning_rate": 3.026266882440043e-07, "loss": 0.0, "step": 24029 }, { "epoch": 23.10576923076923, "grad_norm": 0.000678542535752058, "learning_rate": 3.023226311718774e-07, "loss": 0.0, "step": 24030 }, { "epoch": 23.10673076923077, "grad_norm": 0.000713373941835016, "learning_rate": 3.0201872457923944e-07, "loss": 0.0, "step": 24031 }, { "epoch": 23.107692307692307, "grad_norm": 0.0012354919454082847, "learning_rate": 3.0171496847080875e-07, "loss": 0.0, "step": 24032 }, { "epoch": 23.108653846153846, "grad_norm": 0.001852680346928537, "learning_rate": 3.0141136285129825e-07, "loss": 0.0, "step": 24033 }, { "epoch": 23.109615384615385, "grad_norm": 0.0007301852456294, "learning_rate": 3.0110790772541644e-07, "loss": 0.0, "step": 24034 }, { "epoch": 23.110576923076923, "grad_norm": 0.00122775265481323, "learning_rate": 3.0080460309787615e-07, "loss": 0.0, "step": 24035 }, { "epoch": 23.111538461538462, "grad_norm": 0.0011460795067250729, "learning_rate": 3.005014489733804e-07, "loss": 0.0, "step": 24036 }, { "epoch": 23.1125, "grad_norm": 0.0006104448693804443, "learning_rate": 3.0019844535663425e-07, "loss": 0.0, "step": 24037 }, { "epoch": 23.11346153846154, "grad_norm": 0.00071301608113572, "learning_rate": 2.9989559225234057e-07, "loss": 0.0, "step": 24038 }, { "epoch": 23.114423076923078, "grad_norm": 0.0014999427367001772, "learning_rate": 2.995928896651956e-07, "loss": 0.0, "step": 24039 }, { "epoch": 23.115384615384617, "grad_norm": 0.0011195146944373846, "learning_rate": 2.992903375999012e-07, "loss": 0.0, "step": 24040 }, { "epoch": 23.116346153846155, "grad_norm": 0.0009555199649184942, "learning_rate": 2.989879360611481e-07, "loss": 0.0, "step": 24041 }, { "epoch": 23.11730769230769, "grad_norm": 0.0015660235658288002, "learning_rate": 2.9868568505363016e-07, "loss": 0.0, "step": 24042 }, { "epoch": 23.11826923076923, "grad_norm": 0.0011168167693540454, "learning_rate": 2.983835845820393e-07, "loss": 0.0, "step": 24043 }, { "epoch": 23.119230769230768, "grad_norm": 0.0009705721167847514, "learning_rate": 2.9808163465105957e-07, "loss": 0.0, "step": 24044 }, { "epoch": 23.120192307692307, "grad_norm": 0.00022024368809070438, "learning_rate": 2.977798352653782e-07, "loss": 0.0, "step": 24045 }, { "epoch": 23.121153846153845, "grad_norm": 0.0007896953029558063, "learning_rate": 2.974781864296783e-07, "loss": 0.0, "step": 24046 }, { "epoch": 23.122115384615384, "grad_norm": 0.0008007528958842158, "learning_rate": 2.9717668814864043e-07, "loss": 0.0, "step": 24047 }, { "epoch": 23.123076923076923, "grad_norm": 0.0022752885706722736, "learning_rate": 2.96875340426942e-07, "loss": 0.0, "step": 24048 }, { "epoch": 23.12403846153846, "grad_norm": 0.0006279583903960884, "learning_rate": 2.9657414326926146e-07, "loss": 0.0, "step": 24049 }, { "epoch": 23.125, "grad_norm": 0.0012646745890378952, "learning_rate": 2.962730966802707e-07, "loss": 0.0, "step": 24050 }, { "epoch": 23.12596153846154, "grad_norm": 0.00031955799204297364, "learning_rate": 2.959722006646415e-07, "loss": 0.0, "step": 24051 }, { "epoch": 23.126923076923077, "grad_norm": 0.002569369738921523, "learning_rate": 2.9567145522704235e-07, "loss": 0.0, "step": 24052 }, { "epoch": 23.127884615384616, "grad_norm": 0.0006920512532815337, "learning_rate": 2.9537086037213945e-07, "loss": 0.0, "step": 24053 }, { "epoch": 23.128846153846155, "grad_norm": 0.0013168305158615112, "learning_rate": 2.950704161046003e-07, "loss": 0.0, "step": 24054 }, { "epoch": 23.129807692307693, "grad_norm": 0.004299572668969631, "learning_rate": 2.947701224290822e-07, "loss": 0.0, "step": 24055 }, { "epoch": 23.130769230769232, "grad_norm": 0.0018696545157581568, "learning_rate": 2.944699793502481e-07, "loss": 0.0, "step": 24056 }, { "epoch": 23.13173076923077, "grad_norm": 0.0006181081989780068, "learning_rate": 2.9416998687275543e-07, "loss": 0.0, "step": 24057 }, { "epoch": 23.13269230769231, "grad_norm": 0.0018923800671473145, "learning_rate": 2.9387014500125823e-07, "loss": 0.0, "step": 24058 }, { "epoch": 23.133653846153845, "grad_norm": 0.00274416315369308, "learning_rate": 2.935704537404083e-07, "loss": 0.0, "step": 24059 }, { "epoch": 23.134615384615383, "grad_norm": 0.00034716250956989825, "learning_rate": 2.9327091309485743e-07, "loss": 0.0, "step": 24060 }, { "epoch": 23.135576923076922, "grad_norm": 0.0006785966688767076, "learning_rate": 2.929715230692531e-07, "loss": 0.0, "step": 24061 }, { "epoch": 23.13653846153846, "grad_norm": 0.0030578537844121456, "learning_rate": 2.926722836682416e-07, "loss": 0.0, "step": 24062 }, { "epoch": 23.1375, "grad_norm": 0.0005595545517280698, "learning_rate": 2.9237319489646363e-07, "loss": 0.0, "step": 24063 }, { "epoch": 23.138461538461538, "grad_norm": 0.0015968449879437685, "learning_rate": 2.9207425675856324e-07, "loss": 0.0, "step": 24064 }, { "epoch": 23.139423076923077, "grad_norm": 0.0008677790756337345, "learning_rate": 2.9177546925917896e-07, "loss": 0.0, "step": 24065 }, { "epoch": 23.140384615384615, "grad_norm": 0.0008058593957684934, "learning_rate": 2.914768324029449e-07, "loss": 0.0, "step": 24066 }, { "epoch": 23.141346153846154, "grad_norm": 0.0005347355036064982, "learning_rate": 2.9117834619449615e-07, "loss": 0.0, "step": 24067 }, { "epoch": 23.142307692307693, "grad_norm": 0.0008190483204089105, "learning_rate": 2.908800106384646e-07, "loss": 0.0, "step": 24068 }, { "epoch": 23.14326923076923, "grad_norm": 0.0009963788324967027, "learning_rate": 2.905818257394799e-07, "loss": 0.0, "step": 24069 }, { "epoch": 23.14423076923077, "grad_norm": 0.0008364584646187723, "learning_rate": 2.9028379150216726e-07, "loss": 0.0, "step": 24070 }, { "epoch": 23.14519230769231, "grad_norm": 0.0018600814510136843, "learning_rate": 2.89985907931154e-07, "loss": 0.0, "step": 24071 }, { "epoch": 23.146153846153847, "grad_norm": 0.00026595359668135643, "learning_rate": 2.8968817503105984e-07, "loss": 0.0, "step": 24072 }, { "epoch": 23.147115384615386, "grad_norm": 0.000742885924410075, "learning_rate": 2.893905928065066e-07, "loss": 0.0, "step": 24073 }, { "epoch": 23.148076923076925, "grad_norm": 0.0011068001622334123, "learning_rate": 2.890931612621106e-07, "loss": 0.0, "step": 24074 }, { "epoch": 23.14903846153846, "grad_norm": 0.0015426096506416798, "learning_rate": 2.8879588040248706e-07, "loss": 0.0, "step": 24075 }, { "epoch": 23.15, "grad_norm": 0.000292821554467082, "learning_rate": 2.884987502322512e-07, "loss": 0.0, "step": 24076 }, { "epoch": 23.150961538461537, "grad_norm": 0.0011180584551766515, "learning_rate": 2.882017707560092e-07, "loss": 0.0, "step": 24077 }, { "epoch": 23.151923076923076, "grad_norm": 0.0016267100581899285, "learning_rate": 2.8790494197837304e-07, "loss": 0.0, "step": 24078 }, { "epoch": 23.152884615384615, "grad_norm": 0.0017836615443229675, "learning_rate": 2.87608263903949e-07, "loss": 0.0, "step": 24079 }, { "epoch": 23.153846153846153, "grad_norm": 0.0009422721923328936, "learning_rate": 2.873117365373379e-07, "loss": 0.0, "step": 24080 }, { "epoch": 23.154807692307692, "grad_norm": 0.0021567170042544603, "learning_rate": 2.870153598831416e-07, "loss": 0.0, "step": 24081 }, { "epoch": 23.15576923076923, "grad_norm": 0.0007787004578858614, "learning_rate": 2.867191339459607e-07, "loss": 0.0, "step": 24082 }, { "epoch": 23.15673076923077, "grad_norm": 0.0005579745047725737, "learning_rate": 2.8642305873039066e-07, "loss": 0.0, "step": 24083 }, { "epoch": 23.157692307692308, "grad_norm": 0.00043704750714823604, "learning_rate": 2.8612713424102546e-07, "loss": 0.0, "step": 24084 }, { "epoch": 23.158653846153847, "grad_norm": 0.0002588916686363518, "learning_rate": 2.8583136048245697e-07, "loss": 0.0, "step": 24085 }, { "epoch": 23.159615384615385, "grad_norm": 0.001958179287612438, "learning_rate": 2.8553573745927487e-07, "loss": 0.0, "step": 24086 }, { "epoch": 23.160576923076924, "grad_norm": 0.0008550049969926476, "learning_rate": 2.8524026517606775e-07, "loss": 0.0, "step": 24087 }, { "epoch": 23.161538461538463, "grad_norm": 0.0007845315267331898, "learning_rate": 2.8494494363741854e-07, "loss": 0.0, "step": 24088 }, { "epoch": 23.1625, "grad_norm": 0.0008018224034458399, "learning_rate": 2.8464977284790917e-07, "loss": 0.0, "step": 24089 }, { "epoch": 23.16346153846154, "grad_norm": 0.0006929272785782814, "learning_rate": 2.8435475281212376e-07, "loss": 0.0, "step": 24090 }, { "epoch": 23.164423076923075, "grad_norm": 0.0009046782506629825, "learning_rate": 2.840598835346353e-07, "loss": 0.0, "step": 24091 }, { "epoch": 23.165384615384614, "grad_norm": 0.007938532158732414, "learning_rate": 2.8376516502002125e-07, "loss": 0.0, "step": 24092 }, { "epoch": 23.166346153846153, "grad_norm": 0.000485973316244781, "learning_rate": 2.8347059727285687e-07, "loss": 0.0, "step": 24093 }, { "epoch": 23.16730769230769, "grad_norm": 0.0009343623532913625, "learning_rate": 2.831761802977107e-07, "loss": 0.0, "step": 24094 }, { "epoch": 23.16826923076923, "grad_norm": 0.0012541101314127445, "learning_rate": 2.828819140991512e-07, "loss": 0.0, "step": 24095 }, { "epoch": 23.16923076923077, "grad_norm": 0.00019411827088333666, "learning_rate": 2.825877986817449e-07, "loss": 0.0, "step": 24096 }, { "epoch": 23.170192307692307, "grad_norm": 0.00170827005058527, "learning_rate": 2.8229383405005584e-07, "loss": 0.0, "step": 24097 }, { "epoch": 23.171153846153846, "grad_norm": 0.001867880579084158, "learning_rate": 2.820000202086459e-07, "loss": 0.0, "step": 24098 }, { "epoch": 23.172115384615385, "grad_norm": 0.001716090482659638, "learning_rate": 2.8170635716207263e-07, "loss": 0.0, "step": 24099 }, { "epoch": 23.173076923076923, "grad_norm": 0.0008143368177115917, "learning_rate": 2.814128449148945e-07, "loss": 0.0, "step": 24100 }, { "epoch": 23.174038461538462, "grad_norm": 0.0019417935982346535, "learning_rate": 2.8111948347166574e-07, "loss": 0.0, "step": 24101 }, { "epoch": 23.175, "grad_norm": 0.0009868917986750603, "learning_rate": 2.808262728369371e-07, "loss": 0.0, "step": 24102 }, { "epoch": 23.17596153846154, "grad_norm": 0.0009152318816632032, "learning_rate": 2.805332130152594e-07, "loss": 0.0, "step": 24103 }, { "epoch": 23.176923076923078, "grad_norm": 0.0005760618951171637, "learning_rate": 2.8024030401118006e-07, "loss": 0.0, "step": 24104 }, { "epoch": 23.177884615384617, "grad_norm": 0.0009619363700039685, "learning_rate": 2.799475458292444e-07, "loss": 0.0, "step": 24105 }, { "epoch": 23.178846153846155, "grad_norm": 0.0015928492648527026, "learning_rate": 2.796549384739944e-07, "loss": 0.0, "step": 24106 }, { "epoch": 23.17980769230769, "grad_norm": 0.0017229558434337378, "learning_rate": 2.793624819499707e-07, "loss": 0.0, "step": 24107 }, { "epoch": 23.18076923076923, "grad_norm": 0.0033313927706331015, "learning_rate": 2.790701762617121e-07, "loss": 0.0, "step": 24108 }, { "epoch": 23.181730769230768, "grad_norm": 0.0005796834593638778, "learning_rate": 2.7877802141375366e-07, "loss": 0.0, "step": 24109 }, { "epoch": 23.182692307692307, "grad_norm": 0.0007301759324036539, "learning_rate": 2.7848601741062855e-07, "loss": 0.0, "step": 24110 }, { "epoch": 23.183653846153845, "grad_norm": 0.00034696291550062597, "learning_rate": 2.781941642568686e-07, "loss": 0.0, "step": 24111 }, { "epoch": 23.184615384615384, "grad_norm": 0.000543822068721056, "learning_rate": 2.7790246195700255e-07, "loss": 0.0, "step": 24112 }, { "epoch": 23.185576923076923, "grad_norm": 0.0028399787843227386, "learning_rate": 2.776109105155556e-07, "loss": 0.0, "step": 24113 }, { "epoch": 23.18653846153846, "grad_norm": 0.0011328047839924693, "learning_rate": 2.773195099370518e-07, "loss": 0.0, "step": 24114 }, { "epoch": 23.1875, "grad_norm": 0.0008326403913088143, "learning_rate": 2.7702826022601436e-07, "loss": 0.0, "step": 24115 }, { "epoch": 23.18846153846154, "grad_norm": 0.0009135613800026476, "learning_rate": 2.767371613869607e-07, "loss": 0.0, "step": 24116 }, { "epoch": 23.189423076923077, "grad_norm": 0.000946692714933306, "learning_rate": 2.7644621342441056e-07, "loss": 0.0, "step": 24117 }, { "epoch": 23.190384615384616, "grad_norm": 0.000849189586006105, "learning_rate": 2.761554163428759e-07, "loss": 0.0, "step": 24118 }, { "epoch": 23.191346153846155, "grad_norm": 0.0006264140247367322, "learning_rate": 2.758647701468697e-07, "loss": 0.0, "step": 24119 }, { "epoch": 23.192307692307693, "grad_norm": 0.0006327999872155488, "learning_rate": 2.7557427484090405e-07, "loss": 0.0, "step": 24120 }, { "epoch": 23.193269230769232, "grad_norm": 0.0003455744590610266, "learning_rate": 2.75283930429483e-07, "loss": 0.0, "step": 24121 }, { "epoch": 23.19423076923077, "grad_norm": 0.000327794550685212, "learning_rate": 2.7499373691711296e-07, "loss": 0.0, "step": 24122 }, { "epoch": 23.19519230769231, "grad_norm": 0.0007959595532156527, "learning_rate": 2.7470369430829924e-07, "loss": 0.0, "step": 24123 }, { "epoch": 23.196153846153845, "grad_norm": 0.0009388612816110253, "learning_rate": 2.744138026075405e-07, "loss": 0.0, "step": 24124 }, { "epoch": 23.197115384615383, "grad_norm": 0.001047105179168284, "learning_rate": 2.7412406181933416e-07, "loss": 0.0, "step": 24125 }, { "epoch": 23.198076923076922, "grad_norm": 0.000824422633741051, "learning_rate": 2.7383447194817893e-07, "loss": 0.0, "step": 24126 }, { "epoch": 23.19903846153846, "grad_norm": 0.0004962945822626352, "learning_rate": 2.735450329985656e-07, "loss": 0.0, "step": 24127 }, { "epoch": 23.2, "grad_norm": 0.0006354587385430932, "learning_rate": 2.732557449749873e-07, "loss": 0.0, "step": 24128 }, { "epoch": 23.200961538461538, "grad_norm": 0.0017398971831426024, "learning_rate": 2.729666078819326e-07, "loss": 0.0, "step": 24129 }, { "epoch": 23.201923076923077, "grad_norm": 0.0018462800653651357, "learning_rate": 2.7267762172388577e-07, "loss": 0.0, "step": 24130 }, { "epoch": 23.202884615384615, "grad_norm": 0.0053089940920472145, "learning_rate": 2.723887865053354e-07, "loss": 0.0, "step": 24131 }, { "epoch": 23.203846153846154, "grad_norm": 0.0010544254910200834, "learning_rate": 2.72100102230759e-07, "loss": 0.0, "step": 24132 }, { "epoch": 23.204807692307693, "grad_norm": 0.001257518888451159, "learning_rate": 2.7181156890463745e-07, "loss": 0.0, "step": 24133 }, { "epoch": 23.20576923076923, "grad_norm": 0.0005043172859586775, "learning_rate": 2.715231865314505e-07, "loss": 0.0, "step": 24134 }, { "epoch": 23.20673076923077, "grad_norm": 0.0006554457359015942, "learning_rate": 2.71234955115669e-07, "loss": 0.0, "step": 24135 }, { "epoch": 23.20769230769231, "grad_norm": 0.0012661319924518466, "learning_rate": 2.709468746617683e-07, "loss": 0.0, "step": 24136 }, { "epoch": 23.208653846153847, "grad_norm": 0.001058272086083889, "learning_rate": 2.706589451742181e-07, "loss": 0.0, "step": 24137 }, { "epoch": 23.209615384615386, "grad_norm": 0.0005483674467541277, "learning_rate": 2.70371166657486e-07, "loss": 0.0, "step": 24138 }, { "epoch": 23.210576923076925, "grad_norm": 0.0006189614068716764, "learning_rate": 2.700835391160361e-07, "loss": 0.0, "step": 24139 }, { "epoch": 23.21153846153846, "grad_norm": 0.0009262990788556635, "learning_rate": 2.697960625543339e-07, "loss": 0.0, "step": 24140 }, { "epoch": 23.2125, "grad_norm": 0.0012322354596108198, "learning_rate": 2.695087369768379e-07, "loss": 0.0, "step": 24141 }, { "epoch": 23.213461538461537, "grad_norm": 0.0015189595287665725, "learning_rate": 2.69221562388009e-07, "loss": 0.0, "step": 24142 }, { "epoch": 23.214423076923076, "grad_norm": 0.0008754116715863347, "learning_rate": 2.6893453879230034e-07, "loss": 0.0, "step": 24143 }, { "epoch": 23.215384615384615, "grad_norm": 0.0008433845359832048, "learning_rate": 2.686476661941673e-07, "loss": 0.0, "step": 24144 }, { "epoch": 23.216346153846153, "grad_norm": 0.0014702440239489079, "learning_rate": 2.6836094459806284e-07, "loss": 0.0, "step": 24145 }, { "epoch": 23.217307692307692, "grad_norm": 0.0011662321630865335, "learning_rate": 2.680743740084335e-07, "loss": 0.0, "step": 24146 }, { "epoch": 23.21826923076923, "grad_norm": 0.0017505153082311153, "learning_rate": 2.6778795442972685e-07, "loss": 0.0, "step": 24147 }, { "epoch": 23.21923076923077, "grad_norm": 0.0008659971063025296, "learning_rate": 2.6750168586638924e-07, "loss": 0.0, "step": 24148 }, { "epoch": 23.220192307692308, "grad_norm": 0.0019341637380421162, "learning_rate": 2.6721556832285836e-07, "loss": 0.0, "step": 24149 }, { "epoch": 23.221153846153847, "grad_norm": 0.0007180313696153462, "learning_rate": 2.669296018035772e-07, "loss": 0.0, "step": 24150 }, { "epoch": 23.222115384615385, "grad_norm": 0.0011272495612502098, "learning_rate": 2.6664378631298337e-07, "loss": 0.0, "step": 24151 }, { "epoch": 23.223076923076924, "grad_norm": 0.0003846766194328666, "learning_rate": 2.6635812185551e-07, "loss": 0.0, "step": 24152 }, { "epoch": 23.224038461538463, "grad_norm": 0.0012603024952113628, "learning_rate": 2.6607260843559245e-07, "loss": 0.0, "step": 24153 }, { "epoch": 23.225, "grad_norm": 0.0008760099299252033, "learning_rate": 2.657872460576572e-07, "loss": 0.0, "step": 24154 }, { "epoch": 23.22596153846154, "grad_norm": 0.0004902661894448102, "learning_rate": 2.65502034726135e-07, "loss": 0.0, "step": 24155 }, { "epoch": 23.226923076923075, "grad_norm": 0.0011591317597776651, "learning_rate": 2.6521697444545137e-07, "loss": 0.0, "step": 24156 }, { "epoch": 23.227884615384614, "grad_norm": 0.0007945182733237743, "learning_rate": 2.6493206522002825e-07, "loss": 0.0, "step": 24157 }, { "epoch": 23.228846153846153, "grad_norm": 0.0006760083488188684, "learning_rate": 2.6464730705428767e-07, "loss": 0.0, "step": 24158 }, { "epoch": 23.22980769230769, "grad_norm": 0.0008588444325141609, "learning_rate": 2.643626999526483e-07, "loss": 0.0, "step": 24159 }, { "epoch": 23.23076923076923, "grad_norm": 0.0005039377138018608, "learning_rate": 2.640782439195255e-07, "loss": 0.0, "step": 24160 }, { "epoch": 23.23173076923077, "grad_norm": 0.000888978480361402, "learning_rate": 2.637939389593336e-07, "loss": 0.0, "step": 24161 }, { "epoch": 23.232692307692307, "grad_norm": 0.00034046461223624647, "learning_rate": 2.635097850764856e-07, "loss": 0.0, "step": 24162 }, { "epoch": 23.233653846153846, "grad_norm": 0.0016348980134353042, "learning_rate": 2.632257822753881e-07, "loss": 0.0, "step": 24163 }, { "epoch": 23.234615384615385, "grad_norm": 0.000995609792880714, "learning_rate": 2.629419305604508e-07, "loss": 0.0, "step": 24164 }, { "epoch": 23.235576923076923, "grad_norm": 0.0015971394022926688, "learning_rate": 2.626582299360747e-07, "loss": 0.0, "step": 24165 }, { "epoch": 23.236538461538462, "grad_norm": 0.0006481429445557296, "learning_rate": 2.6237468040666515e-07, "loss": 0.0, "step": 24166 }, { "epoch": 23.2375, "grad_norm": 0.0006872416124679148, "learning_rate": 2.620912819766219e-07, "loss": 0.0, "step": 24167 }, { "epoch": 23.23846153846154, "grad_norm": 0.0012253652093932033, "learning_rate": 2.618080346503404e-07, "loss": 0.0, "step": 24168 }, { "epoch": 23.239423076923078, "grad_norm": 0.0009835143573582172, "learning_rate": 2.6152493843221714e-07, "loss": 0.0, "step": 24169 }, { "epoch": 23.240384615384617, "grad_norm": 0.0007664377917535603, "learning_rate": 2.6124199332664524e-07, "loss": 0.0, "step": 24170 }, { "epoch": 23.241346153846155, "grad_norm": 0.00038679721183143556, "learning_rate": 2.609591993380145e-07, "loss": 0.0, "step": 24171 }, { "epoch": 23.24230769230769, "grad_norm": 0.00170592637732625, "learning_rate": 2.6067655647071254e-07, "loss": 0.0, "step": 24172 }, { "epoch": 23.24326923076923, "grad_norm": 0.0006403992883861065, "learning_rate": 2.60394064729127e-07, "loss": 0.0, "step": 24173 }, { "epoch": 23.244230769230768, "grad_norm": 0.0010349965887144208, "learning_rate": 2.601117241176399e-07, "loss": 0.0, "step": 24174 }, { "epoch": 23.245192307692307, "grad_norm": 0.0010107175912708044, "learning_rate": 2.598295346406321e-07, "loss": 0.0, "step": 24175 }, { "epoch": 23.246153846153845, "grad_norm": 0.001043147873133421, "learning_rate": 2.5954749630248355e-07, "loss": 0.0, "step": 24176 }, { "epoch": 23.247115384615384, "grad_norm": 0.0008885767310857773, "learning_rate": 2.592656091075685e-07, "loss": 0.0, "step": 24177 }, { "epoch": 23.248076923076923, "grad_norm": 0.0005784230306744576, "learning_rate": 2.589838730602645e-07, "loss": 0.0, "step": 24178 }, { "epoch": 23.24903846153846, "grad_norm": 0.000757284346036613, "learning_rate": 2.5870228816494034e-07, "loss": 0.0, "step": 24179 }, { "epoch": 23.25, "grad_norm": 0.000677325006108731, "learning_rate": 2.5842085442596586e-07, "loss": 0.0, "step": 24180 }, { "epoch": 23.25096153846154, "grad_norm": 0.0018442285945639014, "learning_rate": 2.5813957184770977e-07, "loss": 0.0, "step": 24181 }, { "epoch": 23.251923076923077, "grad_norm": 0.0013880053302273154, "learning_rate": 2.578584404345341e-07, "loss": 0.0, "step": 24182 }, { "epoch": 23.252884615384616, "grad_norm": 0.0013252251083031297, "learning_rate": 2.575774601908021e-07, "loss": 0.0, "step": 24183 }, { "epoch": 23.253846153846155, "grad_norm": 0.0008210339583456516, "learning_rate": 2.5729663112087575e-07, "loss": 0.0, "step": 24184 }, { "epoch": 23.254807692307693, "grad_norm": 0.0007639299146831036, "learning_rate": 2.5701595322911055e-07, "loss": 0.0, "step": 24185 }, { "epoch": 23.255769230769232, "grad_norm": 0.0011265090433880687, "learning_rate": 2.567354265198641e-07, "loss": 0.0, "step": 24186 }, { "epoch": 23.25673076923077, "grad_norm": 0.0017840485088527203, "learning_rate": 2.56455050997485e-07, "loss": 0.0, "step": 24187 }, { "epoch": 23.25769230769231, "grad_norm": 0.0011195562547072768, "learning_rate": 2.5617482666632777e-07, "loss": 0.0, "step": 24188 }, { "epoch": 23.258653846153845, "grad_norm": 0.0016713324002921581, "learning_rate": 2.5589475353073987e-07, "loss": 0.0, "step": 24189 }, { "epoch": 23.259615384615383, "grad_norm": 0.0010089878924190998, "learning_rate": 2.5561483159506574e-07, "loss": 0.0, "step": 24190 }, { "epoch": 23.260576923076922, "grad_norm": 0.0007921322248876095, "learning_rate": 2.5533506086365066e-07, "loss": 0.0, "step": 24191 }, { "epoch": 23.26153846153846, "grad_norm": 0.0020339954644441605, "learning_rate": 2.550554413408357e-07, "loss": 0.0, "step": 24192 }, { "epoch": 23.2625, "grad_norm": 0.000957283831667155, "learning_rate": 2.547759730309585e-07, "loss": 0.0, "step": 24193 }, { "epoch": 23.263461538461538, "grad_norm": 0.0015703027602285147, "learning_rate": 2.544966559383555e-07, "loss": 0.0, "step": 24194 }, { "epoch": 23.264423076923077, "grad_norm": 0.0006461059092544019, "learning_rate": 2.5421749006736327e-07, "loss": 0.0, "step": 24195 }, { "epoch": 23.265384615384615, "grad_norm": 0.0005948626785539091, "learning_rate": 2.5393847542231176e-07, "loss": 0.0, "step": 24196 }, { "epoch": 23.266346153846154, "grad_norm": 0.0006876901024952531, "learning_rate": 2.5365961200753074e-07, "loss": 0.0, "step": 24197 }, { "epoch": 23.267307692307693, "grad_norm": 0.003024233505129814, "learning_rate": 2.5338089982734683e-07, "loss": 0.0, "step": 24198 }, { "epoch": 23.26826923076923, "grad_norm": 0.0009439461282454431, "learning_rate": 2.5310233888608537e-07, "loss": 0.0, "step": 24199 }, { "epoch": 23.26923076923077, "grad_norm": 0.0006774001521989703, "learning_rate": 2.5282392918806966e-07, "loss": 0.0, "step": 24200 }, { "epoch": 23.27019230769231, "grad_norm": 0.002045182278379798, "learning_rate": 2.5254567073761835e-07, "loss": 0.0, "step": 24201 }, { "epoch": 23.271153846153847, "grad_norm": 0.0008953449432738125, "learning_rate": 2.5226756353904925e-07, "loss": 0.0, "step": 24202 }, { "epoch": 23.272115384615386, "grad_norm": 0.00042584925540722907, "learning_rate": 2.519896075966799e-07, "loss": 0.0, "step": 24203 }, { "epoch": 23.273076923076925, "grad_norm": 0.001663133967667818, "learning_rate": 2.5171180291482025e-07, "loss": 0.0, "step": 24204 }, { "epoch": 23.27403846153846, "grad_norm": 0.0005468461313284934, "learning_rate": 2.5143414949778235e-07, "loss": 0.0, "step": 24205 }, { "epoch": 23.275, "grad_norm": 0.0008764268131926656, "learning_rate": 2.511566473498761e-07, "loss": 0.0, "step": 24206 }, { "epoch": 23.275961538461537, "grad_norm": 0.0012724915286526084, "learning_rate": 2.508792964754048e-07, "loss": 0.0, "step": 24207 }, { "epoch": 23.276923076923076, "grad_norm": 0.0004204586730338633, "learning_rate": 2.506020968786749e-07, "loss": 0.0, "step": 24208 }, { "epoch": 23.277884615384615, "grad_norm": 0.0007436990272253752, "learning_rate": 2.503250485639852e-07, "loss": 0.0, "step": 24209 }, { "epoch": 23.278846153846153, "grad_norm": 0.0014124164590612054, "learning_rate": 2.5004815153563564e-07, "loss": 0.0, "step": 24210 }, { "epoch": 23.279807692307692, "grad_norm": 0.0002668145461939275, "learning_rate": 2.49771405797925e-07, "loss": 0.0, "step": 24211 }, { "epoch": 23.28076923076923, "grad_norm": 0.0005761711508966982, "learning_rate": 2.494948113551432e-07, "loss": 0.0, "step": 24212 }, { "epoch": 23.28173076923077, "grad_norm": 0.0004215740191284567, "learning_rate": 2.492183682115856e-07, "loss": 0.0, "step": 24213 }, { "epoch": 23.282692307692308, "grad_norm": 0.0010829281527549028, "learning_rate": 2.489420763715411e-07, "loss": 0.0, "step": 24214 }, { "epoch": 23.283653846153847, "grad_norm": 0.002956422744318843, "learning_rate": 2.486659358392951e-07, "loss": 0.0, "step": 24215 }, { "epoch": 23.284615384615385, "grad_norm": 0.0003578761825338006, "learning_rate": 2.4838994661913527e-07, "loss": 0.0, "step": 24216 }, { "epoch": 23.285576923076924, "grad_norm": 0.0012393599608913064, "learning_rate": 2.481141087153427e-07, "loss": 0.0, "step": 24217 }, { "epoch": 23.286538461538463, "grad_norm": 0.0020252845715731382, "learning_rate": 2.4783842213219725e-07, "loss": 0.0, "step": 24218 }, { "epoch": 23.2875, "grad_norm": 0.0017549969488754869, "learning_rate": 2.4756288687397655e-07, "loss": 0.0, "step": 24219 }, { "epoch": 23.28846153846154, "grad_norm": 0.0007187669980339706, "learning_rate": 2.472875029449584e-07, "loss": 0.0, "step": 24220 }, { "epoch": 23.289423076923075, "grad_norm": 0.000514804560225457, "learning_rate": 2.4701227034941376e-07, "loss": 0.0, "step": 24221 }, { "epoch": 23.290384615384614, "grad_norm": 0.0006889817886985838, "learning_rate": 2.4673718909161484e-07, "loss": 0.0, "step": 24222 }, { "epoch": 23.291346153846153, "grad_norm": 0.0008715104195289314, "learning_rate": 2.464622591758281e-07, "loss": 0.0, "step": 24223 }, { "epoch": 23.29230769230769, "grad_norm": 0.00031756918178871274, "learning_rate": 2.4618748060632024e-07, "loss": 0.0, "step": 24224 }, { "epoch": 23.29326923076923, "grad_norm": 0.001271153916604817, "learning_rate": 2.459128533873567e-07, "loss": 0.0, "step": 24225 }, { "epoch": 23.29423076923077, "grad_norm": 0.0009625498787499964, "learning_rate": 2.4563837752319744e-07, "loss": 0.0, "step": 24226 }, { "epoch": 23.295192307692307, "grad_norm": 0.0033166527282446623, "learning_rate": 2.4536405301810115e-07, "loss": 0.0, "step": 24227 }, { "epoch": 23.296153846153846, "grad_norm": 0.000844921451061964, "learning_rate": 2.450898798763268e-07, "loss": 0.0, "step": 24228 }, { "epoch": 23.297115384615385, "grad_norm": 0.0008224021294154227, "learning_rate": 2.448158581021265e-07, "loss": 0.0, "step": 24229 }, { "epoch": 23.298076923076923, "grad_norm": 0.0006803682772442698, "learning_rate": 2.4454198769975234e-07, "loss": 0.0, "step": 24230 }, { "epoch": 23.299038461538462, "grad_norm": 0.0006955821882002056, "learning_rate": 2.4426826867345543e-07, "loss": 0.0, "step": 24231 }, { "epoch": 23.3, "grad_norm": 0.0008423352846875787, "learning_rate": 2.4399470102748125e-07, "loss": 0.0, "step": 24232 }, { "epoch": 23.30096153846154, "grad_norm": 0.0013835433637723327, "learning_rate": 2.4372128476607747e-07, "loss": 0.0, "step": 24233 }, { "epoch": 23.301923076923078, "grad_norm": 0.0007815982680767775, "learning_rate": 2.434480198934841e-07, "loss": 0.0, "step": 24234 }, { "epoch": 23.302884615384617, "grad_norm": 0.0008038922096602619, "learning_rate": 2.43174906413941e-07, "loss": 0.0, "step": 24235 }, { "epoch": 23.303846153846155, "grad_norm": 0.0007698939298279583, "learning_rate": 2.4290194433168936e-07, "loss": 0.0, "step": 24236 }, { "epoch": 23.30480769230769, "grad_norm": 0.0006084442138671875, "learning_rate": 2.426291336509623e-07, "loss": 0.0, "step": 24237 }, { "epoch": 23.30576923076923, "grad_norm": 0.0011134374653920531, "learning_rate": 2.423564743759921e-07, "loss": 0.0, "step": 24238 }, { "epoch": 23.306730769230768, "grad_norm": 0.0005610737716779113, "learning_rate": 2.4208396651101307e-07, "loss": 0.0, "step": 24239 }, { "epoch": 23.307692307692307, "grad_norm": 0.0005090474733151495, "learning_rate": 2.4181161006025076e-07, "loss": 0.0, "step": 24240 }, { "epoch": 23.308653846153845, "grad_norm": 0.0008463371195830405, "learning_rate": 2.4153940502793185e-07, "loss": 0.0, "step": 24241 }, { "epoch": 23.309615384615384, "grad_norm": 0.001837550662457943, "learning_rate": 2.4126735141828283e-07, "loss": 0.0, "step": 24242 }, { "epoch": 23.310576923076923, "grad_norm": 0.0007523089880123734, "learning_rate": 2.4099544923552043e-07, "loss": 0.0, "step": 24243 }, { "epoch": 23.31153846153846, "grad_norm": 0.001025591162033379, "learning_rate": 2.407236984838679e-07, "loss": 0.0, "step": 24244 }, { "epoch": 23.3125, "grad_norm": 0.0006574064609594643, "learning_rate": 2.404520991675396e-07, "loss": 0.0, "step": 24245 }, { "epoch": 23.31346153846154, "grad_norm": 0.0005154322716407478, "learning_rate": 2.401806512907512e-07, "loss": 0.0, "step": 24246 }, { "epoch": 23.314423076923077, "grad_norm": 0.0010198133531957865, "learning_rate": 2.3990935485771474e-07, "loss": 0.0, "step": 24247 }, { "epoch": 23.315384615384616, "grad_norm": 0.0006224979879334569, "learning_rate": 2.396382098726391e-07, "loss": 0.0, "step": 24248 }, { "epoch": 23.316346153846155, "grad_norm": 0.001948561635799706, "learning_rate": 2.393672163397309e-07, "loss": 0.0, "step": 24249 }, { "epoch": 23.317307692307693, "grad_norm": 0.0004231120983604342, "learning_rate": 2.3909637426319907e-07, "loss": 0.0, "step": 24250 }, { "epoch": 23.318269230769232, "grad_norm": 0.0006108239758759737, "learning_rate": 2.3882568364724137e-07, "loss": 0.0, "step": 24251 }, { "epoch": 23.31923076923077, "grad_norm": 0.0021184016950428486, "learning_rate": 2.38555144496061e-07, "loss": 0.0, "step": 24252 }, { "epoch": 23.32019230769231, "grad_norm": 0.0006715432391501963, "learning_rate": 2.3828475681385576e-07, "loss": 0.0, "step": 24253 }, { "epoch": 23.321153846153845, "grad_norm": 0.0002417311625322327, "learning_rate": 2.380145206048201e-07, "loss": 0.0, "step": 24254 }, { "epoch": 23.322115384615383, "grad_norm": 0.0006470819353125989, "learning_rate": 2.3774443587314843e-07, "loss": 0.0, "step": 24255 }, { "epoch": 23.323076923076922, "grad_norm": 0.0010229969630017877, "learning_rate": 2.3747450262303074e-07, "loss": 0.0, "step": 24256 }, { "epoch": 23.32403846153846, "grad_norm": 0.0004901239881291986, "learning_rate": 2.3720472085865697e-07, "loss": 0.0, "step": 24257 }, { "epoch": 23.325, "grad_norm": 0.0007788711227476597, "learning_rate": 2.369350905842127e-07, "loss": 0.0, "step": 24258 }, { "epoch": 23.325961538461538, "grad_norm": 0.0019714816007763147, "learning_rate": 2.3666561180388014e-07, "loss": 0.0, "step": 24259 }, { "epoch": 23.326923076923077, "grad_norm": 0.0002007342263823375, "learning_rate": 2.363962845218426e-07, "loss": 0.0, "step": 24260 }, { "epoch": 23.327884615384615, "grad_norm": 0.0014520591357722878, "learning_rate": 2.3612710874228007e-07, "loss": 0.0, "step": 24261 }, { "epoch": 23.328846153846154, "grad_norm": 0.0005799675127491355, "learning_rate": 2.3585808446936698e-07, "loss": 0.0, "step": 24262 }, { "epoch": 23.329807692307693, "grad_norm": 0.0004725141916424036, "learning_rate": 2.355892117072789e-07, "loss": 0.0, "step": 24263 }, { "epoch": 23.33076923076923, "grad_norm": 0.0011497150408104062, "learning_rate": 2.3532049046018913e-07, "loss": 0.0, "step": 24264 }, { "epoch": 23.33173076923077, "grad_norm": 0.0005427108844742179, "learning_rate": 2.3505192073226547e-07, "loss": 0.0, "step": 24265 }, { "epoch": 23.33269230769231, "grad_norm": 0.0018785331631079316, "learning_rate": 2.347835025276768e-07, "loss": 0.0, "step": 24266 }, { "epoch": 23.333653846153847, "grad_norm": 0.0009368344908580184, "learning_rate": 2.3451523585058756e-07, "loss": 0.0, "step": 24267 }, { "epoch": 23.334615384615386, "grad_norm": 0.000985757214948535, "learning_rate": 2.3424712070516e-07, "loss": 0.0, "step": 24268 }, { "epoch": 23.335576923076925, "grad_norm": 0.0002521103888284415, "learning_rate": 2.3397915709555518e-07, "loss": 0.0, "step": 24269 }, { "epoch": 23.33653846153846, "grad_norm": 0.0011298982426524162, "learning_rate": 2.3371134502593096e-07, "loss": 0.0, "step": 24270 }, { "epoch": 23.3375, "grad_norm": 0.003357341280207038, "learning_rate": 2.3344368450044287e-07, "loss": 0.0, "step": 24271 }, { "epoch": 23.338461538461537, "grad_norm": 0.001045699929818511, "learning_rate": 2.3317617552324533e-07, "loss": 0.0, "step": 24272 }, { "epoch": 23.339423076923076, "grad_norm": 0.0006734271300956607, "learning_rate": 2.329088180984862e-07, "loss": 0.0, "step": 24273 }, { "epoch": 23.340384615384615, "grad_norm": 0.0008622105815447867, "learning_rate": 2.3264161223031768e-07, "loss": 0.0, "step": 24274 }, { "epoch": 23.341346153846153, "grad_norm": 0.00041872781002894044, "learning_rate": 2.3237455792288421e-07, "loss": 0.0, "step": 24275 }, { "epoch": 23.342307692307692, "grad_norm": 0.0007265324238687754, "learning_rate": 2.321076551803303e-07, "loss": 0.0, "step": 24276 }, { "epoch": 23.34326923076923, "grad_norm": 0.0009215913596563041, "learning_rate": 2.3184090400679703e-07, "loss": 0.0, "step": 24277 }, { "epoch": 23.34423076923077, "grad_norm": 0.0019491740968078375, "learning_rate": 2.3157430440642337e-07, "loss": 0.0, "step": 24278 }, { "epoch": 23.345192307692308, "grad_norm": 0.0006840698188170791, "learning_rate": 2.3130785638334597e-07, "loss": 0.0, "step": 24279 }, { "epoch": 23.346153846153847, "grad_norm": 0.0008112859213724732, "learning_rate": 2.3104155994170042e-07, "loss": 0.0, "step": 24280 }, { "epoch": 23.347115384615385, "grad_norm": 0.0010349219664931297, "learning_rate": 2.3077541508561896e-07, "loss": 0.0, "step": 24281 }, { "epoch": 23.348076923076924, "grad_norm": 0.0008965907036326826, "learning_rate": 2.3050942181922942e-07, "loss": 0.0, "step": 24282 }, { "epoch": 23.349038461538463, "grad_norm": 0.0009742702823132277, "learning_rate": 2.3024358014666181e-07, "loss": 0.0, "step": 24283 }, { "epoch": 23.35, "grad_norm": 0.0018620550399646163, "learning_rate": 2.299778900720395e-07, "loss": 0.0, "step": 24284 }, { "epoch": 23.35096153846154, "grad_norm": 0.0006528326775878668, "learning_rate": 2.297123515994848e-07, "loss": 0.0, "step": 24285 }, { "epoch": 23.351923076923075, "grad_norm": 0.0006781116244383156, "learning_rate": 2.2944696473311989e-07, "loss": 0.0, "step": 24286 }, { "epoch": 23.352884615384614, "grad_norm": 0.0013474348234012723, "learning_rate": 2.2918172947706152e-07, "loss": 0.0, "step": 24287 }, { "epoch": 23.353846153846153, "grad_norm": 0.00032921761157922447, "learning_rate": 2.289166458354264e-07, "loss": 0.0, "step": 24288 }, { "epoch": 23.35480769230769, "grad_norm": 0.0015202502254396677, "learning_rate": 2.2865171381232677e-07, "loss": 0.0, "step": 24289 }, { "epoch": 23.35576923076923, "grad_norm": 0.0015356503427028656, "learning_rate": 2.2838693341187378e-07, "loss": 0.0, "step": 24290 }, { "epoch": 23.35673076923077, "grad_norm": 0.0008247864316217601, "learning_rate": 2.281223046381764e-07, "loss": 0.0, "step": 24291 }, { "epoch": 23.357692307692307, "grad_norm": 0.0008259384194388986, "learning_rate": 2.2785782749534024e-07, "loss": 0.0, "step": 24292 }, { "epoch": 23.358653846153846, "grad_norm": 0.00025818657013587654, "learning_rate": 2.2759350198746978e-07, "loss": 0.0, "step": 24293 }, { "epoch": 23.359615384615385, "grad_norm": 0.0011209524236619473, "learning_rate": 2.2732932811866726e-07, "loss": 0.0, "step": 24294 }, { "epoch": 23.360576923076923, "grad_norm": 0.0008255037828348577, "learning_rate": 2.2706530589303057e-07, "loss": 0.0, "step": 24295 }, { "epoch": 23.361538461538462, "grad_norm": 0.0006933626136742532, "learning_rate": 2.2680143531465637e-07, "loss": 0.0, "step": 24296 }, { "epoch": 23.3625, "grad_norm": 0.00022438535233959556, "learning_rate": 2.2653771638764144e-07, "loss": 0.0, "step": 24297 }, { "epoch": 23.36346153846154, "grad_norm": 0.0010315870167687535, "learning_rate": 2.2627414911607582e-07, "loss": 0.0, "step": 24298 }, { "epoch": 23.364423076923078, "grad_norm": 0.000957455369643867, "learning_rate": 2.2601073350404957e-07, "loss": 0.0, "step": 24299 }, { "epoch": 23.365384615384617, "grad_norm": 0.3417826294898987, "learning_rate": 2.2574746955565051e-07, "loss": 0.0021, "step": 24300 }, { "epoch": 23.366346153846155, "grad_norm": 0.0028050336986780167, "learning_rate": 2.254843572749632e-07, "loss": 0.0, "step": 24301 }, { "epoch": 23.36730769230769, "grad_norm": 0.0005872486508451402, "learning_rate": 2.2522139666607212e-07, "loss": 0.0, "step": 24302 }, { "epoch": 23.36826923076923, "grad_norm": 0.0007824386702850461, "learning_rate": 2.2495858773305512e-07, "loss": 0.0, "step": 24303 }, { "epoch": 23.369230769230768, "grad_norm": 0.0009569725370965898, "learning_rate": 2.2469593047999226e-07, "loss": 0.0, "step": 24304 }, { "epoch": 23.370192307692307, "grad_norm": 0.0010163254337385297, "learning_rate": 2.2443342491095809e-07, "loss": 0.0, "step": 24305 }, { "epoch": 23.371153846153845, "grad_norm": 0.001010018982924521, "learning_rate": 2.24171071030026e-07, "loss": 0.0, "step": 24306 }, { "epoch": 23.372115384615384, "grad_norm": 0.0010984165128320456, "learning_rate": 2.2390886884126716e-07, "loss": 0.0, "step": 24307 }, { "epoch": 23.373076923076923, "grad_norm": 0.00042071827920153737, "learning_rate": 2.2364681834875056e-07, "loss": 0.0, "step": 24308 }, { "epoch": 23.37403846153846, "grad_norm": 0.00042626794311217964, "learning_rate": 2.2338491955654184e-07, "loss": 0.0, "step": 24309 }, { "epoch": 23.375, "grad_norm": 0.0012871711514890194, "learning_rate": 2.231231724687044e-07, "loss": 0.0, "step": 24310 }, { "epoch": 23.37596153846154, "grad_norm": 0.000806822907179594, "learning_rate": 2.2286157708930168e-07, "loss": 0.0, "step": 24311 }, { "epoch": 23.376923076923077, "grad_norm": 0.0007098430069163442, "learning_rate": 2.226001334223915e-07, "loss": 0.0, "step": 24312 }, { "epoch": 23.377884615384616, "grad_norm": 0.0015201059868559241, "learning_rate": 2.2233884147203065e-07, "loss": 0.0, "step": 24313 }, { "epoch": 23.378846153846155, "grad_norm": 0.00035867124097421765, "learning_rate": 2.2207770124227367e-07, "loss": 0.0, "step": 24314 }, { "epoch": 23.379807692307693, "grad_norm": 0.0009848866611719131, "learning_rate": 2.2181671273717287e-07, "loss": 0.0, "step": 24315 }, { "epoch": 23.380769230769232, "grad_norm": 0.00027254450833424926, "learning_rate": 2.2155587596077944e-07, "loss": 0.0, "step": 24316 }, { "epoch": 23.38173076923077, "grad_norm": 0.0008199981530196965, "learning_rate": 2.2129519091713792e-07, "loss": 0.0, "step": 24317 }, { "epoch": 23.38269230769231, "grad_norm": 0.0013899881159886718, "learning_rate": 2.2103465761029507e-07, "loss": 0.0, "step": 24318 }, { "epoch": 23.383653846153845, "grad_norm": 0.0008346462273038924, "learning_rate": 2.2077427604429435e-07, "loss": 0.0, "step": 24319 }, { "epoch": 23.384615384615383, "grad_norm": 0.0012706989655271173, "learning_rate": 2.2051404622317364e-07, "loss": 0.0, "step": 24320 }, { "epoch": 23.385576923076922, "grad_norm": 0.000742679345421493, "learning_rate": 2.20253968150973e-07, "loss": 0.0, "step": 24321 }, { "epoch": 23.38653846153846, "grad_norm": 0.0005150400102138519, "learning_rate": 2.1999404183172923e-07, "loss": 0.0, "step": 24322 }, { "epoch": 23.3875, "grad_norm": 0.0005043314886279404, "learning_rate": 2.1973426726947246e-07, "loss": 0.0, "step": 24323 }, { "epoch": 23.388461538461538, "grad_norm": 0.0009942426113411784, "learning_rate": 2.1947464446823497e-07, "loss": 0.0, "step": 24324 }, { "epoch": 23.389423076923077, "grad_norm": 0.0009386461460962892, "learning_rate": 2.192151734320458e-07, "loss": 0.0, "step": 24325 }, { "epoch": 23.390384615384615, "grad_norm": 0.000778654997702688, "learning_rate": 2.189558541649306e-07, "loss": 0.0, "step": 24326 }, { "epoch": 23.391346153846154, "grad_norm": 0.00037834123941138387, "learning_rate": 2.1869668667091504e-07, "loss": 0.0, "step": 24327 }, { "epoch": 23.392307692307693, "grad_norm": 0.00046744864084757864, "learning_rate": 2.1843767095401703e-07, "loss": 0.0, "step": 24328 }, { "epoch": 23.39326923076923, "grad_norm": 0.0010905402014032006, "learning_rate": 2.1817880701825888e-07, "loss": 0.0, "step": 24329 }, { "epoch": 23.39423076923077, "grad_norm": 0.0017338104080408812, "learning_rate": 2.179200948676563e-07, "loss": 0.0, "step": 24330 }, { "epoch": 23.39519230769231, "grad_norm": 0.000608739152085036, "learning_rate": 2.1766153450622385e-07, "loss": 0.0, "step": 24331 }, { "epoch": 23.396153846153847, "grad_norm": 0.0014462186954915524, "learning_rate": 2.1740312593797274e-07, "loss": 0.0, "step": 24332 }, { "epoch": 23.397115384615386, "grad_norm": 0.0005089488695375621, "learning_rate": 2.1714486916691534e-07, "loss": 0.0, "step": 24333 }, { "epoch": 23.398076923076925, "grad_norm": 0.0006912101525813341, "learning_rate": 2.1688676419705511e-07, "loss": 0.0, "step": 24334 }, { "epoch": 23.39903846153846, "grad_norm": 0.0012980697210878134, "learning_rate": 2.1662881103240108e-07, "loss": 0.0, "step": 24335 }, { "epoch": 23.4, "grad_norm": 0.0005532114882953465, "learning_rate": 2.1637100967695334e-07, "loss": 0.0, "step": 24336 }, { "epoch": 23.400961538461537, "grad_norm": 0.0006733539630658925, "learning_rate": 2.1611336013471207e-07, "loss": 0.0, "step": 24337 }, { "epoch": 23.401923076923076, "grad_norm": 0.0010409587994217873, "learning_rate": 2.1585586240967738e-07, "loss": 0.0, "step": 24338 }, { "epoch": 23.402884615384615, "grad_norm": 0.0009177768370136619, "learning_rate": 2.1559851650584274e-07, "loss": 0.0, "step": 24339 }, { "epoch": 23.403846153846153, "grad_norm": 0.0005350718856789172, "learning_rate": 2.1534132242720162e-07, "loss": 0.0, "step": 24340 }, { "epoch": 23.404807692307692, "grad_norm": 0.0008781927754171193, "learning_rate": 2.1508428017774751e-07, "loss": 0.0, "step": 24341 }, { "epoch": 23.40576923076923, "grad_norm": 0.0010045437375083566, "learning_rate": 2.14827389761465e-07, "loss": 0.0, "step": 24342 }, { "epoch": 23.40673076923077, "grad_norm": 0.0008052904740907252, "learning_rate": 2.145706511823431e-07, "loss": 0.0, "step": 24343 }, { "epoch": 23.407692307692308, "grad_norm": 0.0008600318687967956, "learning_rate": 2.143140644443653e-07, "loss": 0.0, "step": 24344 }, { "epoch": 23.408653846153847, "grad_norm": 0.0007077503832988441, "learning_rate": 2.1405762955151178e-07, "loss": 0.0, "step": 24345 }, { "epoch": 23.409615384615385, "grad_norm": 0.0009364555007778108, "learning_rate": 2.1380134650776373e-07, "loss": 0.0, "step": 24346 }, { "epoch": 23.410576923076924, "grad_norm": 0.000962279736995697, "learning_rate": 2.1354521531709582e-07, "loss": 0.0, "step": 24347 }, { "epoch": 23.411538461538463, "grad_norm": 0.003817302407696843, "learning_rate": 2.132892359834826e-07, "loss": 0.0, "step": 24348 }, { "epoch": 23.4125, "grad_norm": 0.0008647207287140191, "learning_rate": 2.130334085108976e-07, "loss": 0.0, "step": 24349 }, { "epoch": 23.41346153846154, "grad_norm": 0.00105815171264112, "learning_rate": 2.127777329033087e-07, "loss": 0.0, "step": 24350 }, { "epoch": 23.414423076923075, "grad_norm": 0.002203782321885228, "learning_rate": 2.1252220916468504e-07, "loss": 0.0, "step": 24351 }, { "epoch": 23.415384615384614, "grad_norm": 0.0007139671361073852, "learning_rate": 2.122668372989911e-07, "loss": 0.0, "step": 24352 }, { "epoch": 23.416346153846153, "grad_norm": 0.000586735550314188, "learning_rate": 2.1201161731018828e-07, "loss": 0.0, "step": 24353 }, { "epoch": 23.41730769230769, "grad_norm": 0.0005624999175779521, "learning_rate": 2.1175654920223887e-07, "loss": 0.0, "step": 24354 }, { "epoch": 23.41826923076923, "grad_norm": 0.0010356588754802942, "learning_rate": 2.1150163297909976e-07, "loss": 0.0, "step": 24355 }, { "epoch": 23.41923076923077, "grad_norm": 0.0008874667691998184, "learning_rate": 2.1124686864472443e-07, "loss": 0.0, "step": 24356 }, { "epoch": 23.420192307692307, "grad_norm": 0.005731065291911364, "learning_rate": 2.1099225620307085e-07, "loss": 0.0001, "step": 24357 }, { "epoch": 23.421153846153846, "grad_norm": 0.0009994006250053644, "learning_rate": 2.1073779565808471e-07, "loss": 0.0, "step": 24358 }, { "epoch": 23.422115384615385, "grad_norm": 0.0009294800693169236, "learning_rate": 2.1048348701371734e-07, "loss": 0.0, "step": 24359 }, { "epoch": 23.423076923076923, "grad_norm": 0.0008564292802475393, "learning_rate": 2.1022933027391555e-07, "loss": 0.0, "step": 24360 }, { "epoch": 23.424038461538462, "grad_norm": 0.0017189084319397807, "learning_rate": 2.0997532544262068e-07, "loss": 0.0, "step": 24361 }, { "epoch": 23.425, "grad_norm": 0.0007435865118168294, "learning_rate": 2.0972147252377507e-07, "loss": 0.0, "step": 24362 }, { "epoch": 23.42596153846154, "grad_norm": 0.0013315773103386164, "learning_rate": 2.0946777152132003e-07, "loss": 0.0, "step": 24363 }, { "epoch": 23.426923076923078, "grad_norm": 0.0010511255823075771, "learning_rate": 2.092142224391891e-07, "loss": 0.0, "step": 24364 }, { "epoch": 23.427884615384617, "grad_norm": 0.0007709902711212635, "learning_rate": 2.089608252813169e-07, "loss": 0.0, "step": 24365 }, { "epoch": 23.428846153846155, "grad_norm": 0.0012269350700080395, "learning_rate": 2.0870758005163804e-07, "loss": 0.0, "step": 24366 }, { "epoch": 23.42980769230769, "grad_norm": 0.0013051735004410148, "learning_rate": 2.0845448675407943e-07, "loss": 0.0, "step": 24367 }, { "epoch": 23.43076923076923, "grad_norm": 0.0011420517694205046, "learning_rate": 2.082015453925701e-07, "loss": 0.0, "step": 24368 }, { "epoch": 23.431730769230768, "grad_norm": 0.002630780916661024, "learning_rate": 2.0794875597103358e-07, "loss": 0.0, "step": 24369 }, { "epoch": 23.432692307692307, "grad_norm": 0.001227630884386599, "learning_rate": 2.0769611849339234e-07, "loss": 0.0, "step": 24370 }, { "epoch": 23.433653846153845, "grad_norm": 0.0006972201517783105, "learning_rate": 2.0744363296356872e-07, "loss": 0.0, "step": 24371 }, { "epoch": 23.434615384615384, "grad_norm": 0.0014211857924237847, "learning_rate": 2.0719129938547854e-07, "loss": 0.0, "step": 24372 }, { "epoch": 23.435576923076923, "grad_norm": 0.0010007956298068166, "learning_rate": 2.0693911776303753e-07, "loss": 0.0, "step": 24373 }, { "epoch": 23.43653846153846, "grad_norm": 0.0008891833131201565, "learning_rate": 2.0668708810015924e-07, "loss": 0.0, "step": 24374 }, { "epoch": 23.4375, "grad_norm": 0.0004009459225926548, "learning_rate": 2.0643521040075387e-07, "loss": 0.0, "step": 24375 }, { "epoch": 23.43846153846154, "grad_norm": 0.0012357545783743262, "learning_rate": 2.061834846687305e-07, "loss": 0.0, "step": 24376 }, { "epoch": 23.439423076923077, "grad_norm": 0.0007957452908158302, "learning_rate": 2.05931910907996e-07, "loss": 0.0, "step": 24377 }, { "epoch": 23.440384615384616, "grad_norm": 0.0025742638390511274, "learning_rate": 2.0568048912245174e-07, "loss": 0.0, "step": 24378 }, { "epoch": 23.441346153846155, "grad_norm": 0.0010428271489217877, "learning_rate": 2.0542921931600235e-07, "loss": 0.0, "step": 24379 }, { "epoch": 23.442307692307693, "grad_norm": 0.001098631415516138, "learning_rate": 2.0517810149254247e-07, "loss": 0.0, "step": 24380 }, { "epoch": 23.443269230769232, "grad_norm": 0.0004437321040313691, "learning_rate": 2.0492713565597123e-07, "loss": 0.0, "step": 24381 }, { "epoch": 23.44423076923077, "grad_norm": 0.0007874153088778257, "learning_rate": 2.0467632181018437e-07, "loss": 0.0, "step": 24382 }, { "epoch": 23.44519230769231, "grad_norm": 0.001767630921676755, "learning_rate": 2.0442565995906993e-07, "loss": 0.0, "step": 24383 }, { "epoch": 23.446153846153845, "grad_norm": 0.0009707815479487181, "learning_rate": 2.0417515010652032e-07, "loss": 0.0, "step": 24384 }, { "epoch": 23.447115384615383, "grad_norm": 0.0016965351533144712, "learning_rate": 2.0392479225642248e-07, "loss": 0.0, "step": 24385 }, { "epoch": 23.448076923076922, "grad_norm": 0.0007678709225729108, "learning_rate": 2.0367458641266101e-07, "loss": 0.0, "step": 24386 }, { "epoch": 23.44903846153846, "grad_norm": 0.0005557969561778009, "learning_rate": 2.034245325791162e-07, "loss": 0.0, "step": 24387 }, { "epoch": 23.45, "grad_norm": 0.0005859232041984797, "learning_rate": 2.031746307596727e-07, "loss": 0.0, "step": 24388 }, { "epoch": 23.450961538461538, "grad_norm": 0.0016145663103088737, "learning_rate": 2.0292488095820405e-07, "loss": 0.0, "step": 24389 }, { "epoch": 23.451923076923077, "grad_norm": 0.0011966327438130975, "learning_rate": 2.026752831785861e-07, "loss": 0.0, "step": 24390 }, { "epoch": 23.452884615384615, "grad_norm": 0.0007532507879659534, "learning_rate": 2.0242583742469568e-07, "loss": 0.0, "step": 24391 }, { "epoch": 23.453846153846154, "grad_norm": 0.0006226159748621285, "learning_rate": 2.0217654370039863e-07, "loss": 0.0, "step": 24392 }, { "epoch": 23.454807692307693, "grad_norm": 0.0014712491538375616, "learning_rate": 2.019274020095663e-07, "loss": 0.0, "step": 24393 }, { "epoch": 23.45576923076923, "grad_norm": 0.0011889728484675288, "learning_rate": 2.0167841235606332e-07, "loss": 0.0, "step": 24394 }, { "epoch": 23.45673076923077, "grad_norm": 0.0009419253328815103, "learning_rate": 2.0142957474375336e-07, "loss": 0.0, "step": 24395 }, { "epoch": 23.45769230769231, "grad_norm": 0.0009177878964692354, "learning_rate": 2.011808891764988e-07, "loss": 0.0, "step": 24396 }, { "epoch": 23.458653846153847, "grad_norm": 0.0008737074676901102, "learning_rate": 2.009323556581566e-07, "loss": 0.0, "step": 24397 }, { "epoch": 23.459615384615386, "grad_norm": 0.0012588715180754662, "learning_rate": 2.0068397419258478e-07, "loss": 0.0, "step": 24398 }, { "epoch": 23.460576923076925, "grad_norm": 0.0018094207625836134, "learning_rate": 2.0043574478363804e-07, "loss": 0.0, "step": 24399 }, { "epoch": 23.46153846153846, "grad_norm": 0.000672406458761543, "learning_rate": 2.0018766743516548e-07, "loss": 0.0, "step": 24400 }, { "epoch": 23.4625, "grad_norm": 0.00044518933282233775, "learning_rate": 1.9993974215101853e-07, "loss": 0.0, "step": 24401 }, { "epoch": 23.463461538461537, "grad_norm": 0.0013105238322168589, "learning_rate": 1.9969196893504518e-07, "loss": 0.0, "step": 24402 }, { "epoch": 23.464423076923076, "grad_norm": 0.0006337293307296932, "learning_rate": 1.994443477910879e-07, "loss": 0.0, "step": 24403 }, { "epoch": 23.465384615384615, "grad_norm": 0.0009573969873599708, "learning_rate": 1.9919687872299142e-07, "loss": 0.0, "step": 24404 }, { "epoch": 23.466346153846153, "grad_norm": 0.00024823041167110205, "learning_rate": 1.9894956173459267e-07, "loss": 0.0, "step": 24405 }, { "epoch": 23.467307692307692, "grad_norm": 0.0006375826196745038, "learning_rate": 1.987023968297308e-07, "loss": 0.0, "step": 24406 }, { "epoch": 23.46826923076923, "grad_norm": 0.0006023645983077586, "learning_rate": 1.9845538401224273e-07, "loss": 0.0, "step": 24407 }, { "epoch": 23.46923076923077, "grad_norm": 0.0009734743507578969, "learning_rate": 1.9820852328595874e-07, "loss": 0.0, "step": 24408 }, { "epoch": 23.470192307692308, "grad_norm": 0.0006266107084229589, "learning_rate": 1.979618146547113e-07, "loss": 0.0, "step": 24409 }, { "epoch": 23.471153846153847, "grad_norm": 0.0009362690034322441, "learning_rate": 1.977152581223274e-07, "loss": 0.0, "step": 24410 }, { "epoch": 23.472115384615385, "grad_norm": 0.002359552774578333, "learning_rate": 1.974688536926339e-07, "loss": 0.0, "step": 24411 }, { "epoch": 23.473076923076924, "grad_norm": 0.0006978884339332581, "learning_rate": 1.9722260136945227e-07, "loss": 0.0, "step": 24412 }, { "epoch": 23.474038461538463, "grad_norm": 0.000719842326361686, "learning_rate": 1.9697650115660715e-07, "loss": 0.0, "step": 24413 }, { "epoch": 23.475, "grad_norm": 0.0015931816305965185, "learning_rate": 1.9673055305791332e-07, "loss": 0.0, "step": 24414 }, { "epoch": 23.47596153846154, "grad_norm": 0.0005432715988717973, "learning_rate": 1.9648475707719106e-07, "loss": 0.0, "step": 24415 }, { "epoch": 23.476923076923075, "grad_norm": 0.0006462751771323383, "learning_rate": 1.962391132182506e-07, "loss": 0.0, "step": 24416 }, { "epoch": 23.477884615384614, "grad_norm": 0.0010632341727614403, "learning_rate": 1.9599362148490564e-07, "loss": 0.0, "step": 24417 }, { "epoch": 23.478846153846153, "grad_norm": 0.0007038359763100743, "learning_rate": 1.9574828188096638e-07, "loss": 0.0, "step": 24418 }, { "epoch": 23.47980769230769, "grad_norm": 0.0005964802112430334, "learning_rate": 1.9550309441023761e-07, "loss": 0.0, "step": 24419 }, { "epoch": 23.48076923076923, "grad_norm": 0.0006166442180983722, "learning_rate": 1.9525805907652518e-07, "loss": 0.0, "step": 24420 }, { "epoch": 23.48173076923077, "grad_norm": 0.0013253771467134356, "learning_rate": 1.9501317588363155e-07, "loss": 0.0, "step": 24421 }, { "epoch": 23.482692307692307, "grad_norm": 0.0006742958794347942, "learning_rate": 1.9476844483535595e-07, "loss": 0.0, "step": 24422 }, { "epoch": 23.483653846153846, "grad_norm": 0.0018930367659777403, "learning_rate": 1.9452386593549534e-07, "loss": 0.0, "step": 24423 }, { "epoch": 23.484615384615385, "grad_norm": 0.0010407485533505678, "learning_rate": 1.9427943918784665e-07, "loss": 0.0, "step": 24424 }, { "epoch": 23.485576923076923, "grad_norm": 0.0012091764947399497, "learning_rate": 1.940351645962013e-07, "loss": 0.0, "step": 24425 }, { "epoch": 23.486538461538462, "grad_norm": 0.0014293629210442305, "learning_rate": 1.9379104216435074e-07, "loss": 0.0, "step": 24426 }, { "epoch": 23.4875, "grad_norm": 0.0008489610627293587, "learning_rate": 1.9354707189608079e-07, "loss": 0.0, "step": 24427 }, { "epoch": 23.48846153846154, "grad_norm": 0.0005850906600244343, "learning_rate": 1.9330325379517845e-07, "loss": 0.0, "step": 24428 }, { "epoch": 23.489423076923078, "grad_norm": 0.0016076600877568126, "learning_rate": 1.9305958786542954e-07, "loss": 0.0, "step": 24429 }, { "epoch": 23.490384615384617, "grad_norm": 0.0013896062737330794, "learning_rate": 1.928160741106111e-07, "loss": 0.0, "step": 24430 }, { "epoch": 23.491346153846155, "grad_norm": 0.0010634817881509662, "learning_rate": 1.925727125345034e-07, "loss": 0.0, "step": 24431 }, { "epoch": 23.49230769230769, "grad_norm": 0.000647387991193682, "learning_rate": 1.923295031408834e-07, "loss": 0.0, "step": 24432 }, { "epoch": 23.49326923076923, "grad_norm": 0.0009728510049171746, "learning_rate": 1.9208644593352366e-07, "loss": 0.0, "step": 24433 }, { "epoch": 23.494230769230768, "grad_norm": 0.0007346953498199582, "learning_rate": 1.9184354091619671e-07, "loss": 0.0, "step": 24434 }, { "epoch": 23.495192307692307, "grad_norm": 0.0009941753232851624, "learning_rate": 1.9160078809267292e-07, "loss": 0.0, "step": 24435 }, { "epoch": 23.496153846153845, "grad_norm": 0.0010994520271196961, "learning_rate": 1.9135818746671587e-07, "loss": 0.0, "step": 24436 }, { "epoch": 23.497115384615384, "grad_norm": 0.0009948356309905648, "learning_rate": 1.9111573904209257e-07, "loss": 0.0, "step": 24437 }, { "epoch": 23.498076923076923, "grad_norm": 0.0013058874756097794, "learning_rate": 1.9087344282256336e-07, "loss": 0.0, "step": 24438 }, { "epoch": 23.49903846153846, "grad_norm": 0.00021529379591811448, "learning_rate": 1.9063129881188857e-07, "loss": 0.0, "step": 24439 }, { "epoch": 23.5, "grad_norm": 0.0010606200667098165, "learning_rate": 1.9038930701382742e-07, "loss": 0.0, "step": 24440 }, { "epoch": 23.50096153846154, "grad_norm": 0.0005883325356990099, "learning_rate": 1.9014746743213243e-07, "loss": 0.0, "step": 24441 }, { "epoch": 23.501923076923077, "grad_norm": 0.0007540449150837958, "learning_rate": 1.8990578007055726e-07, "loss": 0.0, "step": 24442 }, { "epoch": 23.502884615384616, "grad_norm": 0.0008595233084633946, "learning_rate": 1.896642449328523e-07, "loss": 0.0, "step": 24443 }, { "epoch": 23.503846153846155, "grad_norm": 0.00121604825835675, "learning_rate": 1.894228620227645e-07, "loss": 0.0, "step": 24444 }, { "epoch": 23.504807692307693, "grad_norm": 0.0015544927446171641, "learning_rate": 1.8918163134404087e-07, "loss": 0.0, "step": 24445 }, { "epoch": 23.505769230769232, "grad_norm": 0.008687018416821957, "learning_rate": 1.8894055290042402e-07, "loss": 0.0, "step": 24446 }, { "epoch": 23.50673076923077, "grad_norm": 0.0008441825048066676, "learning_rate": 1.8869962669565422e-07, "loss": 0.0, "step": 24447 }, { "epoch": 23.50769230769231, "grad_norm": 0.0004426236846484244, "learning_rate": 1.8845885273347187e-07, "loss": 0.0, "step": 24448 }, { "epoch": 23.508653846153845, "grad_norm": 0.0004200054390821606, "learning_rate": 1.8821823101760949e-07, "loss": 0.0, "step": 24449 }, { "epoch": 23.509615384615383, "grad_norm": 0.0008371265139430761, "learning_rate": 1.8797776155180414e-07, "loss": 0.0, "step": 24450 }, { "epoch": 23.510576923076922, "grad_norm": 0.0013364087790250778, "learning_rate": 1.8773744433978613e-07, "loss": 0.0, "step": 24451 }, { "epoch": 23.51153846153846, "grad_norm": 0.000639376463368535, "learning_rate": 1.874972793852836e-07, "loss": 0.0, "step": 24452 }, { "epoch": 23.5125, "grad_norm": 0.0014975655358284712, "learning_rate": 1.872572666920236e-07, "loss": 0.0, "step": 24453 }, { "epoch": 23.513461538461538, "grad_norm": 0.0013267132453620434, "learning_rate": 1.87017406263732e-07, "loss": 0.0, "step": 24454 }, { "epoch": 23.514423076923077, "grad_norm": 0.0005412772879935801, "learning_rate": 1.8677769810412914e-07, "loss": 0.0, "step": 24455 }, { "epoch": 23.515384615384615, "grad_norm": 0.0011189725482836366, "learning_rate": 1.8653814221693433e-07, "loss": 0.0, "step": 24456 }, { "epoch": 23.516346153846154, "grad_norm": 0.0016891201958060265, "learning_rate": 1.8629873860586567e-07, "loss": 0.0, "step": 24457 }, { "epoch": 23.517307692307693, "grad_norm": 0.000980846118181944, "learning_rate": 1.8605948727463797e-07, "loss": 0.0, "step": 24458 }, { "epoch": 23.51826923076923, "grad_norm": 0.0013103579403832555, "learning_rate": 1.858203882269638e-07, "loss": 0.0, "step": 24459 }, { "epoch": 23.51923076923077, "grad_norm": 0.0007935931789688766, "learning_rate": 1.8558144146655243e-07, "loss": 0.0, "step": 24460 }, { "epoch": 23.52019230769231, "grad_norm": 0.0008847347344271839, "learning_rate": 1.8534264699711091e-07, "loss": 0.0, "step": 24461 }, { "epoch": 23.521153846153847, "grad_norm": 0.00180497404653579, "learning_rate": 1.8510400482234848e-07, "loss": 0.0, "step": 24462 }, { "epoch": 23.522115384615386, "grad_norm": 0.0011346546234562993, "learning_rate": 1.8486551494596327e-07, "loss": 0.0, "step": 24463 }, { "epoch": 23.523076923076925, "grad_norm": 0.0006605334929190576, "learning_rate": 1.8462717737165904e-07, "loss": 0.0, "step": 24464 }, { "epoch": 23.52403846153846, "grad_norm": 0.000596382946241647, "learning_rate": 1.8438899210313388e-07, "loss": 0.0, "step": 24465 }, { "epoch": 23.525, "grad_norm": 0.0014554281951859593, "learning_rate": 1.8415095914408155e-07, "loss": 0.0, "step": 24466 }, { "epoch": 23.525961538461537, "grad_norm": 0.0006716529605910182, "learning_rate": 1.8391307849819794e-07, "loss": 0.0, "step": 24467 }, { "epoch": 23.526923076923076, "grad_norm": 0.0005832473980262876, "learning_rate": 1.836753501691746e-07, "loss": 0.0, "step": 24468 }, { "epoch": 23.527884615384615, "grad_norm": 0.00034568479168228805, "learning_rate": 1.834377741606974e-07, "loss": 0.0, "step": 24469 }, { "epoch": 23.528846153846153, "grad_norm": 0.0005477992235682905, "learning_rate": 1.832003504764568e-07, "loss": 0.0, "step": 24470 }, { "epoch": 23.529807692307692, "grad_norm": 0.0006153593421913683, "learning_rate": 1.829630791201331e-07, "loss": 0.0, "step": 24471 }, { "epoch": 23.53076923076923, "grad_norm": 0.0006942167528904974, "learning_rate": 1.82725960095409e-07, "loss": 0.0, "step": 24472 }, { "epoch": 23.53173076923077, "grad_norm": 0.0010892223799601197, "learning_rate": 1.8248899340596705e-07, "loss": 0.0, "step": 24473 }, { "epoch": 23.532692307692308, "grad_norm": 0.001477712532505393, "learning_rate": 1.8225217905547986e-07, "loss": 0.0, "step": 24474 }, { "epoch": 23.533653846153847, "grad_norm": 0.0007546971901319921, "learning_rate": 1.8201551704762453e-07, "loss": 0.0, "step": 24475 }, { "epoch": 23.534615384615385, "grad_norm": 0.0009642992517910898, "learning_rate": 1.8177900738607367e-07, "loss": 0.0, "step": 24476 }, { "epoch": 23.535576923076924, "grad_norm": 0.0005887593142688274, "learning_rate": 1.8154265007449657e-07, "loss": 0.0, "step": 24477 }, { "epoch": 23.536538461538463, "grad_norm": 0.001751379226334393, "learning_rate": 1.8130644511655914e-07, "loss": 0.0, "step": 24478 }, { "epoch": 23.5375, "grad_norm": 0.0009380218689329922, "learning_rate": 1.8107039251593072e-07, "loss": 0.0, "step": 24479 }, { "epoch": 23.53846153846154, "grad_norm": 0.0007994916522875428, "learning_rate": 1.8083449227626948e-07, "loss": 0.0, "step": 24480 }, { "epoch": 23.539423076923075, "grad_norm": 0.0017093332717195153, "learning_rate": 1.8059874440123804e-07, "loss": 0.0, "step": 24481 }, { "epoch": 23.540384615384614, "grad_norm": 0.00014714847202412784, "learning_rate": 1.803631488944957e-07, "loss": 0.0, "step": 24482 }, { "epoch": 23.541346153846153, "grad_norm": 0.0005483713466674089, "learning_rate": 1.8012770575969617e-07, "loss": 0.0, "step": 24483 }, { "epoch": 23.54230769230769, "grad_norm": 0.0012882620794698596, "learning_rate": 1.7989241500049438e-07, "loss": 0.0, "step": 24484 }, { "epoch": 23.54326923076923, "grad_norm": 0.0005597062408924103, "learning_rate": 1.7965727662054068e-07, "loss": 0.0, "step": 24485 }, { "epoch": 23.54423076923077, "grad_norm": 0.0008281558402813971, "learning_rate": 1.7942229062348216e-07, "loss": 0.0, "step": 24486 }, { "epoch": 23.545192307692307, "grad_norm": 0.0006447818013839424, "learning_rate": 1.7918745701296815e-07, "loss": 0.0, "step": 24487 }, { "epoch": 23.546153846153846, "grad_norm": 0.0009347227169200778, "learning_rate": 1.7895277579264015e-07, "loss": 0.0, "step": 24488 }, { "epoch": 23.547115384615385, "grad_norm": 0.0009978273883461952, "learning_rate": 1.787182469661408e-07, "loss": 0.0, "step": 24489 }, { "epoch": 23.548076923076923, "grad_norm": 0.0012269566068425775, "learning_rate": 1.7848387053711058e-07, "loss": 0.0, "step": 24490 }, { "epoch": 23.549038461538462, "grad_norm": 0.0006730422610417008, "learning_rate": 1.782496465091832e-07, "loss": 0.0, "step": 24491 }, { "epoch": 23.55, "grad_norm": 0.0006330435862764716, "learning_rate": 1.7801557488599462e-07, "loss": 0.0, "step": 24492 }, { "epoch": 23.55096153846154, "grad_norm": 0.0010925522074103355, "learning_rate": 1.7778165567117865e-07, "loss": 0.0, "step": 24493 }, { "epoch": 23.551923076923078, "grad_norm": 0.0012642315123230219, "learning_rate": 1.775478888683624e-07, "loss": 0.0, "step": 24494 }, { "epoch": 23.552884615384617, "grad_norm": 0.0018637756584212184, "learning_rate": 1.773142744811751e-07, "loss": 0.0, "step": 24495 }, { "epoch": 23.553846153846155, "grad_norm": 0.0015615297015756369, "learning_rate": 1.770808125132406e-07, "loss": 0.0, "step": 24496 }, { "epoch": 23.55480769230769, "grad_norm": 0.00272213201969862, "learning_rate": 1.7684750296818154e-07, "loss": 0.0, "step": 24497 }, { "epoch": 23.55576923076923, "grad_norm": 0.0009885207982733846, "learning_rate": 1.7661434584961944e-07, "loss": 0.0, "step": 24498 }, { "epoch": 23.556730769230768, "grad_norm": 0.0008269940735772252, "learning_rate": 1.7638134116117146e-07, "loss": 0.0, "step": 24499 }, { "epoch": 23.557692307692307, "grad_norm": 0.0009273397154174745, "learning_rate": 1.761484889064524e-07, "loss": 0.0, "step": 24500 }, { "epoch": 23.558653846153845, "grad_norm": 0.00028035679133608937, "learning_rate": 1.7591578908907724e-07, "loss": 0.0, "step": 24501 }, { "epoch": 23.559615384615384, "grad_norm": 0.0016556022455915809, "learning_rate": 1.7568324171265416e-07, "loss": 0.0, "step": 24502 }, { "epoch": 23.560576923076923, "grad_norm": 0.0013076121686026454, "learning_rate": 1.754508467807936e-07, "loss": 0.0, "step": 24503 }, { "epoch": 23.56153846153846, "grad_norm": 0.002484299475327134, "learning_rate": 1.752186042971016e-07, "loss": 0.0, "step": 24504 }, { "epoch": 23.5625, "grad_norm": 0.002141763921827078, "learning_rate": 1.7498651426518075e-07, "loss": 0.0, "step": 24505 }, { "epoch": 23.56346153846154, "grad_norm": 0.001162486500106752, "learning_rate": 1.7475457668863493e-07, "loss": 0.0, "step": 24506 }, { "epoch": 23.564423076923077, "grad_norm": 0.005819288548082113, "learning_rate": 1.7452279157105902e-07, "loss": 0.0, "step": 24507 }, { "epoch": 23.565384615384616, "grad_norm": 0.0019292301731184125, "learning_rate": 1.7429115891605343e-07, "loss": 0.0, "step": 24508 }, { "epoch": 23.566346153846155, "grad_norm": 0.0016549021238461137, "learning_rate": 1.7405967872721085e-07, "loss": 0.0, "step": 24509 }, { "epoch": 23.567307692307693, "grad_norm": 0.001227199682034552, "learning_rate": 1.738283510081229e-07, "loss": 0.0, "step": 24510 }, { "epoch": 23.568269230769232, "grad_norm": 0.0012170959962531924, "learning_rate": 1.735971757623789e-07, "loss": 0.0, "step": 24511 }, { "epoch": 23.56923076923077, "grad_norm": 0.0010297965491190553, "learning_rate": 1.7336615299356818e-07, "loss": 0.0, "step": 24512 }, { "epoch": 23.57019230769231, "grad_norm": 0.001141535583883524, "learning_rate": 1.7313528270527236e-07, "loss": 0.0, "step": 24513 }, { "epoch": 23.571153846153845, "grad_norm": 0.0011309145484119654, "learning_rate": 1.7290456490107522e-07, "loss": 0.0, "step": 24514 }, { "epoch": 23.572115384615383, "grad_norm": 0.0006592390709556639, "learning_rate": 1.7267399958455943e-07, "loss": 0.0, "step": 24515 }, { "epoch": 23.573076923076922, "grad_norm": 0.0004852705169469118, "learning_rate": 1.7244358675929774e-07, "loss": 0.0, "step": 24516 }, { "epoch": 23.57403846153846, "grad_norm": 0.0006985030486248434, "learning_rate": 1.7221332642886946e-07, "loss": 0.0, "step": 24517 }, { "epoch": 23.575, "grad_norm": 0.0013724431628361344, "learning_rate": 1.7198321859684618e-07, "loss": 0.0, "step": 24518 }, { "epoch": 23.575961538461538, "grad_norm": 0.001246943254955113, "learning_rate": 1.7175326326679732e-07, "loss": 0.0, "step": 24519 }, { "epoch": 23.576923076923077, "grad_norm": 0.0010626920266076922, "learning_rate": 1.7152346044229328e-07, "loss": 0.0, "step": 24520 }, { "epoch": 23.577884615384615, "grad_norm": 0.0006809629267081618, "learning_rate": 1.7129381012689906e-07, "loss": 0.0, "step": 24521 }, { "epoch": 23.578846153846154, "grad_norm": 0.0011087632738053799, "learning_rate": 1.7106431232417732e-07, "loss": 0.0, "step": 24522 }, { "epoch": 23.579807692307693, "grad_norm": 0.0007086285040713847, "learning_rate": 1.7083496703768965e-07, "loss": 0.0, "step": 24523 }, { "epoch": 23.58076923076923, "grad_norm": 0.0007946778205223382, "learning_rate": 1.7060577427099545e-07, "loss": 0.0, "step": 24524 }, { "epoch": 23.58173076923077, "grad_norm": 0.001197699923068285, "learning_rate": 1.7037673402765076e-07, "loss": 0.0, "step": 24525 }, { "epoch": 23.58269230769231, "grad_norm": 0.0005708649405278265, "learning_rate": 1.701478463112105e-07, "loss": 0.0, "step": 24526 }, { "epoch": 23.583653846153847, "grad_norm": 0.003045199206098914, "learning_rate": 1.699191111252241e-07, "loss": 0.0, "step": 24527 }, { "epoch": 23.584615384615386, "grad_norm": 0.00041901596705429256, "learning_rate": 1.6969052847324307e-07, "loss": 0.0, "step": 24528 }, { "epoch": 23.585576923076925, "grad_norm": 0.0017411003354936838, "learning_rate": 1.6946209835881244e-07, "loss": 0.0, "step": 24529 }, { "epoch": 23.58653846153846, "grad_norm": 0.0018479889258742332, "learning_rate": 1.6923382078547713e-07, "loss": 0.0, "step": 24530 }, { "epoch": 23.5875, "grad_norm": 0.0006156240124255419, "learning_rate": 1.6900569575678204e-07, "loss": 0.0, "step": 24531 }, { "epoch": 23.588461538461537, "grad_norm": 0.0009124547941610217, "learning_rate": 1.687777232762633e-07, "loss": 0.0, "step": 24532 }, { "epoch": 23.589423076923076, "grad_norm": 0.0012266951380297542, "learning_rate": 1.6854990334746025e-07, "loss": 0.0, "step": 24533 }, { "epoch": 23.590384615384615, "grad_norm": 0.0009757557418197393, "learning_rate": 1.6832223597390895e-07, "loss": 0.0, "step": 24534 }, { "epoch": 23.591346153846153, "grad_norm": 0.00034051109105348587, "learning_rate": 1.6809472115913993e-07, "loss": 0.0, "step": 24535 }, { "epoch": 23.592307692307692, "grad_norm": 0.0006001616129651666, "learning_rate": 1.678673589066837e-07, "loss": 0.0, "step": 24536 }, { "epoch": 23.59326923076923, "grad_norm": 0.0011523697758093476, "learning_rate": 1.6764014922007078e-07, "loss": 0.0, "step": 24537 }, { "epoch": 23.59423076923077, "grad_norm": 0.0008606869378127158, "learning_rate": 1.6741309210282496e-07, "loss": 0.0, "step": 24538 }, { "epoch": 23.595192307692308, "grad_norm": 0.002346364315599203, "learning_rate": 1.6718618755846906e-07, "loss": 0.0, "step": 24539 }, { "epoch": 23.596153846153847, "grad_norm": 0.001041893963702023, "learning_rate": 1.6695943559052463e-07, "loss": 0.0, "step": 24540 }, { "epoch": 23.597115384615385, "grad_norm": 0.0013555580517277122, "learning_rate": 1.6673283620251002e-07, "loss": 0.0, "step": 24541 }, { "epoch": 23.598076923076924, "grad_norm": 0.0007948283455334604, "learning_rate": 1.6650638939794127e-07, "loss": 0.0, "step": 24542 }, { "epoch": 23.599038461538463, "grad_norm": 0.0013303799787536263, "learning_rate": 1.6628009518033229e-07, "loss": 0.0, "step": 24543 }, { "epoch": 23.6, "grad_norm": 0.0006710623856633902, "learning_rate": 1.6605395355319576e-07, "loss": 0.0, "step": 24544 }, { "epoch": 23.60096153846154, "grad_norm": 0.0016755216056481004, "learning_rate": 1.6582796452003892e-07, "loss": 0.0, "step": 24545 }, { "epoch": 23.601923076923075, "grad_norm": 0.0008972405339591205, "learning_rate": 1.6560212808436892e-07, "loss": 0.0, "step": 24546 }, { "epoch": 23.602884615384614, "grad_norm": 0.0004838956519961357, "learning_rate": 1.6537644424968968e-07, "loss": 0.0, "step": 24547 }, { "epoch": 23.603846153846153, "grad_norm": 0.00045304515515454113, "learning_rate": 1.6515091301950502e-07, "loss": 0.0, "step": 24548 }, { "epoch": 23.60480769230769, "grad_norm": 0.0015950752422213554, "learning_rate": 1.649255343973122e-07, "loss": 0.0, "step": 24549 }, { "epoch": 23.60576923076923, "grad_norm": 0.0027508495841175318, "learning_rate": 1.6470030838661055e-07, "loss": 0.0, "step": 24550 }, { "epoch": 23.60673076923077, "grad_norm": 0.0010185169521719217, "learning_rate": 1.6447523499089403e-07, "loss": 0.0, "step": 24551 }, { "epoch": 23.607692307692307, "grad_norm": 0.0004912382573820651, "learning_rate": 1.642503142136531e-07, "loss": 0.0, "step": 24552 }, { "epoch": 23.608653846153846, "grad_norm": 0.000423657736973837, "learning_rate": 1.6402554605838173e-07, "loss": 0.0, "step": 24553 }, { "epoch": 23.609615384615385, "grad_norm": 0.0008053199853748083, "learning_rate": 1.6380093052856482e-07, "loss": 0.0, "step": 24554 }, { "epoch": 23.610576923076923, "grad_norm": 0.0009758995729498565, "learning_rate": 1.6357646762768742e-07, "loss": 0.0, "step": 24555 }, { "epoch": 23.611538461538462, "grad_norm": 0.0010318330023437738, "learning_rate": 1.633521573592356e-07, "loss": 0.0, "step": 24556 }, { "epoch": 23.6125, "grad_norm": 0.0013291846262291074, "learning_rate": 1.6312799972668659e-07, "loss": 0.0, "step": 24557 }, { "epoch": 23.61346153846154, "grad_norm": 0.0014817035989835858, "learning_rate": 1.6290399473352092e-07, "loss": 0.0, "step": 24558 }, { "epoch": 23.614423076923078, "grad_norm": 0.00033333926694467664, "learning_rate": 1.6268014238321474e-07, "loss": 0.0, "step": 24559 }, { "epoch": 23.615384615384617, "grad_norm": 0.0015113921836018562, "learning_rate": 1.6245644267923855e-07, "loss": 0.0, "step": 24560 }, { "epoch": 23.616346153846155, "grad_norm": 0.0011608533095568419, "learning_rate": 1.622328956250674e-07, "loss": 0.0, "step": 24561 }, { "epoch": 23.61730769230769, "grad_norm": 0.0012953259283676744, "learning_rate": 1.6200950122416626e-07, "loss": 0.0, "step": 24562 }, { "epoch": 23.61826923076923, "grad_norm": 0.000905363354831934, "learning_rate": 1.617862594800046e-07, "loss": 0.0, "step": 24563 }, { "epoch": 23.619230769230768, "grad_norm": 0.0008187139173969626, "learning_rate": 1.615631703960463e-07, "loss": 0.0, "step": 24564 }, { "epoch": 23.620192307692307, "grad_norm": 0.0008175014518201351, "learning_rate": 1.6134023397575084e-07, "loss": 0.0, "step": 24565 }, { "epoch": 23.621153846153845, "grad_norm": 0.0007683509029448032, "learning_rate": 1.6111745022257873e-07, "loss": 0.0, "step": 24566 }, { "epoch": 23.622115384615384, "grad_norm": 0.0014734955038875341, "learning_rate": 1.6089481913998727e-07, "loss": 0.0, "step": 24567 }, { "epoch": 23.623076923076923, "grad_norm": 0.00029751850524917245, "learning_rate": 1.6067234073143035e-07, "loss": 0.0, "step": 24568 }, { "epoch": 23.62403846153846, "grad_norm": 0.000846671115141362, "learning_rate": 1.6045001500036072e-07, "loss": 0.0, "step": 24569 }, { "epoch": 23.625, "grad_norm": 0.0009228429989889264, "learning_rate": 1.6022784195023012e-07, "loss": 0.0, "step": 24570 }, { "epoch": 23.62596153846154, "grad_norm": 0.0014918340602889657, "learning_rate": 1.600058215844813e-07, "loss": 0.0, "step": 24571 }, { "epoch": 23.626923076923077, "grad_norm": 0.0006709630833938718, "learning_rate": 1.5978395390656376e-07, "loss": 0.0, "step": 24572 }, { "epoch": 23.627884615384616, "grad_norm": 0.0006220786017365754, "learning_rate": 1.5956223891991807e-07, "loss": 0.0, "step": 24573 }, { "epoch": 23.628846153846155, "grad_norm": 0.0006875735707581043, "learning_rate": 1.5934067662798481e-07, "loss": 0.0, "step": 24574 }, { "epoch": 23.629807692307693, "grad_norm": 0.0012837698450312018, "learning_rate": 1.5911926703420343e-07, "loss": 0.0, "step": 24575 }, { "epoch": 23.630769230769232, "grad_norm": 0.0010152059840038419, "learning_rate": 1.5889801014200678e-07, "loss": 0.0, "step": 24576 }, { "epoch": 23.63173076923077, "grad_norm": 0.0005698170280084014, "learning_rate": 1.5867690595482987e-07, "loss": 0.0, "step": 24577 }, { "epoch": 23.63269230769231, "grad_norm": 0.0024816798977553844, "learning_rate": 1.584559544761055e-07, "loss": 0.0, "step": 24578 }, { "epoch": 23.633653846153845, "grad_norm": 0.001832805573940277, "learning_rate": 1.5823515570925763e-07, "loss": 0.0, "step": 24579 }, { "epoch": 23.634615384615383, "grad_norm": 0.0011669370578601956, "learning_rate": 1.580145096577157e-07, "loss": 0.0, "step": 24580 }, { "epoch": 23.635576923076922, "grad_norm": 0.0013423340860754251, "learning_rate": 1.5779401632490365e-07, "loss": 0.0, "step": 24581 }, { "epoch": 23.63653846153846, "grad_norm": 0.001497936900705099, "learning_rate": 1.57573675714241e-07, "loss": 0.0, "step": 24582 }, { "epoch": 23.6375, "grad_norm": 0.0007442407077178359, "learning_rate": 1.5735348782914717e-07, "loss": 0.0, "step": 24583 }, { "epoch": 23.638461538461538, "grad_norm": 0.0006401787977665663, "learning_rate": 1.5713345267304058e-07, "loss": 0.0, "step": 24584 }, { "epoch": 23.639423076923077, "grad_norm": 0.002523222006857395, "learning_rate": 1.5691357024933407e-07, "loss": 0.0, "step": 24585 }, { "epoch": 23.640384615384615, "grad_norm": 0.0037358494009822607, "learning_rate": 1.5669384056144043e-07, "loss": 0.0, "step": 24586 }, { "epoch": 23.641346153846154, "grad_norm": 0.0007757263374514878, "learning_rate": 1.5647426361276696e-07, "loss": 0.0, "step": 24587 }, { "epoch": 23.642307692307693, "grad_norm": 0.005309184081852436, "learning_rate": 1.5625483940672314e-07, "loss": 0.0001, "step": 24588 }, { "epoch": 23.64326923076923, "grad_norm": 0.000467432924779132, "learning_rate": 1.5603556794671403e-07, "loss": 0.0, "step": 24589 }, { "epoch": 23.64423076923077, "grad_norm": 0.0006727034924551845, "learning_rate": 1.5581644923614136e-07, "loss": 0.0, "step": 24590 }, { "epoch": 23.64519230769231, "grad_norm": 0.0015057494165375829, "learning_rate": 1.5559748327840352e-07, "loss": 0.0, "step": 24591 }, { "epoch": 23.646153846153847, "grad_norm": 0.001288359286263585, "learning_rate": 1.5537867007690111e-07, "loss": 0.0, "step": 24592 }, { "epoch": 23.647115384615386, "grad_norm": 0.0016300330171361566, "learning_rate": 1.5516000963502698e-07, "loss": 0.0, "step": 24593 }, { "epoch": 23.648076923076925, "grad_norm": 0.00046605258830823004, "learning_rate": 1.549415019561762e-07, "loss": 0.0, "step": 24594 }, { "epoch": 23.64903846153846, "grad_norm": 0.003474899334833026, "learning_rate": 1.5472314704373825e-07, "loss": 0.0, "step": 24595 }, { "epoch": 23.65, "grad_norm": 0.00025658769300207496, "learning_rate": 1.5450494490110158e-07, "loss": 0.0, "step": 24596 }, { "epoch": 23.650961538461537, "grad_norm": 0.0005531220813281834, "learning_rate": 1.5428689553165233e-07, "loss": 0.0, "step": 24597 }, { "epoch": 23.651923076923076, "grad_norm": 0.0007460371707566082, "learning_rate": 1.5406899893877225e-07, "loss": 0.0, "step": 24598 }, { "epoch": 23.652884615384615, "grad_norm": 0.0016887594247236848, "learning_rate": 1.538512551258442e-07, "loss": 0.0, "step": 24599 }, { "epoch": 23.653846153846153, "grad_norm": 0.0007060746429488063, "learning_rate": 1.536336640962477e-07, "loss": 0.0, "step": 24600 }, { "epoch": 23.654807692307692, "grad_norm": 0.00035396727616898715, "learning_rate": 1.5341622585335668e-07, "loss": 0.0, "step": 24601 }, { "epoch": 23.65576923076923, "grad_norm": 0.0011085611768066883, "learning_rate": 1.531989404005474e-07, "loss": 0.0, "step": 24602 }, { "epoch": 23.65673076923077, "grad_norm": 0.0020190877839922905, "learning_rate": 1.5298180774119044e-07, "loss": 0.0, "step": 24603 }, { "epoch": 23.657692307692308, "grad_norm": 0.0010079647181555629, "learning_rate": 1.5276482787865532e-07, "loss": 0.0, "step": 24604 }, { "epoch": 23.658653846153847, "grad_norm": 0.0007025427184998989, "learning_rate": 1.5254800081630828e-07, "loss": 0.0, "step": 24605 }, { "epoch": 23.659615384615385, "grad_norm": 0.00034566904651001096, "learning_rate": 1.5233132655751437e-07, "loss": 0.0, "step": 24606 }, { "epoch": 23.660576923076924, "grad_norm": 0.0010692290961742401, "learning_rate": 1.5211480510563537e-07, "loss": 0.0, "step": 24607 }, { "epoch": 23.661538461538463, "grad_norm": 0.0013184602139517665, "learning_rate": 1.5189843646403302e-07, "loss": 0.0, "step": 24608 }, { "epoch": 23.6625, "grad_norm": 0.0005063265562057495, "learning_rate": 1.516822206360613e-07, "loss": 0.0, "step": 24609 }, { "epoch": 23.66346153846154, "grad_norm": 0.0006374517688527703, "learning_rate": 1.5146615762507755e-07, "loss": 0.0, "step": 24610 }, { "epoch": 23.664423076923075, "grad_norm": 0.0005902425618842244, "learning_rate": 1.5125024743443463e-07, "loss": 0.0, "step": 24611 }, { "epoch": 23.665384615384614, "grad_norm": 0.0006875777035020292, "learning_rate": 1.5103449006748095e-07, "loss": 0.0, "step": 24612 }, { "epoch": 23.666346153846153, "grad_norm": 0.00046890939120203257, "learning_rate": 1.5081888552756608e-07, "loss": 0.0, "step": 24613 }, { "epoch": 23.66730769230769, "grad_norm": 0.003935075830668211, "learning_rate": 1.5060343381803623e-07, "loss": 0.0, "step": 24614 }, { "epoch": 23.66826923076923, "grad_norm": 0.0011236949358135462, "learning_rate": 1.5038813494223203e-07, "loss": 0.0, "step": 24615 }, { "epoch": 23.66923076923077, "grad_norm": 0.0020498258527368307, "learning_rate": 1.5017298890349642e-07, "loss": 0.0, "step": 24616 }, { "epoch": 23.670192307692307, "grad_norm": 0.0009580551413819194, "learning_rate": 1.4995799570516666e-07, "loss": 0.0, "step": 24617 }, { "epoch": 23.671153846153846, "grad_norm": 0.00025632971664890647, "learning_rate": 1.4974315535058016e-07, "loss": 0.0, "step": 24618 }, { "epoch": 23.672115384615385, "grad_norm": 0.0006384178996086121, "learning_rate": 1.4952846784306864e-07, "loss": 0.0, "step": 24619 }, { "epoch": 23.673076923076923, "grad_norm": 0.0015739371301606297, "learning_rate": 1.4931393318596498e-07, "loss": 0.0, "step": 24620 }, { "epoch": 23.674038461538462, "grad_norm": 0.0007038211333565414, "learning_rate": 1.4909955138259768e-07, "loss": 0.0, "step": 24621 }, { "epoch": 23.675, "grad_norm": 0.0016445553628727794, "learning_rate": 1.4888532243629405e-07, "loss": 0.0, "step": 24622 }, { "epoch": 23.67596153846154, "grad_norm": 0.0009719811496324837, "learning_rate": 1.4867124635037588e-07, "loss": 0.0, "step": 24623 }, { "epoch": 23.676923076923078, "grad_norm": 0.0011328438995406032, "learning_rate": 1.4845732312816717e-07, "loss": 0.0, "step": 24624 }, { "epoch": 23.677884615384617, "grad_norm": 0.0018879565177485347, "learning_rate": 1.4824355277298753e-07, "loss": 0.0, "step": 24625 }, { "epoch": 23.678846153846155, "grad_norm": 0.0020648292265832424, "learning_rate": 1.4802993528815313e-07, "loss": 0.0, "step": 24626 }, { "epoch": 23.67980769230769, "grad_norm": 0.0026570982299745083, "learning_rate": 1.4781647067697913e-07, "loss": 0.0, "step": 24627 }, { "epoch": 23.68076923076923, "grad_norm": 0.0008248594822362065, "learning_rate": 1.4760315894277734e-07, "loss": 0.0, "step": 24628 }, { "epoch": 23.681730769230768, "grad_norm": 0.001551046734675765, "learning_rate": 1.4739000008885839e-07, "loss": 0.0, "step": 24629 }, { "epoch": 23.682692307692307, "grad_norm": 0.0017375440802425146, "learning_rate": 1.4717699411852971e-07, "loss": 0.0, "step": 24630 }, { "epoch": 23.683653846153845, "grad_norm": 0.0013542319647967815, "learning_rate": 1.469641410350964e-07, "loss": 0.0, "step": 24631 }, { "epoch": 23.684615384615384, "grad_norm": 0.005084097385406494, "learning_rate": 1.4675144084186022e-07, "loss": 0.0, "step": 24632 }, { "epoch": 23.685576923076923, "grad_norm": 0.00041854067239910364, "learning_rate": 1.4653889354212414e-07, "loss": 0.0, "step": 24633 }, { "epoch": 23.68653846153846, "grad_norm": 0.0019077653996646404, "learning_rate": 1.463264991391844e-07, "loss": 0.0, "step": 24634 }, { "epoch": 23.6875, "grad_norm": 0.004362364299595356, "learning_rate": 1.4611425763633614e-07, "loss": 0.0, "step": 24635 }, { "epoch": 23.68846153846154, "grad_norm": 0.001323956181295216, "learning_rate": 1.459021690368756e-07, "loss": 0.0, "step": 24636 }, { "epoch": 23.689423076923077, "grad_norm": 0.0020607903134077787, "learning_rate": 1.4569023334409128e-07, "loss": 0.0, "step": 24637 }, { "epoch": 23.690384615384616, "grad_norm": 0.0009298833319917321, "learning_rate": 1.4547845056127163e-07, "loss": 0.0, "step": 24638 }, { "epoch": 23.691346153846155, "grad_norm": 0.00019523120136000216, "learning_rate": 1.4526682069170517e-07, "loss": 0.0, "step": 24639 }, { "epoch": 23.692307692307693, "grad_norm": 0.0011527040041983128, "learning_rate": 1.4505534373867258e-07, "loss": 0.0, "step": 24640 }, { "epoch": 23.693269230769232, "grad_norm": 0.004768941551446915, "learning_rate": 1.4484401970545903e-07, "loss": 0.0, "step": 24641 }, { "epoch": 23.69423076923077, "grad_norm": 0.0007964380783960223, "learning_rate": 1.4463284859534076e-07, "loss": 0.0, "step": 24642 }, { "epoch": 23.69519230769231, "grad_norm": 0.0016038785688579082, "learning_rate": 1.4442183041159519e-07, "loss": 0.0, "step": 24643 }, { "epoch": 23.696153846153845, "grad_norm": 0.0006693075411021709, "learning_rate": 1.4421096515749855e-07, "loss": 0.0, "step": 24644 }, { "epoch": 23.697115384615383, "grad_norm": 0.0012032114900648594, "learning_rate": 1.4400025283631936e-07, "loss": 0.0, "step": 24645 }, { "epoch": 23.698076923076922, "grad_norm": 0.0003132821584586054, "learning_rate": 1.4378969345133054e-07, "loss": 0.0, "step": 24646 }, { "epoch": 23.69903846153846, "grad_norm": 0.0012706228299066424, "learning_rate": 1.4357928700579725e-07, "loss": 0.0, "step": 24647 }, { "epoch": 23.7, "grad_norm": 0.001468145870603621, "learning_rate": 1.433690335029858e-07, "loss": 0.0, "step": 24648 }, { "epoch": 23.700961538461538, "grad_norm": 0.0010184188140556216, "learning_rate": 1.4315893294615802e-07, "loss": 0.0, "step": 24649 }, { "epoch": 23.701923076923077, "grad_norm": 0.0014368364354595542, "learning_rate": 1.429489853385746e-07, "loss": 0.0, "step": 24650 }, { "epoch": 23.702884615384615, "grad_norm": 0.0009088683291338384, "learning_rate": 1.4273919068349184e-07, "loss": 0.0, "step": 24651 }, { "epoch": 23.703846153846154, "grad_norm": 0.0018711546435952187, "learning_rate": 1.4252954898416604e-07, "loss": 0.0, "step": 24652 }, { "epoch": 23.704807692307693, "grad_norm": 0.0010127425193786621, "learning_rate": 1.4232006024385126e-07, "loss": 0.0, "step": 24653 }, { "epoch": 23.70576923076923, "grad_norm": 0.0004365371714811772, "learning_rate": 1.42110724465796e-07, "loss": 0.0, "step": 24654 }, { "epoch": 23.70673076923077, "grad_norm": 0.0016112430021166801, "learning_rate": 1.41901541653251e-07, "loss": 0.0, "step": 24655 }, { "epoch": 23.70769230769231, "grad_norm": 0.0021011102944612503, "learning_rate": 1.416925118094603e-07, "loss": 0.0, "step": 24656 }, { "epoch": 23.708653846153847, "grad_norm": 0.0008411557064391673, "learning_rate": 1.4148363493766803e-07, "loss": 0.0, "step": 24657 }, { "epoch": 23.709615384615386, "grad_norm": 0.00152454839553684, "learning_rate": 1.41274911041116e-07, "loss": 0.0, "step": 24658 }, { "epoch": 23.710576923076925, "grad_norm": 0.000653148046694696, "learning_rate": 1.4106634012304166e-07, "loss": 0.0, "step": 24659 }, { "epoch": 23.71153846153846, "grad_norm": 0.0010173940099775791, "learning_rate": 1.4085792218668126e-07, "loss": 0.0, "step": 24660 }, { "epoch": 23.7125, "grad_norm": 0.0022495046723634005, "learning_rate": 1.4064965723527114e-07, "loss": 0.0, "step": 24661 }, { "epoch": 23.713461538461537, "grad_norm": 0.0004703104496002197, "learning_rate": 1.404415452720409e-07, "loss": 0.0, "step": 24662 }, { "epoch": 23.714423076923076, "grad_norm": 0.0005350032006390393, "learning_rate": 1.402335863002202e-07, "loss": 0.0, "step": 24663 }, { "epoch": 23.715384615384615, "grad_norm": 0.00029091566102579236, "learning_rate": 1.4002578032303648e-07, "loss": 0.0, "step": 24664 }, { "epoch": 23.716346153846153, "grad_norm": 0.0005365776596590877, "learning_rate": 1.398181273437138e-07, "loss": 0.0, "step": 24665 }, { "epoch": 23.717307692307692, "grad_norm": 0.0012820608681067824, "learning_rate": 1.3961062736547514e-07, "loss": 0.0, "step": 24666 }, { "epoch": 23.71826923076923, "grad_norm": 0.0009963741758838296, "learning_rate": 1.3940328039153905e-07, "loss": 0.0, "step": 24667 }, { "epoch": 23.71923076923077, "grad_norm": 0.0011249419767409563, "learning_rate": 1.3919608642512294e-07, "loss": 0.0, "step": 24668 }, { "epoch": 23.720192307692308, "grad_norm": 0.0003581350902095437, "learning_rate": 1.3898904546944425e-07, "loss": 0.0, "step": 24669 }, { "epoch": 23.721153846153847, "grad_norm": 0.0011767827672883868, "learning_rate": 1.3878215752771264e-07, "loss": 0.0, "step": 24670 }, { "epoch": 23.722115384615385, "grad_norm": 0.0014800874050706625, "learning_rate": 1.3857542260313884e-07, "loss": 0.0, "step": 24671 }, { "epoch": 23.723076923076924, "grad_norm": 0.0015053640818223357, "learning_rate": 1.3836884069893365e-07, "loss": 0.0, "step": 24672 }, { "epoch": 23.724038461538463, "grad_norm": 0.0004660409758798778, "learning_rate": 1.3816241181829891e-07, "loss": 0.0, "step": 24673 }, { "epoch": 23.725, "grad_norm": 0.0018551986431702971, "learning_rate": 1.379561359644388e-07, "loss": 0.0, "step": 24674 }, { "epoch": 23.72596153846154, "grad_norm": 0.0004467795370146632, "learning_rate": 1.3775001314055624e-07, "loss": 0.0, "step": 24675 }, { "epoch": 23.726923076923075, "grad_norm": 0.00041060143848881125, "learning_rate": 1.375440433498476e-07, "loss": 0.0, "step": 24676 }, { "epoch": 23.727884615384614, "grad_norm": 0.0006096767028793693, "learning_rate": 1.3733822659551033e-07, "loss": 0.0, "step": 24677 }, { "epoch": 23.728846153846153, "grad_norm": 0.0007396088913083076, "learning_rate": 1.3713256288073518e-07, "loss": 0.0, "step": 24678 }, { "epoch": 23.72980769230769, "grad_norm": 0.0009344237041659653, "learning_rate": 1.3692705220871627e-07, "loss": 0.0, "step": 24679 }, { "epoch": 23.73076923076923, "grad_norm": 0.0005781294894404709, "learning_rate": 1.3672169458264329e-07, "loss": 0.0, "step": 24680 }, { "epoch": 23.73173076923077, "grad_norm": 0.00045872756163589656, "learning_rate": 1.3651649000569922e-07, "loss": 0.0, "step": 24681 }, { "epoch": 23.732692307692307, "grad_norm": 0.0011787954717874527, "learning_rate": 1.3631143848107152e-07, "loss": 0.0, "step": 24682 }, { "epoch": 23.733653846153846, "grad_norm": 0.0015364737482741475, "learning_rate": 1.361065400119399e-07, "loss": 0.0, "step": 24683 }, { "epoch": 23.734615384615385, "grad_norm": 0.000948425498791039, "learning_rate": 1.3590179460148513e-07, "loss": 0.0, "step": 24684 }, { "epoch": 23.735576923076923, "grad_norm": 0.0007193643832579255, "learning_rate": 1.3569720225288353e-07, "loss": 0.0, "step": 24685 }, { "epoch": 23.736538461538462, "grad_norm": 0.0015757004730403423, "learning_rate": 1.3549276296931035e-07, "loss": 0.0, "step": 24686 }, { "epoch": 23.7375, "grad_norm": 0.0032609037589281797, "learning_rate": 1.3528847675393642e-07, "loss": 0.0, "step": 24687 }, { "epoch": 23.73846153846154, "grad_norm": 0.006135883741080761, "learning_rate": 1.350843436099347e-07, "loss": 0.0001, "step": 24688 }, { "epoch": 23.739423076923078, "grad_norm": 0.0009599241893738508, "learning_rate": 1.348803635404683e-07, "loss": 0.0, "step": 24689 }, { "epoch": 23.740384615384617, "grad_norm": 0.00022814288968220353, "learning_rate": 1.3467653654870682e-07, "loss": 0.0, "step": 24690 }, { "epoch": 23.741346153846155, "grad_norm": 0.0006802757852710783, "learning_rate": 1.3447286263781e-07, "loss": 0.0, "step": 24691 }, { "epoch": 23.74230769230769, "grad_norm": 0.0004620281979441643, "learning_rate": 1.3426934181093975e-07, "loss": 0.0, "step": 24692 }, { "epoch": 23.74326923076923, "grad_norm": 0.001077083870768547, "learning_rate": 1.3406597407125354e-07, "loss": 0.0, "step": 24693 }, { "epoch": 23.744230769230768, "grad_norm": 0.003110922407358885, "learning_rate": 1.3386275942190773e-07, "loss": 0.0, "step": 24694 }, { "epoch": 23.745192307692307, "grad_norm": 0.0009057081770151854, "learning_rate": 1.3365969786605538e-07, "loss": 0.0, "step": 24695 }, { "epoch": 23.746153846153845, "grad_norm": 0.0012170313857495785, "learning_rate": 1.3345678940684615e-07, "loss": 0.0, "step": 24696 }, { "epoch": 23.747115384615384, "grad_norm": 0.0007501098443754017, "learning_rate": 1.3325403404743088e-07, "loss": 0.0, "step": 24697 }, { "epoch": 23.748076923076923, "grad_norm": 0.0009192729485221207, "learning_rate": 1.3305143179095258e-07, "loss": 0.0, "step": 24698 }, { "epoch": 23.74903846153846, "grad_norm": 0.000813625636510551, "learning_rate": 1.3284898264055878e-07, "loss": 0.0, "step": 24699 }, { "epoch": 23.75, "grad_norm": 0.0007483198423869908, "learning_rate": 1.3264668659938805e-07, "loss": 0.0, "step": 24700 }, { "epoch": 23.75096153846154, "grad_norm": 0.0007060467614792287, "learning_rate": 1.3244454367058013e-07, "loss": 0.0, "step": 24701 }, { "epoch": 23.751923076923077, "grad_norm": 0.0007533683674409986, "learning_rate": 1.3224255385727357e-07, "loss": 0.0, "step": 24702 }, { "epoch": 23.752884615384616, "grad_norm": 0.0001637437380850315, "learning_rate": 1.3204071716259924e-07, "loss": 0.0, "step": 24703 }, { "epoch": 23.753846153846155, "grad_norm": 0.0006881874869577587, "learning_rate": 1.318390335896913e-07, "loss": 0.0, "step": 24704 }, { "epoch": 23.754807692307693, "grad_norm": 0.0004436143208295107, "learning_rate": 1.3163750314167944e-07, "loss": 0.0, "step": 24705 }, { "epoch": 23.755769230769232, "grad_norm": 0.0009863654850050807, "learning_rate": 1.3143612582169008e-07, "loss": 0.0, "step": 24706 }, { "epoch": 23.75673076923077, "grad_norm": 0.0008826054981909692, "learning_rate": 1.3123490163284736e-07, "loss": 0.0, "step": 24707 }, { "epoch": 23.75769230769231, "grad_norm": 0.0008779022609815001, "learning_rate": 1.3103383057827545e-07, "loss": 0.0, "step": 24708 }, { "epoch": 23.758653846153845, "grad_norm": 0.0006231105653569102, "learning_rate": 1.30832912661093e-07, "loss": 0.0, "step": 24709 }, { "epoch": 23.759615384615383, "grad_norm": 0.0011709339451044798, "learning_rate": 1.3063214788441858e-07, "loss": 0.0, "step": 24710 }, { "epoch": 23.760576923076922, "grad_norm": 0.0005697167944163084, "learning_rate": 1.304315362513664e-07, "loss": 0.0, "step": 24711 }, { "epoch": 23.76153846153846, "grad_norm": 0.0009233115124516189, "learning_rate": 1.3023107776505062e-07, "loss": 0.0, "step": 24712 }, { "epoch": 23.7625, "grad_norm": 0.0004412096459418535, "learning_rate": 1.3003077242858096e-07, "loss": 0.0, "step": 24713 }, { "epoch": 23.763461538461538, "grad_norm": 0.0007650231709703803, "learning_rate": 1.29830620245065e-07, "loss": 0.0, "step": 24714 }, { "epoch": 23.764423076923077, "grad_norm": 0.0005520391277968884, "learning_rate": 1.2963062121760904e-07, "loss": 0.0, "step": 24715 }, { "epoch": 23.765384615384615, "grad_norm": 0.003287507686764002, "learning_rate": 1.2943077534931736e-07, "loss": 0.0, "step": 24716 }, { "epoch": 23.766346153846154, "grad_norm": 0.000973402988165617, "learning_rate": 1.2923108264328966e-07, "loss": 0.0, "step": 24717 }, { "epoch": 23.767307692307693, "grad_norm": 0.0007560790982097387, "learning_rate": 1.2903154310262566e-07, "loss": 0.0, "step": 24718 }, { "epoch": 23.76826923076923, "grad_norm": 0.0007998995715752244, "learning_rate": 1.2883215673042182e-07, "loss": 0.0, "step": 24719 }, { "epoch": 23.76923076923077, "grad_norm": 0.000570919131860137, "learning_rate": 1.2863292352977008e-07, "loss": 0.0, "step": 24720 }, { "epoch": 23.77019230769231, "grad_norm": 0.0020922922994941473, "learning_rate": 1.2843384350376465e-07, "loss": 0.0, "step": 24721 }, { "epoch": 23.771153846153847, "grad_norm": 0.0006600109627470374, "learning_rate": 1.2823491665549193e-07, "loss": 0.0, "step": 24722 }, { "epoch": 23.772115384615386, "grad_norm": 0.0013253774959594011, "learning_rate": 1.2803614298804057e-07, "loss": 0.0, "step": 24723 }, { "epoch": 23.773076923076925, "grad_norm": 0.0023540942929685116, "learning_rate": 1.2783752250449478e-07, "loss": 0.0, "step": 24724 }, { "epoch": 23.77403846153846, "grad_norm": 0.0018563958583399653, "learning_rate": 1.276390552079354e-07, "loss": 0.0, "step": 24725 }, { "epoch": 23.775, "grad_norm": 0.0012094642734155059, "learning_rate": 1.2744074110144334e-07, "loss": 0.0, "step": 24726 }, { "epoch": 23.775961538461537, "grad_norm": 0.0006086014909669757, "learning_rate": 1.2724258018809498e-07, "loss": 0.0, "step": 24727 }, { "epoch": 23.776923076923076, "grad_norm": 0.0017062483821064234, "learning_rate": 1.270445724709657e-07, "loss": 0.0, "step": 24728 }, { "epoch": 23.777884615384615, "grad_norm": 0.0011256622383370996, "learning_rate": 1.2684671795312854e-07, "loss": 0.0, "step": 24729 }, { "epoch": 23.778846153846153, "grad_norm": 0.0064164274372160435, "learning_rate": 1.2664901663765216e-07, "loss": 0.0001, "step": 24730 }, { "epoch": 23.779807692307692, "grad_norm": 0.000971352041233331, "learning_rate": 1.2645146852760526e-07, "loss": 0.0, "step": 24731 }, { "epoch": 23.78076923076923, "grad_norm": 0.00032994133653119206, "learning_rate": 1.2625407362605424e-07, "loss": 0.0, "step": 24732 }, { "epoch": 23.78173076923077, "grad_norm": 0.0018405717564746737, "learning_rate": 1.260568319360589e-07, "loss": 0.0, "step": 24733 }, { "epoch": 23.782692307692308, "grad_norm": 0.0006503153708763421, "learning_rate": 1.2585974346068342e-07, "loss": 0.0, "step": 24734 }, { "epoch": 23.783653846153847, "grad_norm": 0.00029504121630452573, "learning_rate": 1.2566280820298427e-07, "loss": 0.0, "step": 24735 }, { "epoch": 23.784615384615385, "grad_norm": 0.0019032855052500963, "learning_rate": 1.2546602616601677e-07, "loss": 0.0, "step": 24736 }, { "epoch": 23.785576923076924, "grad_norm": 0.0005913228960707784, "learning_rate": 1.2526939735283517e-07, "loss": 0.0, "step": 24737 }, { "epoch": 23.786538461538463, "grad_norm": 0.0010092726442962885, "learning_rate": 1.2507292176649145e-07, "loss": 0.0, "step": 24738 }, { "epoch": 23.7875, "grad_norm": 0.0005622532917186618, "learning_rate": 1.2487659941003204e-07, "loss": 0.0, "step": 24739 }, { "epoch": 23.78846153846154, "grad_norm": 0.0005312498542480171, "learning_rate": 1.2468043028650567e-07, "loss": 0.0, "step": 24740 }, { "epoch": 23.789423076923075, "grad_norm": 0.0008267787052318454, "learning_rate": 1.2448441439895541e-07, "loss": 0.0, "step": 24741 }, { "epoch": 23.790384615384614, "grad_norm": 0.0015298272483050823, "learning_rate": 1.2428855175042222e-07, "loss": 0.0, "step": 24742 }, { "epoch": 23.791346153846153, "grad_norm": 0.0012996934819966555, "learning_rate": 1.240928423439458e-07, "loss": 0.0, "step": 24743 }, { "epoch": 23.79230769230769, "grad_norm": 0.001539865741506219, "learning_rate": 1.238972861825638e-07, "loss": 0.0, "step": 24744 }, { "epoch": 23.79326923076923, "grad_norm": 0.00040525608346797526, "learning_rate": 1.237018832693093e-07, "loss": 0.0, "step": 24745 }, { "epoch": 23.79423076923077, "grad_norm": 0.0008592319209128618, "learning_rate": 1.235066336072155e-07, "loss": 0.0, "step": 24746 }, { "epoch": 23.795192307692307, "grad_norm": 0.00012527185026556253, "learning_rate": 1.2331153719930987e-07, "loss": 0.0, "step": 24747 }, { "epoch": 23.796153846153846, "grad_norm": 0.000912133720703423, "learning_rate": 1.231165940486234e-07, "loss": 0.0, "step": 24748 }, { "epoch": 23.797115384615385, "grad_norm": 0.0005895909271202981, "learning_rate": 1.229218041581781e-07, "loss": 0.0, "step": 24749 }, { "epoch": 23.798076923076923, "grad_norm": 0.00038313213735818863, "learning_rate": 1.227271675309982e-07, "loss": 0.0, "step": 24750 }, { "epoch": 23.799038461538462, "grad_norm": 0.00045039260294288397, "learning_rate": 1.2253268417010245e-07, "loss": 0.0, "step": 24751 }, { "epoch": 23.8, "grad_norm": 0.00025843610637821257, "learning_rate": 1.2233835407850948e-07, "loss": 0.0, "step": 24752 }, { "epoch": 23.80096153846154, "grad_norm": 0.0023508770391345024, "learning_rate": 1.2214417725923578e-07, "loss": 0.0, "step": 24753 }, { "epoch": 23.801923076923078, "grad_norm": 0.0006940989987924695, "learning_rate": 1.2195015371529227e-07, "loss": 0.0, "step": 24754 }, { "epoch": 23.802884615384617, "grad_norm": 0.0004670351918321103, "learning_rate": 1.2175628344969103e-07, "loss": 0.0, "step": 24755 }, { "epoch": 23.803846153846155, "grad_norm": 0.000471464212751016, "learning_rate": 1.2156256646543963e-07, "loss": 0.0, "step": 24756 }, { "epoch": 23.80480769230769, "grad_norm": 0.0005901833646930754, "learning_rate": 1.2136900276554453e-07, "loss": 0.0, "step": 24757 }, { "epoch": 23.80576923076923, "grad_norm": 0.00041806267108768225, "learning_rate": 1.211755923530089e-07, "loss": 0.0, "step": 24758 }, { "epoch": 23.806730769230768, "grad_norm": 0.0005268027307465672, "learning_rate": 1.209823352308337e-07, "loss": 0.0, "step": 24759 }, { "epoch": 23.807692307692307, "grad_norm": 0.0014375203754752874, "learning_rate": 1.207892314020198e-07, "loss": 0.0, "step": 24760 }, { "epoch": 23.808653846153845, "grad_norm": 0.0013405054342001677, "learning_rate": 1.2059628086956044e-07, "loss": 0.0, "step": 24761 }, { "epoch": 23.809615384615384, "grad_norm": 0.0010071212891489267, "learning_rate": 1.2040348363645092e-07, "loss": 0.0, "step": 24762 }, { "epoch": 23.810576923076923, "grad_norm": 0.0022779363207519054, "learning_rate": 1.2021083970568336e-07, "loss": 0.0, "step": 24763 }, { "epoch": 23.81153846153846, "grad_norm": 0.0014315277803689241, "learning_rate": 1.2001834908024756e-07, "loss": 0.0, "step": 24764 }, { "epoch": 23.8125, "grad_norm": 0.003690022509545088, "learning_rate": 1.198260117631278e-07, "loss": 0.0, "step": 24765 }, { "epoch": 23.81346153846154, "grad_norm": 0.00038068139110691845, "learning_rate": 1.1963382775731281e-07, "loss": 0.0, "step": 24766 }, { "epoch": 23.814423076923077, "grad_norm": 0.0006026249611750245, "learning_rate": 1.1944179706578018e-07, "loss": 0.0, "step": 24767 }, { "epoch": 23.815384615384616, "grad_norm": 0.000889353163074702, "learning_rate": 1.1924991969151313e-07, "loss": 0.0, "step": 24768 }, { "epoch": 23.816346153846155, "grad_norm": 0.0003668616118375212, "learning_rate": 1.19058195637487e-07, "loss": 0.0, "step": 24769 }, { "epoch": 23.817307692307693, "grad_norm": 0.0010074253659695387, "learning_rate": 1.1886662490667722e-07, "loss": 0.0, "step": 24770 }, { "epoch": 23.818269230769232, "grad_norm": 0.0007322253659367561, "learning_rate": 1.1867520750205696e-07, "loss": 0.0, "step": 24771 }, { "epoch": 23.81923076923077, "grad_norm": 0.0029048328287899494, "learning_rate": 1.1848394342659608e-07, "loss": 0.0, "step": 24772 }, { "epoch": 23.82019230769231, "grad_norm": 0.0006079364684410393, "learning_rate": 1.182928326832622e-07, "loss": 0.0, "step": 24773 }, { "epoch": 23.821153846153845, "grad_norm": 0.0005443321424536407, "learning_rate": 1.1810187527502182e-07, "loss": 0.0, "step": 24774 }, { "epoch": 23.822115384615383, "grad_norm": 0.0013479929184541106, "learning_rate": 1.1791107120483702e-07, "loss": 0.0, "step": 24775 }, { "epoch": 23.823076923076922, "grad_norm": 0.0010397143196314573, "learning_rate": 1.1772042047566878e-07, "loss": 0.0, "step": 24776 }, { "epoch": 23.82403846153846, "grad_norm": 0.0015201488276943564, "learning_rate": 1.1752992309047584e-07, "loss": 0.0, "step": 24777 }, { "epoch": 23.825, "grad_norm": 0.0016803317703306675, "learning_rate": 1.173395790522125e-07, "loss": 0.0, "step": 24778 }, { "epoch": 23.825961538461538, "grad_norm": 0.0007223912398330867, "learning_rate": 1.1714938836383526e-07, "loss": 0.0, "step": 24779 }, { "epoch": 23.826923076923077, "grad_norm": 0.0011474150232970715, "learning_rate": 1.1695935102829292e-07, "loss": 0.0, "step": 24780 }, { "epoch": 23.827884615384615, "grad_norm": 0.0014098252868279815, "learning_rate": 1.1676946704853531e-07, "loss": 0.0, "step": 24781 }, { "epoch": 23.828846153846154, "grad_norm": 0.00022787679336033762, "learning_rate": 1.1657973642750897e-07, "loss": 0.0, "step": 24782 }, { "epoch": 23.829807692307693, "grad_norm": 0.0011304946383461356, "learning_rate": 1.1639015916815599e-07, "loss": 0.0, "step": 24783 }, { "epoch": 23.83076923076923, "grad_norm": 0.0007513862219639122, "learning_rate": 1.1620073527342069e-07, "loss": 0.0, "step": 24784 }, { "epoch": 23.83173076923077, "grad_norm": 0.001014456618577242, "learning_rate": 1.160114647462418e-07, "loss": 0.0, "step": 24785 }, { "epoch": 23.83269230769231, "grad_norm": 0.001280668075196445, "learning_rate": 1.158223475895559e-07, "loss": 0.0, "step": 24786 }, { "epoch": 23.833653846153847, "grad_norm": 0.0010770043591037393, "learning_rate": 1.1563338380629618e-07, "loss": 0.0, "step": 24787 }, { "epoch": 23.834615384615386, "grad_norm": 0.001513647846877575, "learning_rate": 1.1544457339939807e-07, "loss": 0.0, "step": 24788 }, { "epoch": 23.835576923076925, "grad_norm": 0.0018390740733593702, "learning_rate": 1.1525591637178813e-07, "loss": 0.0, "step": 24789 }, { "epoch": 23.83653846153846, "grad_norm": 0.0008776789181865752, "learning_rate": 1.150674127263951e-07, "loss": 0.0, "step": 24790 }, { "epoch": 23.8375, "grad_norm": 0.0008819918148219585, "learning_rate": 1.1487906246614333e-07, "loss": 0.0, "step": 24791 }, { "epoch": 23.838461538461537, "grad_norm": 0.0036017638631165028, "learning_rate": 1.1469086559395715e-07, "loss": 0.0, "step": 24792 }, { "epoch": 23.839423076923076, "grad_norm": 0.000977926654741168, "learning_rate": 1.1450282211275532e-07, "loss": 0.0, "step": 24793 }, { "epoch": 23.840384615384615, "grad_norm": 0.0005640448071062565, "learning_rate": 1.1431493202545552e-07, "loss": 0.0, "step": 24794 }, { "epoch": 23.841346153846153, "grad_norm": 0.0019403916085138917, "learning_rate": 1.141271953349743e-07, "loss": 0.0, "step": 24795 }, { "epoch": 23.842307692307692, "grad_norm": 0.0010159745579585433, "learning_rate": 1.1393961204422488e-07, "loss": 0.0, "step": 24796 }, { "epoch": 23.84326923076923, "grad_norm": 0.0006224157987162471, "learning_rate": 1.1375218215611606e-07, "loss": 0.0, "step": 24797 }, { "epoch": 23.84423076923077, "grad_norm": 0.0005949487094767392, "learning_rate": 1.1356490567355771e-07, "loss": 0.0, "step": 24798 }, { "epoch": 23.845192307692308, "grad_norm": 0.00041853287257254124, "learning_rate": 1.1337778259945753e-07, "loss": 0.0, "step": 24799 }, { "epoch": 23.846153846153847, "grad_norm": 0.000590478943195194, "learning_rate": 1.1319081293671541e-07, "loss": 0.0, "step": 24800 }, { "epoch": 23.847115384615385, "grad_norm": 0.0011558571131899953, "learning_rate": 1.1300399668823569e-07, "loss": 0.0, "step": 24801 }, { "epoch": 23.848076923076924, "grad_norm": 0.0009010312496684492, "learning_rate": 1.1281733385691495e-07, "loss": 0.0, "step": 24802 }, { "epoch": 23.849038461538463, "grad_norm": 0.0009830878116190434, "learning_rate": 1.1263082444565088e-07, "loss": 0.0, "step": 24803 }, { "epoch": 23.85, "grad_norm": 0.0005412204191088676, "learning_rate": 1.1244446845733781e-07, "loss": 0.0, "step": 24804 }, { "epoch": 23.85096153846154, "grad_norm": 0.0007082188967615366, "learning_rate": 1.1225826589486677e-07, "loss": 0.0, "step": 24805 }, { "epoch": 23.851923076923075, "grad_norm": 0.0009566547814756632, "learning_rate": 1.1207221676112656e-07, "loss": 0.0, "step": 24806 }, { "epoch": 23.852884615384614, "grad_norm": 0.0013138896320015192, "learning_rate": 1.1188632105900598e-07, "loss": 0.0, "step": 24807 }, { "epoch": 23.853846153846153, "grad_norm": 0.0018172672716900706, "learning_rate": 1.1170057879138829e-07, "loss": 0.0, "step": 24808 }, { "epoch": 23.85480769230769, "grad_norm": 0.0025733187794685364, "learning_rate": 1.115149899611545e-07, "loss": 0.0, "step": 24809 }, { "epoch": 23.85576923076923, "grad_norm": 0.0014759866753593087, "learning_rate": 1.1132955457118788e-07, "loss": 0.0, "step": 24810 }, { "epoch": 23.85673076923077, "grad_norm": 0.0006674052565358579, "learning_rate": 1.1114427262436167e-07, "loss": 0.0, "step": 24811 }, { "epoch": 23.857692307692307, "grad_norm": 0.0006446157931350172, "learning_rate": 1.1095914412355357e-07, "loss": 0.0, "step": 24812 }, { "epoch": 23.858653846153846, "grad_norm": 0.000770618615206331, "learning_rate": 1.1077416907163573e-07, "loss": 0.0, "step": 24813 }, { "epoch": 23.859615384615385, "grad_norm": 0.0005756551399827003, "learning_rate": 1.1058934747147809e-07, "loss": 0.0, "step": 24814 }, { "epoch": 23.860576923076923, "grad_norm": 0.0005467890878207982, "learning_rate": 1.1040467932594945e-07, "loss": 0.0, "step": 24815 }, { "epoch": 23.861538461538462, "grad_norm": 0.0013618646189570427, "learning_rate": 1.1022016463791308e-07, "loss": 0.0, "step": 24816 }, { "epoch": 23.8625, "grad_norm": 0.0008863734547048807, "learning_rate": 1.1003580341023445e-07, "loss": 0.0, "step": 24817 }, { "epoch": 23.86346153846154, "grad_norm": 0.0010656036902219057, "learning_rate": 1.0985159564577352e-07, "loss": 0.0, "step": 24818 }, { "epoch": 23.864423076923078, "grad_norm": 0.0011071679182350636, "learning_rate": 1.0966754134738911e-07, "loss": 0.0, "step": 24819 }, { "epoch": 23.865384615384617, "grad_norm": 0.0012855074601247907, "learning_rate": 1.0948364051793559e-07, "loss": 0.0, "step": 24820 }, { "epoch": 23.866346153846155, "grad_norm": 0.00037631861050613225, "learning_rate": 1.0929989316026846e-07, "loss": 0.0, "step": 24821 }, { "epoch": 23.86730769230769, "grad_norm": 0.0006813738727942109, "learning_rate": 1.0911629927723766e-07, "loss": 0.0, "step": 24822 }, { "epoch": 23.86826923076923, "grad_norm": 0.00036146154161542654, "learning_rate": 1.0893285887169313e-07, "loss": 0.0, "step": 24823 }, { "epoch": 23.869230769230768, "grad_norm": 0.0006743276608176529, "learning_rate": 1.087495719464804e-07, "loss": 0.0, "step": 24824 }, { "epoch": 23.870192307692307, "grad_norm": 0.001992995385080576, "learning_rate": 1.0856643850444271e-07, "loss": 0.0, "step": 24825 }, { "epoch": 23.871153846153845, "grad_norm": 0.0009831517236307263, "learning_rate": 1.0838345854842447e-07, "loss": 0.0, "step": 24826 }, { "epoch": 23.872115384615384, "grad_norm": 0.0012256235349923372, "learning_rate": 1.0820063208126231e-07, "loss": 0.0, "step": 24827 }, { "epoch": 23.873076923076923, "grad_norm": 0.0010144996922463179, "learning_rate": 1.0801795910579504e-07, "loss": 0.0, "step": 24828 }, { "epoch": 23.87403846153846, "grad_norm": 0.00037815814721398056, "learning_rate": 1.0783543962485598e-07, "loss": 0.0, "step": 24829 }, { "epoch": 23.875, "grad_norm": 0.0006267628632485867, "learning_rate": 1.0765307364127731e-07, "loss": 0.0, "step": 24830 }, { "epoch": 23.87596153846154, "grad_norm": 0.0011797512415796518, "learning_rate": 1.0747086115789007e-07, "loss": 0.0, "step": 24831 }, { "epoch": 23.876923076923077, "grad_norm": 0.001770530827343464, "learning_rate": 1.0728880217752091e-07, "loss": 0.0, "step": 24832 }, { "epoch": 23.877884615384616, "grad_norm": 0.00029532541520893574, "learning_rate": 1.0710689670299423e-07, "loss": 0.0, "step": 24833 }, { "epoch": 23.878846153846155, "grad_norm": 0.000919933314435184, "learning_rate": 1.0692514473713334e-07, "loss": 0.0, "step": 24834 }, { "epoch": 23.879807692307693, "grad_norm": 0.0010585805866867304, "learning_rate": 1.0674354628275818e-07, "loss": 0.0, "step": 24835 }, { "epoch": 23.880769230769232, "grad_norm": 0.0007711605867370963, "learning_rate": 1.0656210134268652e-07, "loss": 0.0, "step": 24836 }, { "epoch": 23.88173076923077, "grad_norm": 0.0006968833040446043, "learning_rate": 1.0638080991973609e-07, "loss": 0.0, "step": 24837 }, { "epoch": 23.88269230769231, "grad_norm": 0.0024790086317807436, "learning_rate": 1.0619967201671577e-07, "loss": 0.0, "step": 24838 }, { "epoch": 23.883653846153845, "grad_norm": 0.0016978613566607237, "learning_rate": 1.0601868763643997e-07, "loss": 0.0, "step": 24839 }, { "epoch": 23.884615384615383, "grad_norm": 0.0006493639666587114, "learning_rate": 1.0583785678171532e-07, "loss": 0.0, "step": 24840 }, { "epoch": 23.885576923076922, "grad_norm": 0.0014332549180835485, "learning_rate": 1.0565717945534737e-07, "loss": 0.0, "step": 24841 }, { "epoch": 23.88653846153846, "grad_norm": 0.001242087222635746, "learning_rate": 1.0547665566014164e-07, "loss": 0.0, "step": 24842 }, { "epoch": 23.8875, "grad_norm": 0.0003349225444253534, "learning_rate": 1.0529628539889703e-07, "loss": 0.0, "step": 24843 }, { "epoch": 23.888461538461538, "grad_norm": 0.0010042247595265508, "learning_rate": 1.0511606867441459e-07, "loss": 0.0, "step": 24844 }, { "epoch": 23.889423076923077, "grad_norm": 0.0007835153373889625, "learning_rate": 1.0493600548948879e-07, "loss": 0.0, "step": 24845 }, { "epoch": 23.890384615384615, "grad_norm": 0.0012943672481924295, "learning_rate": 1.0475609584691515e-07, "loss": 0.0, "step": 24846 }, { "epoch": 23.891346153846154, "grad_norm": 0.001858919276855886, "learning_rate": 1.0457633974948367e-07, "loss": 0.0, "step": 24847 }, { "epoch": 23.892307692307693, "grad_norm": 0.0005481004482135177, "learning_rate": 1.0439673719998545e-07, "loss": 0.0, "step": 24848 }, { "epoch": 23.89326923076923, "grad_norm": 0.0006288021104410291, "learning_rate": 1.0421728820120713e-07, "loss": 0.0, "step": 24849 }, { "epoch": 23.89423076923077, "grad_norm": 0.001070283236913383, "learning_rate": 1.0403799275593207e-07, "loss": 0.0, "step": 24850 }, { "epoch": 23.89519230769231, "grad_norm": 0.0008411231101490557, "learning_rate": 1.0385885086694247e-07, "loss": 0.0, "step": 24851 }, { "epoch": 23.896153846153847, "grad_norm": 0.0014171043876558542, "learning_rate": 1.0367986253701945e-07, "loss": 0.0, "step": 24852 }, { "epoch": 23.897115384615386, "grad_norm": 0.0008284740033559501, "learning_rate": 1.0350102776893966e-07, "loss": 0.0, "step": 24853 }, { "epoch": 23.898076923076925, "grad_norm": 0.0005775967729277909, "learning_rate": 1.0332234656547757e-07, "loss": 0.0, "step": 24854 }, { "epoch": 23.89903846153846, "grad_norm": 0.000698692281730473, "learning_rate": 1.031438189294065e-07, "loss": 0.0, "step": 24855 }, { "epoch": 23.9, "grad_norm": 0.0005862206453457475, "learning_rate": 1.0296544486349758e-07, "loss": 0.0, "step": 24856 }, { "epoch": 23.900961538461537, "grad_norm": 0.002181109506636858, "learning_rate": 1.0278722437051636e-07, "loss": 0.0, "step": 24857 }, { "epoch": 23.901923076923076, "grad_norm": 0.001036583911627531, "learning_rate": 1.0260915745322842e-07, "loss": 0.0, "step": 24858 }, { "epoch": 23.902884615384615, "grad_norm": 0.0003518827725201845, "learning_rate": 1.0243124411439931e-07, "loss": 0.0, "step": 24859 }, { "epoch": 23.903846153846153, "grad_norm": 0.0008485629805363715, "learning_rate": 1.0225348435678795e-07, "loss": 0.0, "step": 24860 }, { "epoch": 23.904807692307692, "grad_norm": 0.00035857828333973885, "learning_rate": 1.0207587818315323e-07, "loss": 0.0, "step": 24861 }, { "epoch": 23.90576923076923, "grad_norm": 0.0008940501720644534, "learning_rate": 1.0189842559625073e-07, "loss": 0.0, "step": 24862 }, { "epoch": 23.90673076923077, "grad_norm": 0.0021526433993130922, "learning_rate": 1.0172112659883381e-07, "loss": 0.0, "step": 24863 }, { "epoch": 23.907692307692308, "grad_norm": 0.001237221178598702, "learning_rate": 1.015439811936536e-07, "loss": 0.0, "step": 24864 }, { "epoch": 23.908653846153847, "grad_norm": 0.00021638756152242422, "learning_rate": 1.0136698938346012e-07, "loss": 0.0, "step": 24865 }, { "epoch": 23.909615384615385, "grad_norm": 0.0007894097943790257, "learning_rate": 1.0119015117099785e-07, "loss": 0.0, "step": 24866 }, { "epoch": 23.910576923076924, "grad_norm": 0.0005103509756736457, "learning_rate": 1.0101346655901234e-07, "loss": 0.0, "step": 24867 }, { "epoch": 23.911538461538463, "grad_norm": 0.0005863442784175277, "learning_rate": 1.0083693555024476e-07, "loss": 0.0, "step": 24868 }, { "epoch": 23.9125, "grad_norm": 0.0011741019552573562, "learning_rate": 1.0066055814743292e-07, "loss": 0.0, "step": 24869 }, { "epoch": 23.91346153846154, "grad_norm": 0.0010550881270319223, "learning_rate": 1.0048433435331684e-07, "loss": 0.0, "step": 24870 }, { "epoch": 23.914423076923075, "grad_norm": 0.0008140244171954691, "learning_rate": 1.0030826417062878e-07, "loss": 0.0, "step": 24871 }, { "epoch": 23.915384615384614, "grad_norm": 0.0011693037813529372, "learning_rate": 1.0013234760209989e-07, "loss": 0.0, "step": 24872 }, { "epoch": 23.916346153846153, "grad_norm": 0.0009595170267857611, "learning_rate": 9.995658465046243e-08, "loss": 0.0, "step": 24873 }, { "epoch": 23.91730769230769, "grad_norm": 0.0017017738427966833, "learning_rate": 9.9780975318442e-08, "loss": 0.0, "step": 24874 }, { "epoch": 23.91826923076923, "grad_norm": 0.00022602856915909797, "learning_rate": 9.960551960876529e-08, "loss": 0.0, "step": 24875 }, { "epoch": 23.91923076923077, "grad_norm": 0.00042645385838113725, "learning_rate": 9.943021752415238e-08, "loss": 0.0, "step": 24876 }, { "epoch": 23.920192307692307, "grad_norm": 0.000634669850114733, "learning_rate": 9.925506906732441e-08, "loss": 0.0, "step": 24877 }, { "epoch": 23.921153846153846, "grad_norm": 0.0004887889372184873, "learning_rate": 9.90800742410003e-08, "loss": 0.0, "step": 24878 }, { "epoch": 23.922115384615385, "grad_norm": 0.00031012287945486605, "learning_rate": 9.890523304789346e-08, "loss": 0.0, "step": 24879 }, { "epoch": 23.923076923076923, "grad_norm": 0.0006480515585280955, "learning_rate": 9.873054549071836e-08, "loss": 0.0, "step": 24880 }, { "epoch": 23.924038461538462, "grad_norm": 0.0018651472637429833, "learning_rate": 9.85560115721862e-08, "loss": 0.0, "step": 24881 }, { "epoch": 23.925, "grad_norm": 0.0006329715833999217, "learning_rate": 9.838163129500367e-08, "loss": 0.0, "step": 24882 }, { "epoch": 23.92596153846154, "grad_norm": 0.0007222786080092192, "learning_rate": 9.82074046618775e-08, "loss": 0.0, "step": 24883 }, { "epoch": 23.926923076923078, "grad_norm": 0.0005805094842799008, "learning_rate": 9.80333316755111e-08, "loss": 0.0, "step": 24884 }, { "epoch": 23.927884615384617, "grad_norm": 0.00032717702561058104, "learning_rate": 9.785941233860563e-08, "loss": 0.0, "step": 24885 }, { "epoch": 23.928846153846155, "grad_norm": 0.0019505751552060246, "learning_rate": 9.768564665386005e-08, "loss": 0.0, "step": 24886 }, { "epoch": 23.92980769230769, "grad_norm": 0.001221930026076734, "learning_rate": 9.751203462396886e-08, "loss": 0.0, "step": 24887 }, { "epoch": 23.93076923076923, "grad_norm": 0.00042462622513994575, "learning_rate": 9.733857625162769e-08, "loss": 0.0, "step": 24888 }, { "epoch": 23.931730769230768, "grad_norm": 0.000580107094720006, "learning_rate": 9.716527153952882e-08, "loss": 0.0, "step": 24889 }, { "epoch": 23.932692307692307, "grad_norm": 0.0012322603724896908, "learning_rate": 9.69921204903601e-08, "loss": 0.0, "step": 24890 }, { "epoch": 23.933653846153845, "grad_norm": 0.00049710733583197, "learning_rate": 9.68191231068083e-08, "loss": 0.0, "step": 24891 }, { "epoch": 23.934615384615384, "grad_norm": 0.0023545261938124895, "learning_rate": 9.664627939155902e-08, "loss": 0.0, "step": 24892 }, { "epoch": 23.935576923076923, "grad_norm": 0.0007238674443215132, "learning_rate": 9.647358934729233e-08, "loss": 0.0, "step": 24893 }, { "epoch": 23.93653846153846, "grad_norm": 0.000498601293656975, "learning_rate": 9.630105297668946e-08, "loss": 0.0, "step": 24894 }, { "epoch": 23.9375, "grad_norm": 0.0021809833124279976, "learning_rate": 9.612867028242823e-08, "loss": 0.0, "step": 24895 }, { "epoch": 23.93846153846154, "grad_norm": 0.000588226830586791, "learning_rate": 9.595644126718206e-08, "loss": 0.0, "step": 24896 }, { "epoch": 23.939423076923077, "grad_norm": 0.001006165286526084, "learning_rate": 9.578436593362328e-08, "loss": 0.0, "step": 24897 }, { "epoch": 23.940384615384616, "grad_norm": 0.0008298594038933516, "learning_rate": 9.561244428442307e-08, "loss": 0.0, "step": 24898 }, { "epoch": 23.941346153846155, "grad_norm": 0.00021145548089407384, "learning_rate": 9.544067632224818e-08, "loss": 0.0, "step": 24899 }, { "epoch": 23.942307692307693, "grad_norm": 0.00076774024637416, "learning_rate": 9.526906204976539e-08, "loss": 0.0, "step": 24900 }, { "epoch": 23.943269230769232, "grad_norm": 0.0016268726903945208, "learning_rate": 9.509760146963587e-08, "loss": 0.0, "step": 24901 }, { "epoch": 23.94423076923077, "grad_norm": 0.0006664074608124793, "learning_rate": 9.492629458452085e-08, "loss": 0.0, "step": 24902 }, { "epoch": 23.94519230769231, "grad_norm": 0.0022184872068464756, "learning_rate": 9.47551413970793e-08, "loss": 0.0, "step": 24903 }, { "epoch": 23.946153846153845, "grad_norm": 0.0005327263497747481, "learning_rate": 9.45841419099669e-08, "loss": 0.0, "step": 24904 }, { "epoch": 23.947115384615383, "grad_norm": 0.0008773242589086294, "learning_rate": 9.441329612583594e-08, "loss": 0.0, "step": 24905 }, { "epoch": 23.948076923076922, "grad_norm": 0.0022806201595813036, "learning_rate": 9.424260404733765e-08, "loss": 0.0, "step": 24906 }, { "epoch": 23.94903846153846, "grad_norm": 0.001027543330565095, "learning_rate": 9.407206567712213e-08, "loss": 0.0, "step": 24907 }, { "epoch": 23.95, "grad_norm": 0.0013097850605845451, "learning_rate": 9.390168101783392e-08, "loss": 0.0, "step": 24908 }, { "epoch": 23.950961538461538, "grad_norm": 0.0004258358385413885, "learning_rate": 9.373145007211759e-08, "loss": 0.0, "step": 24909 }, { "epoch": 23.951923076923077, "grad_norm": 0.0007176173967309296, "learning_rate": 9.356137284261435e-08, "loss": 0.0, "step": 24910 }, { "epoch": 23.952884615384615, "grad_norm": 0.0007466429960913956, "learning_rate": 9.339144933196432e-08, "loss": 0.0, "step": 24911 }, { "epoch": 23.953846153846154, "grad_norm": 0.0006033446989022195, "learning_rate": 9.322167954280314e-08, "loss": 0.0, "step": 24912 }, { "epoch": 23.954807692307693, "grad_norm": 0.0010808779625222087, "learning_rate": 9.30520634777643e-08, "loss": 0.0, "step": 24913 }, { "epoch": 23.95576923076923, "grad_norm": 0.0010139880469068885, "learning_rate": 9.288260113948233e-08, "loss": 0.0, "step": 24914 }, { "epoch": 23.95673076923077, "grad_norm": 0.001891530235297978, "learning_rate": 9.271329253058514e-08, "loss": 0.0, "step": 24915 }, { "epoch": 23.95769230769231, "grad_norm": 0.000566825969144702, "learning_rate": 9.254413765369952e-08, "loss": 0.0, "step": 24916 }, { "epoch": 23.958653846153847, "grad_norm": 0.00034198208595626056, "learning_rate": 9.237513651145224e-08, "loss": 0.0, "step": 24917 }, { "epoch": 23.959615384615386, "grad_norm": 0.0017458089860156178, "learning_rate": 9.220628910646234e-08, "loss": 0.0, "step": 24918 }, { "epoch": 23.960576923076925, "grad_norm": 0.004561115987598896, "learning_rate": 9.203759544135326e-08, "loss": 0.0, "step": 24919 }, { "epoch": 23.96153846153846, "grad_norm": 0.0007101136143319309, "learning_rate": 9.18690555187407e-08, "loss": 0.0, "step": 24920 }, { "epoch": 23.9625, "grad_norm": 0.0010862421477213502, "learning_rate": 9.170066934124034e-08, "loss": 0.0, "step": 24921 }, { "epoch": 23.963461538461537, "grad_norm": 0.00048107560724020004, "learning_rate": 9.153243691146563e-08, "loss": 0.0, "step": 24922 }, { "epoch": 23.964423076923076, "grad_norm": 0.0014861846575513482, "learning_rate": 9.13643582320256e-08, "loss": 0.0, "step": 24923 }, { "epoch": 23.965384615384615, "grad_norm": 0.0007432579295709729, "learning_rate": 9.11964333055293e-08, "loss": 0.0, "step": 24924 }, { "epoch": 23.966346153846153, "grad_norm": 0.005507124587893486, "learning_rate": 9.10286621345835e-08, "loss": 0.0, "step": 24925 }, { "epoch": 23.967307692307692, "grad_norm": 0.001144313719123602, "learning_rate": 9.086104472178836e-08, "loss": 0.0, "step": 24926 }, { "epoch": 23.96826923076923, "grad_norm": 0.0009682445670478046, "learning_rate": 9.069358106974846e-08, "loss": 0.0, "step": 24927 }, { "epoch": 23.96923076923077, "grad_norm": 0.0006861975998617709, "learning_rate": 9.052627118106061e-08, "loss": 0.0, "step": 24928 }, { "epoch": 23.970192307692308, "grad_norm": 0.0037018582224845886, "learning_rate": 9.035911505832051e-08, "loss": 0.0, "step": 24929 }, { "epoch": 23.971153846153847, "grad_norm": 0.00044487870763987303, "learning_rate": 9.019211270412275e-08, "loss": 0.0, "step": 24930 }, { "epoch": 23.972115384615385, "grad_norm": 0.0006785400910302997, "learning_rate": 9.00252641210586e-08, "loss": 0.0, "step": 24931 }, { "epoch": 23.973076923076924, "grad_norm": 0.0009572604321874678, "learning_rate": 8.9858569311716e-08, "loss": 0.0, "step": 24932 }, { "epoch": 23.974038461538463, "grad_norm": 0.0013958164490759373, "learning_rate": 8.969202827868395e-08, "loss": 0.0, "step": 24933 }, { "epoch": 23.975, "grad_norm": 0.0009907216299325228, "learning_rate": 8.952564102454486e-08, "loss": 0.0, "step": 24934 }, { "epoch": 23.97596153846154, "grad_norm": 0.0008028767188079655, "learning_rate": 8.935940755188e-08, "loss": 0.0, "step": 24935 }, { "epoch": 23.976923076923075, "grad_norm": 0.0010995549382641912, "learning_rate": 8.919332786327061e-08, "loss": 0.0, "step": 24936 }, { "epoch": 23.977884615384614, "grad_norm": 0.0005049012834206223, "learning_rate": 8.902740196129244e-08, "loss": 0.0, "step": 24937 }, { "epoch": 23.978846153846153, "grad_norm": 0.0008497862727381289, "learning_rate": 8.886162984852121e-08, "loss": 0.0, "step": 24938 }, { "epoch": 23.97980769230769, "grad_norm": 0.002024609362706542, "learning_rate": 8.869601152752816e-08, "loss": 0.0, "step": 24939 }, { "epoch": 23.98076923076923, "grad_norm": 0.0011104641016572714, "learning_rate": 8.853054700088348e-08, "loss": 0.0, "step": 24940 }, { "epoch": 23.98173076923077, "grad_norm": 0.0010307845659554005, "learning_rate": 8.836523627115623e-08, "loss": 0.0, "step": 24941 }, { "epoch": 23.982692307692307, "grad_norm": 0.0006033883546479046, "learning_rate": 8.82000793409088e-08, "loss": 0.0, "step": 24942 }, { "epoch": 23.983653846153846, "grad_norm": 0.00039999550790525973, "learning_rate": 8.80350762127058e-08, "loss": 0.0, "step": 24943 }, { "epoch": 23.984615384615385, "grad_norm": 0.0008737704483792186, "learning_rate": 8.787022688910741e-08, "loss": 0.0, "step": 24944 }, { "epoch": 23.985576923076923, "grad_norm": 0.0004844720533583313, "learning_rate": 8.770553137267046e-08, "loss": 0.0, "step": 24945 }, { "epoch": 23.986538461538462, "grad_norm": 0.0005161667359061539, "learning_rate": 8.754098966595181e-08, "loss": 0.0, "step": 24946 }, { "epoch": 23.9875, "grad_norm": 0.0013342120219022036, "learning_rate": 8.737660177150498e-08, "loss": 0.0, "step": 24947 }, { "epoch": 23.98846153846154, "grad_norm": 0.0009582653874531388, "learning_rate": 8.721236769187902e-08, "loss": 0.0, "step": 24948 }, { "epoch": 23.989423076923078, "grad_norm": 0.0006217531627044082, "learning_rate": 8.704828742962412e-08, "loss": 0.0, "step": 24949 }, { "epoch": 23.990384615384617, "grad_norm": 0.0005766757531091571, "learning_rate": 8.688436098728603e-08, "loss": 0.0, "step": 24950 }, { "epoch": 23.991346153846155, "grad_norm": 0.0013816612772643566, "learning_rate": 8.672058836740716e-08, "loss": 0.0, "step": 24951 }, { "epoch": 23.99230769230769, "grad_norm": 0.0007301971781998873, "learning_rate": 8.65569695725299e-08, "loss": 0.0, "step": 24952 }, { "epoch": 23.99326923076923, "grad_norm": 0.008311285637319088, "learning_rate": 8.639350460519335e-08, "loss": 0.0001, "step": 24953 }, { "epoch": 23.994230769230768, "grad_norm": 0.0008823969983495772, "learning_rate": 8.623019346793215e-08, "loss": 0.0, "step": 24954 }, { "epoch": 23.995192307692307, "grad_norm": 0.0009539792081341147, "learning_rate": 8.606703616328316e-08, "loss": 0.0, "step": 24955 }, { "epoch": 23.996153846153845, "grad_norm": 0.001218514866195619, "learning_rate": 8.590403269377656e-08, "loss": 0.0, "step": 24956 }, { "epoch": 23.997115384615384, "grad_norm": 0.0009216830949299037, "learning_rate": 8.574118306194146e-08, "loss": 0.0, "step": 24957 }, { "epoch": 23.998076923076923, "grad_norm": 0.0006941990577615798, "learning_rate": 8.557848727030582e-08, "loss": 0.0, "step": 24958 }, { "epoch": 23.99903846153846, "grad_norm": 0.0013583487598225474, "learning_rate": 8.541594532139208e-08, "loss": 0.0, "step": 24959 }, { "epoch": 24.0, "grad_norm": 0.0006243421230465174, "learning_rate": 8.525355721772488e-08, "loss": 0.0, "step": 24960 }, { "epoch": 24.00096153846154, "grad_norm": 0.0017755810404196382, "learning_rate": 8.509132296182332e-08, "loss": 0.0, "step": 24961 }, { "epoch": 24.001923076923077, "grad_norm": 0.0011529304319992661, "learning_rate": 8.492924255620427e-08, "loss": 0.0, "step": 24962 }, { "epoch": 24.002884615384616, "grad_norm": 0.0012807766906917095, "learning_rate": 8.47673160033835e-08, "loss": 0.0, "step": 24963 }, { "epoch": 24.003846153846155, "grad_norm": 0.001243320293724537, "learning_rate": 8.460554330587234e-08, "loss": 0.0, "step": 24964 }, { "epoch": 24.004807692307693, "grad_norm": 0.0007334647816605866, "learning_rate": 8.44439244661821e-08, "loss": 0.0, "step": 24965 }, { "epoch": 24.005769230769232, "grad_norm": 0.0012357082450762391, "learning_rate": 8.428245948682079e-08, "loss": 0.0, "step": 24966 }, { "epoch": 24.00673076923077, "grad_norm": 0.00047827354865148664, "learning_rate": 8.412114837029306e-08, "loss": 0.0, "step": 24967 }, { "epoch": 24.00769230769231, "grad_norm": 0.0008659998420625925, "learning_rate": 8.395999111910135e-08, "loss": 0.0, "step": 24968 }, { "epoch": 24.008653846153845, "grad_norm": 0.0040152049623429775, "learning_rate": 8.379898773574924e-08, "loss": 0.0, "step": 24969 }, { "epoch": 24.009615384615383, "grad_norm": 0.0010020809713751078, "learning_rate": 8.363813822273137e-08, "loss": 0.0, "step": 24970 }, { "epoch": 24.010576923076922, "grad_norm": 0.0010815440909937024, "learning_rate": 8.347744258254686e-08, "loss": 0.0, "step": 24971 }, { "epoch": 24.01153846153846, "grad_norm": 0.0030601161997765303, "learning_rate": 8.331690081768707e-08, "loss": 0.0, "step": 24972 }, { "epoch": 24.0125, "grad_norm": 0.0015777574153617024, "learning_rate": 8.31565129306433e-08, "loss": 0.0, "step": 24973 }, { "epoch": 24.013461538461538, "grad_norm": 0.000568874878808856, "learning_rate": 8.299627892390583e-08, "loss": 0.0, "step": 24974 }, { "epoch": 24.014423076923077, "grad_norm": 0.0008344916859641671, "learning_rate": 8.28361987999604e-08, "loss": 0.0, "step": 24975 }, { "epoch": 24.015384615384615, "grad_norm": 0.0009590478730387986, "learning_rate": 8.267627256128952e-08, "loss": 0.0, "step": 24976 }, { "epoch": 24.016346153846154, "grad_norm": 0.0007517858175560832, "learning_rate": 8.251650021037671e-08, "loss": 0.0, "step": 24977 }, { "epoch": 24.017307692307693, "grad_norm": 0.0015046228654682636, "learning_rate": 8.235688174970003e-08, "loss": 0.0, "step": 24978 }, { "epoch": 24.01826923076923, "grad_norm": 0.002218818524852395, "learning_rate": 8.219741718173635e-08, "loss": 0.0, "step": 24979 }, { "epoch": 24.01923076923077, "grad_norm": 0.001228041248396039, "learning_rate": 8.203810650896038e-08, "loss": 0.0, "step": 24980 }, { "epoch": 24.02019230769231, "grad_norm": 0.0016022283816710114, "learning_rate": 8.187894973384458e-08, "loss": 0.0, "step": 24981 }, { "epoch": 24.021153846153847, "grad_norm": 0.0009439162677153945, "learning_rate": 8.171994685885698e-08, "loss": 0.0, "step": 24982 }, { "epoch": 24.022115384615386, "grad_norm": 0.0010257265530526638, "learning_rate": 8.156109788646782e-08, "loss": 0.0, "step": 24983 }, { "epoch": 24.023076923076925, "grad_norm": 0.0005079815164208412, "learning_rate": 8.140240281913848e-08, "loss": 0.0, "step": 24984 }, { "epoch": 24.02403846153846, "grad_norm": 0.0002943742147181183, "learning_rate": 8.124386165933473e-08, "loss": 0.0, "step": 24985 }, { "epoch": 24.025, "grad_norm": 0.0013500493951141834, "learning_rate": 8.108547440951354e-08, "loss": 0.0, "step": 24986 }, { "epoch": 24.025961538461537, "grad_norm": 0.0009245246765203774, "learning_rate": 8.092724107213512e-08, "loss": 0.0, "step": 24987 }, { "epoch": 24.026923076923076, "grad_norm": 0.0014376668259501457, "learning_rate": 8.076916164965421e-08, "loss": 0.0, "step": 24988 }, { "epoch": 24.027884615384615, "grad_norm": 0.0018099240260198712, "learning_rate": 8.061123614452215e-08, "loss": 0.0, "step": 24989 }, { "epoch": 24.028846153846153, "grad_norm": 0.0015401866985484958, "learning_rate": 8.045346455919035e-08, "loss": 0.0, "step": 24990 }, { "epoch": 24.029807692307692, "grad_norm": 0.00030430741026066244, "learning_rate": 8.029584689610903e-08, "loss": 0.0, "step": 24991 }, { "epoch": 24.03076923076923, "grad_norm": 0.002443095436319709, "learning_rate": 8.013838315772071e-08, "loss": 0.0, "step": 24992 }, { "epoch": 24.03173076923077, "grad_norm": 0.0005212076939642429, "learning_rate": 7.99810733464701e-08, "loss": 0.0, "step": 24993 }, { "epoch": 24.032692307692308, "grad_norm": 0.0004895137972198427, "learning_rate": 7.982391746479967e-08, "loss": 0.0, "step": 24994 }, { "epoch": 24.033653846153847, "grad_norm": 0.0019023416098207235, "learning_rate": 7.966691551514527e-08, "loss": 0.0, "step": 24995 }, { "epoch": 24.034615384615385, "grad_norm": 0.0004984152037650347, "learning_rate": 7.951006749994494e-08, "loss": 0.0, "step": 24996 }, { "epoch": 24.035576923076924, "grad_norm": 0.0010915044695138931, "learning_rate": 7.93533734216323e-08, "loss": 0.0, "step": 24997 }, { "epoch": 24.036538461538463, "grad_norm": 0.0010495504830032587, "learning_rate": 7.919683328263872e-08, "loss": 0.0, "step": 24998 }, { "epoch": 24.0375, "grad_norm": 0.0010714955860748887, "learning_rate": 7.904044708539338e-08, "loss": 0.0, "step": 24999 }, { "epoch": 24.03846153846154, "grad_norm": 0.001246951287612319, "learning_rate": 7.888421483232212e-08, "loss": 0.0, "step": 25000 }, { "epoch": 24.039423076923075, "grad_norm": 0.000795688945800066, "learning_rate": 7.872813652584854e-08, "loss": 0.0, "step": 25001 }, { "epoch": 24.040384615384614, "grad_norm": 0.0003517814038787037, "learning_rate": 7.857221216839739e-08, "loss": 0.0, "step": 25002 }, { "epoch": 24.041346153846153, "grad_norm": 0.00045394457993097603, "learning_rate": 7.841644176238561e-08, "loss": 0.0, "step": 25003 }, { "epoch": 24.04230769230769, "grad_norm": 0.0007109346915967762, "learning_rate": 7.826082531023238e-08, "loss": 0.0, "step": 25004 }, { "epoch": 24.04326923076923, "grad_norm": 0.0003701085224747658, "learning_rate": 7.810536281435022e-08, "loss": 0.0, "step": 25005 }, { "epoch": 24.04423076923077, "grad_norm": 0.0022951492574065924, "learning_rate": 7.795005427715274e-08, "loss": 0.0, "step": 25006 }, { "epoch": 24.045192307692307, "grad_norm": 0.0005757285980507731, "learning_rate": 7.779489970104803e-08, "loss": 0.0, "step": 25007 }, { "epoch": 24.046153846153846, "grad_norm": 0.0006398760015144944, "learning_rate": 7.763989908844749e-08, "loss": 0.0, "step": 25008 }, { "epoch": 24.047115384615385, "grad_norm": 0.0006939445738680661, "learning_rate": 7.748505244175252e-08, "loss": 0.0, "step": 25009 }, { "epoch": 24.048076923076923, "grad_norm": 0.0004087777924723923, "learning_rate": 7.733035976336789e-08, "loss": 0.0, "step": 25010 }, { "epoch": 24.049038461538462, "grad_norm": 0.0010495510650798678, "learning_rate": 7.717582105569276e-08, "loss": 0.0, "step": 25011 }, { "epoch": 24.05, "grad_norm": 0.0012076165294274688, "learning_rate": 7.702143632112636e-08, "loss": 0.0, "step": 25012 }, { "epoch": 24.05096153846154, "grad_norm": 0.0004356963618192822, "learning_rate": 7.68672055620634e-08, "loss": 0.0, "step": 25013 }, { "epoch": 24.051923076923078, "grad_norm": 0.0009560241596773267, "learning_rate": 7.671312878089754e-08, "loss": 0.0, "step": 25014 }, { "epoch": 24.052884615384617, "grad_norm": 0.0012154191499575973, "learning_rate": 7.655920598001909e-08, "loss": 0.0, "step": 25015 }, { "epoch": 24.053846153846155, "grad_norm": 0.0005451870383694768, "learning_rate": 7.640543716181725e-08, "loss": 0.0, "step": 25016 }, { "epoch": 24.05480769230769, "grad_norm": 0.0007290698122233152, "learning_rate": 7.625182232867789e-08, "loss": 0.0, "step": 25017 }, { "epoch": 24.05576923076923, "grad_norm": 0.0022496345918625593, "learning_rate": 7.609836148298356e-08, "loss": 0.0, "step": 25018 }, { "epoch": 24.056730769230768, "grad_norm": 0.0010670936899259686, "learning_rate": 7.594505462711677e-08, "loss": 0.0, "step": 25019 }, { "epoch": 24.057692307692307, "grad_norm": 0.0008899521199055016, "learning_rate": 7.579190176345563e-08, "loss": 0.0, "step": 25020 }, { "epoch": 24.058653846153845, "grad_norm": 0.0015923549653962255, "learning_rate": 7.563890289437825e-08, "loss": 0.0, "step": 25021 }, { "epoch": 24.059615384615384, "grad_norm": 0.001798966433852911, "learning_rate": 7.548605802225606e-08, "loss": 0.0, "step": 25022 }, { "epoch": 24.060576923076923, "grad_norm": 0.001492454670369625, "learning_rate": 7.53333671494616e-08, "loss": 0.0, "step": 25023 }, { "epoch": 24.06153846153846, "grad_norm": 0.0010754081886261702, "learning_rate": 7.518083027836631e-08, "loss": 0.0, "step": 25024 }, { "epoch": 24.0625, "grad_norm": 0.00047870291746221483, "learning_rate": 7.502844741133497e-08, "loss": 0.0, "step": 25025 }, { "epoch": 24.06346153846154, "grad_norm": 0.0010853748535737395, "learning_rate": 7.487621855073235e-08, "loss": 0.0, "step": 25026 }, { "epoch": 24.064423076923077, "grad_norm": 0.0009815115481615067, "learning_rate": 7.472414369892212e-08, "loss": 0.0, "step": 25027 }, { "epoch": 24.065384615384616, "grad_norm": 0.0015066699124872684, "learning_rate": 7.457222285826127e-08, "loss": 0.0, "step": 25028 }, { "epoch": 24.066346153846155, "grad_norm": 0.0011468209559097886, "learning_rate": 7.442045603110903e-08, "loss": 0.0, "step": 25029 }, { "epoch": 24.067307692307693, "grad_norm": 0.0009802350541576743, "learning_rate": 7.42688432198213e-08, "loss": 0.0, "step": 25030 }, { "epoch": 24.068269230769232, "grad_norm": 0.0010340731823816895, "learning_rate": 7.411738442674842e-08, "loss": 0.0, "step": 25031 }, { "epoch": 24.06923076923077, "grad_norm": 0.0011049341410398483, "learning_rate": 7.396607965424185e-08, "loss": 0.0, "step": 25032 }, { "epoch": 24.07019230769231, "grad_norm": 0.0004892962169833481, "learning_rate": 7.381492890464858e-08, "loss": 0.0, "step": 25033 }, { "epoch": 24.071153846153845, "grad_norm": 0.001346733421087265, "learning_rate": 7.366393218031564e-08, "loss": 0.0, "step": 25034 }, { "epoch": 24.072115384615383, "grad_norm": 0.0013829147210344672, "learning_rate": 7.351308948358449e-08, "loss": 0.0, "step": 25035 }, { "epoch": 24.073076923076922, "grad_norm": 0.00019850411626975983, "learning_rate": 7.336240081679657e-08, "loss": 0.0, "step": 25036 }, { "epoch": 24.07403846153846, "grad_norm": 0.0005849572480656207, "learning_rate": 7.321186618228893e-08, "loss": 0.0, "step": 25037 }, { "epoch": 24.075, "grad_norm": 0.0010875139851123095, "learning_rate": 7.306148558239967e-08, "loss": 0.0, "step": 25038 }, { "epoch": 24.075961538461538, "grad_norm": 0.0004990037996321917, "learning_rate": 7.291125901946027e-08, "loss": 0.0, "step": 25039 }, { "epoch": 24.076923076923077, "grad_norm": 0.00022434288985095918, "learning_rate": 7.276118649580221e-08, "loss": 0.0, "step": 25040 }, { "epoch": 24.077884615384615, "grad_norm": 0.0009609616827219725, "learning_rate": 7.261126801375474e-08, "loss": 0.0, "step": 25041 }, { "epoch": 24.078846153846154, "grad_norm": 0.0011185265611857176, "learning_rate": 7.246150357564374e-08, "loss": 0.0, "step": 25042 }, { "epoch": 24.079807692307693, "grad_norm": 0.0008747662650421262, "learning_rate": 7.231189318379294e-08, "loss": 0.0, "step": 25043 }, { "epoch": 24.08076923076923, "grad_norm": 0.0009970221435651183, "learning_rate": 7.216243684052493e-08, "loss": 0.0, "step": 25044 }, { "epoch": 24.08173076923077, "grad_norm": 0.0019463881617411971, "learning_rate": 7.201313454815784e-08, "loss": 0.0, "step": 25045 }, { "epoch": 24.08269230769231, "grad_norm": 0.0012085151392966509, "learning_rate": 7.18639863090087e-08, "loss": 0.0, "step": 25046 }, { "epoch": 24.083653846153847, "grad_norm": 0.0008551012142561376, "learning_rate": 7.171499212539124e-08, "loss": 0.0, "step": 25047 }, { "epoch": 24.084615384615386, "grad_norm": 0.0007392681436613202, "learning_rate": 7.156615199961803e-08, "loss": 0.0, "step": 25048 }, { "epoch": 24.085576923076925, "grad_norm": 0.002259110799059272, "learning_rate": 7.141746593399945e-08, "loss": 0.0, "step": 25049 }, { "epoch": 24.08653846153846, "grad_norm": 0.0016555157490074635, "learning_rate": 7.126893393084033e-08, "loss": 0.0, "step": 25050 }, { "epoch": 24.0875, "grad_norm": 0.003795668249949813, "learning_rate": 7.112055599244772e-08, "loss": 0.0, "step": 25051 }, { "epoch": 24.088461538461537, "grad_norm": 0.0004303113091737032, "learning_rate": 7.097233212112309e-08, "loss": 0.0, "step": 25052 }, { "epoch": 24.089423076923076, "grad_norm": 0.0001802901242626831, "learning_rate": 7.082426231916684e-08, "loss": 0.0, "step": 25053 }, { "epoch": 24.090384615384615, "grad_norm": 0.0007292859372682869, "learning_rate": 7.067634658887601e-08, "loss": 0.0, "step": 25054 }, { "epoch": 24.091346153846153, "grad_norm": 0.0010070721618831158, "learning_rate": 7.052858493254545e-08, "loss": 0.0, "step": 25055 }, { "epoch": 24.092307692307692, "grad_norm": 0.0008993262308649719, "learning_rate": 7.038097735246885e-08, "loss": 0.0, "step": 25056 }, { "epoch": 24.09326923076923, "grad_norm": 0.0006748438463546336, "learning_rate": 7.023352385093774e-08, "loss": 0.0, "step": 25057 }, { "epoch": 24.09423076923077, "grad_norm": 0.0010172576876357198, "learning_rate": 7.008622443023694e-08, "loss": 0.0, "step": 25058 }, { "epoch": 24.095192307692308, "grad_norm": 0.00047980935778468847, "learning_rate": 6.993907909265463e-08, "loss": 0.0, "step": 25059 }, { "epoch": 24.096153846153847, "grad_norm": 0.0009637849871069193, "learning_rate": 6.979208784047454e-08, "loss": 0.0, "step": 25060 }, { "epoch": 24.097115384615385, "grad_norm": 0.0004471864376682788, "learning_rate": 6.964525067597484e-08, "loss": 0.0, "step": 25061 }, { "epoch": 24.098076923076924, "grad_norm": 0.0007855298463255167, "learning_rate": 6.949856760143591e-08, "loss": 0.0, "step": 25062 }, { "epoch": 24.099038461538463, "grad_norm": 0.0008537719841115177, "learning_rate": 6.935203861913376e-08, "loss": 0.0, "step": 25063 }, { "epoch": 24.1, "grad_norm": 0.00038571315235458314, "learning_rate": 6.920566373134208e-08, "loss": 0.0, "step": 25064 }, { "epoch": 24.10096153846154, "grad_norm": 0.0007874121074564755, "learning_rate": 6.90594429403324e-08, "loss": 0.0, "step": 25065 }, { "epoch": 24.101923076923075, "grad_norm": 0.0006580897606909275, "learning_rate": 6.891337624837291e-08, "loss": 0.0, "step": 25066 }, { "epoch": 24.102884615384614, "grad_norm": 0.0004591697361320257, "learning_rate": 6.876746365772958e-08, "loss": 0.0, "step": 25067 }, { "epoch": 24.103846153846153, "grad_norm": 0.00035971379838883877, "learning_rate": 6.862170517066836e-08, "loss": 0.0, "step": 25068 }, { "epoch": 24.10480769230769, "grad_norm": 0.00045868282904848456, "learning_rate": 6.847610078944966e-08, "loss": 0.0, "step": 25069 }, { "epoch": 24.10576923076923, "grad_norm": 0.0002893525524996221, "learning_rate": 6.833065051633281e-08, "loss": 0.0, "step": 25070 }, { "epoch": 24.10673076923077, "grad_norm": 0.0014943594578653574, "learning_rate": 6.818535435357598e-08, "loss": 0.0, "step": 25071 }, { "epoch": 24.107692307692307, "grad_norm": 0.0006140876794233918, "learning_rate": 6.804021230343183e-08, "loss": 0.0, "step": 25072 }, { "epoch": 24.108653846153846, "grad_norm": 0.001255700713954866, "learning_rate": 6.78952243681541e-08, "loss": 0.0, "step": 25073 }, { "epoch": 24.109615384615385, "grad_norm": 0.000880335399415344, "learning_rate": 6.775039054999322e-08, "loss": 0.0, "step": 25074 }, { "epoch": 24.110576923076923, "grad_norm": 0.0006139151519164443, "learning_rate": 6.760571085119405e-08, "loss": 0.0, "step": 25075 }, { "epoch": 24.111538461538462, "grad_norm": 0.0006233176682144403, "learning_rate": 6.746118527400369e-08, "loss": 0.0, "step": 25076 }, { "epoch": 24.1125, "grad_norm": 0.000423771416535601, "learning_rate": 6.731681382066369e-08, "loss": 0.0, "step": 25077 }, { "epoch": 24.11346153846154, "grad_norm": 0.0009307300788350403, "learning_rate": 6.717259649341557e-08, "loss": 0.0, "step": 25078 }, { "epoch": 24.114423076923078, "grad_norm": 0.0010381870670244098, "learning_rate": 6.702853329449533e-08, "loss": 0.0, "step": 25079 }, { "epoch": 24.115384615384617, "grad_norm": 0.0006825218442827463, "learning_rate": 6.688462422614005e-08, "loss": 0.0, "step": 25080 }, { "epoch": 24.116346153846155, "grad_norm": 0.0007752046803943813, "learning_rate": 6.674086929058133e-08, "loss": 0.0, "step": 25081 }, { "epoch": 24.11730769230769, "grad_norm": 0.0013500581262633204, "learning_rate": 6.659726849005066e-08, "loss": 0.0, "step": 25082 }, { "epoch": 24.11826923076923, "grad_norm": 0.00034994943416677415, "learning_rate": 6.645382182677629e-08, "loss": 0.0, "step": 25083 }, { "epoch": 24.119230769230768, "grad_norm": 0.0010256464593112469, "learning_rate": 6.63105293029842e-08, "loss": 0.0, "step": 25084 }, { "epoch": 24.120192307692307, "grad_norm": 0.00031498013413511217, "learning_rate": 6.616739092089708e-08, "loss": 0.0, "step": 25085 }, { "epoch": 24.121153846153845, "grad_norm": 0.0005739583284594119, "learning_rate": 6.602440668273758e-08, "loss": 0.0, "step": 25086 }, { "epoch": 24.122115384615384, "grad_norm": 0.0004697110562119633, "learning_rate": 6.588157659072281e-08, "loss": 0.0, "step": 25087 }, { "epoch": 24.123076923076923, "grad_norm": 0.0009595052688382566, "learning_rate": 6.57389006470699e-08, "loss": 0.0, "step": 25088 }, { "epoch": 24.12403846153846, "grad_norm": 0.0006973458803258836, "learning_rate": 6.559637885399262e-08, "loss": 0.0, "step": 25089 }, { "epoch": 24.125, "grad_norm": 0.0010396911529824138, "learning_rate": 6.545401121370253e-08, "loss": 0.0, "step": 25090 }, { "epoch": 24.12596153846154, "grad_norm": 0.0010399933671578765, "learning_rate": 6.53117977284079e-08, "loss": 0.0, "step": 25091 }, { "epoch": 24.126923076923077, "grad_norm": 0.0003853319212794304, "learning_rate": 6.516973840031581e-08, "loss": 0.0, "step": 25092 }, { "epoch": 24.127884615384616, "grad_norm": 0.0007472092402167618, "learning_rate": 6.502783323163231e-08, "loss": 0.0, "step": 25093 }, { "epoch": 24.128846153846155, "grad_norm": 0.0018004787852987647, "learning_rate": 6.488608222455784e-08, "loss": 0.0, "step": 25094 }, { "epoch": 24.129807692307693, "grad_norm": 0.0005229623639024794, "learning_rate": 6.474448538129175e-08, "loss": 0.0, "step": 25095 }, { "epoch": 24.130769230769232, "grad_norm": 0.0030971127562224865, "learning_rate": 6.460304270403229e-08, "loss": 0.0, "step": 25096 }, { "epoch": 24.13173076923077, "grad_norm": 0.0006770396139472723, "learning_rate": 6.446175419497214e-08, "loss": 0.0, "step": 25097 }, { "epoch": 24.13269230769231, "grad_norm": 0.00036187635851092637, "learning_rate": 6.432061985630622e-08, "loss": 0.0, "step": 25098 }, { "epoch": 24.133653846153845, "grad_norm": 0.001390770892612636, "learning_rate": 6.417963969022389e-08, "loss": 0.0, "step": 25099 }, { "epoch": 24.134615384615383, "grad_norm": 0.0011086884187534451, "learning_rate": 6.403881369891119e-08, "loss": 0.0, "step": 25100 }, { "epoch": 24.135576923076922, "grad_norm": 0.004949001595377922, "learning_rate": 6.389814188455523e-08, "loss": 0.0001, "step": 25101 }, { "epoch": 24.13653846153846, "grad_norm": 0.0016779727302491665, "learning_rate": 6.375762424933652e-08, "loss": 0.0, "step": 25102 }, { "epoch": 24.1375, "grad_norm": 0.0023091575130820274, "learning_rate": 6.361726079543884e-08, "loss": 0.0, "step": 25103 }, { "epoch": 24.138461538461538, "grad_norm": 0.0016643765848129988, "learning_rate": 6.347705152503714e-08, "loss": 0.0, "step": 25104 }, { "epoch": 24.139423076923077, "grad_norm": 0.0010752312373369932, "learning_rate": 6.333699644030855e-08, "loss": 0.0, "step": 25105 }, { "epoch": 24.140384615384615, "grad_norm": 0.0010155120398849249, "learning_rate": 6.319709554342579e-08, "loss": 0.0, "step": 25106 }, { "epoch": 24.141346153846154, "grad_norm": 0.00035268563078716397, "learning_rate": 6.305734883656046e-08, "loss": 0.0, "step": 25107 }, { "epoch": 24.142307692307693, "grad_norm": 0.0008823483367450535, "learning_rate": 6.29177563218808e-08, "loss": 0.0, "step": 25108 }, { "epoch": 24.14326923076923, "grad_norm": 0.0014320601476356387, "learning_rate": 6.277831800155176e-08, "loss": 0.0, "step": 25109 }, { "epoch": 24.14423076923077, "grad_norm": 0.00052735983626917, "learning_rate": 6.263903387773829e-08, "loss": 0.0, "step": 25110 }, { "epoch": 24.14519230769231, "grad_norm": 0.000900436716619879, "learning_rate": 6.249990395260086e-08, "loss": 0.0, "step": 25111 }, { "epoch": 24.146153846153847, "grad_norm": 0.0007913164445199072, "learning_rate": 6.236092822829887e-08, "loss": 0.0, "step": 25112 }, { "epoch": 24.147115384615386, "grad_norm": 0.0007405247888527811, "learning_rate": 6.222210670698836e-08, "loss": 0.0, "step": 25113 }, { "epoch": 24.148076923076925, "grad_norm": 0.0010572667233645916, "learning_rate": 6.208343939082429e-08, "loss": 0.0, "step": 25114 }, { "epoch": 24.14903846153846, "grad_norm": 0.0005563811864703894, "learning_rate": 6.194492628195714e-08, "loss": 0.0, "step": 25115 }, { "epoch": 24.15, "grad_norm": 0.001202680403366685, "learning_rate": 6.180656738253743e-08, "loss": 0.0, "step": 25116 }, { "epoch": 24.150961538461537, "grad_norm": 0.0008975893724709749, "learning_rate": 6.166836269471122e-08, "loss": 0.0, "step": 25117 }, { "epoch": 24.151923076923076, "grad_norm": 0.0002801164810080081, "learning_rate": 6.153031222062456e-08, "loss": 0.0, "step": 25118 }, { "epoch": 24.152884615384615, "grad_norm": 0.001296771690249443, "learning_rate": 6.139241596241686e-08, "loss": 0.0, "step": 25119 }, { "epoch": 24.153846153846153, "grad_norm": 0.0003690269950311631, "learning_rate": 6.125467392222972e-08, "loss": 0.0, "step": 25120 }, { "epoch": 24.154807692307692, "grad_norm": 0.0010654290672391653, "learning_rate": 6.111708610220035e-08, "loss": 0.0, "step": 25121 }, { "epoch": 24.15576923076923, "grad_norm": 0.0005275186849758029, "learning_rate": 6.09796525044637e-08, "loss": 0.0, "step": 25122 }, { "epoch": 24.15673076923077, "grad_norm": 0.0032238527201116085, "learning_rate": 6.084237313115249e-08, "loss": 0.0, "step": 25123 }, { "epoch": 24.157692307692308, "grad_norm": 0.0006706694257445633, "learning_rate": 6.070524798439614e-08, "loss": 0.0, "step": 25124 }, { "epoch": 24.158653846153847, "grad_norm": 0.000771314196754247, "learning_rate": 6.056827706632185e-08, "loss": 0.0, "step": 25125 }, { "epoch": 24.159615384615385, "grad_norm": 0.0007848986424505711, "learning_rate": 6.043146037905789e-08, "loss": 0.0, "step": 25126 }, { "epoch": 24.160576923076924, "grad_norm": 0.0006292562466114759, "learning_rate": 6.02947979247237e-08, "loss": 0.0, "step": 25127 }, { "epoch": 24.161538461538463, "grad_norm": 0.0005923593416810036, "learning_rate": 6.015828970544312e-08, "loss": 0.0, "step": 25128 }, { "epoch": 24.1625, "grad_norm": 0.005469387862831354, "learning_rate": 6.002193572333226e-08, "loss": 0.0, "step": 25129 }, { "epoch": 24.16346153846154, "grad_norm": 0.0009511318639852107, "learning_rate": 5.988573598050717e-08, "loss": 0.0, "step": 25130 }, { "epoch": 24.164423076923075, "grad_norm": 0.0013520941138267517, "learning_rate": 5.974969047908063e-08, "loss": 0.0, "step": 25131 }, { "epoch": 24.165384615384614, "grad_norm": 0.0005355558823794127, "learning_rate": 5.96137992211665e-08, "loss": 0.0, "step": 25132 }, { "epoch": 24.166346153846153, "grad_norm": 0.0007152373436838388, "learning_rate": 5.9478062208870865e-08, "loss": 0.0, "step": 25133 }, { "epoch": 24.16730769230769, "grad_norm": 0.0005550857167690992, "learning_rate": 5.934247944429983e-08, "loss": 0.0, "step": 25134 }, { "epoch": 24.16826923076923, "grad_norm": 0.0011505120201036334, "learning_rate": 5.920705092955836e-08, "loss": 0.0, "step": 25135 }, { "epoch": 24.16923076923077, "grad_norm": 0.000865309382788837, "learning_rate": 5.907177666674813e-08, "loss": 0.0, "step": 25136 }, { "epoch": 24.170192307692307, "grad_norm": 0.0013534331228584051, "learning_rate": 5.893665665796744e-08, "loss": 0.0, "step": 25137 }, { "epoch": 24.171153846153846, "grad_norm": 0.0006687747081741691, "learning_rate": 5.880169090531351e-08, "loss": 0.0, "step": 25138 }, { "epoch": 24.172115384615385, "grad_norm": 0.0010203677229583263, "learning_rate": 5.8666879410879116e-08, "loss": 0.0, "step": 25139 }, { "epoch": 24.173076923076923, "grad_norm": 0.0002710982516873628, "learning_rate": 5.8532222176758136e-08, "loss": 0.0, "step": 25140 }, { "epoch": 24.174038461538462, "grad_norm": 0.0003568126121535897, "learning_rate": 5.8397719205038894e-08, "loss": 0.0, "step": 25141 }, { "epoch": 24.175, "grad_norm": 0.00021519087022170424, "learning_rate": 5.826337049780972e-08, "loss": 0.0, "step": 25142 }, { "epoch": 24.17596153846154, "grad_norm": 0.0010767076164484024, "learning_rate": 5.81291760571534e-08, "loss": 0.0, "step": 25143 }, { "epoch": 24.176923076923078, "grad_norm": 0.002832565689459443, "learning_rate": 5.799513588515382e-08, "loss": 0.0, "step": 25144 }, { "epoch": 24.177884615384617, "grad_norm": 0.0006156284362077713, "learning_rate": 5.786124998388931e-08, "loss": 0.0, "step": 25145 }, { "epoch": 24.178846153846155, "grad_norm": 0.0009374971268698573, "learning_rate": 5.772751835543933e-08, "loss": 0.0, "step": 25146 }, { "epoch": 24.17980769230769, "grad_norm": 0.0003516261058393866, "learning_rate": 5.7593941001877763e-08, "loss": 0.0, "step": 25147 }, { "epoch": 24.18076923076923, "grad_norm": 0.00029515998903661966, "learning_rate": 5.7460517925277406e-08, "loss": 0.0, "step": 25148 }, { "epoch": 24.181730769230768, "grad_norm": 0.00030500281718559563, "learning_rate": 5.7327249127708816e-08, "loss": 0.0, "step": 25149 }, { "epoch": 24.182692307692307, "grad_norm": 0.0010518046328797936, "learning_rate": 5.719413461124035e-08, "loss": 0.0, "step": 25150 }, { "epoch": 24.183653846153845, "grad_norm": 0.0008377751801162958, "learning_rate": 5.7061174377937015e-08, "loss": 0.0, "step": 25151 }, { "epoch": 24.184615384615384, "grad_norm": 0.0017210207879543304, "learning_rate": 5.69283684298616e-08, "loss": 0.0, "step": 25152 }, { "epoch": 24.185576923076923, "grad_norm": 0.0006711579044349492, "learning_rate": 5.6795716769075803e-08, "loss": 0.0, "step": 25153 }, { "epoch": 24.18653846153846, "grad_norm": 0.0004917531041428447, "learning_rate": 5.666321939763797e-08, "loss": 0.0, "step": 25154 }, { "epoch": 24.1875, "grad_norm": 0.0010651391930878162, "learning_rate": 5.653087631760312e-08, "loss": 0.0, "step": 25155 }, { "epoch": 24.18846153846154, "grad_norm": 0.0003046815691050142, "learning_rate": 5.6398687531025175e-08, "loss": 0.0, "step": 25156 }, { "epoch": 24.189423076923077, "grad_norm": 0.0009051614906638861, "learning_rate": 5.6266653039955823e-08, "loss": 0.0, "step": 25157 }, { "epoch": 24.190384615384616, "grad_norm": 0.0012081338791176677, "learning_rate": 5.613477284644231e-08, "loss": 0.0, "step": 25158 }, { "epoch": 24.191346153846155, "grad_norm": 0.0005311804125085473, "learning_rate": 5.6003046952534114e-08, "loss": 0.0, "step": 25159 }, { "epoch": 24.192307692307693, "grad_norm": 0.0021329852752387524, "learning_rate": 5.587147536027182e-08, "loss": 0.0, "step": 25160 }, { "epoch": 24.193269230769232, "grad_norm": 0.0007429540273733437, "learning_rate": 5.5740058071699354e-08, "loss": 0.0, "step": 25161 }, { "epoch": 24.19423076923077, "grad_norm": 0.0012188286054879427, "learning_rate": 5.560879508885508e-08, "loss": 0.0, "step": 25162 }, { "epoch": 24.19519230769231, "grad_norm": 0.0009321841644123197, "learning_rate": 5.5477686413776264e-08, "loss": 0.0, "step": 25163 }, { "epoch": 24.196153846153845, "grad_norm": 0.002618612488731742, "learning_rate": 5.534673204849572e-08, "loss": 0.0, "step": 25164 }, { "epoch": 24.197115384615383, "grad_norm": 0.0012012138031423092, "learning_rate": 5.521593199504738e-08, "loss": 0.0, "step": 25165 }, { "epoch": 24.198076923076922, "grad_norm": 0.0013850399991497397, "learning_rate": 5.508528625546072e-08, "loss": 0.0, "step": 25166 }, { "epoch": 24.19903846153846, "grad_norm": 0.0005348498816601932, "learning_rate": 5.4954794831761915e-08, "loss": 0.0, "step": 25167 }, { "epoch": 24.2, "grad_norm": 0.0009280929807573557, "learning_rate": 5.4824457725975996e-08, "loss": 0.0, "step": 25168 }, { "epoch": 24.200961538461538, "grad_norm": 0.00043022542377002537, "learning_rate": 5.46942749401258e-08, "loss": 0.0, "step": 25169 }, { "epoch": 24.201923076923077, "grad_norm": 0.0018833187641575933, "learning_rate": 5.456424647623193e-08, "loss": 0.0, "step": 25170 }, { "epoch": 24.202884615384615, "grad_norm": 0.0004021455242764205, "learning_rate": 5.443437233631166e-08, "loss": 0.0, "step": 25171 }, { "epoch": 24.203846153846154, "grad_norm": 0.0009931697277352214, "learning_rate": 5.4304652522378934e-08, "loss": 0.0, "step": 25172 }, { "epoch": 24.204807692307693, "grad_norm": 0.0005883018602617085, "learning_rate": 5.4175087036448805e-08, "loss": 0.0, "step": 25173 }, { "epoch": 24.20576923076923, "grad_norm": 0.0005653055268339813, "learning_rate": 5.404567588052967e-08, "loss": 0.0, "step": 25174 }, { "epoch": 24.20673076923077, "grad_norm": 0.000748679565731436, "learning_rate": 5.391641905663103e-08, "loss": 0.0, "step": 25175 }, { "epoch": 24.20769230769231, "grad_norm": 0.0009794289944693446, "learning_rate": 5.378731656675906e-08, "loss": 0.0, "step": 25176 }, { "epoch": 24.208653846153847, "grad_norm": 0.0009377860114909708, "learning_rate": 5.365836841291439e-08, "loss": 0.0, "step": 25177 }, { "epoch": 24.209615384615386, "grad_norm": 0.00046715670032426715, "learning_rate": 5.3529574597100954e-08, "loss": 0.0, "step": 25178 }, { "epoch": 24.210576923076925, "grad_norm": 0.0005111639620736241, "learning_rate": 5.3400935121316054e-08, "loss": 0.0, "step": 25179 }, { "epoch": 24.21153846153846, "grad_norm": 0.0006568444659933448, "learning_rate": 5.327244998755476e-08, "loss": 0.0, "step": 25180 }, { "epoch": 24.2125, "grad_norm": 0.0006678763311356306, "learning_rate": 5.314411919781326e-08, "loss": 0.0, "step": 25181 }, { "epoch": 24.213461538461537, "grad_norm": 0.0008369697607122362, "learning_rate": 5.301594275407995e-08, "loss": 0.0, "step": 25182 }, { "epoch": 24.214423076923076, "grad_norm": 0.0006385593442246318, "learning_rate": 5.288792065834547e-08, "loss": 0.0, "step": 25183 }, { "epoch": 24.215384615384615, "grad_norm": 0.0005634988192468882, "learning_rate": 5.2760052912597116e-08, "loss": 0.0, "step": 25184 }, { "epoch": 24.216346153846153, "grad_norm": 0.0009794229408726096, "learning_rate": 5.263233951881663e-08, "loss": 0.0, "step": 25185 }, { "epoch": 24.217307692307692, "grad_norm": 0.0022615708876401186, "learning_rate": 5.250478047898688e-08, "loss": 0.0, "step": 25186 }, { "epoch": 24.21826923076923, "grad_norm": 0.0003224053652957082, "learning_rate": 5.23773757950885e-08, "loss": 0.0, "step": 25187 }, { "epoch": 24.21923076923077, "grad_norm": 0.001446221605874598, "learning_rate": 5.225012546909658e-08, "loss": 0.0, "step": 25188 }, { "epoch": 24.220192307692308, "grad_norm": 0.0017047636210918427, "learning_rate": 5.21230295029862e-08, "loss": 0.0, "step": 25189 }, { "epoch": 24.221153846153847, "grad_norm": 0.0007976395427249372, "learning_rate": 5.199608789873134e-08, "loss": 0.0, "step": 25190 }, { "epoch": 24.222115384615385, "grad_norm": 0.0003721293469425291, "learning_rate": 5.1869300658298207e-08, "loss": 0.0, "step": 25191 }, { "epoch": 24.223076923076924, "grad_norm": 0.0008039133390411735, "learning_rate": 5.174266778365744e-08, "loss": 0.0, "step": 25192 }, { "epoch": 24.224038461538463, "grad_norm": 0.001087529817596078, "learning_rate": 5.161618927677192e-08, "loss": 0.0, "step": 25193 }, { "epoch": 24.225, "grad_norm": 0.00039744129753671587, "learning_rate": 5.148986513960452e-08, "loss": 0.0, "step": 25194 }, { "epoch": 24.22596153846154, "grad_norm": 0.0007671384955756366, "learning_rate": 5.1363695374117004e-08, "loss": 0.0, "step": 25195 }, { "epoch": 24.226923076923075, "grad_norm": 0.0005207561771385372, "learning_rate": 5.123767998226559e-08, "loss": 0.0, "step": 25196 }, { "epoch": 24.227884615384614, "grad_norm": 0.0030316305346786976, "learning_rate": 5.1111818966006477e-08, "loss": 0.0, "step": 25197 }, { "epoch": 24.228846153846153, "grad_norm": 0.0009511820389889181, "learning_rate": 5.098611232729256e-08, "loss": 0.0, "step": 25198 }, { "epoch": 24.22980769230769, "grad_norm": 0.00035611065686680377, "learning_rate": 5.0860560068073384e-08, "loss": 0.0, "step": 25199 }, { "epoch": 24.23076923076923, "grad_norm": 0.000470780476462096, "learning_rate": 5.0735162190297396e-08, "loss": 0.0, "step": 25200 }, { "epoch": 24.23173076923077, "grad_norm": 0.0008561601280234754, "learning_rate": 5.060991869591192e-08, "loss": 0.0, "step": 25201 }, { "epoch": 24.232692307692307, "grad_norm": 0.001136090955697, "learning_rate": 5.0484829586858744e-08, "loss": 0.0, "step": 25202 }, { "epoch": 24.233653846153846, "grad_norm": 0.00035479621146805584, "learning_rate": 5.035989486508075e-08, "loss": 0.0, "step": 25203 }, { "epoch": 24.234615384615385, "grad_norm": 0.0020083191338926554, "learning_rate": 5.023511453251417e-08, "loss": 0.0, "step": 25204 }, { "epoch": 24.235576923076923, "grad_norm": 0.000978279858827591, "learning_rate": 5.011048859109635e-08, "loss": 0.0, "step": 25205 }, { "epoch": 24.236538461538462, "grad_norm": 0.0012759828241541982, "learning_rate": 4.99860170427624e-08, "loss": 0.0, "step": 25206 }, { "epoch": 24.2375, "grad_norm": 0.0005008053267374635, "learning_rate": 4.9861699889441895e-08, "loss": 0.0, "step": 25207 }, { "epoch": 24.23846153846154, "grad_norm": 0.00072961684782058, "learning_rate": 4.9737537133063284e-08, "loss": 0.0, "step": 25208 }, { "epoch": 24.239423076923078, "grad_norm": 0.0005621140589937568, "learning_rate": 4.9613528775556143e-08, "loss": 0.0, "step": 25209 }, { "epoch": 24.240384615384617, "grad_norm": 0.0005598692805506289, "learning_rate": 4.948967481884226e-08, "loss": 0.0, "step": 25210 }, { "epoch": 24.241346153846155, "grad_norm": 0.0003738937375601381, "learning_rate": 4.9365975264844547e-08, "loss": 0.0, "step": 25211 }, { "epoch": 24.24230769230769, "grad_norm": 0.0006662938394583762, "learning_rate": 4.9242430115481464e-08, "loss": 0.0, "step": 25212 }, { "epoch": 24.24326923076923, "grad_norm": 0.0002885175636038184, "learning_rate": 4.911903937267148e-08, "loss": 0.0, "step": 25213 }, { "epoch": 24.244230769230768, "grad_norm": 0.0006818466354161501, "learning_rate": 4.899580303832863e-08, "loss": 0.0, "step": 25214 }, { "epoch": 24.245192307692307, "grad_norm": 0.0024656502064317465, "learning_rate": 4.887272111436359e-08, "loss": 0.0, "step": 25215 }, { "epoch": 24.246153846153845, "grad_norm": 0.0008681509061716497, "learning_rate": 4.874979360268928e-08, "loss": 0.0, "step": 25216 }, { "epoch": 24.247115384615384, "grad_norm": 0.0011587837943807244, "learning_rate": 4.8627020505210845e-08, "loss": 0.0, "step": 25217 }, { "epoch": 24.248076923076923, "grad_norm": 0.0006911902455613017, "learning_rate": 4.8504401823834535e-08, "loss": 0.0, "step": 25218 }, { "epoch": 24.24903846153846, "grad_norm": 0.0077262273989617825, "learning_rate": 4.8381937560462166e-08, "loss": 0.0001, "step": 25219 }, { "epoch": 24.25, "grad_norm": 0.0005786925903521478, "learning_rate": 4.825962771699555e-08, "loss": 0.0, "step": 25220 }, { "epoch": 24.25096153846154, "grad_norm": 0.000997366150841117, "learning_rate": 4.813747229532984e-08, "loss": 0.0, "step": 25221 }, { "epoch": 24.251923076923077, "grad_norm": 0.0008313031867146492, "learning_rate": 4.80154712973635e-08, "loss": 0.0, "step": 25222 }, { "epoch": 24.252884615384616, "grad_norm": 0.0005229791859164834, "learning_rate": 4.7893624724988375e-08, "loss": 0.0, "step": 25223 }, { "epoch": 24.253846153846155, "grad_norm": 0.0004374323470983654, "learning_rate": 4.777193258009405e-08, "loss": 0.0, "step": 25224 }, { "epoch": 24.254807692307693, "grad_norm": 0.0018791883485391736, "learning_rate": 4.765039486457013e-08, "loss": 0.0, "step": 25225 }, { "epoch": 24.255769230769232, "grad_norm": 0.0007527749985456467, "learning_rate": 4.7529011580302876e-08, "loss": 0.0, "step": 25226 }, { "epoch": 24.25673076923077, "grad_norm": 0.0032023245003074408, "learning_rate": 4.7407782729175235e-08, "loss": 0.0, "step": 25227 }, { "epoch": 24.25769230769231, "grad_norm": 0.0009868217166513205, "learning_rate": 4.728670831306792e-08, "loss": 0.0, "step": 25228 }, { "epoch": 24.258653846153845, "grad_norm": 0.001304006204009056, "learning_rate": 4.716578833386054e-08, "loss": 0.0, "step": 25229 }, { "epoch": 24.259615384615383, "grad_norm": 0.0005004910635761917, "learning_rate": 4.7045022793428264e-08, "loss": 0.0, "step": 25230 }, { "epoch": 24.260576923076922, "grad_norm": 0.0009654698660597205, "learning_rate": 4.692441169364625e-08, "loss": 0.0, "step": 25231 }, { "epoch": 24.26153846153846, "grad_norm": 0.0005727861425839365, "learning_rate": 4.680395503638413e-08, "loss": 0.0, "step": 25232 }, { "epoch": 24.2625, "grad_norm": 0.0011134297819808125, "learning_rate": 4.6683652823513725e-08, "loss": 0.0, "step": 25233 }, { "epoch": 24.263461538461538, "grad_norm": 0.002275398001074791, "learning_rate": 4.6563505056900216e-08, "loss": 0.0, "step": 25234 }, { "epoch": 24.264423076923077, "grad_norm": 0.001035485416650772, "learning_rate": 4.6443511738407664e-08, "loss": 0.0, "step": 25235 }, { "epoch": 24.265384615384615, "grad_norm": 0.002197813708335161, "learning_rate": 4.6323672869899025e-08, "loss": 0.0, "step": 25236 }, { "epoch": 24.266346153846154, "grad_norm": 0.0008918671519495547, "learning_rate": 4.6203988453232816e-08, "loss": 0.0, "step": 25237 }, { "epoch": 24.267307692307693, "grad_norm": 0.0032074495684355497, "learning_rate": 4.608445849026644e-08, "loss": 0.0, "step": 25238 }, { "epoch": 24.26826923076923, "grad_norm": 0.0009369868203066289, "learning_rate": 4.596508298285618e-08, "loss": 0.0, "step": 25239 }, { "epoch": 24.26923076923077, "grad_norm": 0.000321859959512949, "learning_rate": 4.5845861932851676e-08, "loss": 0.0, "step": 25240 }, { "epoch": 24.27019230769231, "grad_norm": 0.00108805438503623, "learning_rate": 4.572679534210478e-08, "loss": 0.0, "step": 25241 }, { "epoch": 24.271153846153847, "grad_norm": 0.0014305796939879656, "learning_rate": 4.56078832124629e-08, "loss": 0.0, "step": 25242 }, { "epoch": 24.272115384615386, "grad_norm": 0.0007819881429895759, "learning_rate": 4.548912554577012e-08, "loss": 0.0, "step": 25243 }, { "epoch": 24.273076923076925, "grad_norm": 0.0010077384067699313, "learning_rate": 4.537052234387052e-08, "loss": 0.0, "step": 25244 }, { "epoch": 24.27403846153846, "grad_norm": 0.0012807162711396813, "learning_rate": 4.525207360860373e-08, "loss": 0.0, "step": 25245 }, { "epoch": 24.275, "grad_norm": 0.0007756990962661803, "learning_rate": 4.513377934180829e-08, "loss": 0.0, "step": 25246 }, { "epoch": 24.275961538461537, "grad_norm": 0.0007163684349507093, "learning_rate": 4.501563954531829e-08, "loss": 0.0, "step": 25247 }, { "epoch": 24.276923076923076, "grad_norm": 0.0004306285409256816, "learning_rate": 4.4897654220970036e-08, "loss": 0.0, "step": 25248 }, { "epoch": 24.277884615384615, "grad_norm": 0.00014018962974660099, "learning_rate": 4.4779823370590945e-08, "loss": 0.0, "step": 25249 }, { "epoch": 24.278846153846153, "grad_norm": 0.002099673030897975, "learning_rate": 4.466214699601068e-08, "loss": 0.0, "step": 25250 }, { "epoch": 24.279807692307692, "grad_norm": 0.000656349235214293, "learning_rate": 4.454462509905555e-08, "loss": 0.0, "step": 25251 }, { "epoch": 24.28076923076923, "grad_norm": 0.0012577984016388655, "learning_rate": 4.442725768154854e-08, "loss": 0.0, "step": 25252 }, { "epoch": 24.28173076923077, "grad_norm": 0.0003787975001614541, "learning_rate": 4.431004474531264e-08, "loss": 0.0, "step": 25253 }, { "epoch": 24.282692307692308, "grad_norm": 0.0005541041027754545, "learning_rate": 4.4192986292164176e-08, "loss": 0.0, "step": 25254 }, { "epoch": 24.283653846153847, "grad_norm": 0.0008496571099385619, "learning_rate": 4.4076082323920576e-08, "loss": 0.0, "step": 25255 }, { "epoch": 24.284615384615385, "grad_norm": 0.0008811856969259679, "learning_rate": 4.3959332842395955e-08, "loss": 0.0, "step": 25256 }, { "epoch": 24.285576923076924, "grad_norm": 0.0014877000357955694, "learning_rate": 4.384273784940218e-08, "loss": 0.0, "step": 25257 }, { "epoch": 24.286538461538463, "grad_norm": 0.0006025375914759934, "learning_rate": 4.372629734674783e-08, "loss": 0.0, "step": 25258 }, { "epoch": 24.2875, "grad_norm": 0.0010447471868246794, "learning_rate": 4.361001133624032e-08, "loss": 0.0, "step": 25259 }, { "epoch": 24.28846153846154, "grad_norm": 0.0025811176747083664, "learning_rate": 4.349387981968378e-08, "loss": 0.0, "step": 25260 }, { "epoch": 24.289423076923075, "grad_norm": 0.001520349527709186, "learning_rate": 4.33779027988801e-08, "loss": 0.0, "step": 25261 }, { "epoch": 24.290384615384614, "grad_norm": 0.00024971275706775486, "learning_rate": 4.326208027562895e-08, "loss": 0.0, "step": 25262 }, { "epoch": 24.291346153846153, "grad_norm": 0.0014065172290429473, "learning_rate": 4.3146412251726665e-08, "loss": 0.0, "step": 25263 }, { "epoch": 24.29230769230769, "grad_norm": 0.0019320573192089796, "learning_rate": 4.3030898728970706e-08, "loss": 0.0, "step": 25264 }, { "epoch": 24.29326923076923, "grad_norm": 0.0011879787780344486, "learning_rate": 4.291553970915074e-08, "loss": 0.0, "step": 25265 }, { "epoch": 24.29423076923077, "grad_norm": 0.0010056827450171113, "learning_rate": 4.2800335194058686e-08, "loss": 0.0, "step": 25266 }, { "epoch": 24.295192307692307, "grad_norm": 0.00045893757487647235, "learning_rate": 4.2685285185480876e-08, "loss": 0.0, "step": 25267 }, { "epoch": 24.296153846153846, "grad_norm": 0.0009237296762876213, "learning_rate": 4.257038968520366e-08, "loss": 0.0, "step": 25268 }, { "epoch": 24.297115384615385, "grad_norm": 0.002347154775634408, "learning_rate": 4.2455648695008956e-08, "loss": 0.0, "step": 25269 }, { "epoch": 24.298076923076923, "grad_norm": 0.0006795157096348703, "learning_rate": 4.2341062216677554e-08, "loss": 0.0, "step": 25270 }, { "epoch": 24.299038461538462, "grad_norm": 0.0011134848464280367, "learning_rate": 4.222663025198692e-08, "loss": 0.0, "step": 25271 }, { "epoch": 24.3, "grad_norm": 0.00040170125430449843, "learning_rate": 4.211235280271453e-08, "loss": 0.0, "step": 25272 }, { "epoch": 24.30096153846154, "grad_norm": 0.0013369781663641334, "learning_rate": 4.199822987063118e-08, "loss": 0.0, "step": 25273 }, { "epoch": 24.301923076923078, "grad_norm": 0.0006156760500743985, "learning_rate": 4.188426145750879e-08, "loss": 0.0, "step": 25274 }, { "epoch": 24.302884615384617, "grad_norm": 0.0007819269667379558, "learning_rate": 4.177044756511706e-08, "loss": 0.0, "step": 25275 }, { "epoch": 24.303846153846155, "grad_norm": 0.0005257902084849775, "learning_rate": 4.165678819522012e-08, "loss": 0.0, "step": 25276 }, { "epoch": 24.30480769230769, "grad_norm": 0.0004626769805327058, "learning_rate": 4.154328334958324e-08, "loss": 0.0, "step": 25277 }, { "epoch": 24.30576923076923, "grad_norm": 0.0005127664189785719, "learning_rate": 4.1429933029966116e-08, "loss": 0.0, "step": 25278 }, { "epoch": 24.306730769230768, "grad_norm": 0.0010701980208978057, "learning_rate": 4.1316737238128455e-08, "loss": 0.0, "step": 25279 }, { "epoch": 24.307692307692307, "grad_norm": 0.0004811414983123541, "learning_rate": 4.120369597582663e-08, "loss": 0.0, "step": 25280 }, { "epoch": 24.308653846153845, "grad_norm": 0.0011868461733683944, "learning_rate": 4.109080924481479e-08, "loss": 0.0, "step": 25281 }, { "epoch": 24.309615384615384, "grad_norm": 0.0010104341199621558, "learning_rate": 4.0978077046844865e-08, "loss": 0.0, "step": 25282 }, { "epoch": 24.310576923076923, "grad_norm": 0.0006551945116370916, "learning_rate": 4.0865499383665465e-08, "loss": 0.0, "step": 25283 }, { "epoch": 24.31153846153846, "grad_norm": 0.0009156382875517011, "learning_rate": 4.0753076257024073e-08, "loss": 0.0, "step": 25284 }, { "epoch": 24.3125, "grad_norm": 0.0009569255053065717, "learning_rate": 4.064080766866485e-08, "loss": 0.0, "step": 25285 }, { "epoch": 24.31346153846154, "grad_norm": 0.0016347308410331607, "learning_rate": 4.052869362032974e-08, "loss": 0.0, "step": 25286 }, { "epoch": 24.314423076923077, "grad_norm": 0.0011001540115103126, "learning_rate": 4.041673411375846e-08, "loss": 0.0, "step": 25287 }, { "epoch": 24.315384615384616, "grad_norm": 0.0010219140676781535, "learning_rate": 4.030492915068851e-08, "loss": 0.0, "step": 25288 }, { "epoch": 24.316346153846155, "grad_norm": 0.0005150413489900529, "learning_rate": 4.0193278732855166e-08, "loss": 0.0, "step": 25289 }, { "epoch": 24.317307692307693, "grad_norm": 0.0007539433427155018, "learning_rate": 4.008178286199038e-08, "loss": 0.0, "step": 25290 }, { "epoch": 24.318269230769232, "grad_norm": 0.001567569561302662, "learning_rate": 3.9970441539823876e-08, "loss": 0.0, "step": 25291 }, { "epoch": 24.31923076923077, "grad_norm": 0.0005388990393839777, "learning_rate": 3.9859254768084277e-08, "loss": 0.0, "step": 25292 }, { "epoch": 24.32019230769231, "grad_norm": 0.0011185879120603204, "learning_rate": 3.974822254849575e-08, "loss": 0.0, "step": 25293 }, { "epoch": 24.321153846153845, "grad_norm": 0.0008898744126781821, "learning_rate": 3.963734488278248e-08, "loss": 0.0, "step": 25294 }, { "epoch": 24.322115384615383, "grad_norm": 0.001238455530256033, "learning_rate": 3.95266217726642e-08, "loss": 0.0, "step": 25295 }, { "epoch": 24.323076923076922, "grad_norm": 0.0004724975733552128, "learning_rate": 3.941605321985842e-08, "loss": 0.0, "step": 25296 }, { "epoch": 24.32403846153846, "grad_norm": 0.00038677072734571993, "learning_rate": 3.930563922608266e-08, "loss": 0.0, "step": 25297 }, { "epoch": 24.325, "grad_norm": 0.0009394937660545111, "learning_rate": 3.919537979304777e-08, "loss": 0.0, "step": 25298 }, { "epoch": 24.325961538461538, "grad_norm": 0.0005127909244038165, "learning_rate": 3.908527492246683e-08, "loss": 0.0, "step": 25299 }, { "epoch": 24.326923076923077, "grad_norm": 0.0012976048747077584, "learning_rate": 3.897532461604736e-08, "loss": 0.0, "step": 25300 }, { "epoch": 24.327884615384615, "grad_norm": 0.0007919911877252162, "learning_rate": 3.886552887549577e-08, "loss": 0.0, "step": 25301 }, { "epoch": 24.328846153846154, "grad_norm": 0.0008498458191752434, "learning_rate": 3.875588770251515e-08, "loss": 0.0, "step": 25302 }, { "epoch": 24.329807692307693, "grad_norm": 0.001826144871301949, "learning_rate": 3.8646401098807465e-08, "loss": 0.0, "step": 25303 }, { "epoch": 24.33076923076923, "grad_norm": 0.0009660786017775536, "learning_rate": 3.853706906607135e-08, "loss": 0.0, "step": 25304 }, { "epoch": 24.33173076923077, "grad_norm": 0.0008615932310931385, "learning_rate": 3.8427891606003244e-08, "loss": 0.0, "step": 25305 }, { "epoch": 24.33269230769231, "grad_norm": 0.00035441783256828785, "learning_rate": 3.831886872029844e-08, "loss": 0.0, "step": 25306 }, { "epoch": 24.333653846153847, "grad_norm": 0.0008206996135413647, "learning_rate": 3.82100004106456e-08, "loss": 0.0, "step": 25307 }, { "epoch": 24.334615384615386, "grad_norm": 0.0019831040408462286, "learning_rate": 3.81012866787378e-08, "loss": 0.0, "step": 25308 }, { "epoch": 24.335576923076925, "grad_norm": 0.0012706302804872394, "learning_rate": 3.7992727526259264e-08, "loss": 0.0, "step": 25309 }, { "epoch": 24.33653846153846, "grad_norm": 0.00042788396240212023, "learning_rate": 3.788432295489641e-08, "loss": 0.0, "step": 25310 }, { "epoch": 24.3375, "grad_norm": 0.0006394302472472191, "learning_rate": 3.777607296633012e-08, "loss": 0.0, "step": 25311 }, { "epoch": 24.338461538461537, "grad_norm": 0.0009507711511105299, "learning_rate": 3.7667977562239056e-08, "loss": 0.0, "step": 25312 }, { "epoch": 24.339423076923076, "grad_norm": 0.0004895640886388719, "learning_rate": 3.7560036744302977e-08, "loss": 0.0, "step": 25313 }, { "epoch": 24.340384615384615, "grad_norm": 0.0007218208629637957, "learning_rate": 3.74522505141961e-08, "loss": 0.0, "step": 25314 }, { "epoch": 24.341346153846153, "grad_norm": 0.0012321569956839085, "learning_rate": 3.734461887359042e-08, "loss": 0.0, "step": 25315 }, { "epoch": 24.342307692307692, "grad_norm": 0.0012864582240581512, "learning_rate": 3.723714182415572e-08, "loss": 0.0, "step": 25316 }, { "epoch": 24.34326923076923, "grad_norm": 0.00039885391015559435, "learning_rate": 3.712981936756066e-08, "loss": 0.0, "step": 25317 }, { "epoch": 24.34423076923077, "grad_norm": 0.0009860455757007003, "learning_rate": 3.7022651505469464e-08, "loss": 0.0, "step": 25318 }, { "epoch": 24.345192307692308, "grad_norm": 0.0007016914314590394, "learning_rate": 3.691563823954636e-08, "loss": 0.0, "step": 25319 }, { "epoch": 24.346153846153847, "grad_norm": 0.002501068403944373, "learning_rate": 3.680877957145112e-08, "loss": 0.0, "step": 25320 }, { "epoch": 24.347115384615385, "grad_norm": 0.001304049277678132, "learning_rate": 3.670207550284133e-08, "loss": 0.0, "step": 25321 }, { "epoch": 24.348076923076924, "grad_norm": 0.000910359900444746, "learning_rate": 3.659552603537453e-08, "loss": 0.0, "step": 25322 }, { "epoch": 24.349038461538463, "grad_norm": 0.0007663524593226612, "learning_rate": 3.6489131170702744e-08, "loss": 0.0, "step": 25323 }, { "epoch": 24.35, "grad_norm": 0.0013309628702700138, "learning_rate": 3.6382890910475755e-08, "loss": 0.0, "step": 25324 }, { "epoch": 24.35096153846154, "grad_norm": 0.0005995515384711325, "learning_rate": 3.627680525634558e-08, "loss": 0.0, "step": 25325 }, { "epoch": 24.351923076923075, "grad_norm": 0.0018459019483998418, "learning_rate": 3.617087420995424e-08, "loss": 0.0, "step": 25326 }, { "epoch": 24.352884615384614, "grad_norm": 0.00043490028474479914, "learning_rate": 3.6065097772948197e-08, "loss": 0.0, "step": 25327 }, { "epoch": 24.353846153846153, "grad_norm": 0.0003481998573988676, "learning_rate": 3.5959475946968357e-08, "loss": 0.0, "step": 25328 }, { "epoch": 24.35480769230769, "grad_norm": 0.0007692968938499689, "learning_rate": 3.585400873365341e-08, "loss": 0.0, "step": 25329 }, { "epoch": 24.35576923076923, "grad_norm": 0.0005878778174519539, "learning_rate": 3.5748696134639825e-08, "loss": 0.0, "step": 25330 }, { "epoch": 24.35673076923077, "grad_norm": 0.0007175913779065013, "learning_rate": 3.564353815156074e-08, "loss": 0.0, "step": 25331 }, { "epoch": 24.357692307692307, "grad_norm": 0.0013565628323704004, "learning_rate": 3.5538534786049297e-08, "loss": 0.0, "step": 25332 }, { "epoch": 24.358653846153846, "grad_norm": 0.0006519793532788754, "learning_rate": 3.543368603973529e-08, "loss": 0.0, "step": 25333 }, { "epoch": 24.359615384615385, "grad_norm": 0.000614727265201509, "learning_rate": 3.5328991914244104e-08, "loss": 0.0, "step": 25334 }, { "epoch": 24.360576923076923, "grad_norm": 0.0007708574994467199, "learning_rate": 3.5224452411201094e-08, "loss": 0.0, "step": 25335 }, { "epoch": 24.361538461538462, "grad_norm": 0.0007493027369491756, "learning_rate": 3.51200675322283e-08, "loss": 0.0, "step": 25336 }, { "epoch": 24.3625, "grad_norm": 0.0033793984912335873, "learning_rate": 3.501583727894553e-08, "loss": 0.0, "step": 25337 }, { "epoch": 24.36346153846154, "grad_norm": 0.0004738588468171656, "learning_rate": 3.491176165297039e-08, "loss": 0.0, "step": 25338 }, { "epoch": 24.364423076923078, "grad_norm": 0.0011377198388800025, "learning_rate": 3.4807840655917133e-08, "loss": 0.0, "step": 25339 }, { "epoch": 24.365384615384617, "grad_norm": 0.0009572862181812525, "learning_rate": 3.4704074289398926e-08, "loss": 0.0, "step": 25340 }, { "epoch": 24.366346153846155, "grad_norm": 0.001240848796442151, "learning_rate": 3.4600462555025585e-08, "loss": 0.0, "step": 25341 }, { "epoch": 24.36730769230769, "grad_norm": 0.0008175995317287743, "learning_rate": 3.449700545440471e-08, "loss": 0.0, "step": 25342 }, { "epoch": 24.36826923076923, "grad_norm": 0.0008572958176955581, "learning_rate": 3.4393702989142796e-08, "loss": 0.0, "step": 25343 }, { "epoch": 24.369230769230768, "grad_norm": 0.0007218713872134686, "learning_rate": 3.42905551608419e-08, "loss": 0.0, "step": 25344 }, { "epoch": 24.370192307692307, "grad_norm": 0.0002278570318594575, "learning_rate": 3.4187561971101844e-08, "loss": 0.0, "step": 25345 }, { "epoch": 24.371153846153845, "grad_norm": 0.001548395841382444, "learning_rate": 3.408472342152136e-08, "loss": 0.0, "step": 25346 }, { "epoch": 24.372115384615384, "grad_norm": 0.0007431862759403884, "learning_rate": 3.3982039513696943e-08, "loss": 0.0, "step": 25347 }, { "epoch": 24.373076923076923, "grad_norm": 0.001289655570872128, "learning_rate": 3.387951024922065e-08, "loss": 0.0, "step": 25348 }, { "epoch": 24.37403846153846, "grad_norm": 0.0004644707078114152, "learning_rate": 3.377713562968454e-08, "loss": 0.0, "step": 25349 }, { "epoch": 24.375, "grad_norm": 0.0004191526095382869, "learning_rate": 3.367491565667735e-08, "loss": 0.0, "step": 25350 }, { "epoch": 24.37596153846154, "grad_norm": 0.0006727027357555926, "learning_rate": 3.357285033178337e-08, "loss": 0.0, "step": 25351 }, { "epoch": 24.376923076923077, "grad_norm": 0.001133024343289435, "learning_rate": 3.3470939656589096e-08, "loss": 0.0, "step": 25352 }, { "epoch": 24.377884615384616, "grad_norm": 0.0006491997628472745, "learning_rate": 3.3369183632673275e-08, "loss": 0.0, "step": 25353 }, { "epoch": 24.378846153846155, "grad_norm": 0.0017535975202918053, "learning_rate": 3.326758226161575e-08, "loss": 0.0, "step": 25354 }, { "epoch": 24.379807692307693, "grad_norm": 0.0008324553491547704, "learning_rate": 3.316613554499526e-08, "loss": 0.0, "step": 25355 }, { "epoch": 24.380769230769232, "grad_norm": 0.0009109401144087315, "learning_rate": 3.306484348438277e-08, "loss": 0.0, "step": 25356 }, { "epoch": 24.38173076923077, "grad_norm": 0.00048484618309885263, "learning_rate": 3.2963706081350357e-08, "loss": 0.0, "step": 25357 }, { "epoch": 24.38269230769231, "grad_norm": 0.001172614749521017, "learning_rate": 3.286272333747009e-08, "loss": 0.0, "step": 25358 }, { "epoch": 24.383653846153845, "grad_norm": 0.00047488819109275937, "learning_rate": 3.2761895254306285e-08, "loss": 0.0, "step": 25359 }, { "epoch": 24.384615384615383, "grad_norm": 0.00037208019057288766, "learning_rate": 3.266122183342435e-08, "loss": 0.0, "step": 25360 }, { "epoch": 24.385576923076922, "grad_norm": 0.0012407660251483321, "learning_rate": 3.256070307638637e-08, "loss": 0.0, "step": 25361 }, { "epoch": 24.38653846153846, "grad_norm": 0.0019291974604129791, "learning_rate": 3.24603389847522e-08, "loss": 0.0, "step": 25362 }, { "epoch": 24.3875, "grad_norm": 0.0008466910803690553, "learning_rate": 3.23601295600795e-08, "loss": 0.0, "step": 25363 }, { "epoch": 24.388461538461538, "grad_norm": 0.0012160325422883034, "learning_rate": 3.226007480392368e-08, "loss": 0.0, "step": 25364 }, { "epoch": 24.389423076923077, "grad_norm": 0.00030953114037401974, "learning_rate": 3.2160174717834615e-08, "loss": 0.0, "step": 25365 }, { "epoch": 24.390384615384615, "grad_norm": 0.0010206170845776796, "learning_rate": 3.2060429303366615e-08, "loss": 0.0, "step": 25366 }, { "epoch": 24.391346153846154, "grad_norm": 0.001550857676193118, "learning_rate": 3.196083856206289e-08, "loss": 0.0, "step": 25367 }, { "epoch": 24.392307692307693, "grad_norm": 0.0013647483428940177, "learning_rate": 3.1861402495472206e-08, "loss": 0.0, "step": 25368 }, { "epoch": 24.39326923076923, "grad_norm": 0.0014427603455260396, "learning_rate": 3.1762121105136653e-08, "loss": 0.0, "step": 25369 }, { "epoch": 24.39423076923077, "grad_norm": 0.0014688108349218965, "learning_rate": 3.166299439259723e-08, "loss": 0.0, "step": 25370 }, { "epoch": 24.39519230769231, "grad_norm": 0.0006640184437856078, "learning_rate": 3.156402235938938e-08, "loss": 0.0, "step": 25371 }, { "epoch": 24.396153846153847, "grad_norm": 0.0007181665278039873, "learning_rate": 3.1465205007052965e-08, "loss": 0.0, "step": 25372 }, { "epoch": 24.397115384615386, "grad_norm": 0.0009753854246810079, "learning_rate": 3.136654233711789e-08, "loss": 0.0, "step": 25373 }, { "epoch": 24.398076923076925, "grad_norm": 0.002495105378329754, "learning_rate": 3.126803435111736e-08, "loss": 0.0, "step": 25374 }, { "epoch": 24.39903846153846, "grad_norm": 0.0006441203877329826, "learning_rate": 3.1169681050579045e-08, "loss": 0.0, "step": 25375 }, { "epoch": 24.4, "grad_norm": 0.0008565362077206373, "learning_rate": 3.10714824370284e-08, "loss": 0.0, "step": 25376 }, { "epoch": 24.400961538461537, "grad_norm": 0.001152749522589147, "learning_rate": 3.097343851198975e-08, "loss": 0.0, "step": 25377 }, { "epoch": 24.401923076923076, "grad_norm": 0.0008598045096732676, "learning_rate": 3.0875549276985216e-08, "loss": 0.0, "step": 25378 }, { "epoch": 24.402884615384615, "grad_norm": 0.0008231409010477364, "learning_rate": 3.077781473353358e-08, "loss": 0.0, "step": 25379 }, { "epoch": 24.403846153846153, "grad_norm": 0.0005750774289481342, "learning_rate": 3.068023488315031e-08, "loss": 0.0, "step": 25380 }, { "epoch": 24.404807692307692, "grad_norm": 0.0006514211418107152, "learning_rate": 3.0582809727349725e-08, "loss": 0.0, "step": 25381 }, { "epoch": 24.40576923076923, "grad_norm": 0.0009133580606430769, "learning_rate": 3.048553926764508e-08, "loss": 0.0, "step": 25382 }, { "epoch": 24.40673076923077, "grad_norm": 0.0006898543215356767, "learning_rate": 3.038842350554405e-08, "loss": 0.0, "step": 25383 }, { "epoch": 24.407692307692308, "grad_norm": 0.0007738003041595221, "learning_rate": 3.029146244255432e-08, "loss": 0.0, "step": 25384 }, { "epoch": 24.408653846153847, "grad_norm": 0.001312734792008996, "learning_rate": 3.019465608018024e-08, "loss": 0.0, "step": 25385 }, { "epoch": 24.409615384615385, "grad_norm": 0.0009565275977365673, "learning_rate": 3.0098004419923946e-08, "loss": 0.0, "step": 25386 }, { "epoch": 24.410576923076924, "grad_norm": 0.0008501563570462167, "learning_rate": 3.000150746328534e-08, "loss": 0.0, "step": 25387 }, { "epoch": 24.411538461538463, "grad_norm": 0.0011775660095736384, "learning_rate": 2.990516521176212e-08, "loss": 0.0, "step": 25388 }, { "epoch": 24.4125, "grad_norm": 0.0003275549679528922, "learning_rate": 2.9808977666848646e-08, "loss": 0.0, "step": 25389 }, { "epoch": 24.41346153846154, "grad_norm": 0.001004941645078361, "learning_rate": 2.9712944830038172e-08, "loss": 0.0, "step": 25390 }, { "epoch": 24.414423076923075, "grad_norm": 0.0010361175518482924, "learning_rate": 2.9617066702819498e-08, "loss": 0.0, "step": 25391 }, { "epoch": 24.415384615384614, "grad_norm": 0.0004958432982675731, "learning_rate": 2.9521343286681438e-08, "loss": 0.0, "step": 25392 }, { "epoch": 24.416346153846153, "grad_norm": 0.0009188648546114564, "learning_rate": 2.9425774583109467e-08, "loss": 0.0, "step": 25393 }, { "epoch": 24.41730769230769, "grad_norm": 0.00042634233250282705, "learning_rate": 2.933036059358685e-08, "loss": 0.0, "step": 25394 }, { "epoch": 24.41826923076923, "grad_norm": 0.000593376811593771, "learning_rate": 2.9235101319592396e-08, "loss": 0.0, "step": 25395 }, { "epoch": 24.41923076923077, "grad_norm": 0.0007468101102858782, "learning_rate": 2.9139996762606037e-08, "loss": 0.0, "step": 25396 }, { "epoch": 24.420192307692307, "grad_norm": 0.0009867618791759014, "learning_rate": 2.904504692410215e-08, "loss": 0.0, "step": 25397 }, { "epoch": 24.421153846153846, "grad_norm": 0.00033276990870945156, "learning_rate": 2.8950251805553997e-08, "loss": 0.0, "step": 25398 }, { "epoch": 24.422115384615385, "grad_norm": 0.0008484157733619213, "learning_rate": 2.8855611408434846e-08, "loss": 0.0, "step": 25399 }, { "epoch": 24.423076923076923, "grad_norm": 0.001497788354754448, "learning_rate": 2.8761125734211304e-08, "loss": 0.0, "step": 25400 }, { "epoch": 24.424038461538462, "grad_norm": 0.0008799142087809741, "learning_rate": 2.866679478434886e-08, "loss": 0.0, "step": 25401 }, { "epoch": 24.425, "grad_norm": 0.00044249079655855894, "learning_rate": 2.8572618560313014e-08, "loss": 0.0, "step": 25402 }, { "epoch": 24.42596153846154, "grad_norm": 0.2775566875934601, "learning_rate": 2.8478597063564817e-08, "loss": 0.0017, "step": 25403 }, { "epoch": 24.426923076923078, "grad_norm": 0.0008612599340267479, "learning_rate": 2.838473029556199e-08, "loss": 0.0, "step": 25404 }, { "epoch": 24.427884615384617, "grad_norm": 0.001106104115024209, "learning_rate": 2.8291018257762258e-08, "loss": 0.0, "step": 25405 }, { "epoch": 24.428846153846155, "grad_norm": 0.0006500178133137524, "learning_rate": 2.8197460951618903e-08, "loss": 0.0, "step": 25406 }, { "epoch": 24.42980769230769, "grad_norm": 0.0006158186588436365, "learning_rate": 2.81040583785841e-08, "loss": 0.0, "step": 25407 }, { "epoch": 24.43076923076923, "grad_norm": 0.0009907431667670608, "learning_rate": 2.801081054010779e-08, "loss": 0.0, "step": 25408 }, { "epoch": 24.431730769230768, "grad_norm": 0.0005674066487699747, "learning_rate": 2.7917717437635495e-08, "loss": 0.0, "step": 25409 }, { "epoch": 24.432692307692307, "grad_norm": 0.0035725960042327642, "learning_rate": 2.782477907261272e-08, "loss": 0.0, "step": 25410 }, { "epoch": 24.433653846153845, "grad_norm": 0.0010057729668915272, "learning_rate": 2.773199544648164e-08, "loss": 0.0, "step": 25411 }, { "epoch": 24.434615384615384, "grad_norm": 0.0005369811551645398, "learning_rate": 2.7639366560681112e-08, "loss": 0.0, "step": 25412 }, { "epoch": 24.435576923076923, "grad_norm": 0.000561563647352159, "learning_rate": 2.7546892416649985e-08, "loss": 0.0, "step": 25413 }, { "epoch": 24.43653846153846, "grad_norm": 0.0011080640833824873, "learning_rate": 2.745457301582155e-08, "loss": 0.0, "step": 25414 }, { "epoch": 24.4375, "grad_norm": 0.0016338101122528315, "learning_rate": 2.736240835962911e-08, "loss": 0.0, "step": 25415 }, { "epoch": 24.43846153846154, "grad_norm": 0.0008257930167019367, "learning_rate": 2.7270398449502634e-08, "loss": 0.0, "step": 25416 }, { "epoch": 24.439423076923077, "grad_norm": 0.0004952652379870415, "learning_rate": 2.7178543286870972e-08, "loss": 0.0, "step": 25417 }, { "epoch": 24.440384615384616, "grad_norm": 0.0005847163847647607, "learning_rate": 2.7086842873157437e-08, "loss": 0.0, "step": 25418 }, { "epoch": 24.441346153846155, "grad_norm": 0.0006344991852529347, "learning_rate": 2.6995297209786443e-08, "loss": 0.0, "step": 25419 }, { "epoch": 24.442307692307693, "grad_norm": 0.0005093113868497312, "learning_rate": 2.690390629817796e-08, "loss": 0.0, "step": 25420 }, { "epoch": 24.443269230769232, "grad_norm": 0.002497698413208127, "learning_rate": 2.6812670139749753e-08, "loss": 0.0, "step": 25421 }, { "epoch": 24.44423076923077, "grad_norm": 0.0016098406631499529, "learning_rate": 2.6721588735918458e-08, "loss": 0.0, "step": 25422 }, { "epoch": 24.44519230769231, "grad_norm": 0.0007176419603638351, "learning_rate": 2.663066208809628e-08, "loss": 0.0, "step": 25423 }, { "epoch": 24.446153846153845, "grad_norm": 0.0014268881641328335, "learning_rate": 2.6539890197695428e-08, "loss": 0.0, "step": 25424 }, { "epoch": 24.447115384615383, "grad_norm": 0.0014127438189461827, "learning_rate": 2.644927306612366e-08, "loss": 0.0, "step": 25425 }, { "epoch": 24.448076923076922, "grad_norm": 0.000846280250698328, "learning_rate": 2.6358810694786518e-08, "loss": 0.0, "step": 25426 }, { "epoch": 24.44903846153846, "grad_norm": 0.0008354291203431785, "learning_rate": 2.6268503085089547e-08, "loss": 0.0, "step": 25427 }, { "epoch": 24.45, "grad_norm": 0.00034638363285921514, "learning_rate": 2.6178350238431626e-08, "loss": 0.0, "step": 25428 }, { "epoch": 24.450961538461538, "grad_norm": 0.000370700319763273, "learning_rate": 2.608835215621275e-08, "loss": 0.0, "step": 25429 }, { "epoch": 24.451923076923077, "grad_norm": 0.0005081333802081645, "learning_rate": 2.5998508839830684e-08, "loss": 0.0, "step": 25430 }, { "epoch": 24.452884615384615, "grad_norm": 0.0007207861635833979, "learning_rate": 2.590882029067765e-08, "loss": 0.0, "step": 25431 }, { "epoch": 24.453846153846154, "grad_norm": 0.0019340789876878262, "learning_rate": 2.581928651014698e-08, "loss": 0.0, "step": 25432 }, { "epoch": 24.454807692307693, "grad_norm": 0.0007118671201169491, "learning_rate": 2.572990749962645e-08, "loss": 0.0, "step": 25433 }, { "epoch": 24.45576923076923, "grad_norm": 0.001452557509765029, "learning_rate": 2.564068326050273e-08, "loss": 0.0, "step": 25434 }, { "epoch": 24.45673076923077, "grad_norm": 0.000465210439870134, "learning_rate": 2.555161379416249e-08, "loss": 0.0, "step": 25435 }, { "epoch": 24.45769230769231, "grad_norm": 0.0011717063607648015, "learning_rate": 2.5462699101986844e-08, "loss": 0.0, "step": 25436 }, { "epoch": 24.458653846153847, "grad_norm": 0.0011428209254518151, "learning_rate": 2.537393918535358e-08, "loss": 0.0, "step": 25437 }, { "epoch": 24.459615384615386, "grad_norm": 0.0010625259019434452, "learning_rate": 2.5285334045643818e-08, "loss": 0.0, "step": 25438 }, { "epoch": 24.460576923076925, "grad_norm": 0.0007304985192604363, "learning_rate": 2.5196883684228678e-08, "loss": 0.0, "step": 25439 }, { "epoch": 24.46153846153846, "grad_norm": 0.0012345616705715656, "learning_rate": 2.5108588102482623e-08, "loss": 0.0, "step": 25440 }, { "epoch": 24.4625, "grad_norm": 0.001007005455903709, "learning_rate": 2.5020447301774554e-08, "loss": 0.0, "step": 25441 }, { "epoch": 24.463461538461537, "grad_norm": 0.0012593015562742949, "learning_rate": 2.4932461283473376e-08, "loss": 0.0, "step": 25442 }, { "epoch": 24.464423076923076, "grad_norm": 0.0016488408436998725, "learning_rate": 2.4844630048944664e-08, "loss": 0.0, "step": 25443 }, { "epoch": 24.465384615384615, "grad_norm": 0.001141935819759965, "learning_rate": 2.475695359955066e-08, "loss": 0.0, "step": 25444 }, { "epoch": 24.466346153846153, "grad_norm": 0.000930309877730906, "learning_rate": 2.466943193665028e-08, "loss": 0.0, "step": 25445 }, { "epoch": 24.467307692307692, "grad_norm": 0.0008606049814261496, "learning_rate": 2.4582065061604656e-08, "loss": 0.0, "step": 25446 }, { "epoch": 24.46826923076923, "grad_norm": 0.0004987599677406251, "learning_rate": 2.4494852975767146e-08, "loss": 0.0, "step": 25447 }, { "epoch": 24.46923076923077, "grad_norm": 0.001011617248877883, "learning_rate": 2.440779568049112e-08, "loss": 0.0, "step": 25448 }, { "epoch": 24.470192307692308, "grad_norm": 0.0010169432498514652, "learning_rate": 2.4320893177128822e-08, "loss": 0.0, "step": 25449 }, { "epoch": 24.471153846153847, "grad_norm": 0.0007761292508803308, "learning_rate": 2.423414546702807e-08, "loss": 0.0, "step": 25450 }, { "epoch": 24.472115384615385, "grad_norm": 0.0005139135173521936, "learning_rate": 2.4147552551534448e-08, "loss": 0.0, "step": 25451 }, { "epoch": 24.473076923076924, "grad_norm": 0.0010410206159576774, "learning_rate": 2.4061114431992437e-08, "loss": 0.0, "step": 25452 }, { "epoch": 24.474038461538463, "grad_norm": 0.00033773729228414595, "learning_rate": 2.397483110974319e-08, "loss": 0.0, "step": 25453 }, { "epoch": 24.475, "grad_norm": 0.0017064338317140937, "learning_rate": 2.3888702586124523e-08, "loss": 0.0, "step": 25454 }, { "epoch": 24.47596153846154, "grad_norm": 0.0007274829549714923, "learning_rate": 2.3802728862474256e-08, "loss": 0.0, "step": 25455 }, { "epoch": 24.476923076923075, "grad_norm": 0.0014818834606558084, "learning_rate": 2.3716909940125766e-08, "loss": 0.0, "step": 25456 }, { "epoch": 24.477884615384614, "grad_norm": 0.0004701901925727725, "learning_rate": 2.363124582041021e-08, "loss": 0.0, "step": 25457 }, { "epoch": 24.478846153846153, "grad_norm": 0.0008609393262304366, "learning_rate": 2.3545736504657634e-08, "loss": 0.0, "step": 25458 }, { "epoch": 24.47980769230769, "grad_norm": 0.0015303435502573848, "learning_rate": 2.346038199419476e-08, "loss": 0.0, "step": 25459 }, { "epoch": 24.48076923076923, "grad_norm": 0.00029411393916234374, "learning_rate": 2.3375182290346077e-08, "loss": 0.0, "step": 25460 }, { "epoch": 24.48173076923077, "grad_norm": 0.000912351009901613, "learning_rate": 2.3290137394433865e-08, "loss": 0.0, "step": 25461 }, { "epoch": 24.482692307692307, "grad_norm": 0.0034174583852291107, "learning_rate": 2.320524730777707e-08, "loss": 0.0, "step": 25462 }, { "epoch": 24.483653846153846, "grad_norm": 0.0011033170158043504, "learning_rate": 2.312051203169352e-08, "loss": 0.0, "step": 25463 }, { "epoch": 24.484615384615385, "grad_norm": 0.0006318888044916093, "learning_rate": 2.3035931567497728e-08, "loss": 0.0, "step": 25464 }, { "epoch": 24.485576923076923, "grad_norm": 0.0009506659116595984, "learning_rate": 2.295150591650197e-08, "loss": 0.0, "step": 25465 }, { "epoch": 24.486538461538462, "grad_norm": 0.0006985394866205752, "learning_rate": 2.2867235080016315e-08, "loss": 0.0, "step": 25466 }, { "epoch": 24.4875, "grad_norm": 0.0010759619763121009, "learning_rate": 2.278311905934971e-08, "loss": 0.0, "step": 25467 }, { "epoch": 24.48846153846154, "grad_norm": 0.0011601453879848123, "learning_rate": 2.2699157855805566e-08, "loss": 0.0, "step": 25468 }, { "epoch": 24.489423076923078, "grad_norm": 0.0005934149958193302, "learning_rate": 2.2615351470687276e-08, "loss": 0.0, "step": 25469 }, { "epoch": 24.490384615384617, "grad_norm": 0.0007165910792537034, "learning_rate": 2.2531699905296022e-08, "loss": 0.0, "step": 25470 }, { "epoch": 24.491346153846155, "grad_norm": 0.0034615376498550177, "learning_rate": 2.244820316092855e-08, "loss": 0.0, "step": 25471 }, { "epoch": 24.49230769230769, "grad_norm": 0.0012259468203410506, "learning_rate": 2.2364861238881596e-08, "loss": 0.0, "step": 25472 }, { "epoch": 24.49326923076923, "grad_norm": 0.0005075720837339759, "learning_rate": 2.2281674140448574e-08, "loss": 0.0, "step": 25473 }, { "epoch": 24.494230769230768, "grad_norm": 0.001119246706366539, "learning_rate": 2.2198641866919558e-08, "loss": 0.0, "step": 25474 }, { "epoch": 24.495192307692307, "grad_norm": 0.0004734208923764527, "learning_rate": 2.2115764419582408e-08, "loss": 0.0, "step": 25475 }, { "epoch": 24.496153846153845, "grad_norm": 0.0010408966336399317, "learning_rate": 2.2033041799723877e-08, "loss": 0.0, "step": 25476 }, { "epoch": 24.497115384615384, "grad_norm": 0.0033112105447798967, "learning_rate": 2.1950474008628487e-08, "loss": 0.0, "step": 25477 }, { "epoch": 24.498076923076923, "grad_norm": 0.000794184161350131, "learning_rate": 2.1868061047575217e-08, "loss": 0.0, "step": 25478 }, { "epoch": 24.49903846153846, "grad_norm": 0.0015878317644819617, "learning_rate": 2.1785802917845266e-08, "loss": 0.0, "step": 25479 }, { "epoch": 24.5, "grad_norm": 0.0009177214815281332, "learning_rate": 2.170369962071317e-08, "loss": 0.0, "step": 25480 }, { "epoch": 24.50096153846154, "grad_norm": 0.0009492821991443634, "learning_rate": 2.1621751157454574e-08, "loss": 0.0, "step": 25481 }, { "epoch": 24.501923076923077, "grad_norm": 0.00036590464878827333, "learning_rate": 2.153995752933957e-08, "loss": 0.0, "step": 25482 }, { "epoch": 24.502884615384616, "grad_norm": 0.0011147035984322429, "learning_rate": 2.1458318737638263e-08, "loss": 0.0, "step": 25483 }, { "epoch": 24.503846153846155, "grad_norm": 0.0011770139681175351, "learning_rate": 2.1376834783616295e-08, "loss": 0.0, "step": 25484 }, { "epoch": 24.504807692307693, "grad_norm": 0.0005620643496513367, "learning_rate": 2.129550566854044e-08, "loss": 0.0, "step": 25485 }, { "epoch": 24.505769230769232, "grad_norm": 0.0007998557412065566, "learning_rate": 2.1214331393669686e-08, "loss": 0.0, "step": 25486 }, { "epoch": 24.50673076923077, "grad_norm": 0.0011036513606086373, "learning_rate": 2.1133311960266358e-08, "loss": 0.0, "step": 25487 }, { "epoch": 24.50769230769231, "grad_norm": 0.001019862713292241, "learning_rate": 2.1052447369586117e-08, "loss": 0.0, "step": 25488 }, { "epoch": 24.508653846153845, "grad_norm": 0.00030817085644230247, "learning_rate": 2.0971737622883515e-08, "loss": 0.0, "step": 25489 }, { "epoch": 24.509615384615383, "grad_norm": 0.0009098626906052232, "learning_rate": 2.0891182721411996e-08, "loss": 0.0, "step": 25490 }, { "epoch": 24.510576923076922, "grad_norm": 0.0008408831781707704, "learning_rate": 2.081078266642056e-08, "loss": 0.0, "step": 25491 }, { "epoch": 24.51153846153846, "grad_norm": 0.0009458540007472038, "learning_rate": 2.0730537459157096e-08, "loss": 0.0, "step": 25492 }, { "epoch": 24.5125, "grad_norm": 0.0006911200471222401, "learning_rate": 2.0650447100867277e-08, "loss": 0.0, "step": 25493 }, { "epoch": 24.513461538461538, "grad_norm": 0.001192893018014729, "learning_rate": 2.057051159279344e-08, "loss": 0.0, "step": 25494 }, { "epoch": 24.514423076923077, "grad_norm": 0.0019065248779952526, "learning_rate": 2.049073093617571e-08, "loss": 0.0, "step": 25495 }, { "epoch": 24.515384615384615, "grad_norm": 0.0018395690713077784, "learning_rate": 2.0411105132253084e-08, "loss": 0.0, "step": 25496 }, { "epoch": 24.516346153846154, "grad_norm": 0.0011459155939519405, "learning_rate": 2.033163418226014e-08, "loss": 0.0, "step": 25497 }, { "epoch": 24.517307692307693, "grad_norm": 0.001098669832572341, "learning_rate": 2.0252318087429223e-08, "loss": 0.0, "step": 25498 }, { "epoch": 24.51826923076923, "grad_norm": 0.000664596795104444, "learning_rate": 2.017315684899379e-08, "loss": 0.0, "step": 25499 }, { "epoch": 24.51923076923077, "grad_norm": 0.000614721851889044, "learning_rate": 2.009415046817953e-08, "loss": 0.0, "step": 25500 }, { "epoch": 24.52019230769231, "grad_norm": 0.001702758832834661, "learning_rate": 2.0015298946213236e-08, "loss": 0.0, "step": 25501 }, { "epoch": 24.521153846153847, "grad_norm": 0.00017609240603633225, "learning_rate": 1.9936602284318375e-08, "loss": 0.0, "step": 25502 }, { "epoch": 24.522115384615386, "grad_norm": 0.000310873962007463, "learning_rate": 1.98580604837173e-08, "loss": 0.0, "step": 25503 }, { "epoch": 24.523076923076925, "grad_norm": 0.0007057772018015385, "learning_rate": 1.9779673545626822e-08, "loss": 0.0, "step": 25504 }, { "epoch": 24.52403846153846, "grad_norm": 0.0007623846177011728, "learning_rate": 1.970144147126485e-08, "loss": 0.0, "step": 25505 }, { "epoch": 24.525, "grad_norm": 0.000430117710493505, "learning_rate": 1.9623364261843748e-08, "loss": 0.0, "step": 25506 }, { "epoch": 24.525961538461537, "grad_norm": 0.0007224526489153504, "learning_rate": 1.9545441918576992e-08, "loss": 0.0, "step": 25507 }, { "epoch": 24.526923076923076, "grad_norm": 0.0006352785276249051, "learning_rate": 1.9467674442672502e-08, "loss": 0.0, "step": 25508 }, { "epoch": 24.527884615384615, "grad_norm": 0.002379322424530983, "learning_rate": 1.9390061835337093e-08, "loss": 0.0, "step": 25509 }, { "epoch": 24.528846153846153, "grad_norm": 0.0008441328536719084, "learning_rate": 1.9312604097775357e-08, "loss": 0.0, "step": 25510 }, { "epoch": 24.529807692307692, "grad_norm": 0.0009017887059599161, "learning_rate": 1.9235301231188554e-08, "loss": 0.0, "step": 25511 }, { "epoch": 24.53076923076923, "grad_norm": 0.0027017362881451845, "learning_rate": 1.915815323677683e-08, "loss": 0.0, "step": 25512 }, { "epoch": 24.53173076923077, "grad_norm": 0.0012077029095962644, "learning_rate": 1.9081160115738127e-08, "loss": 0.0, "step": 25513 }, { "epoch": 24.532692307692308, "grad_norm": 0.0007615408394485712, "learning_rate": 1.9004321869264818e-08, "loss": 0.0, "step": 25514 }, { "epoch": 24.533653846153847, "grad_norm": 0.0006210038554854691, "learning_rate": 1.8927638498551502e-08, "loss": 0.0, "step": 25515 }, { "epoch": 24.534615384615385, "grad_norm": 0.002034724922850728, "learning_rate": 1.8851110004786123e-08, "loss": 0.0, "step": 25516 }, { "epoch": 24.535576923076924, "grad_norm": 0.002089484129101038, "learning_rate": 1.8774736389157723e-08, "loss": 0.0, "step": 25517 }, { "epoch": 24.536538461538463, "grad_norm": 0.0003714096383191645, "learning_rate": 1.869851765285091e-08, "loss": 0.0, "step": 25518 }, { "epoch": 24.5375, "grad_norm": 0.0011167603079229593, "learning_rate": 1.862245379704919e-08, "loss": 0.0, "step": 25519 }, { "epoch": 24.53846153846154, "grad_norm": 0.0006213411688804626, "learning_rate": 1.85465448229305e-08, "loss": 0.0, "step": 25520 }, { "epoch": 24.539423076923075, "grad_norm": 0.00068945053499192, "learning_rate": 1.8470790731675014e-08, "loss": 0.0, "step": 25521 }, { "epoch": 24.540384615384614, "grad_norm": 0.0016703902510926127, "learning_rate": 1.8395191524457345e-08, "loss": 0.0, "step": 25522 }, { "epoch": 24.541346153846153, "grad_norm": 0.0004033934965264052, "learning_rate": 1.8319747202451e-08, "loss": 0.0, "step": 25523 }, { "epoch": 24.54230769230769, "grad_norm": 0.0011404796969145536, "learning_rate": 1.824445776682504e-08, "loss": 0.0, "step": 25524 }, { "epoch": 24.54326923076923, "grad_norm": 0.0006557903834618628, "learning_rate": 1.8169323218749646e-08, "loss": 0.0, "step": 25525 }, { "epoch": 24.54423076923077, "grad_norm": 0.00042451429180800915, "learning_rate": 1.8094343559390547e-08, "loss": 0.0, "step": 25526 }, { "epoch": 24.545192307692307, "grad_norm": 0.0006782087148167193, "learning_rate": 1.801951878991015e-08, "loss": 0.0, "step": 25527 }, { "epoch": 24.546153846153846, "grad_norm": 0.0007900851196609437, "learning_rate": 1.7944848911470857e-08, "loss": 0.0, "step": 25528 }, { "epoch": 24.547115384615385, "grad_norm": 0.0009649450657889247, "learning_rate": 1.787033392522952e-08, "loss": 0.0, "step": 25529 }, { "epoch": 24.548076923076923, "grad_norm": 0.0008741266210563481, "learning_rate": 1.7795973832344103e-08, "loss": 0.0, "step": 25530 }, { "epoch": 24.549038461538462, "grad_norm": 0.0007057355251163244, "learning_rate": 1.7721768633967018e-08, "loss": 0.0, "step": 25531 }, { "epoch": 24.55, "grad_norm": 0.0010295966640114784, "learning_rate": 1.7647718331251785e-08, "loss": 0.0, "step": 25532 }, { "epoch": 24.55096153846154, "grad_norm": 0.0009577125310897827, "learning_rate": 1.7573822925345264e-08, "loss": 0.0, "step": 25533 }, { "epoch": 24.551923076923078, "grad_norm": 0.001829656190238893, "learning_rate": 1.7500082417394316e-08, "loss": 0.0, "step": 25534 }, { "epoch": 24.552884615384617, "grad_norm": 0.0009524852503091097, "learning_rate": 1.742649680854469e-08, "loss": 0.0, "step": 25535 }, { "epoch": 24.553846153846155, "grad_norm": 0.0016069088596850634, "learning_rate": 1.7353066099936588e-08, "loss": 0.0, "step": 25536 }, { "epoch": 24.55480769230769, "grad_norm": 0.0015389773761853576, "learning_rate": 1.727979029271132e-08, "loss": 0.0, "step": 25537 }, { "epoch": 24.55576923076923, "grad_norm": 0.0003427818592172116, "learning_rate": 1.720666938800353e-08, "loss": 0.0, "step": 25538 }, { "epoch": 24.556730769230768, "grad_norm": 0.0007569898734800518, "learning_rate": 1.7133703386950086e-08, "loss": 0.0, "step": 25539 }, { "epoch": 24.557692307692307, "grad_norm": 0.0006562841590493917, "learning_rate": 1.7060892290682307e-08, "loss": 0.0, "step": 25540 }, { "epoch": 24.558653846153845, "grad_norm": 0.0008987165056169033, "learning_rate": 1.698823610032929e-08, "loss": 0.0, "step": 25541 }, { "epoch": 24.559615384615384, "grad_norm": 0.001001390628516674, "learning_rate": 1.6915734817019025e-08, "loss": 0.0, "step": 25542 }, { "epoch": 24.560576923076923, "grad_norm": 0.0015841268468648195, "learning_rate": 1.6843388441876163e-08, "loss": 0.0, "step": 25543 }, { "epoch": 24.56153846153846, "grad_norm": 0.0018803399289026856, "learning_rate": 1.677119697602425e-08, "loss": 0.0, "step": 25544 }, { "epoch": 24.5625, "grad_norm": 0.0005819652578793466, "learning_rate": 1.6699160420583504e-08, "loss": 0.0, "step": 25545 }, { "epoch": 24.56346153846154, "grad_norm": 0.0009512042161077261, "learning_rate": 1.66272787766697e-08, "loss": 0.0, "step": 25546 }, { "epoch": 24.564423076923077, "grad_norm": 0.0004726045881398022, "learning_rate": 1.6555552045400826e-08, "loss": 0.0, "step": 25547 }, { "epoch": 24.565384615384616, "grad_norm": 0.002950345166027546, "learning_rate": 1.648398022788822e-08, "loss": 0.0, "step": 25548 }, { "epoch": 24.566346153846155, "grad_norm": 0.0013335528783500195, "learning_rate": 1.6412563325243214e-08, "loss": 0.0, "step": 25549 }, { "epoch": 24.567307692307693, "grad_norm": 0.0006386932218447328, "learning_rate": 1.6341301338573813e-08, "loss": 0.0, "step": 25550 }, { "epoch": 24.568269230769232, "grad_norm": 0.0011740149930119514, "learning_rate": 1.6270194268985796e-08, "loss": 0.0, "step": 25551 }, { "epoch": 24.56923076923077, "grad_norm": 0.0012192405993118882, "learning_rate": 1.6199242117582724e-08, "loss": 0.0, "step": 25552 }, { "epoch": 24.57019230769231, "grad_norm": 0.0010894647566601634, "learning_rate": 1.6128444885464834e-08, "loss": 0.0, "step": 25553 }, { "epoch": 24.571153846153845, "grad_norm": 0.0005649881204590201, "learning_rate": 1.605780257373124e-08, "loss": 0.0, "step": 25554 }, { "epoch": 24.572115384615383, "grad_norm": 0.0006335375364869833, "learning_rate": 1.5987315183477736e-08, "loss": 0.0, "step": 25555 }, { "epoch": 24.573076923076922, "grad_norm": 0.0006573044811375439, "learning_rate": 1.5916982715799e-08, "loss": 0.0, "step": 25556 }, { "epoch": 24.57403846153846, "grad_norm": 0.0016693775542080402, "learning_rate": 1.5846805171785273e-08, "loss": 0.0, "step": 25557 }, { "epoch": 24.575, "grad_norm": 0.00049783376744017, "learning_rate": 1.5776782552525682e-08, "loss": 0.0, "step": 25558 }, { "epoch": 24.575961538461538, "grad_norm": 0.0007912390865385532, "learning_rate": 1.5706914859107136e-08, "loss": 0.0, "step": 25559 }, { "epoch": 24.576923076923077, "grad_norm": 0.0006522313342429698, "learning_rate": 1.5637202092614322e-08, "loss": 0.0, "step": 25560 }, { "epoch": 24.577884615384615, "grad_norm": 0.001425872789695859, "learning_rate": 1.55676442541286e-08, "loss": 0.0, "step": 25561 }, { "epoch": 24.578846153846154, "grad_norm": 0.0008131824433803558, "learning_rate": 1.5498241344727992e-08, "loss": 0.0, "step": 25562 }, { "epoch": 24.579807692307693, "grad_norm": 0.000632460811175406, "learning_rate": 1.542899336549164e-08, "loss": 0.0, "step": 25563 }, { "epoch": 24.58076923076923, "grad_norm": 0.0011055109789595008, "learning_rate": 1.5359900317492017e-08, "loss": 0.0, "step": 25564 }, { "epoch": 24.58173076923077, "grad_norm": 0.0009298823424614966, "learning_rate": 1.5290962201802707e-08, "loss": 0.0, "step": 25565 }, { "epoch": 24.58269230769231, "grad_norm": 0.00196900125592947, "learning_rate": 1.5222179019492855e-08, "loss": 0.0, "step": 25566 }, { "epoch": 24.583653846153847, "grad_norm": 0.0012127418303862214, "learning_rate": 1.5153550771630498e-08, "loss": 0.0, "step": 25567 }, { "epoch": 24.584615384615386, "grad_norm": 0.0010087700793519616, "learning_rate": 1.508507745927923e-08, "loss": 0.0, "step": 25568 }, { "epoch": 24.585576923076925, "grad_norm": 0.001118469750508666, "learning_rate": 1.501675908350153e-08, "loss": 0.0, "step": 25569 }, { "epoch": 24.58653846153846, "grad_norm": 0.0009830392664298415, "learning_rate": 1.4948595645358777e-08, "loss": 0.0, "step": 25570 }, { "epoch": 24.5875, "grad_norm": 0.0006476953276433051, "learning_rate": 1.4880587145907899e-08, "loss": 0.0, "step": 25571 }, { "epoch": 24.588461538461537, "grad_norm": 0.0016223205020651221, "learning_rate": 1.481273358620472e-08, "loss": 0.0, "step": 25572 }, { "epoch": 24.589423076923076, "grad_norm": 0.0012372361961752176, "learning_rate": 1.474503496730173e-08, "loss": 0.0, "step": 25573 }, { "epoch": 24.590384615384615, "grad_norm": 0.00038493494503200054, "learning_rate": 1.4677491290249202e-08, "loss": 0.0, "step": 25574 }, { "epoch": 24.591346153846153, "grad_norm": 0.0007626680308021605, "learning_rate": 1.4610102556095185e-08, "loss": 0.0, "step": 25575 }, { "epoch": 24.592307692307692, "grad_norm": 0.0004000522312708199, "learning_rate": 1.454286876588551e-08, "loss": 0.0, "step": 25576 }, { "epoch": 24.59326923076923, "grad_norm": 0.0005016293143853545, "learning_rate": 1.4475789920663785e-08, "loss": 0.0, "step": 25577 }, { "epoch": 24.59423076923077, "grad_norm": 0.0016491328133270144, "learning_rate": 1.440886602147029e-08, "loss": 0.0, "step": 25578 }, { "epoch": 24.595192307692308, "grad_norm": 0.0006357961101457477, "learning_rate": 1.4342097069345307e-08, "loss": 0.0, "step": 25579 }, { "epoch": 24.596153846153847, "grad_norm": 0.0023918827064335346, "learning_rate": 1.4275483065321338e-08, "loss": 0.0, "step": 25580 }, { "epoch": 24.597115384615385, "grad_norm": 0.0007928182603791356, "learning_rate": 1.4209024010435334e-08, "loss": 0.0, "step": 25581 }, { "epoch": 24.598076923076924, "grad_norm": 0.0015035058604553342, "learning_rate": 1.4142719905717583e-08, "loss": 0.0, "step": 25582 }, { "epoch": 24.599038461538463, "grad_norm": 0.0013775010593235493, "learning_rate": 1.407657075219615e-08, "loss": 0.0, "step": 25583 }, { "epoch": 24.6, "grad_norm": 0.0010126838460564613, "learning_rate": 1.4010576550897993e-08, "loss": 0.0, "step": 25584 }, { "epoch": 24.60096153846154, "grad_norm": 0.0008234791457653046, "learning_rate": 1.3944737302846733e-08, "loss": 0.0, "step": 25585 }, { "epoch": 24.601923076923075, "grad_norm": 0.0005183672765269876, "learning_rate": 1.387905300906489e-08, "loss": 0.0, "step": 25586 }, { "epoch": 24.602884615384614, "grad_norm": 0.0007649236940778792, "learning_rate": 1.381352367057165e-08, "loss": 0.0, "step": 25587 }, { "epoch": 24.603846153846153, "grad_norm": 0.0005080911214463413, "learning_rate": 1.3748149288382862e-08, "loss": 0.0, "step": 25588 }, { "epoch": 24.60480769230769, "grad_norm": 0.0012070992961525917, "learning_rate": 1.3682929863514383e-08, "loss": 0.0, "step": 25589 }, { "epoch": 24.60576923076923, "grad_norm": 0.0011100186966359615, "learning_rate": 1.3617865396976516e-08, "loss": 0.0, "step": 25590 }, { "epoch": 24.60673076923077, "grad_norm": 0.0014561822172254324, "learning_rate": 1.3552955889779561e-08, "loss": 0.0, "step": 25591 }, { "epoch": 24.607692307692307, "grad_norm": 0.0005298244650475681, "learning_rate": 1.3488201342931606e-08, "loss": 0.0, "step": 25592 }, { "epoch": 24.608653846153846, "grad_norm": 0.000735777139198035, "learning_rate": 1.3423601757436289e-08, "loss": 0.0, "step": 25593 }, { "epoch": 24.609615384615385, "grad_norm": 0.0007998833316378295, "learning_rate": 1.3359157134296142e-08, "loss": 0.0, "step": 25594 }, { "epoch": 24.610576923076923, "grad_norm": 0.0010959106730297208, "learning_rate": 1.3294867474511475e-08, "loss": 0.0, "step": 25595 }, { "epoch": 24.611538461538462, "grad_norm": 0.0011193535756319761, "learning_rate": 1.323073277908038e-08, "loss": 0.0, "step": 25596 }, { "epoch": 24.6125, "grad_norm": 2.328505797777325e-05, "learning_rate": 1.3166753048996505e-08, "loss": 0.0, "step": 25597 }, { "epoch": 24.61346153846154, "grad_norm": 0.0015701641095802188, "learning_rate": 1.3102928285254612e-08, "loss": 0.0, "step": 25598 }, { "epoch": 24.614423076923078, "grad_norm": 0.0027241213247179985, "learning_rate": 1.303925848884391e-08, "loss": 0.0, "step": 25599 }, { "epoch": 24.615384615384617, "grad_norm": 0.000245193688897416, "learning_rate": 1.2975743660752493e-08, "loss": 0.0, "step": 25600 }, { "epoch": 24.616346153846155, "grad_norm": 0.0018609551480039954, "learning_rate": 1.2912383801965133e-08, "loss": 0.0, "step": 25601 }, { "epoch": 24.61730769230769, "grad_norm": 0.000439704570453614, "learning_rate": 1.2849178913466599e-08, "loss": 0.0, "step": 25602 }, { "epoch": 24.61826923076923, "grad_norm": 0.0008610845543444157, "learning_rate": 1.2786128996237213e-08, "loss": 0.0, "step": 25603 }, { "epoch": 24.619230769230768, "grad_norm": 0.0017752951243892312, "learning_rate": 1.2723234051253975e-08, "loss": 0.0, "step": 25604 }, { "epoch": 24.620192307692307, "grad_norm": 0.00111860828474164, "learning_rate": 1.266049407949499e-08, "loss": 0.0, "step": 25605 }, { "epoch": 24.621153846153845, "grad_norm": 0.0008996708784252405, "learning_rate": 1.2597909081931702e-08, "loss": 0.0, "step": 25606 }, { "epoch": 24.622115384615384, "grad_norm": 0.0006031312514096498, "learning_rate": 1.253547905953667e-08, "loss": 0.0, "step": 25607 }, { "epoch": 24.623076923076923, "grad_norm": 0.0008205677731893957, "learning_rate": 1.2473204013278007e-08, "loss": 0.0, "step": 25608 }, { "epoch": 24.62403846153846, "grad_norm": 0.0013254957739263773, "learning_rate": 1.2411083944122715e-08, "loss": 0.0, "step": 25609 }, { "epoch": 24.625, "grad_norm": 0.0016624190611764789, "learning_rate": 1.234911885303336e-08, "loss": 0.0, "step": 25610 }, { "epoch": 24.62596153846154, "grad_norm": 0.0012917869025841355, "learning_rate": 1.2287308740972503e-08, "loss": 0.0, "step": 25611 }, { "epoch": 24.626923076923077, "grad_norm": 0.0011412707390263677, "learning_rate": 1.222565360889938e-08, "loss": 0.0, "step": 25612 }, { "epoch": 24.627884615384616, "grad_norm": 0.0010258511174470186, "learning_rate": 1.216415345776989e-08, "loss": 0.0, "step": 25613 }, { "epoch": 24.628846153846155, "grad_norm": 0.0011037534568458796, "learning_rate": 1.2102808288538825e-08, "loss": 0.0, "step": 25614 }, { "epoch": 24.629807692307693, "grad_norm": 0.0007005169754847884, "learning_rate": 1.2041618102158758e-08, "loss": 0.0, "step": 25615 }, { "epoch": 24.630769230769232, "grad_norm": 0.00046618710621260107, "learning_rate": 1.198058289957782e-08, "loss": 0.0, "step": 25616 }, { "epoch": 24.63173076923077, "grad_norm": 0.0007277419790625572, "learning_rate": 1.1919702681744138e-08, "loss": 0.0, "step": 25617 }, { "epoch": 24.63269230769231, "grad_norm": 0.0016700203996151686, "learning_rate": 1.1858977449601406e-08, "loss": 0.0, "step": 25618 }, { "epoch": 24.633653846153845, "grad_norm": 0.0016239157412201166, "learning_rate": 1.179840720409331e-08, "loss": 0.0, "step": 25619 }, { "epoch": 24.634615384615383, "grad_norm": 0.0013343840837478638, "learning_rate": 1.173799194615799e-08, "loss": 0.0, "step": 25620 }, { "epoch": 24.635576923076922, "grad_norm": 0.0019524561939761043, "learning_rate": 1.1677731676733584e-08, "loss": 0.0, "step": 25621 }, { "epoch": 24.63653846153846, "grad_norm": 0.0005762857617810369, "learning_rate": 1.161762639675712e-08, "loss": 0.0, "step": 25622 }, { "epoch": 24.6375, "grad_norm": 0.0010553308529779315, "learning_rate": 1.1557676107157856e-08, "loss": 0.0, "step": 25623 }, { "epoch": 24.638461538461538, "grad_norm": 0.0016884184442460537, "learning_rate": 1.1497880808868379e-08, "loss": 0.0, "step": 25624 }, { "epoch": 24.639423076923077, "grad_norm": 0.000788430857937783, "learning_rate": 1.1438240502815723e-08, "loss": 0.0, "step": 25625 }, { "epoch": 24.640384615384615, "grad_norm": 0.003273717127740383, "learning_rate": 1.1378755189925817e-08, "loss": 0.0, "step": 25626 }, { "epoch": 24.641346153846154, "grad_norm": 0.0009696393390186131, "learning_rate": 1.1319424871121254e-08, "loss": 0.0, "step": 25627 }, { "epoch": 24.642307692307693, "grad_norm": 0.000723343575373292, "learning_rate": 1.126024954732352e-08, "loss": 0.0, "step": 25628 }, { "epoch": 24.64326923076923, "grad_norm": 0.0012504846090450883, "learning_rate": 1.120122921944855e-08, "loss": 0.0, "step": 25629 }, { "epoch": 24.64423076923077, "grad_norm": 0.0010442467173561454, "learning_rate": 1.1142363888414498e-08, "loss": 0.0, "step": 25630 }, { "epoch": 24.64519230769231, "grad_norm": 0.001323465257883072, "learning_rate": 1.1083653555133966e-08, "loss": 0.0, "step": 25631 }, { "epoch": 24.646153846153847, "grad_norm": 0.0006271330639719963, "learning_rate": 1.102509822051845e-08, "loss": 0.0, "step": 25632 }, { "epoch": 24.647115384615386, "grad_norm": 0.0013156394707038999, "learning_rate": 1.0966697885476108e-08, "loss": 0.0, "step": 25633 }, { "epoch": 24.648076923076925, "grad_norm": 0.0006954723503440619, "learning_rate": 1.0908452550912885e-08, "loss": 0.0, "step": 25634 }, { "epoch": 24.64903846153846, "grad_norm": 0.0010440470650792122, "learning_rate": 1.0850362217732502e-08, "loss": 0.0, "step": 25635 }, { "epoch": 24.65, "grad_norm": 0.0007002275087870657, "learning_rate": 1.079242688683757e-08, "loss": 0.0, "step": 25636 }, { "epoch": 24.650961538461537, "grad_norm": 0.001077895169146359, "learning_rate": 1.0734646559125151e-08, "loss": 0.0, "step": 25637 }, { "epoch": 24.651923076923076, "grad_norm": 0.0015156229492276907, "learning_rate": 1.0677021235493412e-08, "loss": 0.0, "step": 25638 }, { "epoch": 24.652884615384615, "grad_norm": 0.0003879532450810075, "learning_rate": 1.0619550916836086e-08, "loss": 0.0, "step": 25639 }, { "epoch": 24.653846153846153, "grad_norm": 0.0016718393890187144, "learning_rate": 1.056223560404468e-08, "loss": 0.0, "step": 25640 }, { "epoch": 24.654807692307692, "grad_norm": 0.0007640171679668128, "learning_rate": 1.0505075298008483e-08, "loss": 0.0, "step": 25641 }, { "epoch": 24.65576923076923, "grad_norm": 0.00046366348396986723, "learning_rate": 1.0448069999614563e-08, "loss": 0.0, "step": 25642 }, { "epoch": 24.65673076923077, "grad_norm": 0.000829530821647495, "learning_rate": 1.0391219709748878e-08, "loss": 0.0, "step": 25643 }, { "epoch": 24.657692307692308, "grad_norm": 0.0003558435710147023, "learning_rate": 1.0334524429290727e-08, "loss": 0.0, "step": 25644 }, { "epoch": 24.658653846153847, "grad_norm": 0.0004872244317084551, "learning_rate": 1.0277984159122734e-08, "loss": 0.0, "step": 25645 }, { "epoch": 24.659615384615385, "grad_norm": 0.0010073744924739003, "learning_rate": 1.0221598900119756e-08, "loss": 0.0, "step": 25646 }, { "epoch": 24.660576923076924, "grad_norm": 0.0016016780864447355, "learning_rate": 1.0165368653158868e-08, "loss": 0.0, "step": 25647 }, { "epoch": 24.661538461538463, "grad_norm": 0.000539724831469357, "learning_rate": 1.0109293419111598e-08, "loss": 0.0, "step": 25648 }, { "epoch": 24.6625, "grad_norm": 0.0006216756301000714, "learning_rate": 1.005337319884725e-08, "loss": 0.0, "step": 25649 }, { "epoch": 24.66346153846154, "grad_norm": 0.00042210493120364845, "learning_rate": 9.997607993235125e-09, "loss": 0.0, "step": 25650 }, { "epoch": 24.664423076923075, "grad_norm": 0.0007752892561256886, "learning_rate": 9.941997803140092e-09, "loss": 0.0, "step": 25651 }, { "epoch": 24.665384615384614, "grad_norm": 0.002058696700260043, "learning_rate": 9.886542629423679e-09, "loss": 0.0, "step": 25652 }, { "epoch": 24.666346153846153, "grad_norm": 0.0013409974053502083, "learning_rate": 9.831242472948532e-09, "loss": 0.0, "step": 25653 }, { "epoch": 24.66730769230769, "grad_norm": 0.0005205539637245238, "learning_rate": 9.776097334571743e-09, "loss": 0.0, "step": 25654 }, { "epoch": 24.66826923076923, "grad_norm": 0.0006976986769586802, "learning_rate": 9.721107215148184e-09, "loss": 0.0, "step": 25655 }, { "epoch": 24.66923076923077, "grad_norm": 0.0011780550703406334, "learning_rate": 9.666272115532726e-09, "loss": 0.0, "step": 25656 }, { "epoch": 24.670192307692307, "grad_norm": 0.0011506967712193727, "learning_rate": 9.61159203657469e-09, "loss": 0.0, "step": 25657 }, { "epoch": 24.671153846153846, "grad_norm": 0.0008334522135555744, "learning_rate": 9.557066979123398e-09, "loss": 0.0, "step": 25658 }, { "epoch": 24.672115384615385, "grad_norm": 0.0004930668510496616, "learning_rate": 9.502696944024837e-09, "loss": 0.0, "step": 25659 }, { "epoch": 24.673076923076923, "grad_norm": 0.0014945659786462784, "learning_rate": 9.448481932123888e-09, "loss": 0.0, "step": 25660 }, { "epoch": 24.674038461538462, "grad_norm": 0.0013117088237777352, "learning_rate": 9.394421944258769e-09, "loss": 0.0, "step": 25661 }, { "epoch": 24.675, "grad_norm": 0.0008580014109611511, "learning_rate": 9.34051698127103e-09, "loss": 0.0, "step": 25662 }, { "epoch": 24.67596153846154, "grad_norm": 0.0009875206742435694, "learning_rate": 9.286767043996669e-09, "loss": 0.0, "step": 25663 }, { "epoch": 24.676923076923078, "grad_norm": 0.0023573648650199175, "learning_rate": 9.233172133268353e-09, "loss": 0.0, "step": 25664 }, { "epoch": 24.677884615384617, "grad_norm": 0.0008481436525471509, "learning_rate": 9.17973224991986e-09, "loss": 0.0, "step": 25665 }, { "epoch": 24.678846153846155, "grad_norm": 0.0008532859501428902, "learning_rate": 9.126447394778304e-09, "loss": 0.0, "step": 25666 }, { "epoch": 24.67980769230769, "grad_norm": 0.0006677847122773528, "learning_rate": 9.073317568670804e-09, "loss": 0.0, "step": 25667 }, { "epoch": 24.68076923076923, "grad_norm": 0.0002177521528210491, "learning_rate": 9.020342772423363e-09, "loss": 0.0, "step": 25668 }, { "epoch": 24.681730769230768, "grad_norm": 0.000951416848693043, "learning_rate": 8.967523006857549e-09, "loss": 0.0, "step": 25669 }, { "epoch": 24.682692307692307, "grad_norm": 0.0026780131738632917, "learning_rate": 8.914858272791593e-09, "loss": 0.0, "step": 25670 }, { "epoch": 24.683653846153845, "grad_norm": 0.00047596843796782196, "learning_rate": 8.862348571043733e-09, "loss": 0.0, "step": 25671 }, { "epoch": 24.684615384615384, "grad_norm": 0.0003991757403127849, "learning_rate": 8.80999390242887e-09, "loss": 0.0, "step": 25672 }, { "epoch": 24.685576923076923, "grad_norm": 0.0006766283768229187, "learning_rate": 8.75779426775858e-09, "loss": 0.0, "step": 25673 }, { "epoch": 24.68653846153846, "grad_norm": 0.0007751787197776139, "learning_rate": 8.705749667843322e-09, "loss": 0.0, "step": 25674 }, { "epoch": 24.6875, "grad_norm": 0.0008791781147010624, "learning_rate": 8.65386010349134e-09, "loss": 0.0, "step": 25675 }, { "epoch": 24.68846153846154, "grad_norm": 0.0005085612065158784, "learning_rate": 8.602125575506438e-09, "loss": 0.0, "step": 25676 }, { "epoch": 24.689423076923077, "grad_norm": 0.0013214916689321399, "learning_rate": 8.550546084692413e-09, "loss": 0.0, "step": 25677 }, { "epoch": 24.690384615384616, "grad_norm": 0.0008459845557808876, "learning_rate": 8.499121631849739e-09, "loss": 0.0, "step": 25678 }, { "epoch": 24.691346153846155, "grad_norm": 0.0007541768136434257, "learning_rate": 8.447852217775554e-09, "loss": 0.0, "step": 25679 }, { "epoch": 24.692307692307693, "grad_norm": 0.0003834780945908278, "learning_rate": 8.39673784326589e-09, "loss": 0.0, "step": 25680 }, { "epoch": 24.693269230769232, "grad_norm": 0.0007146447896957397, "learning_rate": 8.345778509114556e-09, "loss": 0.0, "step": 25681 }, { "epoch": 24.69423076923077, "grad_norm": 0.0017502815462648869, "learning_rate": 8.294974216110918e-09, "loss": 0.0, "step": 25682 }, { "epoch": 24.69519230769231, "grad_norm": 0.0005745739326812327, "learning_rate": 8.244324965044348e-09, "loss": 0.0, "step": 25683 }, { "epoch": 24.696153846153845, "grad_norm": 0.00019703769066836685, "learning_rate": 8.193830756699773e-09, "loss": 0.0, "step": 25684 }, { "epoch": 24.697115384615383, "grad_norm": 0.00048542130389250815, "learning_rate": 8.143491591862118e-09, "loss": 0.0, "step": 25685 }, { "epoch": 24.698076923076922, "grad_norm": 0.0006601531058549881, "learning_rate": 8.093307471310763e-09, "loss": 0.0, "step": 25686 }, { "epoch": 24.69903846153846, "grad_norm": 0.005520964507013559, "learning_rate": 8.043278395826193e-09, "loss": 0.0, "step": 25687 }, { "epoch": 24.7, "grad_norm": 0.0016844132915139198, "learning_rate": 7.993404366184454e-09, "loss": 0.0, "step": 25688 }, { "epoch": 24.700961538461538, "grad_norm": 0.00419404124841094, "learning_rate": 7.94368538315715e-09, "loss": 0.0, "step": 25689 }, { "epoch": 24.701923076923077, "grad_norm": 0.001174792181700468, "learning_rate": 7.89412144751922e-09, "loss": 0.0, "step": 25690 }, { "epoch": 24.702884615384615, "grad_norm": 0.0004306278715375811, "learning_rate": 7.844712560036715e-09, "loss": 0.0, "step": 25691 }, { "epoch": 24.703846153846154, "grad_norm": 0.0011772202560678124, "learning_rate": 7.79545872147902e-09, "loss": 0.0, "step": 25692 }, { "epoch": 24.704807692307693, "grad_norm": 0.001449553412385285, "learning_rate": 7.74635993260775e-09, "loss": 0.0, "step": 25693 }, { "epoch": 24.70576923076923, "grad_norm": 0.00043979319161735475, "learning_rate": 7.697416194186735e-09, "loss": 0.0, "step": 25694 }, { "epoch": 24.70673076923077, "grad_norm": 0.0019610601011663675, "learning_rate": 7.64862750697426e-09, "loss": 0.0, "step": 25695 }, { "epoch": 24.70769230769231, "grad_norm": 0.000860361207742244, "learning_rate": 7.599993871728606e-09, "loss": 0.0, "step": 25696 }, { "epoch": 24.708653846153847, "grad_norm": 0.0013477226020768285, "learning_rate": 7.551515289203615e-09, "loss": 0.0, "step": 25697 }, { "epoch": 24.709615384615386, "grad_norm": 0.00046838505659252405, "learning_rate": 7.503191760150908e-09, "loss": 0.0, "step": 25698 }, { "epoch": 24.710576923076925, "grad_norm": 0.001400194363668561, "learning_rate": 7.455023285320996e-09, "loss": 0.0, "step": 25699 }, { "epoch": 24.71153846153846, "grad_norm": 0.0012645465321838856, "learning_rate": 7.407009865462167e-09, "loss": 0.0, "step": 25700 }, { "epoch": 24.7125, "grad_norm": 0.0005345438839867711, "learning_rate": 7.359151501318274e-09, "loss": 0.0, "step": 25701 }, { "epoch": 24.713461538461537, "grad_norm": 0.0010056545725092292, "learning_rate": 7.311448193632053e-09, "loss": 0.0, "step": 25702 }, { "epoch": 24.714423076923076, "grad_norm": 0.0014384150272235274, "learning_rate": 7.263899943144026e-09, "loss": 0.0, "step": 25703 }, { "epoch": 24.715384615384615, "grad_norm": 0.001452089287340641, "learning_rate": 7.216506750592489e-09, "loss": 0.0, "step": 25704 }, { "epoch": 24.716346153846153, "grad_norm": 0.00048612436512485147, "learning_rate": 7.16926861671241e-09, "loss": 0.0, "step": 25705 }, { "epoch": 24.717307692307692, "grad_norm": 0.0015329498564824462, "learning_rate": 7.122185542235427e-09, "loss": 0.0, "step": 25706 }, { "epoch": 24.71826923076923, "grad_norm": 0.001154343830421567, "learning_rate": 7.075257527894286e-09, "loss": 0.0, "step": 25707 }, { "epoch": 24.71923076923077, "grad_norm": 0.00029646759503521025, "learning_rate": 7.028484574416184e-09, "loss": 0.0, "step": 25708 }, { "epoch": 24.720192307692308, "grad_norm": 0.0023805873934179544, "learning_rate": 6.981866682527206e-09, "loss": 0.0, "step": 25709 }, { "epoch": 24.721153846153847, "grad_norm": 0.0009303589467890561, "learning_rate": 6.935403852950107e-09, "loss": 0.0, "step": 25710 }, { "epoch": 24.722115384615385, "grad_norm": 0.0005157692357897758, "learning_rate": 6.889096086406533e-09, "loss": 0.0, "step": 25711 }, { "epoch": 24.723076923076924, "grad_norm": 0.001888495171442628, "learning_rate": 6.842943383614798e-09, "loss": 0.0, "step": 25712 }, { "epoch": 24.724038461538463, "grad_norm": 0.0004465526435524225, "learning_rate": 6.7969457452909944e-09, "loss": 0.0, "step": 25713 }, { "epoch": 24.725, "grad_norm": 0.0005879847449250519, "learning_rate": 6.7511031721489985e-09, "loss": 0.0, "step": 25714 }, { "epoch": 24.72596153846154, "grad_norm": 0.0011098743416368961, "learning_rate": 6.7054156648993505e-09, "loss": 0.0, "step": 25715 }, { "epoch": 24.726923076923075, "grad_norm": 0.0007334336405619979, "learning_rate": 6.6598832242525945e-09, "loss": 0.0, "step": 25716 }, { "epoch": 24.727884615384614, "grad_norm": 0.0009329229360446334, "learning_rate": 6.6145058509137216e-09, "loss": 0.0, "step": 25717 }, { "epoch": 24.728846153846153, "grad_norm": 0.00036570988595485687, "learning_rate": 6.569283545587724e-09, "loss": 0.0, "step": 25718 }, { "epoch": 24.72980769230769, "grad_norm": 0.0007939388160593808, "learning_rate": 6.524216308975151e-09, "loss": 0.0, "step": 25719 }, { "epoch": 24.73076923076923, "grad_norm": 0.0008246770012192428, "learning_rate": 6.479304141777665e-09, "loss": 0.0, "step": 25720 }, { "epoch": 24.73173076923077, "grad_norm": 0.001549884444102645, "learning_rate": 6.4345470446902645e-09, "loss": 0.0, "step": 25721 }, { "epoch": 24.732692307692307, "grad_norm": 0.00085652299458161, "learning_rate": 6.38994501840795e-09, "loss": 0.0, "step": 25722 }, { "epoch": 24.733653846153846, "grad_norm": 7.958606875035912e-05, "learning_rate": 6.345498063622391e-09, "loss": 0.0, "step": 25723 }, { "epoch": 24.734615384615385, "grad_norm": 0.0007850682595744729, "learning_rate": 6.301206181024144e-09, "loss": 0.0, "step": 25724 }, { "epoch": 24.735576923076923, "grad_norm": 0.0009189587435685098, "learning_rate": 6.257069371299329e-09, "loss": 0.0, "step": 25725 }, { "epoch": 24.736538461538462, "grad_norm": 0.0007807688089087605, "learning_rate": 6.213087635134063e-09, "loss": 0.0, "step": 25726 }, { "epoch": 24.7375, "grad_norm": 0.00043574353912845254, "learning_rate": 6.169260973210023e-09, "loss": 0.0, "step": 25727 }, { "epoch": 24.73846153846154, "grad_norm": 0.0009906684281304479, "learning_rate": 6.125589386207775e-09, "loss": 0.0, "step": 25728 }, { "epoch": 24.739423076923078, "grad_norm": 0.0008154571987688541, "learning_rate": 6.082072874804556e-09, "loss": 0.0, "step": 25729 }, { "epoch": 24.740384615384617, "grad_norm": 0.0016154948389157653, "learning_rate": 6.0387114396764925e-09, "loss": 0.0, "step": 25730 }, { "epoch": 24.741346153846155, "grad_norm": 0.0010477149626240134, "learning_rate": 5.995505081495267e-09, "loss": 0.0, "step": 25731 }, { "epoch": 24.74230769230769, "grad_norm": 0.00041538747609592974, "learning_rate": 5.952453800931457e-09, "loss": 0.0, "step": 25732 }, { "epoch": 24.74326923076923, "grad_norm": 0.0011633561225607991, "learning_rate": 5.9095575986534146e-09, "loss": 0.0, "step": 25733 }, { "epoch": 24.744230769230768, "grad_norm": 0.0009234375902451575, "learning_rate": 5.866816475326165e-09, "loss": 0.0, "step": 25734 }, { "epoch": 24.745192307692307, "grad_norm": 0.0017328251851722598, "learning_rate": 5.82423043161473e-09, "loss": 0.0, "step": 25735 }, { "epoch": 24.746153846153845, "grad_norm": 0.0009961071191355586, "learning_rate": 5.781799468177473e-09, "loss": 0.0, "step": 25736 }, { "epoch": 24.747115384615384, "grad_norm": 0.00255921995267272, "learning_rate": 5.739523585674978e-09, "loss": 0.0, "step": 25737 }, { "epoch": 24.748076923076923, "grad_norm": 0.0005759606137871742, "learning_rate": 5.697402784762273e-09, "loss": 0.0, "step": 25738 }, { "epoch": 24.74903846153846, "grad_norm": 0.0005674654967151582, "learning_rate": 5.655437066092173e-09, "loss": 0.0, "step": 25739 }, { "epoch": 24.75, "grad_norm": 0.0005590803921222687, "learning_rate": 5.613626430317487e-09, "loss": 0.0, "step": 25740 }, { "epoch": 24.75096153846154, "grad_norm": 0.0005568470223806798, "learning_rate": 5.571970878085475e-09, "loss": 0.0, "step": 25741 }, { "epoch": 24.751923076923077, "grad_norm": 0.0004773522960022092, "learning_rate": 5.530470410043398e-09, "loss": 0.0, "step": 25742 }, { "epoch": 24.752884615384616, "grad_norm": 0.0019050569972023368, "learning_rate": 5.489125026835185e-09, "loss": 0.0, "step": 25743 }, { "epoch": 24.753846153846155, "grad_norm": 0.0006995639414526522, "learning_rate": 5.447934729101434e-09, "loss": 0.0, "step": 25744 }, { "epoch": 24.754807692307693, "grad_norm": 0.0006749228341504931, "learning_rate": 5.4068995174827446e-09, "loss": 0.0, "step": 25745 }, { "epoch": 24.755769230769232, "grad_norm": 0.0006820600247010589, "learning_rate": 5.366019392615273e-09, "loss": 0.0, "step": 25746 }, { "epoch": 24.75673076923077, "grad_norm": 0.0006653371965512633, "learning_rate": 5.325294355132959e-09, "loss": 0.0, "step": 25747 }, { "epoch": 24.75769230769231, "grad_norm": 0.0008298803586512804, "learning_rate": 5.284724405668629e-09, "loss": 0.0, "step": 25748 }, { "epoch": 24.758653846153845, "grad_norm": 0.00019963855447713286, "learning_rate": 5.2443095448506674e-09, "loss": 0.0, "step": 25749 }, { "epoch": 24.759615384615383, "grad_norm": 0.0007467447430826724, "learning_rate": 5.204049773307462e-09, "loss": 0.0, "step": 25750 }, { "epoch": 24.760576923076922, "grad_norm": 0.000657491444144398, "learning_rate": 5.163945091661848e-09, "loss": 0.0, "step": 25751 }, { "epoch": 24.76153846153846, "grad_norm": 0.001621123286895454, "learning_rate": 5.123995500538881e-09, "loss": 0.0, "step": 25752 }, { "epoch": 24.7625, "grad_norm": 0.001061426242813468, "learning_rate": 5.084201000555844e-09, "loss": 0.0, "step": 25753 }, { "epoch": 24.763461538461538, "grad_norm": 0.0008012299076654017, "learning_rate": 5.044561592331132e-09, "loss": 0.0, "step": 25754 }, { "epoch": 24.764423076923077, "grad_norm": 0.0006617407198064029, "learning_rate": 5.005077276480918e-09, "loss": 0.0, "step": 25755 }, { "epoch": 24.765384615384615, "grad_norm": 0.005597893614321947, "learning_rate": 4.965748053615827e-09, "loss": 0.0001, "step": 25756 }, { "epoch": 24.766346153846154, "grad_norm": 0.00042401597602292895, "learning_rate": 4.926573924347589e-09, "loss": 0.0, "step": 25757 }, { "epoch": 24.767307692307693, "grad_norm": 0.002742060227319598, "learning_rate": 4.887554889284607e-09, "loss": 0.0, "step": 25758 }, { "epoch": 24.76826923076923, "grad_norm": 0.001611987012438476, "learning_rate": 4.848690949029733e-09, "loss": 0.0, "step": 25759 }, { "epoch": 24.76923076923077, "grad_norm": 0.0003433028468862176, "learning_rate": 4.8099821041891485e-09, "loss": 0.0, "step": 25760 }, { "epoch": 24.77019230769231, "grad_norm": 0.0016669867327436805, "learning_rate": 4.771428355362373e-09, "loss": 0.0, "step": 25761 }, { "epoch": 24.771153846153847, "grad_norm": 0.0007461035274900496, "learning_rate": 4.733029703146708e-09, "loss": 0.0, "step": 25762 }, { "epoch": 24.772115384615386, "grad_norm": 0.000701019074767828, "learning_rate": 4.694786148138342e-09, "loss": 0.0, "step": 25763 }, { "epoch": 24.773076923076925, "grad_norm": 0.000428862840635702, "learning_rate": 4.656697690931245e-09, "loss": 0.0, "step": 25764 }, { "epoch": 24.77403846153846, "grad_norm": 0.0009228098788298666, "learning_rate": 4.618764332116055e-09, "loss": 0.0, "step": 25765 }, { "epoch": 24.775, "grad_norm": 0.0018815190996974707, "learning_rate": 4.5809860722823005e-09, "loss": 0.0, "step": 25766 }, { "epoch": 24.775961538461537, "grad_norm": 0.0005548481713049114, "learning_rate": 4.54336291201507e-09, "loss": 0.0, "step": 25767 }, { "epoch": 24.776923076923076, "grad_norm": 0.0009227479458786547, "learning_rate": 4.50589485189834e-09, "loss": 0.0, "step": 25768 }, { "epoch": 24.777884615384615, "grad_norm": 0.0012482521124184132, "learning_rate": 4.468581892513868e-09, "loss": 0.0, "step": 25769 }, { "epoch": 24.778846153846153, "grad_norm": 0.001012604683637619, "learning_rate": 4.43142403444008e-09, "loss": 0.0, "step": 25770 }, { "epoch": 24.779807692307692, "grad_norm": 0.0010958565399050713, "learning_rate": 4.394421278254291e-09, "loss": 0.0, "step": 25771 }, { "epoch": 24.78076923076923, "grad_norm": 0.0009661269723437726, "learning_rate": 4.357573624530487e-09, "loss": 0.0, "step": 25772 }, { "epoch": 24.78173076923077, "grad_norm": 0.0014135157689452171, "learning_rate": 4.3208810738404326e-09, "loss": 0.0, "step": 25773 }, { "epoch": 24.782692307692308, "grad_norm": 0.0016219940735027194, "learning_rate": 4.284343626753673e-09, "loss": 0.0, "step": 25774 }, { "epoch": 24.783653846153847, "grad_norm": 0.0022138969507068396, "learning_rate": 4.247961283835311e-09, "loss": 0.0, "step": 25775 }, { "epoch": 24.784615384615385, "grad_norm": 0.0005686668446287513, "learning_rate": 4.2117340456526716e-09, "loss": 0.0, "step": 25776 }, { "epoch": 24.785576923076924, "grad_norm": 0.0008209210936911404, "learning_rate": 4.1756619127664155e-09, "loss": 0.0, "step": 25777 }, { "epoch": 24.786538461538463, "grad_norm": 0.0009844722226262093, "learning_rate": 4.139744885736097e-09, "loss": 0.0, "step": 25778 }, { "epoch": 24.7875, "grad_norm": 0.0007814890705049038, "learning_rate": 4.103982965120157e-09, "loss": 0.0, "step": 25779 }, { "epoch": 24.78846153846154, "grad_norm": 0.001035931520164013, "learning_rate": 4.068376151471487e-09, "loss": 0.0, "step": 25780 }, { "epoch": 24.789423076923075, "grad_norm": 0.0012224200181663036, "learning_rate": 4.032924445345199e-09, "loss": 0.0, "step": 25781 }, { "epoch": 24.790384615384614, "grad_norm": 0.00043337728129699826, "learning_rate": 3.997627847289742e-09, "loss": 0.0, "step": 25782 }, { "epoch": 24.791346153846153, "grad_norm": 0.0014118680264800787, "learning_rate": 3.9624863578524575e-09, "loss": 0.0, "step": 25783 }, { "epoch": 24.79230769230769, "grad_norm": 0.0011468473821878433, "learning_rate": 3.927499977580685e-09, "loss": 0.0, "step": 25784 }, { "epoch": 24.79326923076923, "grad_norm": 0.0005251751863397658, "learning_rate": 3.892668707015101e-09, "loss": 0.0, "step": 25785 }, { "epoch": 24.79423076923077, "grad_norm": 0.0002603397297207266, "learning_rate": 3.857992546697498e-09, "loss": 0.0, "step": 25786 }, { "epoch": 24.795192307692307, "grad_norm": 0.0005392803577706218, "learning_rate": 3.823471497165221e-09, "loss": 0.0, "step": 25787 }, { "epoch": 24.796153846153846, "grad_norm": 0.0015534699195995927, "learning_rate": 3.789105558954509e-09, "loss": 0.0, "step": 25788 }, { "epoch": 24.797115384615385, "grad_norm": 0.0010513056768104434, "learning_rate": 3.754894732598269e-09, "loss": 0.0, "step": 25789 }, { "epoch": 24.798076923076923, "grad_norm": 0.001484969281591475, "learning_rate": 3.720839018627187e-09, "loss": 0.0, "step": 25790 }, { "epoch": 24.799038461538462, "grad_norm": 0.0010470767738297582, "learning_rate": 3.68693841756973e-09, "loss": 0.0, "step": 25791 }, { "epoch": 24.8, "grad_norm": 0.0005608694045804441, "learning_rate": 3.653192929953253e-09, "loss": 0.0, "step": 25792 }, { "epoch": 24.80096153846154, "grad_norm": 0.0009583549690432847, "learning_rate": 3.6196025562995616e-09, "loss": 0.0, "step": 25793 }, { "epoch": 24.801923076923078, "grad_norm": 0.0006717926007695496, "learning_rate": 3.5861672971304606e-09, "loss": 0.0, "step": 25794 }, { "epoch": 24.802884615384617, "grad_norm": 0.0013639488024637103, "learning_rate": 3.552887152964424e-09, "loss": 0.0, "step": 25795 }, { "epoch": 24.803846153846155, "grad_norm": 0.0005084422882646322, "learning_rate": 3.5197621243199253e-09, "loss": 0.0, "step": 25796 }, { "epoch": 24.80480769230769, "grad_norm": 0.0010357709834352136, "learning_rate": 3.486792211707668e-09, "loss": 0.0, "step": 25797 }, { "epoch": 24.80576923076923, "grad_norm": 0.0006406524917110801, "learning_rate": 3.4539774156427953e-09, "loss": 0.0, "step": 25798 }, { "epoch": 24.806730769230768, "grad_norm": 0.00048203818732872605, "learning_rate": 3.4213177366315685e-09, "loss": 0.0, "step": 25799 }, { "epoch": 24.807692307692307, "grad_norm": 0.0008701033657416701, "learning_rate": 3.3888131751813604e-09, "loss": 0.0, "step": 25800 }, { "epoch": 24.808653846153845, "grad_norm": 0.0016654559876769781, "learning_rate": 3.3564637317984318e-09, "loss": 0.0, "step": 25801 }, { "epoch": 24.809615384615384, "grad_norm": 0.0009357051458209753, "learning_rate": 3.3242694069823833e-09, "loss": 0.0, "step": 25802 }, { "epoch": 24.810576923076923, "grad_norm": 0.0014488527085632086, "learning_rate": 3.292230201233926e-09, "loss": 0.0, "step": 25803 }, { "epoch": 24.81153846153846, "grad_norm": 0.001114789629355073, "learning_rate": 3.260346115050439e-09, "loss": 0.0, "step": 25804 }, { "epoch": 24.8125, "grad_norm": 0.0007781395688652992, "learning_rate": 3.228617148925972e-09, "loss": 0.0, "step": 25805 }, { "epoch": 24.81346153846154, "grad_norm": 0.0006861589499749243, "learning_rate": 3.1970433033534644e-09, "loss": 0.0, "step": 25806 }, { "epoch": 24.814423076923077, "grad_norm": 0.00030378648079931736, "learning_rate": 3.165624578822524e-09, "loss": 0.0, "step": 25807 }, { "epoch": 24.815384615384616, "grad_norm": 0.00047022924991324544, "learning_rate": 3.1343609758205386e-09, "loss": 0.0, "step": 25808 }, { "epoch": 24.816346153846155, "grad_norm": 0.0004879695479758084, "learning_rate": 3.103252494832676e-09, "loss": 0.0, "step": 25809 }, { "epoch": 24.817307692307693, "grad_norm": 0.0005277423188090324, "learning_rate": 3.072299136341883e-09, "loss": 0.0, "step": 25810 }, { "epoch": 24.818269230769232, "grad_norm": 0.0005400915397331119, "learning_rate": 3.0415009008288864e-09, "loss": 0.0, "step": 25811 }, { "epoch": 24.81923076923077, "grad_norm": 0.000431246095104143, "learning_rate": 3.010857788771082e-09, "loss": 0.0, "step": 25812 }, { "epoch": 24.82019230769231, "grad_norm": 0.0007377053261734545, "learning_rate": 2.9803698006436454e-09, "loss": 0.0, "step": 25813 }, { "epoch": 24.821153846153845, "grad_norm": 0.002113529248163104, "learning_rate": 2.9500369369195313e-09, "loss": 0.0, "step": 25814 }, { "epoch": 24.822115384615383, "grad_norm": 0.0005062022246420383, "learning_rate": 2.9198591980705847e-09, "loss": 0.0, "step": 25815 }, { "epoch": 24.823076923076922, "grad_norm": 0.0007071529398672283, "learning_rate": 2.8898365845642094e-09, "loss": 0.0, "step": 25816 }, { "epoch": 24.82403846153846, "grad_norm": 0.0006572200800292194, "learning_rate": 2.859969096865589e-09, "loss": 0.0, "step": 25817 }, { "epoch": 24.825, "grad_norm": 0.000701550452504307, "learning_rate": 2.8302567354399068e-09, "loss": 0.0, "step": 25818 }, { "epoch": 24.825961538461538, "grad_norm": 0.0006160765187814832, "learning_rate": 2.800699500746795e-09, "loss": 0.0, "step": 25819 }, { "epoch": 24.826923076923077, "grad_norm": 0.0005969548947177827, "learning_rate": 2.7712973932458864e-09, "loss": 0.0, "step": 25820 }, { "epoch": 24.827884615384615, "grad_norm": 0.0009193068253807724, "learning_rate": 2.742050413392372e-09, "loss": 0.0, "step": 25821 }, { "epoch": 24.828846153846154, "grad_norm": 0.00039735561585985124, "learning_rate": 2.7129585616414435e-09, "loss": 0.0, "step": 25822 }, { "epoch": 24.829807692307693, "grad_norm": 0.00047814100980758667, "learning_rate": 2.6840218384427406e-09, "loss": 0.0, "step": 25823 }, { "epoch": 24.83076923076923, "grad_norm": 0.001297735027037561, "learning_rate": 2.6552402442470148e-09, "loss": 0.0, "step": 25824 }, { "epoch": 24.83173076923077, "grad_norm": 0.0010729783680289984, "learning_rate": 2.626613779499465e-09, "loss": 0.0, "step": 25825 }, { "epoch": 24.83269230769231, "grad_norm": 0.001673477585427463, "learning_rate": 2.598142444645291e-09, "loss": 0.0, "step": 25826 }, { "epoch": 24.833653846153847, "grad_norm": 0.005728693678975105, "learning_rate": 2.5698262401263607e-09, "loss": 0.0, "step": 25827 }, { "epoch": 24.834615384615386, "grad_norm": 0.0004996838979423046, "learning_rate": 2.5416651663801027e-09, "loss": 0.0, "step": 25828 }, { "epoch": 24.835576923076925, "grad_norm": 0.0007372979307547212, "learning_rate": 2.5136592238461653e-09, "loss": 0.0, "step": 25829 }, { "epoch": 24.83653846153846, "grad_norm": 0.002684794832020998, "learning_rate": 2.485808412958646e-09, "loss": 0.0, "step": 25830 }, { "epoch": 24.8375, "grad_norm": 0.0012765696737915277, "learning_rate": 2.4581127341483102e-09, "loss": 0.0, "step": 25831 }, { "epoch": 24.838461538461537, "grad_norm": 0.000990741653367877, "learning_rate": 2.430572187844815e-09, "loss": 0.0, "step": 25832 }, { "epoch": 24.839423076923076, "grad_norm": 0.0012628461699932814, "learning_rate": 2.403186774477817e-09, "loss": 0.0, "step": 25833 }, { "epoch": 24.840384615384615, "grad_norm": 0.0013836139114573598, "learning_rate": 2.3759564944692e-09, "loss": 0.0, "step": 25834 }, { "epoch": 24.841346153846153, "grad_norm": 0.0008126309257932007, "learning_rate": 2.348881348244181e-09, "loss": 0.0, "step": 25835 }, { "epoch": 24.842307692307692, "grad_norm": 0.0006483219913206995, "learning_rate": 2.3219613362213123e-09, "loss": 0.0, "step": 25836 }, { "epoch": 24.84326923076923, "grad_norm": 0.0009207541006617248, "learning_rate": 2.295196458819149e-09, "loss": 0.0, "step": 25837 }, { "epoch": 24.84423076923077, "grad_norm": 0.0005376639892347157, "learning_rate": 2.268586716451804e-09, "loss": 0.0, "step": 25838 }, { "epoch": 24.845192307692308, "grad_norm": 0.0008629841031506658, "learning_rate": 2.2421321095345005e-09, "loss": 0.0, "step": 25839 }, { "epoch": 24.846153846153847, "grad_norm": 0.0007839998579584062, "learning_rate": 2.215832638474691e-09, "loss": 0.0, "step": 25840 }, { "epoch": 24.847115384615385, "grad_norm": 0.0007072086445987225, "learning_rate": 2.1896883036831573e-09, "loss": 0.0, "step": 25841 }, { "epoch": 24.848076923076924, "grad_norm": 0.0010984573746100068, "learning_rate": 2.163699105564021e-09, "loss": 0.0, "step": 25842 }, { "epoch": 24.849038461538463, "grad_norm": 0.00094426708528772, "learning_rate": 2.137865044520293e-09, "loss": 0.0, "step": 25843 }, { "epoch": 24.85, "grad_norm": 0.002072230214253068, "learning_rate": 2.112186120953874e-09, "loss": 0.0, "step": 25844 }, { "epoch": 24.85096153846154, "grad_norm": 0.0009834017837420106, "learning_rate": 2.0866623352633343e-09, "loss": 0.0, "step": 25845 }, { "epoch": 24.851923076923075, "grad_norm": 0.00108050974085927, "learning_rate": 2.0612936878439125e-09, "loss": 0.0, "step": 25846 }, { "epoch": 24.852884615384614, "grad_norm": 0.0007653390639461577, "learning_rate": 2.036080179089739e-09, "loss": 0.0, "step": 25847 }, { "epoch": 24.853846153846153, "grad_norm": 0.0011114411754533648, "learning_rate": 2.0110218093927216e-09, "loss": 0.0, "step": 25848 }, { "epoch": 24.85480769230769, "grad_norm": 0.00047793929115869105, "learning_rate": 1.986118579140328e-09, "loss": 0.0, "step": 25849 }, { "epoch": 24.85576923076923, "grad_norm": 0.0014420184306800365, "learning_rate": 1.9613704887189166e-09, "loss": 0.0, "step": 25850 }, { "epoch": 24.85673076923077, "grad_norm": 0.00073765660636127, "learning_rate": 1.9367775385137345e-09, "loss": 0.0, "step": 25851 }, { "epoch": 24.857692307692307, "grad_norm": 0.0006593603175133467, "learning_rate": 1.912339728905588e-09, "loss": 0.0, "step": 25852 }, { "epoch": 24.858653846153846, "grad_norm": 0.00037559884367510676, "learning_rate": 1.888057060274173e-09, "loss": 0.0, "step": 25853 }, { "epoch": 24.859615384615385, "grad_norm": 0.0003983958449680358, "learning_rate": 1.8639295329958562e-09, "loss": 0.0, "step": 25854 }, { "epoch": 24.860576923076923, "grad_norm": 0.0008936417871154845, "learning_rate": 1.8399571474447819e-09, "loss": 0.0, "step": 25855 }, { "epoch": 24.861538461538462, "grad_norm": 0.0010110274888575077, "learning_rate": 1.8161399039939854e-09, "loss": 0.0, "step": 25856 }, { "epoch": 24.8625, "grad_norm": 0.0004935690085403621, "learning_rate": 1.7924778030120605e-09, "loss": 0.0, "step": 25857 }, { "epoch": 24.86346153846154, "grad_norm": 0.0007613765192218125, "learning_rate": 1.768970844866491e-09, "loss": 0.0, "step": 25858 }, { "epoch": 24.864423076923078, "grad_norm": 0.000760484195780009, "learning_rate": 1.7456190299214305e-09, "loss": 0.0, "step": 25859 }, { "epoch": 24.865384615384617, "grad_norm": 0.0012125269277021289, "learning_rate": 1.7224223585410316e-09, "loss": 0.0, "step": 25860 }, { "epoch": 24.866346153846155, "grad_norm": 0.0014173678355291486, "learning_rate": 1.6993808310827864e-09, "loss": 0.0, "step": 25861 }, { "epoch": 24.86730769230769, "grad_norm": 0.0012785153230652213, "learning_rate": 1.6764944479064072e-09, "loss": 0.0, "step": 25862 }, { "epoch": 24.86826923076923, "grad_norm": 0.0009429100900888443, "learning_rate": 1.6537632093660549e-09, "loss": 0.0, "step": 25863 }, { "epoch": 24.869230769230768, "grad_norm": 0.0007233782089315355, "learning_rate": 1.6311871158136706e-09, "loss": 0.0, "step": 25864 }, { "epoch": 24.870192307692307, "grad_norm": 0.002075585536658764, "learning_rate": 1.6087661676011945e-09, "loss": 0.0, "step": 25865 }, { "epoch": 24.871153846153845, "grad_norm": 0.0005694555584341288, "learning_rate": 1.5865003650761268e-09, "loss": 0.0, "step": 25866 }, { "epoch": 24.872115384615384, "grad_norm": 0.0013392339460551739, "learning_rate": 1.5643897085826365e-09, "loss": 0.0, "step": 25867 }, { "epoch": 24.873076923076923, "grad_norm": 0.0006695911870338023, "learning_rate": 1.542434198466003e-09, "loss": 0.0, "step": 25868 }, { "epoch": 24.87403846153846, "grad_norm": 0.0008240691968239844, "learning_rate": 1.520633835063734e-09, "loss": 0.0, "step": 25869 }, { "epoch": 24.875, "grad_norm": 0.005388363264501095, "learning_rate": 1.4989886187177783e-09, "loss": 0.0, "step": 25870 }, { "epoch": 24.87596153846154, "grad_norm": 0.000959142460487783, "learning_rate": 1.4774985497612027e-09, "loss": 0.0, "step": 25871 }, { "epoch": 24.876923076923077, "grad_norm": 0.0031671959441155195, "learning_rate": 1.4561636285292947e-09, "loss": 0.0, "step": 25872 }, { "epoch": 24.877884615384616, "grad_norm": 0.00032130457111634314, "learning_rate": 1.4349838553517902e-09, "loss": 0.0, "step": 25873 }, { "epoch": 24.878846153846155, "grad_norm": 0.0004382371553219855, "learning_rate": 1.413959230558426e-09, "loss": 0.0, "step": 25874 }, { "epoch": 24.879807692307693, "grad_norm": 0.001203116844408214, "learning_rate": 1.393089754474497e-09, "loss": 0.0, "step": 25875 }, { "epoch": 24.880769230769232, "grad_norm": 0.00519489636644721, "learning_rate": 1.3723754274241884e-09, "loss": 0.0, "step": 25876 }, { "epoch": 24.88173076923077, "grad_norm": 0.002428292063996196, "learning_rate": 1.351816249729465e-09, "loss": 0.0, "step": 25877 }, { "epoch": 24.88269230769231, "grad_norm": 0.0006961915642023087, "learning_rate": 1.331412221708961e-09, "loss": 0.0, "step": 25878 }, { "epoch": 24.883653846153845, "grad_norm": 0.0009400551789440215, "learning_rate": 1.3111633436779792e-09, "loss": 0.0, "step": 25879 }, { "epoch": 24.884615384615383, "grad_norm": 0.00047268340131267905, "learning_rate": 1.2910696159529334e-09, "loss": 0.0, "step": 25880 }, { "epoch": 24.885576923076922, "grad_norm": 0.0011449536541476846, "learning_rate": 1.2711310388446862e-09, "loss": 0.0, "step": 25881 }, { "epoch": 24.88653846153846, "grad_norm": 0.0017103514401242137, "learning_rate": 1.2513476126629898e-09, "loss": 0.0, "step": 25882 }, { "epoch": 24.8875, "grad_norm": 0.0005565276369452477, "learning_rate": 1.2317193377131553e-09, "loss": 0.0, "step": 25883 }, { "epoch": 24.888461538461538, "grad_norm": 0.001450211158953607, "learning_rate": 1.2122462143016044e-09, "loss": 0.0, "step": 25884 }, { "epoch": 24.889423076923077, "grad_norm": 0.00130070757586509, "learning_rate": 1.192928242730318e-09, "loss": 0.0, "step": 25885 }, { "epoch": 24.890384615384615, "grad_norm": 0.0004808922531083226, "learning_rate": 1.1737654232979456e-09, "loss": 0.0, "step": 25886 }, { "epoch": 24.891346153846154, "grad_norm": 0.005759015213698149, "learning_rate": 1.1547577563031375e-09, "loss": 0.0001, "step": 25887 }, { "epoch": 24.892307692307693, "grad_norm": 0.0011072478955611587, "learning_rate": 1.1359052420401028e-09, "loss": 0.0, "step": 25888 }, { "epoch": 24.89326923076923, "grad_norm": 0.0007407792727462947, "learning_rate": 1.1172078808008301e-09, "loss": 0.0, "step": 25889 }, { "epoch": 24.89423076923077, "grad_norm": 0.0007305745384655893, "learning_rate": 1.098665672876198e-09, "loss": 0.0, "step": 25890 }, { "epoch": 24.89519230769231, "grad_norm": 0.0030074664391577244, "learning_rate": 1.0802786185548642e-09, "loss": 0.0, "step": 25891 }, { "epoch": 24.896153846153847, "grad_norm": 0.0004779959563165903, "learning_rate": 1.062046718121046e-09, "loss": 0.0, "step": 25892 }, { "epoch": 24.897115384615386, "grad_norm": 0.0018063958268612623, "learning_rate": 1.0439699718567398e-09, "loss": 0.0, "step": 25893 }, { "epoch": 24.898076923076925, "grad_norm": 0.0006639065104536712, "learning_rate": 1.0260483800439425e-09, "loss": 0.0, "step": 25894 }, { "epoch": 24.89903846153846, "grad_norm": 0.0002389630681136623, "learning_rate": 1.0082819429602097e-09, "loss": 0.0, "step": 25895 }, { "epoch": 24.9, "grad_norm": 0.0006955024437047541, "learning_rate": 9.90670660880877e-10, "loss": 0.0, "step": 25896 }, { "epoch": 24.900961538461537, "grad_norm": 0.0012665798421949148, "learning_rate": 9.73214534080169e-10, "loss": 0.0, "step": 25897 }, { "epoch": 24.901923076923076, "grad_norm": 0.0013949359999969602, "learning_rate": 9.559135628278704e-10, "loss": 0.0, "step": 25898 }, { "epoch": 24.902884615384615, "grad_norm": 0.0022273175418376923, "learning_rate": 9.38767747393765e-10, "loss": 0.0, "step": 25899 }, { "epoch": 24.903846153846153, "grad_norm": 0.0009104502969421446, "learning_rate": 9.217770880420862e-10, "loss": 0.0, "step": 25900 }, { "epoch": 24.904807692307692, "grad_norm": 0.0015751069877296686, "learning_rate": 9.049415850381771e-10, "loss": 0.0, "step": 25901 }, { "epoch": 24.90576923076923, "grad_norm": 0.0006150514236651361, "learning_rate": 8.882612386429401e-10, "loss": 0.0, "step": 25902 }, { "epoch": 24.90673076923077, "grad_norm": 0.00031078531173989177, "learning_rate": 8.717360491139471e-10, "loss": 0.0, "step": 25903 }, { "epoch": 24.907692307692308, "grad_norm": 0.00033877865644171834, "learning_rate": 8.553660167087696e-10, "loss": 0.0, "step": 25904 }, { "epoch": 24.908653846153847, "grad_norm": 0.0013598705409094691, "learning_rate": 8.391511416816489e-10, "loss": 0.0, "step": 25905 }, { "epoch": 24.909615384615385, "grad_norm": 0.0012742904946208, "learning_rate": 8.230914242834953e-10, "loss": 0.0, "step": 25906 }, { "epoch": 24.910576923076924, "grad_norm": 0.0007859163451939821, "learning_rate": 8.071868647629988e-10, "loss": 0.0, "step": 25907 }, { "epoch": 24.911538461538463, "grad_norm": 0.0005098391557112336, "learning_rate": 7.914374633688493e-10, "loss": 0.0, "step": 25908 }, { "epoch": 24.9125, "grad_norm": 0.0009046766208484769, "learning_rate": 7.758432203430755e-10, "loss": 0.0, "step": 25909 }, { "epoch": 24.91346153846154, "grad_norm": 0.0005956253735348582, "learning_rate": 7.604041359299264e-10, "loss": 0.0, "step": 25910 }, { "epoch": 24.914423076923075, "grad_norm": 0.0004548263968899846, "learning_rate": 7.451202103669897e-10, "loss": 0.0, "step": 25911 }, { "epoch": 24.915384615384614, "grad_norm": 0.0008011744939722121, "learning_rate": 7.299914438929634e-10, "loss": 0.0, "step": 25912 }, { "epoch": 24.916346153846153, "grad_norm": 0.0007234879303723574, "learning_rate": 7.150178367421046e-10, "loss": 0.0, "step": 25913 }, { "epoch": 24.91730769230769, "grad_norm": 0.001539037679322064, "learning_rate": 7.001993891464498e-10, "loss": 0.0, "step": 25914 }, { "epoch": 24.91826923076923, "grad_norm": 0.0011021646205335855, "learning_rate": 6.855361013358153e-10, "loss": 0.0, "step": 25915 }, { "epoch": 24.91923076923077, "grad_norm": 0.0006484467885456979, "learning_rate": 6.710279735377967e-10, "loss": 0.0, "step": 25916 }, { "epoch": 24.920192307692307, "grad_norm": 0.00042626200593076646, "learning_rate": 6.566750059777694e-10, "loss": 0.0, "step": 25917 }, { "epoch": 24.921153846153846, "grad_norm": 0.0009331488981842995, "learning_rate": 6.424771988788881e-10, "loss": 0.0, "step": 25918 }, { "epoch": 24.922115384615385, "grad_norm": 0.001298532821238041, "learning_rate": 6.28434552460977e-10, "loss": 0.0, "step": 25919 }, { "epoch": 24.923076923076923, "grad_norm": 0.0005281537305563688, "learning_rate": 6.145470669416398e-10, "loss": 0.0, "step": 25920 }, { "epoch": 24.924038461538462, "grad_norm": 0.0006481883465312421, "learning_rate": 6.008147425373701e-10, "loss": 0.0, "step": 25921 }, { "epoch": 24.925, "grad_norm": 0.0006619080086238682, "learning_rate": 5.872375794602203e-10, "loss": 0.0, "step": 25922 }, { "epoch": 24.92596153846154, "grad_norm": 0.0016532072331756353, "learning_rate": 5.738155779211329e-10, "loss": 0.0, "step": 25923 }, { "epoch": 24.926923076923078, "grad_norm": 0.0006823893636465073, "learning_rate": 5.605487381277197e-10, "loss": 0.0, "step": 25924 }, { "epoch": 24.927884615384617, "grad_norm": 0.0014534881338477135, "learning_rate": 5.474370602875923e-10, "loss": 0.0, "step": 25925 }, { "epoch": 24.928846153846155, "grad_norm": 0.0013972927117720246, "learning_rate": 5.344805446028112e-10, "loss": 0.0, "step": 25926 }, { "epoch": 24.92980769230769, "grad_norm": 0.0008696612785570323, "learning_rate": 5.216791912743269e-10, "loss": 0.0, "step": 25927 }, { "epoch": 24.93076923076923, "grad_norm": 0.0007248648325912654, "learning_rate": 5.090330005019795e-10, "loss": 0.0, "step": 25928 }, { "epoch": 24.931730769230768, "grad_norm": 0.001652426435612142, "learning_rate": 4.965419724811682e-10, "loss": 0.0, "step": 25929 }, { "epoch": 24.932692307692307, "grad_norm": 0.0005311682471074164, "learning_rate": 4.842061074061821e-10, "loss": 0.0, "step": 25930 }, { "epoch": 24.933653846153845, "grad_norm": 0.0011809649877250195, "learning_rate": 4.720254054679796e-10, "loss": 0.0, "step": 25931 }, { "epoch": 24.934615384615384, "grad_norm": 0.0007759283180348575, "learning_rate": 4.599998668552985e-10, "loss": 0.0, "step": 25932 }, { "epoch": 24.935576923076923, "grad_norm": 0.00262436899356544, "learning_rate": 4.4812949175465634e-10, "loss": 0.0, "step": 25933 }, { "epoch": 24.93653846153846, "grad_norm": 0.00103056023363024, "learning_rate": 4.364142803514604e-10, "loss": 0.0, "step": 25934 }, { "epoch": 24.9375, "grad_norm": 0.0007566973799839616, "learning_rate": 4.248542328266769e-10, "loss": 0.0, "step": 25935 }, { "epoch": 24.93846153846154, "grad_norm": 0.0014609865611419082, "learning_rate": 4.134493493601621e-10, "loss": 0.0, "step": 25936 }, { "epoch": 24.939423076923077, "grad_norm": 0.0005568729247897863, "learning_rate": 4.021996301273312e-10, "loss": 0.0, "step": 25937 }, { "epoch": 24.940384615384616, "grad_norm": 0.0007273258524946868, "learning_rate": 3.9110507530470965e-10, "loss": 0.0, "step": 25938 }, { "epoch": 24.941346153846155, "grad_norm": 0.0006676138727925718, "learning_rate": 3.8016568506327177e-10, "loss": 0.0, "step": 25939 }, { "epoch": 24.942307692307693, "grad_norm": 0.0026724545750766993, "learning_rate": 3.6938145957399197e-10, "loss": 0.0, "step": 25940 }, { "epoch": 24.943269230769232, "grad_norm": 0.0008632023818790913, "learning_rate": 3.5875239900229343e-10, "loss": 0.0, "step": 25941 }, { "epoch": 24.94423076923077, "grad_norm": 0.0010238389950245619, "learning_rate": 3.482785035147096e-10, "loss": 0.0, "step": 25942 }, { "epoch": 24.94519230769231, "grad_norm": 0.0005624432233162224, "learning_rate": 3.379597732733331e-10, "loss": 0.0, "step": 25943 }, { "epoch": 24.946153846153845, "grad_norm": 0.0006548817036673427, "learning_rate": 3.277962084369257e-10, "loss": 0.0, "step": 25944 }, { "epoch": 24.947115384615383, "grad_norm": 0.0013713666703552008, "learning_rate": 3.177878091653597e-10, "loss": 0.0, "step": 25945 }, { "epoch": 24.948076923076922, "grad_norm": 0.0007904343074187636, "learning_rate": 3.0793457561295593e-10, "loss": 0.0, "step": 25946 }, { "epoch": 24.94903846153846, "grad_norm": 0.0010681377025321126, "learning_rate": 2.9823650793292524e-10, "loss": 0.0, "step": 25947 }, { "epoch": 24.95, "grad_norm": 0.001980321714654565, "learning_rate": 2.8869360627514776e-10, "loss": 0.0, "step": 25948 }, { "epoch": 24.950961538461538, "grad_norm": 0.004179542884230614, "learning_rate": 2.7930587078839333e-10, "loss": 0.0, "step": 25949 }, { "epoch": 24.951923076923077, "grad_norm": 0.0004167706938460469, "learning_rate": 2.700733016181012e-10, "loss": 0.0, "step": 25950 }, { "epoch": 24.952884615384615, "grad_norm": 0.0008045819704420865, "learning_rate": 2.609958989074901e-10, "loss": 0.0, "step": 25951 }, { "epoch": 24.953846153846154, "grad_norm": 0.0005786445108242333, "learning_rate": 2.5207366279644816e-10, "loss": 0.0, "step": 25952 }, { "epoch": 24.954807692307693, "grad_norm": 0.0008538138354197145, "learning_rate": 2.433065934248635e-10, "loss": 0.0, "step": 25953 }, { "epoch": 24.95576923076923, "grad_norm": 0.0009123935597017407, "learning_rate": 2.346946909281833e-10, "loss": 0.0, "step": 25954 }, { "epoch": 24.95673076923077, "grad_norm": 0.00043282058322802186, "learning_rate": 2.2623795544074455e-10, "loss": 0.0, "step": 25955 }, { "epoch": 24.95769230769231, "grad_norm": 0.0018858587136492133, "learning_rate": 2.1793638709244337e-10, "loss": 0.0, "step": 25956 }, { "epoch": 24.958653846153847, "grad_norm": 0.00036334272590465844, "learning_rate": 2.0978998601206558e-10, "loss": 0.0, "step": 25957 }, { "epoch": 24.959615384615386, "grad_norm": 0.0011800170177593827, "learning_rate": 2.0179875232728685e-10, "loss": 0.0, "step": 25958 }, { "epoch": 24.960576923076925, "grad_norm": 0.0008923831046558917, "learning_rate": 1.9396268616245218e-10, "loss": 0.0, "step": 25959 }, { "epoch": 24.96153846153846, "grad_norm": 0.0014346158131957054, "learning_rate": 1.8628178763746563e-10, "loss": 0.0, "step": 25960 }, { "epoch": 24.9625, "grad_norm": 0.000687259656842798, "learning_rate": 1.787560568722313e-10, "loss": 0.0, "step": 25961 }, { "epoch": 24.963461538461537, "grad_norm": 0.001018340466544032, "learning_rate": 1.713854939833226e-10, "loss": 0.0, "step": 25962 }, { "epoch": 24.964423076923076, "grad_norm": 0.0007101314258761704, "learning_rate": 1.641700990850925e-10, "loss": 0.0, "step": 25963 }, { "epoch": 24.965384615384615, "grad_norm": 0.0005594558315351605, "learning_rate": 1.5710987229078378e-10, "loss": 0.0, "step": 25964 }, { "epoch": 24.966346153846153, "grad_norm": 0.0007812751573510468, "learning_rate": 1.5020481370808805e-10, "loss": 0.0, "step": 25965 }, { "epoch": 24.967307692307692, "grad_norm": 0.0015795397339388728, "learning_rate": 1.4345492344580714e-10, "loss": 0.0, "step": 25966 }, { "epoch": 24.96826923076923, "grad_norm": 0.0010327999480068684, "learning_rate": 1.368602016071918e-10, "loss": 0.0, "step": 25967 }, { "epoch": 24.96923076923077, "grad_norm": 0.0008187770727090538, "learning_rate": 1.3042064829549284e-10, "loss": 0.0, "step": 25968 }, { "epoch": 24.970192307692308, "grad_norm": 0.0011936924420297146, "learning_rate": 1.2413626361063025e-10, "loss": 0.0, "step": 25969 }, { "epoch": 24.971153846153847, "grad_norm": 0.0008858708897605538, "learning_rate": 1.1800704765030367e-10, "loss": 0.0, "step": 25970 }, { "epoch": 24.972115384615385, "grad_norm": 0.00034462331677787006, "learning_rate": 1.1203300050888211e-10, "loss": 0.0, "step": 25971 }, { "epoch": 24.973076923076924, "grad_norm": 0.0014988832408562303, "learning_rate": 1.0621412227851401e-10, "loss": 0.0, "step": 25972 }, { "epoch": 24.974038461538463, "grad_norm": 0.0011425450211390853, "learning_rate": 1.0055041305134794e-10, "loss": 0.0, "step": 25973 }, { "epoch": 24.975, "grad_norm": 0.0007376342546194792, "learning_rate": 9.504187291398126e-11, "loss": 0.0, "step": 25974 }, { "epoch": 24.97596153846154, "grad_norm": 0.0017108925385400653, "learning_rate": 8.968850195190116e-11, "loss": 0.0, "step": 25975 }, { "epoch": 24.976923076923075, "grad_norm": 0.0005626562051475048, "learning_rate": 8.449030024837435e-11, "loss": 0.0, "step": 25976 }, { "epoch": 24.977884615384614, "grad_norm": 0.0008815870387479663, "learning_rate": 7.944726788444711e-11, "loss": 0.0, "step": 25977 }, { "epoch": 24.978846153846153, "grad_norm": 0.0010314631508663297, "learning_rate": 7.45594049389453e-11, "loss": 0.0, "step": 25978 }, { "epoch": 24.97980769230769, "grad_norm": 0.0012231121072545648, "learning_rate": 6.982671148514364e-11, "loss": 0.0, "step": 25979 }, { "epoch": 24.98076923076923, "grad_norm": 0.000727129343431443, "learning_rate": 6.52491875996475e-11, "loss": 0.0, "step": 25980 }, { "epoch": 24.98173076923077, "grad_norm": 0.0011350588174536824, "learning_rate": 6.082683335129069e-11, "loss": 0.0, "step": 25981 }, { "epoch": 24.982692307692307, "grad_norm": 0.0004600471875164658, "learning_rate": 5.65596488100173e-11, "loss": 0.0, "step": 25982 }, { "epoch": 24.983653846153846, "grad_norm": 0.00270383944734931, "learning_rate": 5.244763404133046e-11, "loss": 0.0, "step": 25983 }, { "epoch": 24.984615384615385, "grad_norm": 0.0010831952095031738, "learning_rate": 4.84907891085129e-11, "loss": 0.0, "step": 25984 }, { "epoch": 24.985576923076923, "grad_norm": 0.0002486066077835858, "learning_rate": 4.4689114073737105e-11, "loss": 0.0, "step": 25985 }, { "epoch": 24.986538461538462, "grad_norm": 0.00036492120125330985, "learning_rate": 4.104260899584489e-11, "loss": 0.0, "step": 25986 }, { "epoch": 24.9875, "grad_norm": 0.0011547188041731715, "learning_rate": 3.7551273932567856e-11, "loss": 0.0, "step": 25987 }, { "epoch": 24.98846153846154, "grad_norm": 0.000393575377529487, "learning_rate": 3.421510893608648e-11, "loss": 0.0, "step": 25988 }, { "epoch": 24.989423076923078, "grad_norm": 0.0006385951419360936, "learning_rate": 3.103411405858125e-11, "loss": 0.0, "step": 25989 }, { "epoch": 24.990384615384617, "grad_norm": 0.001300725736655295, "learning_rate": 2.800828935001221e-11, "loss": 0.0, "step": 25990 }, { "epoch": 24.991346153846155, "grad_norm": 0.0011983910808339715, "learning_rate": 2.5137634858118933e-11, "loss": 0.0, "step": 25991 }, { "epoch": 24.99230769230769, "grad_norm": 0.000499838381074369, "learning_rate": 2.24221506250899e-11, "loss": 0.0, "step": 25992 }, { "epoch": 24.99326923076923, "grad_norm": 0.37512239813804626, "learning_rate": 1.9861836695334036e-11, "loss": 0.0034, "step": 25993 }, { "epoch": 24.994230769230768, "grad_norm": 0.0014894865453243256, "learning_rate": 1.7456693107709143e-11, "loss": 0.0, "step": 25994 }, { "epoch": 24.995192307692307, "grad_norm": 0.0007897305767983198, "learning_rate": 1.5206719899962808e-11, "loss": 0.0, "step": 25995 }, { "epoch": 24.996153846153845, "grad_norm": 0.0013135324697941542, "learning_rate": 1.311191710651194e-11, "loss": 0.0, "step": 25996 }, { "epoch": 24.997115384615384, "grad_norm": 0.0006317392690107226, "learning_rate": 1.117228475955301e-11, "loss": 0.0, "step": 25997 }, { "epoch": 24.998076923076923, "grad_norm": 0.0007768241339363158, "learning_rate": 9.387822889062037e-12, "loss": 0.0, "step": 25998 }, { "epoch": 24.99903846153846, "grad_norm": 0.0009762974223122001, "learning_rate": 7.758531523904821e-12, "loss": 0.0, "step": 25999 }, { "epoch": 25.0, "grad_norm": 0.001177560188807547, "learning_rate": 6.284410689616494e-12, "loss": 0.0, "step": 26000 } ], "logging_steps": 1.0, "max_steps": 26000, "num_input_tokens_seen": 0, "num_train_epochs": 25, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.366224254416978e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }