| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 7.0, |
| "eval_steps": 500, |
| "global_step": 875, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.008, |
| "grad_norm": 3.784468173980713, |
| "learning_rate": 0.0, |
| "loss": 0.8231, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 3.725552797317505, |
| "learning_rate": 1.7543859649122808e-07, |
| "loss": 1.0003, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.024, |
| "grad_norm": 3.5788533687591553, |
| "learning_rate": 3.5087719298245616e-07, |
| "loss": 0.8452, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 3.4994688034057617, |
| "learning_rate": 5.263157894736843e-07, |
| "loss": 0.9015, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 3.340575695037842, |
| "learning_rate": 7.017543859649123e-07, |
| "loss": 0.91, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 3.4169387817382812, |
| "learning_rate": 8.771929824561404e-07, |
| "loss": 0.8981, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.056, |
| "grad_norm": 3.199388027191162, |
| "learning_rate": 1.0526315789473685e-06, |
| "loss": 0.8082, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 3.5631041526794434, |
| "learning_rate": 1.2280701754385965e-06, |
| "loss": 1.0333, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.072, |
| "grad_norm": 2.9914121627807617, |
| "learning_rate": 1.4035087719298246e-06, |
| "loss": 0.9455, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 3.418433904647827, |
| "learning_rate": 1.5789473684210526e-06, |
| "loss": 0.7966, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.088, |
| "grad_norm": 2.4177350997924805, |
| "learning_rate": 1.7543859649122807e-06, |
| "loss": 0.9202, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 1.7621887922286987, |
| "learning_rate": 1.929824561403509e-06, |
| "loss": 0.8102, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.104, |
| "grad_norm": 2.0125510692596436, |
| "learning_rate": 2.105263157894737e-06, |
| "loss": 0.8969, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 1.7342252731323242, |
| "learning_rate": 2.280701754385965e-06, |
| "loss": 0.8587, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.712204098701477, |
| "learning_rate": 2.456140350877193e-06, |
| "loss": 0.8708, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 1.412792682647705, |
| "learning_rate": 2.631578947368421e-06, |
| "loss": 0.8413, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.136, |
| "grad_norm": 1.5133228302001953, |
| "learning_rate": 2.8070175438596493e-06, |
| "loss": 0.7527, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 1.6985952854156494, |
| "learning_rate": 2.9824561403508774e-06, |
| "loss": 0.9278, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.152, |
| "grad_norm": 1.118333101272583, |
| "learning_rate": 3.157894736842105e-06, |
| "loss": 0.7364, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.1672853231430054, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.8443, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.168, |
| "grad_norm": 1.0605567693710327, |
| "learning_rate": 3.5087719298245615e-06, |
| "loss": 0.714, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 1.2411671876907349, |
| "learning_rate": 3.6842105263157896e-06, |
| "loss": 0.8215, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.184, |
| "grad_norm": 1.144175410270691, |
| "learning_rate": 3.859649122807018e-06, |
| "loss": 0.6955, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 1.3835996389389038, |
| "learning_rate": 4.035087719298246e-06, |
| "loss": 0.7968, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.1685224771499634, |
| "learning_rate": 4.210526315789474e-06, |
| "loss": 0.7425, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 1.1630430221557617, |
| "learning_rate": 4.385964912280702e-06, |
| "loss": 0.7122, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.216, |
| "grad_norm": 1.054180383682251, |
| "learning_rate": 4.56140350877193e-06, |
| "loss": 0.7967, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 1.0310646295547485, |
| "learning_rate": 4.736842105263158e-06, |
| "loss": 0.6822, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.232, |
| "grad_norm": 1.1047680377960205, |
| "learning_rate": 4.912280701754386e-06, |
| "loss": 0.739, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 1.712615966796875, |
| "learning_rate": 5.087719298245615e-06, |
| "loss": 0.7789, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.248, |
| "grad_norm": 1.1467419862747192, |
| "learning_rate": 5.263157894736842e-06, |
| "loss": 0.7517, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 0.9748108386993408, |
| "learning_rate": 5.438596491228071e-06, |
| "loss": 0.7997, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.264, |
| "grad_norm": 1.1908164024353027, |
| "learning_rate": 5.6140350877192985e-06, |
| "loss": 0.7893, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.272, |
| "grad_norm": 1.160500168800354, |
| "learning_rate": 5.789473684210527e-06, |
| "loss": 0.7667, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.0798025131225586, |
| "learning_rate": 5.964912280701755e-06, |
| "loss": 0.6695, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 1.128517746925354, |
| "learning_rate": 6.140350877192983e-06, |
| "loss": 0.7281, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.296, |
| "grad_norm": 1.1588135957717896, |
| "learning_rate": 6.31578947368421e-06, |
| "loss": 0.6856, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.304, |
| "grad_norm": 0.9977500438690186, |
| "learning_rate": 6.491228070175439e-06, |
| "loss": 0.7085, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.312, |
| "grad_norm": 1.0899662971496582, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 0.7503, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.0250076055526733, |
| "learning_rate": 6.842105263157896e-06, |
| "loss": 0.7469, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.328, |
| "grad_norm": 1.0345467329025269, |
| "learning_rate": 7.017543859649123e-06, |
| "loss": 0.7034, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.336, |
| "grad_norm": 0.9953707456588745, |
| "learning_rate": 7.192982456140352e-06, |
| "loss": 0.8141, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.344, |
| "grad_norm": 1.167507290840149, |
| "learning_rate": 7.368421052631579e-06, |
| "loss": 0.7411, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 0.9854204058647156, |
| "learning_rate": 7.5438596491228074e-06, |
| "loss": 0.7219, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.1585136651992798, |
| "learning_rate": 7.719298245614036e-06, |
| "loss": 0.7666, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.368, |
| "grad_norm": 0.9028245210647583, |
| "learning_rate": 7.894736842105265e-06, |
| "loss": 0.677, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.376, |
| "grad_norm": 1.0796241760253906, |
| "learning_rate": 8.070175438596492e-06, |
| "loss": 0.7579, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 0.8502395153045654, |
| "learning_rate": 8.24561403508772e-06, |
| "loss": 0.6178, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.392, |
| "grad_norm": 1.0221744775772095, |
| "learning_rate": 8.421052631578948e-06, |
| "loss": 0.7143, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.1040499210357666, |
| "learning_rate": 8.596491228070176e-06, |
| "loss": 0.739, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.408, |
| "grad_norm": 1.3160624504089355, |
| "learning_rate": 8.771929824561405e-06, |
| "loss": 0.7673, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 0.9868547916412354, |
| "learning_rate": 8.947368421052632e-06, |
| "loss": 0.6158, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.424, |
| "grad_norm": 0.97400963306427, |
| "learning_rate": 9.12280701754386e-06, |
| "loss": 0.7602, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.432, |
| "grad_norm": 0.9297606945037842, |
| "learning_rate": 9.298245614035088e-06, |
| "loss": 0.6972, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.9819108247756958, |
| "learning_rate": 9.473684210526315e-06, |
| "loss": 0.6661, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 0.9263789057731628, |
| "learning_rate": 9.649122807017545e-06, |
| "loss": 0.6548, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.456, |
| "grad_norm": 1.1375892162322998, |
| "learning_rate": 9.824561403508772e-06, |
| "loss": 0.7532, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.464, |
| "grad_norm": 0.9523381590843201, |
| "learning_rate": 1e-05, |
| "loss": 0.7023, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.472, |
| "grad_norm": 1.0329796075820923, |
| "learning_rate": 9.999978367986988e-06, |
| "loss": 0.6951, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.9789192080497742, |
| "learning_rate": 9.999913472135126e-06, |
| "loss": 0.7022, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.488, |
| "grad_norm": 1.0567715167999268, |
| "learning_rate": 9.999805313005946e-06, |
| "loss": 0.7192, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.496, |
| "grad_norm": 0.9230039715766907, |
| "learning_rate": 9.99965389153533e-06, |
| "loss": 0.6258, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.504, |
| "grad_norm": 1.0457018613815308, |
| "learning_rate": 9.999459209033495e-06, |
| "loss": 0.7205, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 0.8816724419593811, |
| "learning_rate": 9.999221267184993e-06, |
| "loss": 0.6797, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.7928981184959412, |
| "learning_rate": 9.998940068048688e-06, |
| "loss": 0.6074, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.528, |
| "grad_norm": 0.911626398563385, |
| "learning_rate": 9.998615614057743e-06, |
| "loss": 0.5725, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.536, |
| "grad_norm": 1.0122913122177124, |
| "learning_rate": 9.998247908019594e-06, |
| "loss": 0.7628, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 0.9864280819892883, |
| "learning_rate": 9.997836953115927e-06, |
| "loss": 0.6184, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.552, |
| "grad_norm": 1.4457077980041504, |
| "learning_rate": 9.997382752902658e-06, |
| "loss": 0.7471, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.9342664480209351, |
| "learning_rate": 9.996885311309892e-06, |
| "loss": 0.7304, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.568, |
| "grad_norm": 1.1148232221603394, |
| "learning_rate": 9.996344632641895e-06, |
| "loss": 0.8129, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 1.0656917095184326, |
| "learning_rate": 9.995760721577053e-06, |
| "loss": 0.7355, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.584, |
| "grad_norm": 1.0649811029434204, |
| "learning_rate": 9.995133583167833e-06, |
| "loss": 0.7288, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.592, |
| "grad_norm": 1.1274362802505493, |
| "learning_rate": 9.994463222840748e-06, |
| "loss": 0.7243, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.8427581191062927, |
| "learning_rate": 9.993749646396286e-06, |
| "loss": 0.5633, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 0.98853600025177, |
| "learning_rate": 9.992992860008893e-06, |
| "loss": 0.7045, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.616, |
| "grad_norm": 0.995302677154541, |
| "learning_rate": 9.99219287022689e-06, |
| "loss": 0.6268, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.624, |
| "grad_norm": 0.9867312908172607, |
| "learning_rate": 9.991349683972435e-06, |
| "loss": 0.6828, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.632, |
| "grad_norm": 0.9813309907913208, |
| "learning_rate": 9.990463308541452e-06, |
| "loss": 0.6601, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.9621861577033997, |
| "learning_rate": 9.989533751603578e-06, |
| "loss": 0.6984, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.648, |
| "grad_norm": 0.988609790802002, |
| "learning_rate": 9.988561021202083e-06, |
| "loss": 0.6877, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.656, |
| "grad_norm": 0.9378901720046997, |
| "learning_rate": 9.987545125753818e-06, |
| "loss": 0.6282, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.664, |
| "grad_norm": 0.932298481464386, |
| "learning_rate": 9.986486074049131e-06, |
| "loss": 0.7014, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.672, |
| "grad_norm": 0.9098712205886841, |
| "learning_rate": 9.985383875251783e-06, |
| "loss": 0.6461, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.9377619624137878, |
| "learning_rate": 9.98423853889889e-06, |
| "loss": 0.6283, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.688, |
| "grad_norm": 1.2829806804656982, |
| "learning_rate": 9.983050074900824e-06, |
| "loss": 0.6713, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.696, |
| "grad_norm": 0.981778085231781, |
| "learning_rate": 9.98181849354113e-06, |
| "loss": 0.643, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 1.2414823770523071, |
| "learning_rate": 9.980543805476447e-06, |
| "loss": 0.7538, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.712, |
| "grad_norm": 1.1841117143630981, |
| "learning_rate": 9.979226021736396e-06, |
| "loss": 0.8119, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.5399696826934814, |
| "learning_rate": 9.977865153723508e-06, |
| "loss": 0.726, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.728, |
| "grad_norm": 0.8497399687767029, |
| "learning_rate": 9.976461213213104e-06, |
| "loss": 0.6109, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.736, |
| "grad_norm": 0.9964956045150757, |
| "learning_rate": 9.975014212353212e-06, |
| "loss": 0.6957, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.744, |
| "grad_norm": 0.9907211065292358, |
| "learning_rate": 9.973524163664447e-06, |
| "loss": 0.7035, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.752, |
| "grad_norm": 1.0158443450927734, |
| "learning_rate": 9.971991080039912e-06, |
| "loss": 0.764, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.0125267505645752, |
| "learning_rate": 9.970414974745077e-06, |
| "loss": 0.6104, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 1.0134661197662354, |
| "learning_rate": 9.968795861417676e-06, |
| "loss": 0.6558, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.776, |
| "grad_norm": 1.1835541725158691, |
| "learning_rate": 9.967133754067581e-06, |
| "loss": 0.7853, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.784, |
| "grad_norm": 0.9032031893730164, |
| "learning_rate": 9.965428667076687e-06, |
| "loss": 0.7235, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.792, |
| "grad_norm": 1.2750600576400757, |
| "learning_rate": 9.963680615198774e-06, |
| "loss": 0.6768, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.0533162355422974, |
| "learning_rate": 9.961889613559396e-06, |
| "loss": 0.6036, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.808, |
| "grad_norm": 0.9049885869026184, |
| "learning_rate": 9.960055677655743e-06, |
| "loss": 0.5954, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.816, |
| "grad_norm": 1.2661690711975098, |
| "learning_rate": 9.958178823356503e-06, |
| "loss": 0.6608, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.824, |
| "grad_norm": 0.957478940486908, |
| "learning_rate": 9.956259066901733e-06, |
| "loss": 0.6722, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 1.0017534494400024, |
| "learning_rate": 9.954296424902709e-06, |
| "loss": 0.699, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.9536829590797424, |
| "learning_rate": 9.95229091434179e-06, |
| "loss": 0.7978, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.848, |
| "grad_norm": 0.9466567635536194, |
| "learning_rate": 9.950242552572272e-06, |
| "loss": 0.6377, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.856, |
| "grad_norm": 0.8663733601570129, |
| "learning_rate": 9.948151357318228e-06, |
| "loss": 0.6943, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.864, |
| "grad_norm": 0.9885082840919495, |
| "learning_rate": 9.946017346674362e-06, |
| "loss": 0.8259, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.872, |
| "grad_norm": 0.9989089369773865, |
| "learning_rate": 9.943840539105853e-06, |
| "loss": 0.7219, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.9034218192100525, |
| "learning_rate": 9.941620953448195e-06, |
| "loss": 0.6568, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.888, |
| "grad_norm": 0.9170519709587097, |
| "learning_rate": 9.939358608907026e-06, |
| "loss": 0.6996, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 1.0670710802078247, |
| "learning_rate": 9.937053525057977e-06, |
| "loss": 0.6088, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.904, |
| "grad_norm": 1.0475540161132812, |
| "learning_rate": 9.934705721846487e-06, |
| "loss": 0.7299, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.912, |
| "grad_norm": 0.8856591582298279, |
| "learning_rate": 9.932315219587641e-06, |
| "loss": 0.6026, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.9878740906715393, |
| "learning_rate": 9.92988203896599e-06, |
| "loss": 0.5906, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.928, |
| "grad_norm": 0.9897602796554565, |
| "learning_rate": 9.927406201035368e-06, |
| "loss": 0.6622, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.936, |
| "grad_norm": 1.0537142753601074, |
| "learning_rate": 9.924887727218724e-06, |
| "loss": 0.696, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.944, |
| "grad_norm": 0.9410306811332703, |
| "learning_rate": 9.922326639307918e-06, |
| "loss": 0.6053, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.952, |
| "grad_norm": 0.9250759482383728, |
| "learning_rate": 9.919722959463545e-06, |
| "loss": 0.702, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.9259200692176819, |
| "learning_rate": 9.917076710214739e-06, |
| "loss": 0.6032, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.968, |
| "grad_norm": 1.0778374671936035, |
| "learning_rate": 9.914387914458983e-06, |
| "loss": 0.6911, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.976, |
| "grad_norm": 0.8513089418411255, |
| "learning_rate": 9.911656595461899e-06, |
| "loss": 0.6422, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.984, |
| "grad_norm": 1.1916236877441406, |
| "learning_rate": 9.908882776857057e-06, |
| "loss": 0.7315, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.992, |
| "grad_norm": 0.9031611680984497, |
| "learning_rate": 9.906066482645774e-06, |
| "loss": 0.6034, |
| "step": 124 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.8987178802490234, |
| "learning_rate": 9.903207737196892e-06, |
| "loss": 0.7292, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.008, |
| "grad_norm": 1.241353988647461, |
| "learning_rate": 9.900306565246579e-06, |
| "loss": 0.7679, |
| "step": 126 |
| }, |
| { |
| "epoch": 1.016, |
| "grad_norm": 1.0967661142349243, |
| "learning_rate": 9.89736299189811e-06, |
| "loss": 0.6499, |
| "step": 127 |
| }, |
| { |
| "epoch": 1.024, |
| "grad_norm": 0.8532967567443848, |
| "learning_rate": 9.894377042621654e-06, |
| "loss": 0.6416, |
| "step": 128 |
| }, |
| { |
| "epoch": 1.032, |
| "grad_norm": 0.951801061630249, |
| "learning_rate": 9.891348743254046e-06, |
| "loss": 0.6654, |
| "step": 129 |
| }, |
| { |
| "epoch": 1.04, |
| "grad_norm": 1.057618498802185, |
| "learning_rate": 9.888278119998573e-06, |
| "loss": 0.623, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.048, |
| "grad_norm": 0.8845769166946411, |
| "learning_rate": 9.885165199424738e-06, |
| "loss": 0.5969, |
| "step": 131 |
| }, |
| { |
| "epoch": 1.056, |
| "grad_norm": 0.8258888721466064, |
| "learning_rate": 9.882010008468038e-06, |
| "loss": 0.4811, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.064, |
| "grad_norm": 0.9165515899658203, |
| "learning_rate": 9.878812574429722e-06, |
| "loss": 0.6168, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.072, |
| "grad_norm": 0.8288542032241821, |
| "learning_rate": 9.875572924976568e-06, |
| "loss": 0.5462, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.08, |
| "grad_norm": 1.180208444595337, |
| "learning_rate": 9.87229108814063e-06, |
| "loss": 0.5903, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.088, |
| "grad_norm": 0.9072532653808594, |
| "learning_rate": 9.868967092319003e-06, |
| "loss": 0.5509, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.096, |
| "grad_norm": 1.2131155729293823, |
| "learning_rate": 9.865600966273576e-06, |
| "loss": 0.5167, |
| "step": 137 |
| }, |
| { |
| "epoch": 1.104, |
| "grad_norm": 1.149257779121399, |
| "learning_rate": 9.86219273913078e-06, |
| "loss": 0.5793, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.112, |
| "grad_norm": 0.8809230923652649, |
| "learning_rate": 9.858742440381343e-06, |
| "loss": 0.5457, |
| "step": 139 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 0.8877855539321899, |
| "learning_rate": 9.855250099880026e-06, |
| "loss": 0.5863, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.1280000000000001, |
| "grad_norm": 1.0398492813110352, |
| "learning_rate": 9.851715747845372e-06, |
| "loss": 0.6525, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.1360000000000001, |
| "grad_norm": 0.9238712191581726, |
| "learning_rate": 9.848139414859441e-06, |
| "loss": 0.586, |
| "step": 142 |
| }, |
| { |
| "epoch": 1.144, |
| "grad_norm": 0.9197869896888733, |
| "learning_rate": 9.844521131867546e-06, |
| "loss": 0.4541, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.152, |
| "grad_norm": 0.9942979216575623, |
| "learning_rate": 9.840860930177984e-06, |
| "loss": 0.655, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.16, |
| "grad_norm": 1.1098957061767578, |
| "learning_rate": 9.837158841461767e-06, |
| "loss": 0.603, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.168, |
| "grad_norm": 0.9269440770149231, |
| "learning_rate": 9.833414897752346e-06, |
| "loss": 0.5672, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.176, |
| "grad_norm": 1.0234994888305664, |
| "learning_rate": 9.829629131445342e-06, |
| "loss": 0.5307, |
| "step": 147 |
| }, |
| { |
| "epoch": 1.184, |
| "grad_norm": 0.9524175524711609, |
| "learning_rate": 9.825801575298248e-06, |
| "loss": 0.5078, |
| "step": 148 |
| }, |
| { |
| "epoch": 1.192, |
| "grad_norm": 0.9059958457946777, |
| "learning_rate": 9.821932262430164e-06, |
| "loss": 0.5077, |
| "step": 149 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.9674916863441467, |
| "learning_rate": 9.818021226321502e-06, |
| "loss": 0.554, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.208, |
| "grad_norm": 0.9286289215087891, |
| "learning_rate": 9.814068500813692e-06, |
| "loss": 0.5633, |
| "step": 151 |
| }, |
| { |
| "epoch": 1.216, |
| "grad_norm": 0.9656635522842407, |
| "learning_rate": 9.8100741201089e-06, |
| "loss": 0.5439, |
| "step": 152 |
| }, |
| { |
| "epoch": 1.224, |
| "grad_norm": 0.9533044099807739, |
| "learning_rate": 9.806038118769724e-06, |
| "loss": 0.5286, |
| "step": 153 |
| }, |
| { |
| "epoch": 1.232, |
| "grad_norm": 0.9019532203674316, |
| "learning_rate": 9.801960531718898e-06, |
| "loss": 0.477, |
| "step": 154 |
| }, |
| { |
| "epoch": 1.24, |
| "grad_norm": 0.9925954341888428, |
| "learning_rate": 9.797841394238987e-06, |
| "loss": 0.6048, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.248, |
| "grad_norm": 1.058506965637207, |
| "learning_rate": 9.793680741972084e-06, |
| "loss": 0.5777, |
| "step": 156 |
| }, |
| { |
| "epoch": 1.256, |
| "grad_norm": 0.9570199847221375, |
| "learning_rate": 9.789478610919508e-06, |
| "loss": 0.5514, |
| "step": 157 |
| }, |
| { |
| "epoch": 1.264, |
| "grad_norm": 0.9736652970314026, |
| "learning_rate": 9.785235037441473e-06, |
| "loss": 0.5932, |
| "step": 158 |
| }, |
| { |
| "epoch": 1.272, |
| "grad_norm": 0.9769409894943237, |
| "learning_rate": 9.780950058256802e-06, |
| "loss": 0.519, |
| "step": 159 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 0.854789137840271, |
| "learning_rate": 9.77662371044258e-06, |
| "loss": 0.5516, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.288, |
| "grad_norm": 1.084175944328308, |
| "learning_rate": 9.77225603143385e-06, |
| "loss": 0.7489, |
| "step": 161 |
| }, |
| { |
| "epoch": 1.296, |
| "grad_norm": 0.9743218421936035, |
| "learning_rate": 9.767847059023292e-06, |
| "loss": 0.6241, |
| "step": 162 |
| }, |
| { |
| "epoch": 1.304, |
| "grad_norm": 0.7581115365028381, |
| "learning_rate": 9.763396831360884e-06, |
| "loss": 0.586, |
| "step": 163 |
| }, |
| { |
| "epoch": 1.312, |
| "grad_norm": 0.9065977931022644, |
| "learning_rate": 9.75890538695358e-06, |
| "loss": 0.592, |
| "step": 164 |
| }, |
| { |
| "epoch": 1.32, |
| "grad_norm": 0.892532467842102, |
| "learning_rate": 9.75437276466497e-06, |
| "loss": 0.6491, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.328, |
| "grad_norm": 1.1423097848892212, |
| "learning_rate": 9.749799003714954e-06, |
| "loss": 0.5284, |
| "step": 166 |
| }, |
| { |
| "epoch": 1.336, |
| "grad_norm": 0.9437663555145264, |
| "learning_rate": 9.745184143679398e-06, |
| "loss": 0.5462, |
| "step": 167 |
| }, |
| { |
| "epoch": 1.3439999999999999, |
| "grad_norm": 1.9607871770858765, |
| "learning_rate": 9.74052822448978e-06, |
| "loss": 0.6264, |
| "step": 168 |
| }, |
| { |
| "epoch": 1.3519999999999999, |
| "grad_norm": 0.952648937702179, |
| "learning_rate": 9.735831286432869e-06, |
| "loss": 0.6131, |
| "step": 169 |
| }, |
| { |
| "epoch": 1.3599999999999999, |
| "grad_norm": 0.9836501479148865, |
| "learning_rate": 9.731093370150349e-06, |
| "loss": 0.5708, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.3679999999999999, |
| "grad_norm": 0.9113344550132751, |
| "learning_rate": 9.72631451663849e-06, |
| "loss": 0.441, |
| "step": 171 |
| }, |
| { |
| "epoch": 1.376, |
| "grad_norm": 0.7625031471252441, |
| "learning_rate": 9.721494767247779e-06, |
| "loss": 0.5014, |
| "step": 172 |
| }, |
| { |
| "epoch": 1.384, |
| "grad_norm": 0.841316819190979, |
| "learning_rate": 9.71663416368257e-06, |
| "loss": 0.5227, |
| "step": 173 |
| }, |
| { |
| "epoch": 1.392, |
| "grad_norm": 0.9969580173492432, |
| "learning_rate": 9.71173274800072e-06, |
| "loss": 0.5641, |
| "step": 174 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.746874988079071, |
| "learning_rate": 9.70679056261322e-06, |
| "loss": 0.5006, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.408, |
| "grad_norm": 1.0710760354995728, |
| "learning_rate": 9.70180765028384e-06, |
| "loss": 0.5583, |
| "step": 176 |
| }, |
| { |
| "epoch": 1.416, |
| "grad_norm": 0.9600836038589478, |
| "learning_rate": 9.696784054128749e-06, |
| "loss": 0.6547, |
| "step": 177 |
| }, |
| { |
| "epoch": 1.424, |
| "grad_norm": 0.9146630764007568, |
| "learning_rate": 9.691719817616148e-06, |
| "loss": 0.6432, |
| "step": 178 |
| }, |
| { |
| "epoch": 1.432, |
| "grad_norm": 1.3010084629058838, |
| "learning_rate": 9.686614984565888e-06, |
| "loss": 0.6419, |
| "step": 179 |
| }, |
| { |
| "epoch": 1.44, |
| "grad_norm": 0.8703446388244629, |
| "learning_rate": 9.681469599149093e-06, |
| "loss": 0.6048, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.448, |
| "grad_norm": 0.9326883554458618, |
| "learning_rate": 9.676283705887783e-06, |
| "loss": 0.5797, |
| "step": 181 |
| }, |
| { |
| "epoch": 1.456, |
| "grad_norm": 0.8628938794136047, |
| "learning_rate": 9.671057349654481e-06, |
| "loss": 0.5471, |
| "step": 182 |
| }, |
| { |
| "epoch": 1.464, |
| "grad_norm": 0.8387150168418884, |
| "learning_rate": 9.66579057567183e-06, |
| "loss": 0.4629, |
| "step": 183 |
| }, |
| { |
| "epoch": 1.472, |
| "grad_norm": 0.9188330769538879, |
| "learning_rate": 9.660483429512198e-06, |
| "loss": 0.5265, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.48, |
| "grad_norm": 0.8853181600570679, |
| "learning_rate": 9.65513595709729e-06, |
| "loss": 0.5754, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.488, |
| "grad_norm": 0.8418251872062683, |
| "learning_rate": 9.649748204697741e-06, |
| "loss": 0.5653, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.496, |
| "grad_norm": 1.0010942220687866, |
| "learning_rate": 9.644320218932723e-06, |
| "loss": 0.6375, |
| "step": 187 |
| }, |
| { |
| "epoch": 1.504, |
| "grad_norm": 0.9441695213317871, |
| "learning_rate": 9.63885204676954e-06, |
| "loss": 0.5651, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.512, |
| "grad_norm": 0.774837076663971, |
| "learning_rate": 9.63334373552322e-06, |
| "loss": 0.4978, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.52, |
| "grad_norm": 1.0337467193603516, |
| "learning_rate": 9.627795332856107e-06, |
| "loss": 0.7022, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.528, |
| "grad_norm": 1.0459073781967163, |
| "learning_rate": 9.622206886777448e-06, |
| "loss": 0.6576, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.536, |
| "grad_norm": 0.8906083106994629, |
| "learning_rate": 9.616578445642982e-06, |
| "loss": 0.5239, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.544, |
| "grad_norm": 0.7993283271789551, |
| "learning_rate": 9.61091005815451e-06, |
| "loss": 0.5833, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.552, |
| "grad_norm": 0.9584801197052002, |
| "learning_rate": 9.605201773359485e-06, |
| "loss": 0.5593, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.56, |
| "grad_norm": 1.0783772468566895, |
| "learning_rate": 9.599453640650585e-06, |
| "loss": 0.5807, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.568, |
| "grad_norm": 0.9267758727073669, |
| "learning_rate": 9.59366570976528e-06, |
| "loss": 0.5253, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.576, |
| "grad_norm": 0.879656195640564, |
| "learning_rate": 9.587838030785413e-06, |
| "loss": 0.5637, |
| "step": 197 |
| }, |
| { |
| "epoch": 1.584, |
| "grad_norm": 0.7291843891143799, |
| "learning_rate": 9.581970654136752e-06, |
| "loss": 0.4746, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.592, |
| "grad_norm": 1.1042965650558472, |
| "learning_rate": 9.576063630588563e-06, |
| "loss": 0.5335, |
| "step": 199 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.89472496509552, |
| "learning_rate": 9.570117011253173e-06, |
| "loss": 0.5244, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.608, |
| "grad_norm": 0.899229109287262, |
| "learning_rate": 9.56413084758552e-06, |
| "loss": 0.5962, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.616, |
| "grad_norm": 0.9164184331893921, |
| "learning_rate": 9.55810519138271e-06, |
| "loss": 0.5423, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.624, |
| "grad_norm": 0.949131429195404, |
| "learning_rate": 9.552040094783575e-06, |
| "loss": 0.6698, |
| "step": 203 |
| }, |
| { |
| "epoch": 1.6320000000000001, |
| "grad_norm": 1.300114393234253, |
| "learning_rate": 9.545935610268213e-06, |
| "loss": 0.7455, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.6400000000000001, |
| "grad_norm": 0.9564195871353149, |
| "learning_rate": 9.53979179065754e-06, |
| "loss": 0.6041, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.6480000000000001, |
| "grad_norm": 0.8021354675292969, |
| "learning_rate": 9.533608689112827e-06, |
| "loss": 0.5842, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.6560000000000001, |
| "grad_norm": 0.8193942904472351, |
| "learning_rate": 9.527386359135254e-06, |
| "loss": 0.4656, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.6640000000000001, |
| "grad_norm": 0.9571919441223145, |
| "learning_rate": 9.521124854565425e-06, |
| "loss": 0.6065, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.6720000000000002, |
| "grad_norm": 0.6706253886222839, |
| "learning_rate": 9.514824229582922e-06, |
| "loss": 0.5117, |
| "step": 209 |
| }, |
| { |
| "epoch": 1.6800000000000002, |
| "grad_norm": 0.896847665309906, |
| "learning_rate": 9.508484538705823e-06, |
| "loss": 0.4671, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.688, |
| "grad_norm": 1.0854939222335815, |
| "learning_rate": 9.50210583679024e-06, |
| "loss": 0.5669, |
| "step": 211 |
| }, |
| { |
| "epoch": 1.696, |
| "grad_norm": 0.83414626121521, |
| "learning_rate": 9.495688179029838e-06, |
| "loss": 0.5015, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.704, |
| "grad_norm": 0.8416348695755005, |
| "learning_rate": 9.48923162095536e-06, |
| "loss": 0.5467, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.712, |
| "grad_norm": 0.8415360450744629, |
| "learning_rate": 9.482736218434144e-06, |
| "loss": 0.5002, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.72, |
| "grad_norm": 0.8762681484222412, |
| "learning_rate": 9.476202027669644e-06, |
| "loss": 0.5915, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.728, |
| "grad_norm": 0.9399668574333191, |
| "learning_rate": 9.469629105200937e-06, |
| "loss": 0.6083, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.736, |
| "grad_norm": 0.8347932696342468, |
| "learning_rate": 9.463017507902245e-06, |
| "loss": 0.5588, |
| "step": 217 |
| }, |
| { |
| "epoch": 1.744, |
| "grad_norm": 0.8872931003570557, |
| "learning_rate": 9.45636729298243e-06, |
| "loss": 0.5585, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.752, |
| "grad_norm": 0.9400362968444824, |
| "learning_rate": 9.449678517984503e-06, |
| "loss": 0.4949, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.76, |
| "grad_norm": 0.9362907409667969, |
| "learning_rate": 9.442951240785135e-06, |
| "loss": 0.4697, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.768, |
| "grad_norm": 0.9293828010559082, |
| "learning_rate": 9.436185519594145e-06, |
| "loss": 0.5465, |
| "step": 221 |
| }, |
| { |
| "epoch": 1.776, |
| "grad_norm": 0.9569793939590454, |
| "learning_rate": 9.429381412954e-06, |
| "loss": 0.5107, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.784, |
| "grad_norm": 0.8464429974555969, |
| "learning_rate": 9.422538979739307e-06, |
| "loss": 0.5828, |
| "step": 223 |
| }, |
| { |
| "epoch": 1.792, |
| "grad_norm": 0.9483685493469238, |
| "learning_rate": 9.415658279156312e-06, |
| "loss": 0.5515, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.8740932941436768, |
| "learning_rate": 9.408739370742372e-06, |
| "loss": 0.5731, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.808, |
| "grad_norm": 0.8768803477287292, |
| "learning_rate": 9.401782314365458e-06, |
| "loss": 0.5504, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.8159999999999998, |
| "grad_norm": 0.9519500732421875, |
| "learning_rate": 9.39478717022362e-06, |
| "loss": 0.5498, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.8239999999999998, |
| "grad_norm": 1.1657662391662598, |
| "learning_rate": 9.387753998844482e-06, |
| "loss": 0.7251, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.8319999999999999, |
| "grad_norm": 0.9255201816558838, |
| "learning_rate": 9.380682861084703e-06, |
| "loss": 0.5701, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.8399999999999999, |
| "grad_norm": 0.8490859270095825, |
| "learning_rate": 9.37357381812946e-06, |
| "loss": 0.5303, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.8479999999999999, |
| "grad_norm": 0.9158905148506165, |
| "learning_rate": 9.366426931491917e-06, |
| "loss": 0.6711, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.8559999999999999, |
| "grad_norm": 0.8813873529434204, |
| "learning_rate": 9.359242263012693e-06, |
| "loss": 0.5088, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.8639999999999999, |
| "grad_norm": 0.9613426327705383, |
| "learning_rate": 9.352019874859326e-06, |
| "loss": 0.5749, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.8719999999999999, |
| "grad_norm": 0.7892547845840454, |
| "learning_rate": 9.344759829525734e-06, |
| "loss": 0.4288, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.88, |
| "grad_norm": 0.9048332571983337, |
| "learning_rate": 9.33746218983167e-06, |
| "loss": 0.5524, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.888, |
| "grad_norm": 1.0441476106643677, |
| "learning_rate": 9.330127018922195e-06, |
| "loss": 0.5697, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.896, |
| "grad_norm": 0.8699061274528503, |
| "learning_rate": 9.32275438026711e-06, |
| "loss": 0.5447, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.904, |
| "grad_norm": 0.8975421786308289, |
| "learning_rate": 9.315344337660422e-06, |
| "loss": 0.4788, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.912, |
| "grad_norm": 0.8610514402389526, |
| "learning_rate": 9.307896955219787e-06, |
| "loss": 0.5311, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 0.9291175007820129, |
| "learning_rate": 9.300412297385954e-06, |
| "loss": 0.4566, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.928, |
| "grad_norm": 0.8736976981163025, |
| "learning_rate": 9.29289042892221e-06, |
| "loss": 0.5223, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.936, |
| "grad_norm": 0.9177389144897461, |
| "learning_rate": 9.285331414913816e-06, |
| "loss": 0.5391, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.944, |
| "grad_norm": 0.8681254386901855, |
| "learning_rate": 9.277735320767449e-06, |
| "loss": 0.4491, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.952, |
| "grad_norm": 0.9195399284362793, |
| "learning_rate": 9.270102212210632e-06, |
| "loss": 0.5366, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.96, |
| "grad_norm": 1.0078879594802856, |
| "learning_rate": 9.262432155291167e-06, |
| "loss": 0.5684, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.968, |
| "grad_norm": 0.9095841646194458, |
| "learning_rate": 9.254725216376562e-06, |
| "loss": 0.559, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.976, |
| "grad_norm": 0.8708402514457703, |
| "learning_rate": 9.246981462153456e-06, |
| "loss": 0.5294, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.984, |
| "grad_norm": 0.8832100033760071, |
| "learning_rate": 9.239200959627048e-06, |
| "loss": 0.652, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.992, |
| "grad_norm": 0.8539401292800903, |
| "learning_rate": 9.231383776120512e-06, |
| "loss": 0.5354, |
| "step": 249 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.8361132144927979, |
| "learning_rate": 9.223529979274411e-06, |
| "loss": 0.5124, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.008, |
| "grad_norm": 1.3811569213867188, |
| "learning_rate": 9.215639637046121e-06, |
| "loss": 0.5122, |
| "step": 251 |
| }, |
| { |
| "epoch": 2.016, |
| "grad_norm": 1.0794579982757568, |
| "learning_rate": 9.207712817709237e-06, |
| "loss": 0.4453, |
| "step": 252 |
| }, |
| { |
| "epoch": 2.024, |
| "grad_norm": 0.8161504864692688, |
| "learning_rate": 9.19974958985298e-06, |
| "loss": 0.4507, |
| "step": 253 |
| }, |
| { |
| "epoch": 2.032, |
| "grad_norm": 0.934089183807373, |
| "learning_rate": 9.191750022381613e-06, |
| "loss": 0.3668, |
| "step": 254 |
| }, |
| { |
| "epoch": 2.04, |
| "grad_norm": 0.9390188455581665, |
| "learning_rate": 9.183714184513832e-06, |
| "loss": 0.4384, |
| "step": 255 |
| }, |
| { |
| "epoch": 2.048, |
| "grad_norm": 1.3397053480148315, |
| "learning_rate": 9.175642145782179e-06, |
| "loss": 0.4417, |
| "step": 256 |
| }, |
| { |
| "epoch": 2.056, |
| "grad_norm": 0.9079061150550842, |
| "learning_rate": 9.16753397603243e-06, |
| "loss": 0.4005, |
| "step": 257 |
| }, |
| { |
| "epoch": 2.064, |
| "grad_norm": 1.0313668251037598, |
| "learning_rate": 9.159389745423003e-06, |
| "loss": 0.4249, |
| "step": 258 |
| }, |
| { |
| "epoch": 2.072, |
| "grad_norm": 0.6768733859062195, |
| "learning_rate": 9.151209524424333e-06, |
| "loss": 0.3698, |
| "step": 259 |
| }, |
| { |
| "epoch": 2.08, |
| "grad_norm": 0.8785944581031799, |
| "learning_rate": 9.142993383818284e-06, |
| "loss": 0.35, |
| "step": 260 |
| }, |
| { |
| "epoch": 2.088, |
| "grad_norm": 1.2951953411102295, |
| "learning_rate": 9.134741394697517e-06, |
| "loss": 0.5264, |
| "step": 261 |
| }, |
| { |
| "epoch": 2.096, |
| "grad_norm": 0.9182500839233398, |
| "learning_rate": 9.126453628464889e-06, |
| "loss": 0.3614, |
| "step": 262 |
| }, |
| { |
| "epoch": 2.104, |
| "grad_norm": 1.2565897703170776, |
| "learning_rate": 9.118130156832823e-06, |
| "loss": 0.4656, |
| "step": 263 |
| }, |
| { |
| "epoch": 2.112, |
| "grad_norm": 0.857447624206543, |
| "learning_rate": 9.109771051822702e-06, |
| "loss": 0.388, |
| "step": 264 |
| }, |
| { |
| "epoch": 2.12, |
| "grad_norm": 0.8584538102149963, |
| "learning_rate": 9.10137638576423e-06, |
| "loss": 0.3833, |
| "step": 265 |
| }, |
| { |
| "epoch": 2.128, |
| "grad_norm": 1.0366853475570679, |
| "learning_rate": 9.09294623129482e-06, |
| "loss": 0.3688, |
| "step": 266 |
| }, |
| { |
| "epoch": 2.136, |
| "grad_norm": 0.8696836233139038, |
| "learning_rate": 9.084480661358954e-06, |
| "loss": 0.4356, |
| "step": 267 |
| }, |
| { |
| "epoch": 2.144, |
| "grad_norm": 0.8424058556556702, |
| "learning_rate": 9.07597974920756e-06, |
| "loss": 0.4523, |
| "step": 268 |
| }, |
| { |
| "epoch": 2.152, |
| "grad_norm": 0.8936377763748169, |
| "learning_rate": 9.067443568397378e-06, |
| "loss": 0.4195, |
| "step": 269 |
| }, |
| { |
| "epoch": 2.16, |
| "grad_norm": 0.7706946730613708, |
| "learning_rate": 9.058872192790314e-06, |
| "loss": 0.4114, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.168, |
| "grad_norm": 0.8530108332633972, |
| "learning_rate": 9.05026569655281e-06, |
| "loss": 0.4485, |
| "step": 271 |
| }, |
| { |
| "epoch": 2.176, |
| "grad_norm": 0.9020074009895325, |
| "learning_rate": 9.041624154155208e-06, |
| "loss": 0.4264, |
| "step": 272 |
| }, |
| { |
| "epoch": 2.184, |
| "grad_norm": 0.6583293676376343, |
| "learning_rate": 9.032947640371086e-06, |
| "loss": 0.3326, |
| "step": 273 |
| }, |
| { |
| "epoch": 2.192, |
| "grad_norm": 0.8414630889892578, |
| "learning_rate": 9.02423623027663e-06, |
| "loss": 0.3968, |
| "step": 274 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 0.9140536785125732, |
| "learning_rate": 9.01548999924997e-06, |
| "loss": 0.3917, |
| "step": 275 |
| }, |
| { |
| "epoch": 2.208, |
| "grad_norm": 0.8281865119934082, |
| "learning_rate": 9.006709022970547e-06, |
| "loss": 0.4069, |
| "step": 276 |
| }, |
| { |
| "epoch": 2.216, |
| "grad_norm": 0.7317336201667786, |
| "learning_rate": 8.997893377418432e-06, |
| "loss": 0.4185, |
| "step": 277 |
| }, |
| { |
| "epoch": 2.224, |
| "grad_norm": 1.2639456987380981, |
| "learning_rate": 8.98904313887369e-06, |
| "loss": 0.3394, |
| "step": 278 |
| }, |
| { |
| "epoch": 2.232, |
| "grad_norm": 0.8909697532653809, |
| "learning_rate": 8.980158383915714e-06, |
| "loss": 0.4178, |
| "step": 279 |
| }, |
| { |
| "epoch": 2.24, |
| "grad_norm": 0.8912489414215088, |
| "learning_rate": 8.971239189422555e-06, |
| "loss": 0.4751, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.248, |
| "grad_norm": 0.8743604421615601, |
| "learning_rate": 8.962285632570266e-06, |
| "loss": 0.438, |
| "step": 281 |
| }, |
| { |
| "epoch": 2.2560000000000002, |
| "grad_norm": 0.9688166379928589, |
| "learning_rate": 8.953297790832231e-06, |
| "loss": 0.4437, |
| "step": 282 |
| }, |
| { |
| "epoch": 2.2640000000000002, |
| "grad_norm": 0.9198762774467468, |
| "learning_rate": 8.944275741978495e-06, |
| "loss": 0.4303, |
| "step": 283 |
| }, |
| { |
| "epoch": 2.2720000000000002, |
| "grad_norm": 0.8178912997245789, |
| "learning_rate": 8.935219564075087e-06, |
| "loss": 0.4714, |
| "step": 284 |
| }, |
| { |
| "epoch": 2.2800000000000002, |
| "grad_norm": 1.30088472366333, |
| "learning_rate": 8.92612933548335e-06, |
| "loss": 0.3948, |
| "step": 285 |
| }, |
| { |
| "epoch": 2.288, |
| "grad_norm": 1.0883452892303467, |
| "learning_rate": 8.917005134859263e-06, |
| "loss": 0.3697, |
| "step": 286 |
| }, |
| { |
| "epoch": 2.296, |
| "grad_norm": 1.3085649013519287, |
| "learning_rate": 8.907847041152757e-06, |
| "loss": 0.4761, |
| "step": 287 |
| }, |
| { |
| "epoch": 2.304, |
| "grad_norm": 0.8386606574058533, |
| "learning_rate": 8.89865513360703e-06, |
| "loss": 0.3663, |
| "step": 288 |
| }, |
| { |
| "epoch": 2.312, |
| "grad_norm": 0.8883110284805298, |
| "learning_rate": 8.889429491757872e-06, |
| "loss": 0.4755, |
| "step": 289 |
| }, |
| { |
| "epoch": 2.32, |
| "grad_norm": 0.8336955308914185, |
| "learning_rate": 8.88017019543296e-06, |
| "loss": 0.3742, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.328, |
| "grad_norm": 0.7842932939529419, |
| "learning_rate": 8.870877324751186e-06, |
| "loss": 0.3338, |
| "step": 291 |
| }, |
| { |
| "epoch": 2.336, |
| "grad_norm": 0.8498310446739197, |
| "learning_rate": 8.861550960121946e-06, |
| "loss": 0.4814, |
| "step": 292 |
| }, |
| { |
| "epoch": 2.344, |
| "grad_norm": 0.785335123538971, |
| "learning_rate": 8.852191182244456e-06, |
| "loss": 0.3093, |
| "step": 293 |
| }, |
| { |
| "epoch": 2.352, |
| "grad_norm": 0.758841872215271, |
| "learning_rate": 8.842798072107055e-06, |
| "loss": 0.4555, |
| "step": 294 |
| }, |
| { |
| "epoch": 2.36, |
| "grad_norm": 0.8311530947685242, |
| "learning_rate": 8.833371710986493e-06, |
| "loss": 0.3642, |
| "step": 295 |
| }, |
| { |
| "epoch": 2.368, |
| "grad_norm": 0.8695397973060608, |
| "learning_rate": 8.823912180447237e-06, |
| "loss": 0.3611, |
| "step": 296 |
| }, |
| { |
| "epoch": 2.376, |
| "grad_norm": 0.9586389660835266, |
| "learning_rate": 8.81441956234076e-06, |
| "loss": 0.4375, |
| "step": 297 |
| }, |
| { |
| "epoch": 2.384, |
| "grad_norm": 1.3389581441879272, |
| "learning_rate": 8.804893938804839e-06, |
| "loss": 0.4707, |
| "step": 298 |
| }, |
| { |
| "epoch": 2.392, |
| "grad_norm": 0.8372800946235657, |
| "learning_rate": 8.795335392262841e-06, |
| "loss": 0.4676, |
| "step": 299 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.9432985186576843, |
| "learning_rate": 8.785744005423003e-06, |
| "loss": 0.5151, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.408, |
| "grad_norm": 0.7734491229057312, |
| "learning_rate": 8.77611986127773e-06, |
| "loss": 0.4515, |
| "step": 301 |
| }, |
| { |
| "epoch": 2.416, |
| "grad_norm": 0.7202035784721375, |
| "learning_rate": 8.766463043102864e-06, |
| "loss": 0.3825, |
| "step": 302 |
| }, |
| { |
| "epoch": 2.424, |
| "grad_norm": 0.9535182118415833, |
| "learning_rate": 8.756773634456975e-06, |
| "loss": 0.5219, |
| "step": 303 |
| }, |
| { |
| "epoch": 2.432, |
| "grad_norm": 0.7026752233505249, |
| "learning_rate": 8.747051719180626e-06, |
| "loss": 0.36, |
| "step": 304 |
| }, |
| { |
| "epoch": 2.44, |
| "grad_norm": 0.7914669513702393, |
| "learning_rate": 8.737297381395657e-06, |
| "loss": 0.4205, |
| "step": 305 |
| }, |
| { |
| "epoch": 2.448, |
| "grad_norm": 0.7313674688339233, |
| "learning_rate": 8.727510705504453e-06, |
| "loss": 0.4484, |
| "step": 306 |
| }, |
| { |
| "epoch": 2.456, |
| "grad_norm": 0.8241586089134216, |
| "learning_rate": 8.717691776189214e-06, |
| "loss": 0.3453, |
| "step": 307 |
| }, |
| { |
| "epoch": 2.464, |
| "grad_norm": 0.9311320185661316, |
| "learning_rate": 8.707840678411223e-06, |
| "loss": 0.3825, |
| "step": 308 |
| }, |
| { |
| "epoch": 2.472, |
| "grad_norm": 0.8270311951637268, |
| "learning_rate": 8.69795749741011e-06, |
| "loss": 0.4327, |
| "step": 309 |
| }, |
| { |
| "epoch": 2.48, |
| "grad_norm": 0.8111954927444458, |
| "learning_rate": 8.688042318703111e-06, |
| "loss": 0.3797, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.488, |
| "grad_norm": 0.8930568695068359, |
| "learning_rate": 8.678095228084343e-06, |
| "loss": 0.3948, |
| "step": 311 |
| }, |
| { |
| "epoch": 2.496, |
| "grad_norm": 1.5294451713562012, |
| "learning_rate": 8.66811631162404e-06, |
| "loss": 0.4458, |
| "step": 312 |
| }, |
| { |
| "epoch": 2.504, |
| "grad_norm": 1.0527986288070679, |
| "learning_rate": 8.65810565566782e-06, |
| "loss": 0.37, |
| "step": 313 |
| }, |
| { |
| "epoch": 2.512, |
| "grad_norm": 0.79234379529953, |
| "learning_rate": 8.648063346835943e-06, |
| "loss": 0.4103, |
| "step": 314 |
| }, |
| { |
| "epoch": 2.52, |
| "grad_norm": 0.8093475103378296, |
| "learning_rate": 8.637989472022548e-06, |
| "loss": 0.3947, |
| "step": 315 |
| }, |
| { |
| "epoch": 2.528, |
| "grad_norm": 0.9096853733062744, |
| "learning_rate": 8.627884118394913e-06, |
| "loss": 0.3167, |
| "step": 316 |
| }, |
| { |
| "epoch": 2.536, |
| "grad_norm": 0.7235214114189148, |
| "learning_rate": 8.617747373392697e-06, |
| "loss": 0.405, |
| "step": 317 |
| }, |
| { |
| "epoch": 2.544, |
| "grad_norm": 0.8731074929237366, |
| "learning_rate": 8.607579324727175e-06, |
| "loss": 0.4647, |
| "step": 318 |
| }, |
| { |
| "epoch": 2.552, |
| "grad_norm": 0.8666344881057739, |
| "learning_rate": 8.597380060380493e-06, |
| "loss": 0.3891, |
| "step": 319 |
| }, |
| { |
| "epoch": 2.56, |
| "grad_norm": 1.1291053295135498, |
| "learning_rate": 8.5871496686049e-06, |
| "loss": 0.461, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.568, |
| "grad_norm": 0.9848377108573914, |
| "learning_rate": 8.576888237921983e-06, |
| "loss": 0.4388, |
| "step": 321 |
| }, |
| { |
| "epoch": 2.576, |
| "grad_norm": 0.8755123019218445, |
| "learning_rate": 8.566595857121902e-06, |
| "loss": 0.4386, |
| "step": 322 |
| }, |
| { |
| "epoch": 2.584, |
| "grad_norm": 1.2679446935653687, |
| "learning_rate": 8.556272615262623e-06, |
| "loss": 0.3827, |
| "step": 323 |
| }, |
| { |
| "epoch": 2.592, |
| "grad_norm": 1.340916395187378, |
| "learning_rate": 8.545918601669147e-06, |
| "loss": 0.4543, |
| "step": 324 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 0.7855986952781677, |
| "learning_rate": 8.535533905932739e-06, |
| "loss": 0.3923, |
| "step": 325 |
| }, |
| { |
| "epoch": 2.608, |
| "grad_norm": 0.829623818397522, |
| "learning_rate": 8.525118617910144e-06, |
| "loss": 0.4529, |
| "step": 326 |
| }, |
| { |
| "epoch": 2.616, |
| "grad_norm": 0.932386040687561, |
| "learning_rate": 8.514672827722824e-06, |
| "loss": 0.4716, |
| "step": 327 |
| }, |
| { |
| "epoch": 2.624, |
| "grad_norm": 0.864211916923523, |
| "learning_rate": 8.504196625756166e-06, |
| "loss": 0.3825, |
| "step": 328 |
| }, |
| { |
| "epoch": 2.632, |
| "grad_norm": 0.7904481291770935, |
| "learning_rate": 8.493690102658703e-06, |
| "loss": 0.3582, |
| "step": 329 |
| }, |
| { |
| "epoch": 2.64, |
| "grad_norm": 0.6928600072860718, |
| "learning_rate": 8.483153349341336e-06, |
| "loss": 0.3512, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.648, |
| "grad_norm": 0.7888721227645874, |
| "learning_rate": 8.472586456976534e-06, |
| "loss": 0.4814, |
| "step": 331 |
| }, |
| { |
| "epoch": 2.656, |
| "grad_norm": 0.8113727569580078, |
| "learning_rate": 8.461989516997565e-06, |
| "loss": 0.4022, |
| "step": 332 |
| }, |
| { |
| "epoch": 2.664, |
| "grad_norm": 0.9936776757240295, |
| "learning_rate": 8.45136262109768e-06, |
| "loss": 0.6013, |
| "step": 333 |
| }, |
| { |
| "epoch": 2.672, |
| "grad_norm": 0.6121838688850403, |
| "learning_rate": 8.440705861229344e-06, |
| "loss": 0.3144, |
| "step": 334 |
| }, |
| { |
| "epoch": 2.68, |
| "grad_norm": 0.9799290299415588, |
| "learning_rate": 8.430019329603423e-06, |
| "loss": 0.4724, |
| "step": 335 |
| }, |
| { |
| "epoch": 2.6879999999999997, |
| "grad_norm": 0.8076691627502441, |
| "learning_rate": 8.41930311868839e-06, |
| "loss": 0.404, |
| "step": 336 |
| }, |
| { |
| "epoch": 2.6959999999999997, |
| "grad_norm": 0.9041216373443604, |
| "learning_rate": 8.408557321209534e-06, |
| "loss": 0.4434, |
| "step": 337 |
| }, |
| { |
| "epoch": 2.7039999999999997, |
| "grad_norm": 0.8356102108955383, |
| "learning_rate": 8.397782030148147e-06, |
| "loss": 0.4153, |
| "step": 338 |
| }, |
| { |
| "epoch": 2.7119999999999997, |
| "grad_norm": 1.0063518285751343, |
| "learning_rate": 8.386977338740724e-06, |
| "loss": 0.421, |
| "step": 339 |
| }, |
| { |
| "epoch": 2.7199999999999998, |
| "grad_norm": 0.741091787815094, |
| "learning_rate": 8.376143340478153e-06, |
| "loss": 0.3746, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.7279999999999998, |
| "grad_norm": 0.992077112197876, |
| "learning_rate": 8.365280129104912e-06, |
| "loss": 0.4036, |
| "step": 341 |
| }, |
| { |
| "epoch": 2.7359999999999998, |
| "grad_norm": 0.8580658435821533, |
| "learning_rate": 8.354387798618254e-06, |
| "loss": 0.3937, |
| "step": 342 |
| }, |
| { |
| "epoch": 2.7439999999999998, |
| "grad_norm": 0.9947491884231567, |
| "learning_rate": 8.34346644326739e-06, |
| "loss": 0.3879, |
| "step": 343 |
| }, |
| { |
| "epoch": 2.752, |
| "grad_norm": 0.8819236159324646, |
| "learning_rate": 8.332516157552684e-06, |
| "loss": 0.4296, |
| "step": 344 |
| }, |
| { |
| "epoch": 2.76, |
| "grad_norm": 0.7174811363220215, |
| "learning_rate": 8.321537036224822e-06, |
| "loss": 0.3896, |
| "step": 345 |
| }, |
| { |
| "epoch": 2.768, |
| "grad_norm": 0.7424646019935608, |
| "learning_rate": 8.310529174284004e-06, |
| "loss": 0.3654, |
| "step": 346 |
| }, |
| { |
| "epoch": 2.776, |
| "grad_norm": 0.9448553323745728, |
| "learning_rate": 8.299492666979114e-06, |
| "loss": 0.3425, |
| "step": 347 |
| }, |
| { |
| "epoch": 2.784, |
| "grad_norm": 0.7875136137008667, |
| "learning_rate": 8.288427609806899e-06, |
| "loss": 0.3703, |
| "step": 348 |
| }, |
| { |
| "epoch": 2.792, |
| "grad_norm": 0.9348728060722351, |
| "learning_rate": 8.277334098511147e-06, |
| "loss": 0.4224, |
| "step": 349 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 0.7805924415588379, |
| "learning_rate": 8.266212229081846e-06, |
| "loss": 0.3679, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.808, |
| "grad_norm": 0.8468799591064453, |
| "learning_rate": 8.255062097754371e-06, |
| "loss": 0.4477, |
| "step": 351 |
| }, |
| { |
| "epoch": 2.816, |
| "grad_norm": 0.7618088126182556, |
| "learning_rate": 8.243883801008632e-06, |
| "loss": 0.4523, |
| "step": 352 |
| }, |
| { |
| "epoch": 2.824, |
| "grad_norm": 0.7418726086616516, |
| "learning_rate": 8.232677435568252e-06, |
| "loss": 0.4286, |
| "step": 353 |
| }, |
| { |
| "epoch": 2.832, |
| "grad_norm": 0.7962421774864197, |
| "learning_rate": 8.221443098399733e-06, |
| "loss": 0.3583, |
| "step": 354 |
| }, |
| { |
| "epoch": 2.84, |
| "grad_norm": 0.9815137982368469, |
| "learning_rate": 8.210180886711603e-06, |
| "loss": 0.4421, |
| "step": 355 |
| }, |
| { |
| "epoch": 2.848, |
| "grad_norm": 0.8846978545188904, |
| "learning_rate": 8.198890897953586e-06, |
| "loss": 0.4625, |
| "step": 356 |
| }, |
| { |
| "epoch": 2.856, |
| "grad_norm": 0.7743475437164307, |
| "learning_rate": 8.187573229815757e-06, |
| "loss": 0.3382, |
| "step": 357 |
| }, |
| { |
| "epoch": 2.864, |
| "grad_norm": 0.9756518602371216, |
| "learning_rate": 8.176227980227693e-06, |
| "loss": 0.4967, |
| "step": 358 |
| }, |
| { |
| "epoch": 2.872, |
| "grad_norm": 0.919818103313446, |
| "learning_rate": 8.164855247357628e-06, |
| "loss": 0.4667, |
| "step": 359 |
| }, |
| { |
| "epoch": 2.88, |
| "grad_norm": 0.7409064173698425, |
| "learning_rate": 8.153455129611605e-06, |
| "loss": 0.3253, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.888, |
| "grad_norm": 0.8657407760620117, |
| "learning_rate": 8.142027725632622e-06, |
| "loss": 0.4967, |
| "step": 361 |
| }, |
| { |
| "epoch": 2.896, |
| "grad_norm": 0.8034350275993347, |
| "learning_rate": 8.130573134299782e-06, |
| "loss": 0.4267, |
| "step": 362 |
| }, |
| { |
| "epoch": 2.904, |
| "grad_norm": 0.8715494871139526, |
| "learning_rate": 8.119091454727427e-06, |
| "loss": 0.4406, |
| "step": 363 |
| }, |
| { |
| "epoch": 2.912, |
| "grad_norm": 0.6950966715812683, |
| "learning_rate": 8.107582786264299e-06, |
| "loss": 0.3652, |
| "step": 364 |
| }, |
| { |
| "epoch": 2.92, |
| "grad_norm": 0.8034759163856506, |
| "learning_rate": 8.09604722849266e-06, |
| "loss": 0.4174, |
| "step": 365 |
| }, |
| { |
| "epoch": 2.928, |
| "grad_norm": 0.9261465072631836, |
| "learning_rate": 8.084484881227449e-06, |
| "loss": 0.348, |
| "step": 366 |
| }, |
| { |
| "epoch": 2.936, |
| "grad_norm": 0.804601788520813, |
| "learning_rate": 8.072895844515398e-06, |
| "loss": 0.4119, |
| "step": 367 |
| }, |
| { |
| "epoch": 2.944, |
| "grad_norm": 0.9597411751747131, |
| "learning_rate": 8.061280218634192e-06, |
| "loss": 0.4271, |
| "step": 368 |
| }, |
| { |
| "epoch": 2.952, |
| "grad_norm": 0.9602273106575012, |
| "learning_rate": 8.049638104091575e-06, |
| "loss": 0.4448, |
| "step": 369 |
| }, |
| { |
| "epoch": 2.96, |
| "grad_norm": 0.8276174664497375, |
| "learning_rate": 8.037969601624495e-06, |
| "loss": 0.3886, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.968, |
| "grad_norm": 0.9530600309371948, |
| "learning_rate": 8.026274812198235e-06, |
| "loss": 0.4353, |
| "step": 371 |
| }, |
| { |
| "epoch": 2.976, |
| "grad_norm": 0.8712935447692871, |
| "learning_rate": 8.014553837005527e-06, |
| "loss": 0.4336, |
| "step": 372 |
| }, |
| { |
| "epoch": 2.984, |
| "grad_norm": 0.7948423624038696, |
| "learning_rate": 8.002806777465685e-06, |
| "loss": 0.4402, |
| "step": 373 |
| }, |
| { |
| "epoch": 2.992, |
| "grad_norm": 0.8660122156143188, |
| "learning_rate": 7.99103373522373e-06, |
| "loss": 0.3639, |
| "step": 374 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.9771859049797058, |
| "learning_rate": 7.9792348121495e-06, |
| "loss": 0.4096, |
| "step": 375 |
| }, |
| { |
| "epoch": 3.008, |
| "grad_norm": 0.9802374243736267, |
| "learning_rate": 7.967410110336782e-06, |
| "loss": 0.336, |
| "step": 376 |
| }, |
| { |
| "epoch": 3.016, |
| "grad_norm": 1.075129747390747, |
| "learning_rate": 7.955559732102414e-06, |
| "loss": 0.2861, |
| "step": 377 |
| }, |
| { |
| "epoch": 3.024, |
| "grad_norm": 0.8979294896125793, |
| "learning_rate": 7.943683779985412e-06, |
| "loss": 0.3531, |
| "step": 378 |
| }, |
| { |
| "epoch": 3.032, |
| "grad_norm": 0.9654936790466309, |
| "learning_rate": 7.931782356746076e-06, |
| "loss": 0.2724, |
| "step": 379 |
| }, |
| { |
| "epoch": 3.04, |
| "grad_norm": 0.9817936420440674, |
| "learning_rate": 7.919855565365102e-06, |
| "loss": 0.2525, |
| "step": 380 |
| }, |
| { |
| "epoch": 3.048, |
| "grad_norm": 1.1742961406707764, |
| "learning_rate": 7.907903509042696e-06, |
| "loss": 0.2952, |
| "step": 381 |
| }, |
| { |
| "epoch": 3.056, |
| "grad_norm": 0.9033184051513672, |
| "learning_rate": 7.895926291197667e-06, |
| "loss": 0.2767, |
| "step": 382 |
| }, |
| { |
| "epoch": 3.064, |
| "grad_norm": 0.7659105062484741, |
| "learning_rate": 7.883924015466554e-06, |
| "loss": 0.2231, |
| "step": 383 |
| }, |
| { |
| "epoch": 3.072, |
| "grad_norm": 0.8166160583496094, |
| "learning_rate": 7.871896785702707e-06, |
| "loss": 0.2325, |
| "step": 384 |
| }, |
| { |
| "epoch": 3.08, |
| "grad_norm": 0.7707303762435913, |
| "learning_rate": 7.859844705975405e-06, |
| "loss": 0.1768, |
| "step": 385 |
| }, |
| { |
| "epoch": 3.088, |
| "grad_norm": 1.0003777742385864, |
| "learning_rate": 7.847767880568944e-06, |
| "loss": 0.3651, |
| "step": 386 |
| }, |
| { |
| "epoch": 3.096, |
| "grad_norm": 0.845194399356842, |
| "learning_rate": 7.835666413981744e-06, |
| "loss": 0.2742, |
| "step": 387 |
| }, |
| { |
| "epoch": 3.104, |
| "grad_norm": 0.7469092607498169, |
| "learning_rate": 7.823540410925434e-06, |
| "loss": 0.2141, |
| "step": 388 |
| }, |
| { |
| "epoch": 3.112, |
| "grad_norm": 0.8582166433334351, |
| "learning_rate": 7.811389976323963e-06, |
| "loss": 0.2813, |
| "step": 389 |
| }, |
| { |
| "epoch": 3.12, |
| "grad_norm": 0.8023694753646851, |
| "learning_rate": 7.799215215312667e-06, |
| "loss": 0.2098, |
| "step": 390 |
| }, |
| { |
| "epoch": 3.128, |
| "grad_norm": 0.8036847114562988, |
| "learning_rate": 7.787016233237387e-06, |
| "loss": 0.2091, |
| "step": 391 |
| }, |
| { |
| "epoch": 3.136, |
| "grad_norm": 0.9070097804069519, |
| "learning_rate": 7.774793135653537e-06, |
| "loss": 0.2785, |
| "step": 392 |
| }, |
| { |
| "epoch": 3.144, |
| "grad_norm": 0.8162369132041931, |
| "learning_rate": 7.7625460283252e-06, |
| "loss": 0.2755, |
| "step": 393 |
| }, |
| { |
| "epoch": 3.152, |
| "grad_norm": 0.8396410942077637, |
| "learning_rate": 7.750275017224208e-06, |
| "loss": 0.3056, |
| "step": 394 |
| }, |
| { |
| "epoch": 3.16, |
| "grad_norm": 0.7635102272033691, |
| "learning_rate": 7.737980208529232e-06, |
| "loss": 0.2045, |
| "step": 395 |
| }, |
| { |
| "epoch": 3.168, |
| "grad_norm": 0.7944246530532837, |
| "learning_rate": 7.725661708624855e-06, |
| "loss": 0.2589, |
| "step": 396 |
| }, |
| { |
| "epoch": 3.176, |
| "grad_norm": 0.8792198300361633, |
| "learning_rate": 7.713319624100657e-06, |
| "loss": 0.2622, |
| "step": 397 |
| }, |
| { |
| "epoch": 3.184, |
| "grad_norm": 0.8552615642547607, |
| "learning_rate": 7.700954061750295e-06, |
| "loss": 0.2594, |
| "step": 398 |
| }, |
| { |
| "epoch": 3.192, |
| "grad_norm": 0.7373338341712952, |
| "learning_rate": 7.688565128570564e-06, |
| "loss": 0.2032, |
| "step": 399 |
| }, |
| { |
| "epoch": 3.2, |
| "grad_norm": 1.1373937129974365, |
| "learning_rate": 7.676152931760496e-06, |
| "loss": 0.3086, |
| "step": 400 |
| }, |
| { |
| "epoch": 3.208, |
| "grad_norm": 0.9050979614257812, |
| "learning_rate": 7.663717578720412e-06, |
| "loss": 0.2816, |
| "step": 401 |
| }, |
| { |
| "epoch": 3.216, |
| "grad_norm": 0.8409582376480103, |
| "learning_rate": 7.651259177050996e-06, |
| "loss": 0.2636, |
| "step": 402 |
| }, |
| { |
| "epoch": 3.224, |
| "grad_norm": 0.8253751397132874, |
| "learning_rate": 7.638777834552372e-06, |
| "loss": 0.2511, |
| "step": 403 |
| }, |
| { |
| "epoch": 3.232, |
| "grad_norm": 0.9281233549118042, |
| "learning_rate": 7.626273659223166e-06, |
| "loss": 0.2718, |
| "step": 404 |
| }, |
| { |
| "epoch": 3.24, |
| "grad_norm": 0.745572566986084, |
| "learning_rate": 7.61374675925957e-06, |
| "loss": 0.2602, |
| "step": 405 |
| }, |
| { |
| "epoch": 3.248, |
| "grad_norm": 0.7207661867141724, |
| "learning_rate": 7.601197243054411e-06, |
| "loss": 0.2428, |
| "step": 406 |
| }, |
| { |
| "epoch": 3.2560000000000002, |
| "grad_norm": 0.804275631904602, |
| "learning_rate": 7.588625219196208e-06, |
| "loss": 0.239, |
| "step": 407 |
| }, |
| { |
| "epoch": 3.2640000000000002, |
| "grad_norm": 0.730238676071167, |
| "learning_rate": 7.576030796468233e-06, |
| "loss": 0.2988, |
| "step": 408 |
| }, |
| { |
| "epoch": 3.2720000000000002, |
| "grad_norm": 0.7774324417114258, |
| "learning_rate": 7.563414083847573e-06, |
| "loss": 0.2133, |
| "step": 409 |
| }, |
| { |
| "epoch": 3.2800000000000002, |
| "grad_norm": 0.9310129880905151, |
| "learning_rate": 7.5507751905041885e-06, |
| "loss": 0.3192, |
| "step": 410 |
| }, |
| { |
| "epoch": 3.288, |
| "grad_norm": 0.9868288636207581, |
| "learning_rate": 7.538114225799955e-06, |
| "loss": 0.3086, |
| "step": 411 |
| }, |
| { |
| "epoch": 3.296, |
| "grad_norm": 0.7649028301239014, |
| "learning_rate": 7.525431299287737e-06, |
| "loss": 0.2505, |
| "step": 412 |
| }, |
| { |
| "epoch": 3.304, |
| "grad_norm": 0.7084949016571045, |
| "learning_rate": 7.512726520710429e-06, |
| "loss": 0.2534, |
| "step": 413 |
| }, |
| { |
| "epoch": 3.312, |
| "grad_norm": 0.8004085421562195, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 0.2705, |
| "step": 414 |
| }, |
| { |
| "epoch": 3.32, |
| "grad_norm": 0.8937138319015503, |
| "learning_rate": 7.4872518472765594e-06, |
| "loss": 0.1749, |
| "step": 415 |
| }, |
| { |
| "epoch": 3.328, |
| "grad_norm": 0.792624831199646, |
| "learning_rate": 7.474482172847391e-06, |
| "loss": 0.2763, |
| "step": 416 |
| }, |
| { |
| "epoch": 3.336, |
| "grad_norm": 0.8165427446365356, |
| "learning_rate": 7.461691087205993e-06, |
| "loss": 0.3184, |
| "step": 417 |
| }, |
| { |
| "epoch": 3.344, |
| "grad_norm": 0.7113141417503357, |
| "learning_rate": 7.4488787010311425e-06, |
| "loss": 0.2336, |
| "step": 418 |
| }, |
| { |
| "epoch": 3.352, |
| "grad_norm": 0.9322896599769592, |
| "learning_rate": 7.436045125185923e-06, |
| "loss": 0.3307, |
| "step": 419 |
| }, |
| { |
| "epoch": 3.36, |
| "grad_norm": 0.7401679754257202, |
| "learning_rate": 7.423190470716761e-06, |
| "loss": 0.2605, |
| "step": 420 |
| }, |
| { |
| "epoch": 3.368, |
| "grad_norm": 0.8005726337432861, |
| "learning_rate": 7.4103148488524824e-06, |
| "loss": 0.2684, |
| "step": 421 |
| }, |
| { |
| "epoch": 3.376, |
| "grad_norm": 0.7309401631355286, |
| "learning_rate": 7.3974183710033334e-06, |
| "loss": 0.2729, |
| "step": 422 |
| }, |
| { |
| "epoch": 3.384, |
| "grad_norm": 0.9116814136505127, |
| "learning_rate": 7.384501148760024e-06, |
| "loss": 0.3469, |
| "step": 423 |
| }, |
| { |
| "epoch": 3.392, |
| "grad_norm": 0.8202308416366577, |
| "learning_rate": 7.371563293892761e-06, |
| "loss": 0.3268, |
| "step": 424 |
| }, |
| { |
| "epoch": 3.4, |
| "grad_norm": 0.8762120604515076, |
| "learning_rate": 7.3586049183502875e-06, |
| "loss": 0.2884, |
| "step": 425 |
| }, |
| { |
| "epoch": 3.408, |
| "grad_norm": 0.7986353635787964, |
| "learning_rate": 7.345626134258897e-06, |
| "loss": 0.2388, |
| "step": 426 |
| }, |
| { |
| "epoch": 3.416, |
| "grad_norm": 0.880988597869873, |
| "learning_rate": 7.3326270539214826e-06, |
| "loss": 0.2725, |
| "step": 427 |
| }, |
| { |
| "epoch": 3.424, |
| "grad_norm": 0.8958916664123535, |
| "learning_rate": 7.319607789816555e-06, |
| "loss": 0.251, |
| "step": 428 |
| }, |
| { |
| "epoch": 3.432, |
| "grad_norm": 0.753807008266449, |
| "learning_rate": 7.306568454597269e-06, |
| "loss": 0.2943, |
| "step": 429 |
| }, |
| { |
| "epoch": 3.44, |
| "grad_norm": 0.8259589672088623, |
| "learning_rate": 7.293509161090453e-06, |
| "loss": 0.255, |
| "step": 430 |
| }, |
| { |
| "epoch": 3.448, |
| "grad_norm": 0.725982666015625, |
| "learning_rate": 7.28043002229563e-06, |
| "loss": 0.244, |
| "step": 431 |
| }, |
| { |
| "epoch": 3.456, |
| "grad_norm": 0.8970488905906677, |
| "learning_rate": 7.2673311513840395e-06, |
| "loss": 0.3301, |
| "step": 432 |
| }, |
| { |
| "epoch": 3.464, |
| "grad_norm": 0.7942942380905151, |
| "learning_rate": 7.2542126616976596e-06, |
| "loss": 0.2654, |
| "step": 433 |
| }, |
| { |
| "epoch": 3.472, |
| "grad_norm": 0.9445860981941223, |
| "learning_rate": 7.241074666748228e-06, |
| "loss": 0.2441, |
| "step": 434 |
| }, |
| { |
| "epoch": 3.48, |
| "grad_norm": 0.9545169472694397, |
| "learning_rate": 7.227917280216254e-06, |
| "loss": 0.2609, |
| "step": 435 |
| }, |
| { |
| "epoch": 3.488, |
| "grad_norm": 0.8117399215698242, |
| "learning_rate": 7.214740615950041e-06, |
| "loss": 0.3145, |
| "step": 436 |
| }, |
| { |
| "epoch": 3.496, |
| "grad_norm": 0.8447906970977783, |
| "learning_rate": 7.201544787964698e-06, |
| "loss": 0.2937, |
| "step": 437 |
| }, |
| { |
| "epoch": 3.504, |
| "grad_norm": 0.8552675247192383, |
| "learning_rate": 7.188329910441154e-06, |
| "loss": 0.2547, |
| "step": 438 |
| }, |
| { |
| "epoch": 3.512, |
| "grad_norm": 0.7751868367195129, |
| "learning_rate": 7.175096097725169e-06, |
| "loss": 0.3318, |
| "step": 439 |
| }, |
| { |
| "epoch": 3.52, |
| "grad_norm": 2.058791160583496, |
| "learning_rate": 7.161843464326349e-06, |
| "loss": 0.2861, |
| "step": 440 |
| }, |
| { |
| "epoch": 3.528, |
| "grad_norm": 0.8021336197853088, |
| "learning_rate": 7.148572124917148e-06, |
| "loss": 0.3147, |
| "step": 441 |
| }, |
| { |
| "epoch": 3.536, |
| "grad_norm": 0.9582990407943726, |
| "learning_rate": 7.135282194331881e-06, |
| "loss": 0.3402, |
| "step": 442 |
| }, |
| { |
| "epoch": 3.544, |
| "grad_norm": 0.7796058058738708, |
| "learning_rate": 7.121973787565727e-06, |
| "loss": 0.2842, |
| "step": 443 |
| }, |
| { |
| "epoch": 3.552, |
| "grad_norm": 0.6856812834739685, |
| "learning_rate": 7.1086470197737405e-06, |
| "loss": 0.2571, |
| "step": 444 |
| }, |
| { |
| "epoch": 3.56, |
| "grad_norm": 0.7525624632835388, |
| "learning_rate": 7.095302006269842e-06, |
| "loss": 0.2708, |
| "step": 445 |
| }, |
| { |
| "epoch": 3.568, |
| "grad_norm": 0.6989469528198242, |
| "learning_rate": 7.0819388625258385e-06, |
| "loss": 0.2104, |
| "step": 446 |
| }, |
| { |
| "epoch": 3.576, |
| "grad_norm": 0.861801028251648, |
| "learning_rate": 7.06855770417041e-06, |
| "loss": 0.2718, |
| "step": 447 |
| }, |
| { |
| "epoch": 3.584, |
| "grad_norm": 0.719370424747467, |
| "learning_rate": 7.05515864698811e-06, |
| "loss": 0.2787, |
| "step": 448 |
| }, |
| { |
| "epoch": 3.592, |
| "grad_norm": 0.6387084126472473, |
| "learning_rate": 7.041741806918372e-06, |
| "loss": 0.2023, |
| "step": 449 |
| }, |
| { |
| "epoch": 3.6, |
| "grad_norm": 0.7619750499725342, |
| "learning_rate": 7.028307300054499e-06, |
| "loss": 0.2164, |
| "step": 450 |
| }, |
| { |
| "epoch": 3.608, |
| "grad_norm": 0.8766146302223206, |
| "learning_rate": 7.014855242642662e-06, |
| "loss": 0.3432, |
| "step": 451 |
| }, |
| { |
| "epoch": 3.616, |
| "grad_norm": 0.9562556147575378, |
| "learning_rate": 7.0013857510808934e-06, |
| "loss": 0.3581, |
| "step": 452 |
| }, |
| { |
| "epoch": 3.624, |
| "grad_norm": 0.7040983438491821, |
| "learning_rate": 6.987898941918082e-06, |
| "loss": 0.2897, |
| "step": 453 |
| }, |
| { |
| "epoch": 3.632, |
| "grad_norm": 0.8851937055587769, |
| "learning_rate": 6.974394931852957e-06, |
| "loss": 0.2532, |
| "step": 454 |
| }, |
| { |
| "epoch": 3.64, |
| "grad_norm": 0.9052807092666626, |
| "learning_rate": 6.960873837733089e-06, |
| "loss": 0.3085, |
| "step": 455 |
| }, |
| { |
| "epoch": 3.648, |
| "grad_norm": 0.8503252267837524, |
| "learning_rate": 6.94733577655387e-06, |
| "loss": 0.3077, |
| "step": 456 |
| }, |
| { |
| "epoch": 3.656, |
| "grad_norm": 0.7360372543334961, |
| "learning_rate": 6.933780865457508e-06, |
| "loss": 0.2262, |
| "step": 457 |
| }, |
| { |
| "epoch": 3.664, |
| "grad_norm": 0.813845157623291, |
| "learning_rate": 6.920209221732007e-06, |
| "loss": 0.3619, |
| "step": 458 |
| }, |
| { |
| "epoch": 3.672, |
| "grad_norm": 0.8746098875999451, |
| "learning_rate": 6.90662096281016e-06, |
| "loss": 0.3011, |
| "step": 459 |
| }, |
| { |
| "epoch": 3.68, |
| "grad_norm": 0.828517496585846, |
| "learning_rate": 6.893016206268518e-06, |
| "loss": 0.3173, |
| "step": 460 |
| }, |
| { |
| "epoch": 3.6879999999999997, |
| "grad_norm": 0.7118755578994751, |
| "learning_rate": 6.879395069826394e-06, |
| "loss": 0.3181, |
| "step": 461 |
| }, |
| { |
| "epoch": 3.6959999999999997, |
| "grad_norm": 1.315356969833374, |
| "learning_rate": 6.865757671344827e-06, |
| "loss": 0.3079, |
| "step": 462 |
| }, |
| { |
| "epoch": 3.7039999999999997, |
| "grad_norm": 0.8539767861366272, |
| "learning_rate": 6.85210412882557e-06, |
| "loss": 0.3023, |
| "step": 463 |
| }, |
| { |
| "epoch": 3.7119999999999997, |
| "grad_norm": 0.7354301810264587, |
| "learning_rate": 6.838434560410064e-06, |
| "loss": 0.2199, |
| "step": 464 |
| }, |
| { |
| "epoch": 3.7199999999999998, |
| "grad_norm": 0.8764749765396118, |
| "learning_rate": 6.824749084378428e-06, |
| "loss": 0.3109, |
| "step": 465 |
| }, |
| { |
| "epoch": 3.7279999999999998, |
| "grad_norm": 0.8957481980323792, |
| "learning_rate": 6.811047819148413e-06, |
| "loss": 0.3485, |
| "step": 466 |
| }, |
| { |
| "epoch": 3.7359999999999998, |
| "grad_norm": 0.9940058588981628, |
| "learning_rate": 6.7973308832744035e-06, |
| "loss": 0.2532, |
| "step": 467 |
| }, |
| { |
| "epoch": 3.7439999999999998, |
| "grad_norm": 0.7741367816925049, |
| "learning_rate": 6.783598395446371e-06, |
| "loss": 0.2186, |
| "step": 468 |
| }, |
| { |
| "epoch": 3.752, |
| "grad_norm": 0.7404496073722839, |
| "learning_rate": 6.769850474488859e-06, |
| "loss": 0.2587, |
| "step": 469 |
| }, |
| { |
| "epoch": 3.76, |
| "grad_norm": 0.9231681227684021, |
| "learning_rate": 6.756087239359948e-06, |
| "loss": 0.3029, |
| "step": 470 |
| }, |
| { |
| "epoch": 3.768, |
| "grad_norm": 0.7261999845504761, |
| "learning_rate": 6.742308809150232e-06, |
| "loss": 0.2329, |
| "step": 471 |
| }, |
| { |
| "epoch": 3.776, |
| "grad_norm": 0.8459004163742065, |
| "learning_rate": 6.728515303081782e-06, |
| "loss": 0.2336, |
| "step": 472 |
| }, |
| { |
| "epoch": 3.784, |
| "grad_norm": 0.9713183641433716, |
| "learning_rate": 6.714706840507122e-06, |
| "loss": 0.266, |
| "step": 473 |
| }, |
| { |
| "epoch": 3.792, |
| "grad_norm": 0.7366663217544556, |
| "learning_rate": 6.700883540908185e-06, |
| "loss": 0.3162, |
| "step": 474 |
| }, |
| { |
| "epoch": 3.8, |
| "grad_norm": 0.8059794306755066, |
| "learning_rate": 6.687045523895292e-06, |
| "loss": 0.2596, |
| "step": 475 |
| }, |
| { |
| "epoch": 3.808, |
| "grad_norm": 0.9890598654747009, |
| "learning_rate": 6.673192909206109e-06, |
| "loss": 0.3798, |
| "step": 476 |
| }, |
| { |
| "epoch": 3.816, |
| "grad_norm": 0.7894034385681152, |
| "learning_rate": 6.6593258167046115e-06, |
| "loss": 0.3121, |
| "step": 477 |
| }, |
| { |
| "epoch": 3.824, |
| "grad_norm": 0.8938712477684021, |
| "learning_rate": 6.64544436638005e-06, |
| "loss": 0.2633, |
| "step": 478 |
| }, |
| { |
| "epoch": 3.832, |
| "grad_norm": 0.738814115524292, |
| "learning_rate": 6.63154867834591e-06, |
| "loss": 0.2544, |
| "step": 479 |
| }, |
| { |
| "epoch": 3.84, |
| "grad_norm": 0.9438901543617249, |
| "learning_rate": 6.617638872838874e-06, |
| "loss": 0.2366, |
| "step": 480 |
| }, |
| { |
| "epoch": 3.848, |
| "grad_norm": 1.0699201822280884, |
| "learning_rate": 6.603715070217779e-06, |
| "loss": 0.3156, |
| "step": 481 |
| }, |
| { |
| "epoch": 3.856, |
| "grad_norm": 0.753672182559967, |
| "learning_rate": 6.589777390962575e-06, |
| "loss": 0.3096, |
| "step": 482 |
| }, |
| { |
| "epoch": 3.864, |
| "grad_norm": 1.0672876834869385, |
| "learning_rate": 6.5758259556732896e-06, |
| "loss": 0.2431, |
| "step": 483 |
| }, |
| { |
| "epoch": 3.872, |
| "grad_norm": 0.756453275680542, |
| "learning_rate": 6.561860885068972e-06, |
| "loss": 0.2151, |
| "step": 484 |
| }, |
| { |
| "epoch": 3.88, |
| "grad_norm": 0.8135131001472473, |
| "learning_rate": 6.547882299986658e-06, |
| "loss": 0.3089, |
| "step": 485 |
| }, |
| { |
| "epoch": 3.888, |
| "grad_norm": 0.7398325204849243, |
| "learning_rate": 6.53389032138032e-06, |
| "loss": 0.326, |
| "step": 486 |
| }, |
| { |
| "epoch": 3.896, |
| "grad_norm": 0.8907092809677124, |
| "learning_rate": 6.519885070319827e-06, |
| "loss": 0.2609, |
| "step": 487 |
| }, |
| { |
| "epoch": 3.904, |
| "grad_norm": 0.9507347345352173, |
| "learning_rate": 6.505866667989884e-06, |
| "loss": 0.3, |
| "step": 488 |
| }, |
| { |
| "epoch": 3.912, |
| "grad_norm": 0.7755032777786255, |
| "learning_rate": 6.491835235688999e-06, |
| "loss": 0.2554, |
| "step": 489 |
| }, |
| { |
| "epoch": 3.92, |
| "grad_norm": 0.9234123229980469, |
| "learning_rate": 6.477790894828422e-06, |
| "loss": 0.3371, |
| "step": 490 |
| }, |
| { |
| "epoch": 3.928, |
| "grad_norm": 0.811630129814148, |
| "learning_rate": 6.463733766931096e-06, |
| "loss": 0.2166, |
| "step": 491 |
| }, |
| { |
| "epoch": 3.936, |
| "grad_norm": 0.6761499047279358, |
| "learning_rate": 6.449663973630613e-06, |
| "loss": 0.2017, |
| "step": 492 |
| }, |
| { |
| "epoch": 3.944, |
| "grad_norm": 0.7352895736694336, |
| "learning_rate": 6.435581636670154e-06, |
| "loss": 0.2395, |
| "step": 493 |
| }, |
| { |
| "epoch": 3.952, |
| "grad_norm": 0.8231508731842041, |
| "learning_rate": 6.421486877901436e-06, |
| "loss": 0.2746, |
| "step": 494 |
| }, |
| { |
| "epoch": 3.96, |
| "grad_norm": 0.7367643713951111, |
| "learning_rate": 6.407379819283661e-06, |
| "loss": 0.2449, |
| "step": 495 |
| }, |
| { |
| "epoch": 3.968, |
| "grad_norm": 0.7580301761627197, |
| "learning_rate": 6.393260582882462e-06, |
| "loss": 0.2871, |
| "step": 496 |
| }, |
| { |
| "epoch": 3.976, |
| "grad_norm": 0.9079432487487793, |
| "learning_rate": 6.379129290868837e-06, |
| "loss": 0.2737, |
| "step": 497 |
| }, |
| { |
| "epoch": 3.984, |
| "grad_norm": 0.9549089074134827, |
| "learning_rate": 6.364986065518106e-06, |
| "loss": 0.4168, |
| "step": 498 |
| }, |
| { |
| "epoch": 3.992, |
| "grad_norm": 0.900466799736023, |
| "learning_rate": 6.350831029208844e-06, |
| "loss": 0.3325, |
| "step": 499 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.8359453678131104, |
| "learning_rate": 6.336664304421818e-06, |
| "loss": 0.2557, |
| "step": 500 |
| }, |
| { |
| "epoch": 4.008, |
| "grad_norm": 1.3857921361923218, |
| "learning_rate": 6.322486013738942e-06, |
| "loss": 0.2166, |
| "step": 501 |
| }, |
| { |
| "epoch": 4.016, |
| "grad_norm": 1.0960663557052612, |
| "learning_rate": 6.308296279842204e-06, |
| "loss": 0.1362, |
| "step": 502 |
| }, |
| { |
| "epoch": 4.024, |
| "grad_norm": 1.209594488143921, |
| "learning_rate": 6.294095225512604e-06, |
| "loss": 0.1746, |
| "step": 503 |
| }, |
| { |
| "epoch": 4.032, |
| "grad_norm": 0.7824774980545044, |
| "learning_rate": 6.279882973629101e-06, |
| "loss": 0.1169, |
| "step": 504 |
| }, |
| { |
| "epoch": 4.04, |
| "grad_norm": 1.3840663433074951, |
| "learning_rate": 6.265659647167542e-06, |
| "loss": 0.2345, |
| "step": 505 |
| }, |
| { |
| "epoch": 4.048, |
| "grad_norm": 1.2887827157974243, |
| "learning_rate": 6.2514253691996e-06, |
| "loss": 0.18, |
| "step": 506 |
| }, |
| { |
| "epoch": 4.056, |
| "grad_norm": 1.167967677116394, |
| "learning_rate": 6.237180262891709e-06, |
| "loss": 0.1821, |
| "step": 507 |
| }, |
| { |
| "epoch": 4.064, |
| "grad_norm": 1.2175976037979126, |
| "learning_rate": 6.222924451504001e-06, |
| "loss": 0.1129, |
| "step": 508 |
| }, |
| { |
| "epoch": 4.072, |
| "grad_norm": 1.063623070716858, |
| "learning_rate": 6.208658058389232e-06, |
| "loss": 0.1169, |
| "step": 509 |
| }, |
| { |
| "epoch": 4.08, |
| "grad_norm": 0.8346629738807678, |
| "learning_rate": 6.194381206991723e-06, |
| "loss": 0.1423, |
| "step": 510 |
| }, |
| { |
| "epoch": 4.088, |
| "grad_norm": 0.9246945381164551, |
| "learning_rate": 6.180094020846291e-06, |
| "loss": 0.141, |
| "step": 511 |
| }, |
| { |
| "epoch": 4.096, |
| "grad_norm": 0.8086335062980652, |
| "learning_rate": 6.165796623577171e-06, |
| "loss": 0.0987, |
| "step": 512 |
| }, |
| { |
| "epoch": 4.104, |
| "grad_norm": 0.7477990984916687, |
| "learning_rate": 6.15148913889696e-06, |
| "loss": 0.0986, |
| "step": 513 |
| }, |
| { |
| "epoch": 4.112, |
| "grad_norm": 0.836835503578186, |
| "learning_rate": 6.1371716906055336e-06, |
| "loss": 0.1293, |
| "step": 514 |
| }, |
| { |
| "epoch": 4.12, |
| "grad_norm": 0.9984874725341797, |
| "learning_rate": 6.122844402588982e-06, |
| "loss": 0.1184, |
| "step": 515 |
| }, |
| { |
| "epoch": 4.128, |
| "grad_norm": 0.7542848587036133, |
| "learning_rate": 6.10850739881854e-06, |
| "loss": 0.1256, |
| "step": 516 |
| }, |
| { |
| "epoch": 4.136, |
| "grad_norm": 1.12161123752594, |
| "learning_rate": 6.094160803349508e-06, |
| "loss": 0.1174, |
| "step": 517 |
| }, |
| { |
| "epoch": 4.144, |
| "grad_norm": 0.9414368867874146, |
| "learning_rate": 6.079804740320181e-06, |
| "loss": 0.1302, |
| "step": 518 |
| }, |
| { |
| "epoch": 4.152, |
| "grad_norm": 0.9736228585243225, |
| "learning_rate": 6.065439333950776e-06, |
| "loss": 0.1686, |
| "step": 519 |
| }, |
| { |
| "epoch": 4.16, |
| "grad_norm": 0.6382215023040771, |
| "learning_rate": 6.051064708542357e-06, |
| "loss": 0.0873, |
| "step": 520 |
| }, |
| { |
| "epoch": 4.168, |
| "grad_norm": 0.7092307209968567, |
| "learning_rate": 6.036680988475756e-06, |
| "loss": 0.0862, |
| "step": 521 |
| }, |
| { |
| "epoch": 4.176, |
| "grad_norm": 0.7188888788223267, |
| "learning_rate": 6.022288298210502e-06, |
| "loss": 0.1247, |
| "step": 522 |
| }, |
| { |
| "epoch": 4.184, |
| "grad_norm": 0.7763389945030212, |
| "learning_rate": 6.00788676228374e-06, |
| "loss": 0.0686, |
| "step": 523 |
| }, |
| { |
| "epoch": 4.192, |
| "grad_norm": 0.7515356540679932, |
| "learning_rate": 5.993476505309154e-06, |
| "loss": 0.1354, |
| "step": 524 |
| }, |
| { |
| "epoch": 4.2, |
| "grad_norm": 0.9005271792411804, |
| "learning_rate": 5.979057651975893e-06, |
| "loss": 0.1756, |
| "step": 525 |
| }, |
| { |
| "epoch": 4.208, |
| "grad_norm": 0.7862653136253357, |
| "learning_rate": 5.964630327047485e-06, |
| "loss": 0.1146, |
| "step": 526 |
| }, |
| { |
| "epoch": 4.216, |
| "grad_norm": 0.8641824126243591, |
| "learning_rate": 5.9501946553607615e-06, |
| "loss": 0.1673, |
| "step": 527 |
| }, |
| { |
| "epoch": 4.224, |
| "grad_norm": 1.1484317779541016, |
| "learning_rate": 5.935750761824777e-06, |
| "loss": 0.1787, |
| "step": 528 |
| }, |
| { |
| "epoch": 4.232, |
| "grad_norm": 0.9002270102500916, |
| "learning_rate": 5.921298771419731e-06, |
| "loss": 0.1587, |
| "step": 529 |
| }, |
| { |
| "epoch": 4.24, |
| "grad_norm": 0.951327383518219, |
| "learning_rate": 5.906838809195879e-06, |
| "loss": 0.1974, |
| "step": 530 |
| }, |
| { |
| "epoch": 4.248, |
| "grad_norm": 0.7668549418449402, |
| "learning_rate": 5.8923710002724595e-06, |
| "loss": 0.1783, |
| "step": 531 |
| }, |
| { |
| "epoch": 4.256, |
| "grad_norm": 0.773178219795227, |
| "learning_rate": 5.877895469836604e-06, |
| "loss": 0.1061, |
| "step": 532 |
| }, |
| { |
| "epoch": 4.264, |
| "grad_norm": 0.7705480456352234, |
| "learning_rate": 5.863412343142258e-06, |
| "loss": 0.142, |
| "step": 533 |
| }, |
| { |
| "epoch": 4.272, |
| "grad_norm": 0.9089683294296265, |
| "learning_rate": 5.848921745509094e-06, |
| "loss": 0.1697, |
| "step": 534 |
| }, |
| { |
| "epoch": 4.28, |
| "grad_norm": 0.7634876370429993, |
| "learning_rate": 5.8344238023214305e-06, |
| "loss": 0.1261, |
| "step": 535 |
| }, |
| { |
| "epoch": 4.288, |
| "grad_norm": 0.7811418175697327, |
| "learning_rate": 5.819918639027149e-06, |
| "loss": 0.122, |
| "step": 536 |
| }, |
| { |
| "epoch": 4.296, |
| "grad_norm": 0.7748238444328308, |
| "learning_rate": 5.805406381136598e-06, |
| "loss": 0.1597, |
| "step": 537 |
| }, |
| { |
| "epoch": 4.304, |
| "grad_norm": 0.8066184520721436, |
| "learning_rate": 5.790887154221521e-06, |
| "loss": 0.1219, |
| "step": 538 |
| }, |
| { |
| "epoch": 4.312, |
| "grad_norm": 0.7770021557807922, |
| "learning_rate": 5.776361083913959e-06, |
| "loss": 0.1352, |
| "step": 539 |
| }, |
| { |
| "epoch": 4.32, |
| "grad_norm": 0.7021104097366333, |
| "learning_rate": 5.7618282959051685e-06, |
| "loss": 0.1294, |
| "step": 540 |
| }, |
| { |
| "epoch": 4.328, |
| "grad_norm": 0.7303136587142944, |
| "learning_rate": 5.747288915944533e-06, |
| "loss": 0.1032, |
| "step": 541 |
| }, |
| { |
| "epoch": 4.336, |
| "grad_norm": 0.758567750453949, |
| "learning_rate": 5.7327430698384775e-06, |
| "loss": 0.1577, |
| "step": 542 |
| }, |
| { |
| "epoch": 4.344, |
| "grad_norm": 0.8246206641197205, |
| "learning_rate": 5.718190883449373e-06, |
| "loss": 0.0904, |
| "step": 543 |
| }, |
| { |
| "epoch": 4.352, |
| "grad_norm": 0.9011938571929932, |
| "learning_rate": 5.703632482694453e-06, |
| "loss": 0.1311, |
| "step": 544 |
| }, |
| { |
| "epoch": 4.36, |
| "grad_norm": 0.6359648704528809, |
| "learning_rate": 5.689067993544726e-06, |
| "loss": 0.1199, |
| "step": 545 |
| }, |
| { |
| "epoch": 4.368, |
| "grad_norm": 0.8066957592964172, |
| "learning_rate": 5.674497542023875e-06, |
| "loss": 0.1491, |
| "step": 546 |
| }, |
| { |
| "epoch": 4.376, |
| "grad_norm": 0.7746307253837585, |
| "learning_rate": 5.659921254207183e-06, |
| "loss": 0.1438, |
| "step": 547 |
| }, |
| { |
| "epoch": 4.384, |
| "grad_norm": 0.7862228751182556, |
| "learning_rate": 5.645339256220427e-06, |
| "loss": 0.145, |
| "step": 548 |
| }, |
| { |
| "epoch": 4.392, |
| "grad_norm": 0.7591133117675781, |
| "learning_rate": 5.630751674238796e-06, |
| "loss": 0.1081, |
| "step": 549 |
| }, |
| { |
| "epoch": 4.4, |
| "grad_norm": 1.0325382947921753, |
| "learning_rate": 5.616158634485793e-06, |
| "loss": 0.2272, |
| "step": 550 |
| }, |
| { |
| "epoch": 4.408, |
| "grad_norm": 0.6715402007102966, |
| "learning_rate": 5.601560263232153e-06, |
| "loss": 0.0886, |
| "step": 551 |
| }, |
| { |
| "epoch": 4.416, |
| "grad_norm": 0.7422818541526794, |
| "learning_rate": 5.5869566867947344e-06, |
| "loss": 0.1198, |
| "step": 552 |
| }, |
| { |
| "epoch": 4.424, |
| "grad_norm": 0.9160724878311157, |
| "learning_rate": 5.572348031535442e-06, |
| "loss": 0.1184, |
| "step": 553 |
| }, |
| { |
| "epoch": 4.432, |
| "grad_norm": 0.7711536288261414, |
| "learning_rate": 5.557734423860122e-06, |
| "loss": 0.1254, |
| "step": 554 |
| }, |
| { |
| "epoch": 4.44, |
| "grad_norm": 0.9296648502349854, |
| "learning_rate": 5.543115990217478e-06, |
| "loss": 0.2222, |
| "step": 555 |
| }, |
| { |
| "epoch": 4.448, |
| "grad_norm": 0.920160710811615, |
| "learning_rate": 5.528492857097966e-06, |
| "loss": 0.1424, |
| "step": 556 |
| }, |
| { |
| "epoch": 4.456, |
| "grad_norm": 0.788571298122406, |
| "learning_rate": 5.513865151032709e-06, |
| "loss": 0.1811, |
| "step": 557 |
| }, |
| { |
| "epoch": 4.464, |
| "grad_norm": 0.9120768904685974, |
| "learning_rate": 5.499232998592399e-06, |
| "loss": 0.116, |
| "step": 558 |
| }, |
| { |
| "epoch": 4.4719999999999995, |
| "grad_norm": 0.7307674884796143, |
| "learning_rate": 5.484596526386198e-06, |
| "loss": 0.1415, |
| "step": 559 |
| }, |
| { |
| "epoch": 4.48, |
| "grad_norm": 0.9699017405509949, |
| "learning_rate": 5.469955861060653e-06, |
| "loss": 0.2032, |
| "step": 560 |
| }, |
| { |
| "epoch": 4.4879999999999995, |
| "grad_norm": 0.8792822957038879, |
| "learning_rate": 5.455311129298586e-06, |
| "loss": 0.1764, |
| "step": 561 |
| }, |
| { |
| "epoch": 4.496, |
| "grad_norm": 0.8328776359558105, |
| "learning_rate": 5.44066245781801e-06, |
| "loss": 0.1446, |
| "step": 562 |
| }, |
| { |
| "epoch": 4.504, |
| "grad_norm": 0.7818514704704285, |
| "learning_rate": 5.426009973371026e-06, |
| "loss": 0.14, |
| "step": 563 |
| }, |
| { |
| "epoch": 4.5120000000000005, |
| "grad_norm": 0.8472930788993835, |
| "learning_rate": 5.4113538027427245e-06, |
| "loss": 0.1308, |
| "step": 564 |
| }, |
| { |
| "epoch": 4.52, |
| "grad_norm": 2.571410894393921, |
| "learning_rate": 5.396694072750099e-06, |
| "loss": 0.1595, |
| "step": 565 |
| }, |
| { |
| "epoch": 4.5280000000000005, |
| "grad_norm": 0.7498548030853271, |
| "learning_rate": 5.382030910240936e-06, |
| "loss": 0.1357, |
| "step": 566 |
| }, |
| { |
| "epoch": 4.536, |
| "grad_norm": 0.9282652735710144, |
| "learning_rate": 5.367364442092724e-06, |
| "loss": 0.1469, |
| "step": 567 |
| }, |
| { |
| "epoch": 4.5440000000000005, |
| "grad_norm": 0.8957253694534302, |
| "learning_rate": 5.352694795211555e-06, |
| "loss": 0.1435, |
| "step": 568 |
| }, |
| { |
| "epoch": 4.552, |
| "grad_norm": 0.7313421964645386, |
| "learning_rate": 5.338022096531028e-06, |
| "loss": 0.1084, |
| "step": 569 |
| }, |
| { |
| "epoch": 4.5600000000000005, |
| "grad_norm": 0.8108712434768677, |
| "learning_rate": 5.3233464730111426e-06, |
| "loss": 0.119, |
| "step": 570 |
| }, |
| { |
| "epoch": 4.568, |
| "grad_norm": 0.7092931270599365, |
| "learning_rate": 5.308668051637213e-06, |
| "loss": 0.1398, |
| "step": 571 |
| }, |
| { |
| "epoch": 4.576, |
| "grad_norm": 0.7072190046310425, |
| "learning_rate": 5.29398695941876e-06, |
| "loss": 0.1216, |
| "step": 572 |
| }, |
| { |
| "epoch": 4.584, |
| "grad_norm": 1.016250729560852, |
| "learning_rate": 5.279303323388413e-06, |
| "loss": 0.2615, |
| "step": 573 |
| }, |
| { |
| "epoch": 4.592, |
| "grad_norm": 0.6790833473205566, |
| "learning_rate": 5.2646172706008154e-06, |
| "loss": 0.0958, |
| "step": 574 |
| }, |
| { |
| "epoch": 4.6, |
| "grad_norm": 0.5775589942932129, |
| "learning_rate": 5.249928928131523e-06, |
| "loss": 0.097, |
| "step": 575 |
| }, |
| { |
| "epoch": 4.608, |
| "grad_norm": 0.7333958745002747, |
| "learning_rate": 5.235238423075899e-06, |
| "loss": 0.1343, |
| "step": 576 |
| }, |
| { |
| "epoch": 4.616, |
| "grad_norm": 0.8037298917770386, |
| "learning_rate": 5.220545882548024e-06, |
| "loss": 0.116, |
| "step": 577 |
| }, |
| { |
| "epoch": 4.624, |
| "grad_norm": 0.8875942230224609, |
| "learning_rate": 5.20585143367959e-06, |
| "loss": 0.1063, |
| "step": 578 |
| }, |
| { |
| "epoch": 4.632, |
| "grad_norm": 0.8265597820281982, |
| "learning_rate": 5.191155203618796e-06, |
| "loss": 0.1544, |
| "step": 579 |
| }, |
| { |
| "epoch": 4.64, |
| "grad_norm": 0.9249614477157593, |
| "learning_rate": 5.176457319529264e-06, |
| "loss": 0.1271, |
| "step": 580 |
| }, |
| { |
| "epoch": 4.648, |
| "grad_norm": 0.914107084274292, |
| "learning_rate": 5.161757908588917e-06, |
| "loss": 0.116, |
| "step": 581 |
| }, |
| { |
| "epoch": 4.656, |
| "grad_norm": 0.7554004192352295, |
| "learning_rate": 5.147057097988898e-06, |
| "loss": 0.1269, |
| "step": 582 |
| }, |
| { |
| "epoch": 4.664, |
| "grad_norm": 0.9406664371490479, |
| "learning_rate": 5.132355014932455e-06, |
| "loss": 0.1091, |
| "step": 583 |
| }, |
| { |
| "epoch": 4.672, |
| "grad_norm": 0.7658833265304565, |
| "learning_rate": 5.1176517866338495e-06, |
| "loss": 0.1301, |
| "step": 584 |
| }, |
| { |
| "epoch": 4.68, |
| "grad_norm": 0.6699630618095398, |
| "learning_rate": 5.102947540317254e-06, |
| "loss": 0.0935, |
| "step": 585 |
| }, |
| { |
| "epoch": 4.688, |
| "grad_norm": 0.8095746636390686, |
| "learning_rate": 5.088242403215644e-06, |
| "loss": 0.1779, |
| "step": 586 |
| }, |
| { |
| "epoch": 4.696, |
| "grad_norm": 0.7161609530448914, |
| "learning_rate": 5.073536502569708e-06, |
| "loss": 0.1411, |
| "step": 587 |
| }, |
| { |
| "epoch": 4.704, |
| "grad_norm": 0.7674558162689209, |
| "learning_rate": 5.058829965626742e-06, |
| "loss": 0.1494, |
| "step": 588 |
| }, |
| { |
| "epoch": 4.712, |
| "grad_norm": 0.8620008826255798, |
| "learning_rate": 5.0441229196395416e-06, |
| "loss": 0.1443, |
| "step": 589 |
| }, |
| { |
| "epoch": 4.72, |
| "grad_norm": 0.7442160248756409, |
| "learning_rate": 5.029415491865311e-06, |
| "loss": 0.1328, |
| "step": 590 |
| }, |
| { |
| "epoch": 4.728, |
| "grad_norm": 0.7636533975601196, |
| "learning_rate": 5.014707809564562e-06, |
| "loss": 0.1444, |
| "step": 591 |
| }, |
| { |
| "epoch": 4.736, |
| "grad_norm": 0.8142823576927185, |
| "learning_rate": 5e-06, |
| "loss": 0.1867, |
| "step": 592 |
| }, |
| { |
| "epoch": 4.744, |
| "grad_norm": 0.9690331816673279, |
| "learning_rate": 4.98529219043544e-06, |
| "loss": 0.1497, |
| "step": 593 |
| }, |
| { |
| "epoch": 4.752, |
| "grad_norm": 1.0349537134170532, |
| "learning_rate": 4.97058450813469e-06, |
| "loss": 0.1369, |
| "step": 594 |
| }, |
| { |
| "epoch": 4.76, |
| "grad_norm": 0.9176954627037048, |
| "learning_rate": 4.955877080360462e-06, |
| "loss": 0.1553, |
| "step": 595 |
| }, |
| { |
| "epoch": 4.768, |
| "grad_norm": 0.7095694541931152, |
| "learning_rate": 4.94117003437326e-06, |
| "loss": 0.1247, |
| "step": 596 |
| }, |
| { |
| "epoch": 4.776, |
| "grad_norm": 0.8611987829208374, |
| "learning_rate": 4.926463497430293e-06, |
| "loss": 0.1724, |
| "step": 597 |
| }, |
| { |
| "epoch": 4.784, |
| "grad_norm": 0.7742799520492554, |
| "learning_rate": 4.911757596784358e-06, |
| "loss": 0.123, |
| "step": 598 |
| }, |
| { |
| "epoch": 4.792, |
| "grad_norm": 0.580224335193634, |
| "learning_rate": 4.897052459682749e-06, |
| "loss": 0.1059, |
| "step": 599 |
| }, |
| { |
| "epoch": 4.8, |
| "grad_norm": 0.7499905228614807, |
| "learning_rate": 4.882348213366152e-06, |
| "loss": 0.1635, |
| "step": 600 |
| }, |
| { |
| "epoch": 4.808, |
| "grad_norm": 0.8561443090438843, |
| "learning_rate": 4.867644985067548e-06, |
| "loss": 0.0817, |
| "step": 601 |
| }, |
| { |
| "epoch": 4.816, |
| "grad_norm": 0.9102675318717957, |
| "learning_rate": 4.8529429020111035e-06, |
| "loss": 0.2103, |
| "step": 602 |
| }, |
| { |
| "epoch": 4.824, |
| "grad_norm": 0.7099835872650146, |
| "learning_rate": 4.838242091411085e-06, |
| "loss": 0.1099, |
| "step": 603 |
| }, |
| { |
| "epoch": 4.832, |
| "grad_norm": 0.897228479385376, |
| "learning_rate": 4.823542680470738e-06, |
| "loss": 0.1302, |
| "step": 604 |
| }, |
| { |
| "epoch": 4.84, |
| "grad_norm": 0.6506624817848206, |
| "learning_rate": 4.808844796381205e-06, |
| "loss": 0.12, |
| "step": 605 |
| }, |
| { |
| "epoch": 4.848, |
| "grad_norm": 0.8063788414001465, |
| "learning_rate": 4.794148566320412e-06, |
| "loss": 0.1609, |
| "step": 606 |
| }, |
| { |
| "epoch": 4.856, |
| "grad_norm": 0.7618057727813721, |
| "learning_rate": 4.779454117451978e-06, |
| "loss": 0.1449, |
| "step": 607 |
| }, |
| { |
| "epoch": 4.864, |
| "grad_norm": 0.8923940658569336, |
| "learning_rate": 4.7647615769241e-06, |
| "loss": 0.1854, |
| "step": 608 |
| }, |
| { |
| "epoch": 4.872, |
| "grad_norm": 0.7307261824607849, |
| "learning_rate": 4.750071071868478e-06, |
| "loss": 0.1354, |
| "step": 609 |
| }, |
| { |
| "epoch": 4.88, |
| "grad_norm": 0.7396050691604614, |
| "learning_rate": 4.7353827293991845e-06, |
| "loss": 0.097, |
| "step": 610 |
| }, |
| { |
| "epoch": 4.888, |
| "grad_norm": 0.7688437700271606, |
| "learning_rate": 4.720696676611589e-06, |
| "loss": 0.1447, |
| "step": 611 |
| }, |
| { |
| "epoch": 4.896, |
| "grad_norm": 0.9390592575073242, |
| "learning_rate": 4.706013040581242e-06, |
| "loss": 0.1153, |
| "step": 612 |
| }, |
| { |
| "epoch": 4.904, |
| "grad_norm": 0.7500796914100647, |
| "learning_rate": 4.691331948362789e-06, |
| "loss": 0.1404, |
| "step": 613 |
| }, |
| { |
| "epoch": 4.912, |
| "grad_norm": 0.913267195224762, |
| "learning_rate": 4.676653526988858e-06, |
| "loss": 0.1348, |
| "step": 614 |
| }, |
| { |
| "epoch": 4.92, |
| "grad_norm": 0.8776205778121948, |
| "learning_rate": 4.661977903468974e-06, |
| "loss": 0.1285, |
| "step": 615 |
| }, |
| { |
| "epoch": 4.928, |
| "grad_norm": 1.010945200920105, |
| "learning_rate": 4.647305204788445e-06, |
| "loss": 0.1646, |
| "step": 616 |
| }, |
| { |
| "epoch": 4.936, |
| "grad_norm": 0.7763349413871765, |
| "learning_rate": 4.632635557907277e-06, |
| "loss": 0.1549, |
| "step": 617 |
| }, |
| { |
| "epoch": 4.944, |
| "grad_norm": 0.7637826800346375, |
| "learning_rate": 4.617969089759066e-06, |
| "loss": 0.1077, |
| "step": 618 |
| }, |
| { |
| "epoch": 4.952, |
| "grad_norm": 0.9262024760246277, |
| "learning_rate": 4.603305927249902e-06, |
| "loss": 0.185, |
| "step": 619 |
| }, |
| { |
| "epoch": 4.96, |
| "grad_norm": 0.821549117565155, |
| "learning_rate": 4.588646197257278e-06, |
| "loss": 0.1219, |
| "step": 620 |
| }, |
| { |
| "epoch": 4.968, |
| "grad_norm": 0.7055884003639221, |
| "learning_rate": 4.573990026628976e-06, |
| "loss": 0.118, |
| "step": 621 |
| }, |
| { |
| "epoch": 4.976, |
| "grad_norm": 0.7518096566200256, |
| "learning_rate": 4.559337542181993e-06, |
| "loss": 0.168, |
| "step": 622 |
| }, |
| { |
| "epoch": 4.984, |
| "grad_norm": 0.7408725023269653, |
| "learning_rate": 4.544688870701416e-06, |
| "loss": 0.1333, |
| "step": 623 |
| }, |
| { |
| "epoch": 4.992, |
| "grad_norm": 0.6920803189277649, |
| "learning_rate": 4.53004413893935e-06, |
| "loss": 0.1177, |
| "step": 624 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.8042945861816406, |
| "learning_rate": 4.5154034736138035e-06, |
| "loss": 0.1387, |
| "step": 625 |
| }, |
| { |
| "epoch": 5.008, |
| "grad_norm": 0.8056797981262207, |
| "learning_rate": 4.500767001407604e-06, |
| "loss": 0.0511, |
| "step": 626 |
| }, |
| { |
| "epoch": 5.016, |
| "grad_norm": 0.8114173412322998, |
| "learning_rate": 4.486134848967292e-06, |
| "loss": 0.0592, |
| "step": 627 |
| }, |
| { |
| "epoch": 5.024, |
| "grad_norm": 0.6595902442932129, |
| "learning_rate": 4.471507142902036e-06, |
| "loss": 0.0703, |
| "step": 628 |
| }, |
| { |
| "epoch": 5.032, |
| "grad_norm": 0.7525659799575806, |
| "learning_rate": 4.4568840097825225e-06, |
| "loss": 0.053, |
| "step": 629 |
| }, |
| { |
| "epoch": 5.04, |
| "grad_norm": 0.7577898502349854, |
| "learning_rate": 4.4422655761398785e-06, |
| "loss": 0.0562, |
| "step": 630 |
| }, |
| { |
| "epoch": 5.048, |
| "grad_norm": 0.66656893491745, |
| "learning_rate": 4.427651968464559e-06, |
| "loss": 0.031, |
| "step": 631 |
| }, |
| { |
| "epoch": 5.056, |
| "grad_norm": 1.139257788658142, |
| "learning_rate": 4.413043313205266e-06, |
| "loss": 0.0474, |
| "step": 632 |
| }, |
| { |
| "epoch": 5.064, |
| "grad_norm": 1.2135770320892334, |
| "learning_rate": 4.3984397367678475e-06, |
| "loss": 0.0669, |
| "step": 633 |
| }, |
| { |
| "epoch": 5.072, |
| "grad_norm": 4.926527976989746, |
| "learning_rate": 4.383841365514208e-06, |
| "loss": 0.0528, |
| "step": 634 |
| }, |
| { |
| "epoch": 5.08, |
| "grad_norm": 0.8709739446640015, |
| "learning_rate": 4.369248325761205e-06, |
| "loss": 0.0477, |
| "step": 635 |
| }, |
| { |
| "epoch": 5.088, |
| "grad_norm": 0.8318364024162292, |
| "learning_rate": 4.354660743779575e-06, |
| "loss": 0.0323, |
| "step": 636 |
| }, |
| { |
| "epoch": 5.096, |
| "grad_norm": 0.6887449026107788, |
| "learning_rate": 4.340078745792818e-06, |
| "loss": 0.0566, |
| "step": 637 |
| }, |
| { |
| "epoch": 5.104, |
| "grad_norm": 0.7675641775131226, |
| "learning_rate": 4.325502457976126e-06, |
| "loss": 0.0501, |
| "step": 638 |
| }, |
| { |
| "epoch": 5.112, |
| "grad_norm": 0.5305010080337524, |
| "learning_rate": 4.310932006455276e-06, |
| "loss": 0.04, |
| "step": 639 |
| }, |
| { |
| "epoch": 5.12, |
| "grad_norm": 0.6297786831855774, |
| "learning_rate": 4.296367517305548e-06, |
| "loss": 0.0405, |
| "step": 640 |
| }, |
| { |
| "epoch": 5.128, |
| "grad_norm": 0.7022202014923096, |
| "learning_rate": 4.281809116550629e-06, |
| "loss": 0.0622, |
| "step": 641 |
| }, |
| { |
| "epoch": 5.136, |
| "grad_norm": 0.5648162961006165, |
| "learning_rate": 4.267256930161523e-06, |
| "loss": 0.0484, |
| "step": 642 |
| }, |
| { |
| "epoch": 5.144, |
| "grad_norm": 0.6618186235427856, |
| "learning_rate": 4.252711084055468e-06, |
| "loss": 0.0682, |
| "step": 643 |
| }, |
| { |
| "epoch": 5.152, |
| "grad_norm": 0.6455008387565613, |
| "learning_rate": 4.238171704094833e-06, |
| "loss": 0.0664, |
| "step": 644 |
| }, |
| { |
| "epoch": 5.16, |
| "grad_norm": 0.6835476160049438, |
| "learning_rate": 4.223638916086044e-06, |
| "loss": 0.049, |
| "step": 645 |
| }, |
| { |
| "epoch": 5.168, |
| "grad_norm": 0.7162488102912903, |
| "learning_rate": 4.209112845778481e-06, |
| "loss": 0.0539, |
| "step": 646 |
| }, |
| { |
| "epoch": 5.176, |
| "grad_norm": 0.7715399265289307, |
| "learning_rate": 4.194593618863404e-06, |
| "loss": 0.0636, |
| "step": 647 |
| }, |
| { |
| "epoch": 5.184, |
| "grad_norm": 0.6754415035247803, |
| "learning_rate": 4.180081360972852e-06, |
| "loss": 0.0603, |
| "step": 648 |
| }, |
| { |
| "epoch": 5.192, |
| "grad_norm": 0.8835145831108093, |
| "learning_rate": 4.165576197678571e-06, |
| "loss": 0.0217, |
| "step": 649 |
| }, |
| { |
| "epoch": 5.2, |
| "grad_norm": 0.7698906660079956, |
| "learning_rate": 4.151078254490908e-06, |
| "loss": 0.0442, |
| "step": 650 |
| }, |
| { |
| "epoch": 5.208, |
| "grad_norm": 0.8498547077178955, |
| "learning_rate": 4.136587656857744e-06, |
| "loss": 0.0706, |
| "step": 651 |
| }, |
| { |
| "epoch": 5.216, |
| "grad_norm": 0.5944578051567078, |
| "learning_rate": 4.122104530163397e-06, |
| "loss": 0.0354, |
| "step": 652 |
| }, |
| { |
| "epoch": 5.224, |
| "grad_norm": 0.7195687890052795, |
| "learning_rate": 4.107628999727542e-06, |
| "loss": 0.0572, |
| "step": 653 |
| }, |
| { |
| "epoch": 5.232, |
| "grad_norm": 0.8516260981559753, |
| "learning_rate": 4.09316119080412e-06, |
| "loss": 0.068, |
| "step": 654 |
| }, |
| { |
| "epoch": 5.24, |
| "grad_norm": 0.7528324127197266, |
| "learning_rate": 4.0787012285802695e-06, |
| "loss": 0.0809, |
| "step": 655 |
| }, |
| { |
| "epoch": 5.248, |
| "grad_norm": 1.21733820438385, |
| "learning_rate": 4.064249238175223e-06, |
| "loss": 0.0333, |
| "step": 656 |
| }, |
| { |
| "epoch": 5.256, |
| "grad_norm": 0.6287680864334106, |
| "learning_rate": 4.04980534463924e-06, |
| "loss": 0.0385, |
| "step": 657 |
| }, |
| { |
| "epoch": 5.264, |
| "grad_norm": 0.7100169658660889, |
| "learning_rate": 4.035369672952516e-06, |
| "loss": 0.0469, |
| "step": 658 |
| }, |
| { |
| "epoch": 5.272, |
| "grad_norm": 0.8435373306274414, |
| "learning_rate": 4.020942348024108e-06, |
| "loss": 0.0836, |
| "step": 659 |
| }, |
| { |
| "epoch": 5.28, |
| "grad_norm": 0.7471747994422913, |
| "learning_rate": 4.0065234946908456e-06, |
| "loss": 0.057, |
| "step": 660 |
| }, |
| { |
| "epoch": 5.288, |
| "grad_norm": 0.6374616026878357, |
| "learning_rate": 3.992113237716261e-06, |
| "loss": 0.0501, |
| "step": 661 |
| }, |
| { |
| "epoch": 5.296, |
| "grad_norm": 1.1847115755081177, |
| "learning_rate": 3.977711701789499e-06, |
| "loss": 0.0746, |
| "step": 662 |
| }, |
| { |
| "epoch": 5.304, |
| "grad_norm": 0.5774708986282349, |
| "learning_rate": 3.963319011524246e-06, |
| "loss": 0.0347, |
| "step": 663 |
| }, |
| { |
| "epoch": 5.312, |
| "grad_norm": 0.6785467863082886, |
| "learning_rate": 3.948935291457645e-06, |
| "loss": 0.0482, |
| "step": 664 |
| }, |
| { |
| "epoch": 5.32, |
| "grad_norm": 0.55971360206604, |
| "learning_rate": 3.934560666049226e-06, |
| "loss": 0.0369, |
| "step": 665 |
| }, |
| { |
| "epoch": 5.328, |
| "grad_norm": 0.5369053483009338, |
| "learning_rate": 3.920195259679822e-06, |
| "loss": 0.0352, |
| "step": 666 |
| }, |
| { |
| "epoch": 5.336, |
| "grad_norm": 0.48250868916511536, |
| "learning_rate": 3.905839196650494e-06, |
| "loss": 0.0462, |
| "step": 667 |
| }, |
| { |
| "epoch": 5.344, |
| "grad_norm": 0.5979182720184326, |
| "learning_rate": 3.891492601181462e-06, |
| "loss": 0.0447, |
| "step": 668 |
| }, |
| { |
| "epoch": 5.352, |
| "grad_norm": 0.7114503979682922, |
| "learning_rate": 3.877155597411019e-06, |
| "loss": 0.0316, |
| "step": 669 |
| }, |
| { |
| "epoch": 5.36, |
| "grad_norm": 0.6035363078117371, |
| "learning_rate": 3.862828309394469e-06, |
| "loss": 0.0582, |
| "step": 670 |
| }, |
| { |
| "epoch": 5.368, |
| "grad_norm": 0.8595693111419678, |
| "learning_rate": 3.8485108611030415e-06, |
| "loss": 0.0796, |
| "step": 671 |
| }, |
| { |
| "epoch": 5.376, |
| "grad_norm": 0.6660138964653015, |
| "learning_rate": 3.834203376422831e-06, |
| "loss": 0.058, |
| "step": 672 |
| }, |
| { |
| "epoch": 5.384, |
| "grad_norm": 0.6106982231140137, |
| "learning_rate": 3.8199059791537105e-06, |
| "loss": 0.0403, |
| "step": 673 |
| }, |
| { |
| "epoch": 5.392, |
| "grad_norm": 0.6765463352203369, |
| "learning_rate": 3.805618793008279e-06, |
| "loss": 0.0393, |
| "step": 674 |
| }, |
| { |
| "epoch": 5.4, |
| "grad_norm": 0.7175659537315369, |
| "learning_rate": 3.7913419416107692e-06, |
| "loss": 0.0592, |
| "step": 675 |
| }, |
| { |
| "epoch": 5.408, |
| "grad_norm": 0.7095440030097961, |
| "learning_rate": 3.777075548496001e-06, |
| "loss": 0.0448, |
| "step": 676 |
| }, |
| { |
| "epoch": 5.416, |
| "grad_norm": 0.858165979385376, |
| "learning_rate": 3.7628197371082916e-06, |
| "loss": 0.0502, |
| "step": 677 |
| }, |
| { |
| "epoch": 5.424, |
| "grad_norm": 0.6832898855209351, |
| "learning_rate": 3.7485746308004013e-06, |
| "loss": 0.0405, |
| "step": 678 |
| }, |
| { |
| "epoch": 5.432, |
| "grad_norm": 0.6566227078437805, |
| "learning_rate": 3.7343403528324574e-06, |
| "loss": 0.0572, |
| "step": 679 |
| }, |
| { |
| "epoch": 5.44, |
| "grad_norm": 0.9831671118736267, |
| "learning_rate": 3.7201170263709004e-06, |
| "loss": 0.0922, |
| "step": 680 |
| }, |
| { |
| "epoch": 5.448, |
| "grad_norm": 0.6447628140449524, |
| "learning_rate": 3.705904774487396e-06, |
| "loss": 0.0433, |
| "step": 681 |
| }, |
| { |
| "epoch": 5.456, |
| "grad_norm": 0.7340384721755981, |
| "learning_rate": 3.6917037201577977e-06, |
| "loss": 0.0593, |
| "step": 682 |
| }, |
| { |
| "epoch": 5.464, |
| "grad_norm": 0.6065418124198914, |
| "learning_rate": 3.6775139862610577e-06, |
| "loss": 0.0731, |
| "step": 683 |
| }, |
| { |
| "epoch": 5.4719999999999995, |
| "grad_norm": 1.329861044883728, |
| "learning_rate": 3.6633356955781827e-06, |
| "loss": 0.0302, |
| "step": 684 |
| }, |
| { |
| "epoch": 5.48, |
| "grad_norm": 0.5943688154220581, |
| "learning_rate": 3.649168970791157e-06, |
| "loss": 0.0446, |
| "step": 685 |
| }, |
| { |
| "epoch": 5.4879999999999995, |
| "grad_norm": 0.6232944130897522, |
| "learning_rate": 3.635013934481895e-06, |
| "loss": 0.052, |
| "step": 686 |
| }, |
| { |
| "epoch": 5.496, |
| "grad_norm": 1.0451732873916626, |
| "learning_rate": 3.620870709131163e-06, |
| "loss": 0.0594, |
| "step": 687 |
| }, |
| { |
| "epoch": 5.504, |
| "grad_norm": 0.628891110420227, |
| "learning_rate": 3.6067394171175397e-06, |
| "loss": 0.0583, |
| "step": 688 |
| }, |
| { |
| "epoch": 5.5120000000000005, |
| "grad_norm": 0.7899133563041687, |
| "learning_rate": 3.5926201807163384e-06, |
| "loss": 0.0299, |
| "step": 689 |
| }, |
| { |
| "epoch": 5.52, |
| "grad_norm": 0.7041831612586975, |
| "learning_rate": 3.578513122098566e-06, |
| "loss": 0.0344, |
| "step": 690 |
| }, |
| { |
| "epoch": 5.5280000000000005, |
| "grad_norm": 0.6907998919487, |
| "learning_rate": 3.564418363329848e-06, |
| "loss": 0.0616, |
| "step": 691 |
| }, |
| { |
| "epoch": 5.536, |
| "grad_norm": 0.6608344912528992, |
| "learning_rate": 3.5503360263693887e-06, |
| "loss": 0.0471, |
| "step": 692 |
| }, |
| { |
| "epoch": 5.5440000000000005, |
| "grad_norm": 0.7428660988807678, |
| "learning_rate": 3.5362662330689067e-06, |
| "loss": 0.0541, |
| "step": 693 |
| }, |
| { |
| "epoch": 5.552, |
| "grad_norm": 0.9560613036155701, |
| "learning_rate": 3.5222091051715803e-06, |
| "loss": 0.0642, |
| "step": 694 |
| }, |
| { |
| "epoch": 5.5600000000000005, |
| "grad_norm": 0.7475034594535828, |
| "learning_rate": 3.5081647643110028e-06, |
| "loss": 0.0732, |
| "step": 695 |
| }, |
| { |
| "epoch": 5.568, |
| "grad_norm": 0.6867515444755554, |
| "learning_rate": 3.4941333320101173e-06, |
| "loss": 0.0565, |
| "step": 696 |
| }, |
| { |
| "epoch": 5.576, |
| "grad_norm": 0.5686975717544556, |
| "learning_rate": 3.480114929680176e-06, |
| "loss": 0.0384, |
| "step": 697 |
| }, |
| { |
| "epoch": 5.584, |
| "grad_norm": 0.6070277690887451, |
| "learning_rate": 3.466109678619681e-06, |
| "loss": 0.0484, |
| "step": 698 |
| }, |
| { |
| "epoch": 5.592, |
| "grad_norm": 0.6168974041938782, |
| "learning_rate": 3.4521177000133456e-06, |
| "loss": 0.0508, |
| "step": 699 |
| }, |
| { |
| "epoch": 5.6, |
| "grad_norm": 0.6029306650161743, |
| "learning_rate": 3.4381391149310294e-06, |
| "loss": 0.0502, |
| "step": 700 |
| }, |
| { |
| "epoch": 5.608, |
| "grad_norm": 0.8586588501930237, |
| "learning_rate": 3.4241740443267112e-06, |
| "loss": 0.0787, |
| "step": 701 |
| }, |
| { |
| "epoch": 5.616, |
| "grad_norm": 0.5512130856513977, |
| "learning_rate": 3.4102226090374246e-06, |
| "loss": 0.0357, |
| "step": 702 |
| }, |
| { |
| "epoch": 5.624, |
| "grad_norm": 0.7166364789009094, |
| "learning_rate": 3.3962849297822225e-06, |
| "loss": 0.0565, |
| "step": 703 |
| }, |
| { |
| "epoch": 5.632, |
| "grad_norm": 0.7719413042068481, |
| "learning_rate": 3.3823611271611266e-06, |
| "loss": 0.0626, |
| "step": 704 |
| }, |
| { |
| "epoch": 5.64, |
| "grad_norm": 1.0928401947021484, |
| "learning_rate": 3.368451321654091e-06, |
| "loss": 0.0686, |
| "step": 705 |
| }, |
| { |
| "epoch": 5.648, |
| "grad_norm": 0.7056580781936646, |
| "learning_rate": 3.35455563361995e-06, |
| "loss": 0.0646, |
| "step": 706 |
| }, |
| { |
| "epoch": 5.656, |
| "grad_norm": 0.5838544368743896, |
| "learning_rate": 3.3406741832953893e-06, |
| "loss": 0.0367, |
| "step": 707 |
| }, |
| { |
| "epoch": 5.664, |
| "grad_norm": 1.1275421380996704, |
| "learning_rate": 3.3268070907938915e-06, |
| "loss": 0.0488, |
| "step": 708 |
| }, |
| { |
| "epoch": 5.672, |
| "grad_norm": 0.8408868312835693, |
| "learning_rate": 3.3129544761047093e-06, |
| "loss": 0.0597, |
| "step": 709 |
| }, |
| { |
| "epoch": 5.68, |
| "grad_norm": 0.571863055229187, |
| "learning_rate": 3.2991164590918162e-06, |
| "loss": 0.0518, |
| "step": 710 |
| }, |
| { |
| "epoch": 5.688, |
| "grad_norm": 0.6888249516487122, |
| "learning_rate": 3.2852931594928804e-06, |
| "loss": 0.0757, |
| "step": 711 |
| }, |
| { |
| "epoch": 5.696, |
| "grad_norm": 0.5743219256401062, |
| "learning_rate": 3.271484696918218e-06, |
| "loss": 0.0268, |
| "step": 712 |
| }, |
| { |
| "epoch": 5.704, |
| "grad_norm": 0.7105786800384521, |
| "learning_rate": 3.2576911908497695e-06, |
| "loss": 0.0561, |
| "step": 713 |
| }, |
| { |
| "epoch": 5.712, |
| "grad_norm": 0.5829000473022461, |
| "learning_rate": 3.2439127606400546e-06, |
| "loss": 0.0453, |
| "step": 714 |
| }, |
| { |
| "epoch": 5.72, |
| "grad_norm": 0.6392514705657959, |
| "learning_rate": 3.2301495255111426e-06, |
| "loss": 0.0542, |
| "step": 715 |
| }, |
| { |
| "epoch": 5.728, |
| "grad_norm": 0.8063925504684448, |
| "learning_rate": 3.2164016045536306e-06, |
| "loss": 0.0693, |
| "step": 716 |
| }, |
| { |
| "epoch": 5.736, |
| "grad_norm": 0.6815555691719055, |
| "learning_rate": 3.202669116725598e-06, |
| "loss": 0.0289, |
| "step": 717 |
| }, |
| { |
| "epoch": 5.744, |
| "grad_norm": 0.6942662000656128, |
| "learning_rate": 3.1889521808515888e-06, |
| "loss": 0.0458, |
| "step": 718 |
| }, |
| { |
| "epoch": 5.752, |
| "grad_norm": 0.8222205638885498, |
| "learning_rate": 3.1752509156215738e-06, |
| "loss": 0.0606, |
| "step": 719 |
| }, |
| { |
| "epoch": 5.76, |
| "grad_norm": 0.597169816493988, |
| "learning_rate": 3.1615654395899377e-06, |
| "loss": 0.0595, |
| "step": 720 |
| }, |
| { |
| "epoch": 5.768, |
| "grad_norm": 0.542059600353241, |
| "learning_rate": 3.1478958711744324e-06, |
| "loss": 0.0302, |
| "step": 721 |
| }, |
| { |
| "epoch": 5.776, |
| "grad_norm": 0.6608166694641113, |
| "learning_rate": 3.1342423286551756e-06, |
| "loss": 0.0638, |
| "step": 722 |
| }, |
| { |
| "epoch": 5.784, |
| "grad_norm": 0.6509668231010437, |
| "learning_rate": 3.120604930173608e-06, |
| "loss": 0.0356, |
| "step": 723 |
| }, |
| { |
| "epoch": 5.792, |
| "grad_norm": 0.6087043285369873, |
| "learning_rate": 3.1069837937314846e-06, |
| "loss": 0.0617, |
| "step": 724 |
| }, |
| { |
| "epoch": 5.8, |
| "grad_norm": 0.6988217830657959, |
| "learning_rate": 3.093379037189842e-06, |
| "loss": 0.0546, |
| "step": 725 |
| }, |
| { |
| "epoch": 5.808, |
| "grad_norm": 0.5948909521102905, |
| "learning_rate": 3.0797907782679944e-06, |
| "loss": 0.0678, |
| "step": 726 |
| }, |
| { |
| "epoch": 5.816, |
| "grad_norm": 0.6464372873306274, |
| "learning_rate": 3.0662191345424925e-06, |
| "loss": 0.0562, |
| "step": 727 |
| }, |
| { |
| "epoch": 5.824, |
| "grad_norm": 1.200032353401184, |
| "learning_rate": 3.0526642234461313e-06, |
| "loss": 0.1153, |
| "step": 728 |
| }, |
| { |
| "epoch": 5.832, |
| "grad_norm": 0.7848719954490662, |
| "learning_rate": 3.039126162266912e-06, |
| "loss": 0.0774, |
| "step": 729 |
| }, |
| { |
| "epoch": 5.84, |
| "grad_norm": 0.7536558508872986, |
| "learning_rate": 3.0256050681470446e-06, |
| "loss": 0.0878, |
| "step": 730 |
| }, |
| { |
| "epoch": 5.848, |
| "grad_norm": 0.7371671795845032, |
| "learning_rate": 3.012101058081919e-06, |
| "loss": 0.0515, |
| "step": 731 |
| }, |
| { |
| "epoch": 5.856, |
| "grad_norm": 0.6955927014350891, |
| "learning_rate": 2.9986142489191074e-06, |
| "loss": 0.0448, |
| "step": 732 |
| }, |
| { |
| "epoch": 5.864, |
| "grad_norm": 0.6752610206604004, |
| "learning_rate": 2.9851447573573383e-06, |
| "loss": 0.0673, |
| "step": 733 |
| }, |
| { |
| "epoch": 5.872, |
| "grad_norm": 0.672664225101471, |
| "learning_rate": 2.971692699945502e-06, |
| "loss": 0.051, |
| "step": 734 |
| }, |
| { |
| "epoch": 5.88, |
| "grad_norm": 0.570433497428894, |
| "learning_rate": 2.958258193081629e-06, |
| "loss": 0.0477, |
| "step": 735 |
| }, |
| { |
| "epoch": 5.888, |
| "grad_norm": 0.6220299601554871, |
| "learning_rate": 2.9448413530118912e-06, |
| "loss": 0.0465, |
| "step": 736 |
| }, |
| { |
| "epoch": 5.896, |
| "grad_norm": 0.7739304900169373, |
| "learning_rate": 2.9314422958295906e-06, |
| "loss": 0.0554, |
| "step": 737 |
| }, |
| { |
| "epoch": 5.904, |
| "grad_norm": 0.5828734040260315, |
| "learning_rate": 2.9180611374741623e-06, |
| "loss": 0.0534, |
| "step": 738 |
| }, |
| { |
| "epoch": 5.912, |
| "grad_norm": 0.47254109382629395, |
| "learning_rate": 2.904697993730159e-06, |
| "loss": 0.0344, |
| "step": 739 |
| }, |
| { |
| "epoch": 5.92, |
| "grad_norm": 0.6161917448043823, |
| "learning_rate": 2.891352980226262e-06, |
| "loss": 0.0551, |
| "step": 740 |
| }, |
| { |
| "epoch": 5.928, |
| "grad_norm": 0.6161420345306396, |
| "learning_rate": 2.8780262124342755e-06, |
| "loss": 0.0627, |
| "step": 741 |
| }, |
| { |
| "epoch": 5.936, |
| "grad_norm": 0.7250885963439941, |
| "learning_rate": 2.8647178056681197e-06, |
| "loss": 0.0488, |
| "step": 742 |
| }, |
| { |
| "epoch": 5.944, |
| "grad_norm": 0.6412259340286255, |
| "learning_rate": 2.8514278750828537e-06, |
| "loss": 0.0541, |
| "step": 743 |
| }, |
| { |
| "epoch": 5.952, |
| "grad_norm": 0.5730807781219482, |
| "learning_rate": 2.838156535673652e-06, |
| "loss": 0.0416, |
| "step": 744 |
| }, |
| { |
| "epoch": 5.96, |
| "grad_norm": 0.714428722858429, |
| "learning_rate": 2.8249039022748315e-06, |
| "loss": 0.0723, |
| "step": 745 |
| }, |
| { |
| "epoch": 5.968, |
| "grad_norm": 0.625837504863739, |
| "learning_rate": 2.8116700895588473e-06, |
| "loss": 0.0464, |
| "step": 746 |
| }, |
| { |
| "epoch": 5.976, |
| "grad_norm": 0.6484919786453247, |
| "learning_rate": 2.798455212035305e-06, |
| "loss": 0.0542, |
| "step": 747 |
| }, |
| { |
| "epoch": 5.984, |
| "grad_norm": 0.930560290813446, |
| "learning_rate": 2.785259384049959e-06, |
| "loss": 0.0837, |
| "step": 748 |
| }, |
| { |
| "epoch": 5.992, |
| "grad_norm": 0.539526641368866, |
| "learning_rate": 2.7720827197837475e-06, |
| "loss": 0.0355, |
| "step": 749 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 0.653667151927948, |
| "learning_rate": 2.7589253332517736e-06, |
| "loss": 0.0448, |
| "step": 750 |
| }, |
| { |
| "epoch": 6.008, |
| "grad_norm": 0.37607839703559875, |
| "learning_rate": 2.745787338302341e-06, |
| "loss": 0.0128, |
| "step": 751 |
| }, |
| { |
| "epoch": 6.016, |
| "grad_norm": 0.5014586448669434, |
| "learning_rate": 2.7326688486159613e-06, |
| "loss": 0.0292, |
| "step": 752 |
| }, |
| { |
| "epoch": 6.024, |
| "grad_norm": 0.544358491897583, |
| "learning_rate": 2.7195699777043723e-06, |
| "loss": 0.0326, |
| "step": 753 |
| }, |
| { |
| "epoch": 6.032, |
| "grad_norm": 0.2762000262737274, |
| "learning_rate": 2.706490838909547e-06, |
| "loss": 0.0077, |
| "step": 754 |
| }, |
| { |
| "epoch": 6.04, |
| "grad_norm": 0.47887730598449707, |
| "learning_rate": 2.6934315454027323e-06, |
| "loss": 0.0224, |
| "step": 755 |
| }, |
| { |
| "epoch": 6.048, |
| "grad_norm": 0.45757901668548584, |
| "learning_rate": 2.680392210183446e-06, |
| "loss": 0.0205, |
| "step": 756 |
| }, |
| { |
| "epoch": 6.056, |
| "grad_norm": 0.37720590829849243, |
| "learning_rate": 2.6673729460785174e-06, |
| "loss": 0.0232, |
| "step": 757 |
| }, |
| { |
| "epoch": 6.064, |
| "grad_norm": 0.48954445123672485, |
| "learning_rate": 2.6543738657411033e-06, |
| "loss": 0.0196, |
| "step": 758 |
| }, |
| { |
| "epoch": 6.072, |
| "grad_norm": 0.4824325740337372, |
| "learning_rate": 2.6413950816497146e-06, |
| "loss": 0.0121, |
| "step": 759 |
| }, |
| { |
| "epoch": 6.08, |
| "grad_norm": 0.4821329712867737, |
| "learning_rate": 2.628436706107238e-06, |
| "loss": 0.0182, |
| "step": 760 |
| }, |
| { |
| "epoch": 6.088, |
| "grad_norm": 0.42847394943237305, |
| "learning_rate": 2.6154988512399784e-06, |
| "loss": 0.0147, |
| "step": 761 |
| }, |
| { |
| "epoch": 6.096, |
| "grad_norm": 0.6512643098831177, |
| "learning_rate": 2.6025816289966703e-06, |
| "loss": 0.0169, |
| "step": 762 |
| }, |
| { |
| "epoch": 6.104, |
| "grad_norm": 0.5844804644584656, |
| "learning_rate": 2.5896851511475184e-06, |
| "loss": 0.0202, |
| "step": 763 |
| }, |
| { |
| "epoch": 6.112, |
| "grad_norm": 0.44382765889167786, |
| "learning_rate": 2.5768095292832412e-06, |
| "loss": 0.0214, |
| "step": 764 |
| }, |
| { |
| "epoch": 6.12, |
| "grad_norm": 0.5254351496696472, |
| "learning_rate": 2.5639548748140803e-06, |
| "loss": 0.0287, |
| "step": 765 |
| }, |
| { |
| "epoch": 6.128, |
| "grad_norm": 0.41182348132133484, |
| "learning_rate": 2.5511212989688587e-06, |
| "loss": 0.0228, |
| "step": 766 |
| }, |
| { |
| "epoch": 6.136, |
| "grad_norm": 0.411668598651886, |
| "learning_rate": 2.5383089127940087e-06, |
| "loss": 0.0133, |
| "step": 767 |
| }, |
| { |
| "epoch": 6.144, |
| "grad_norm": 0.44052034616470337, |
| "learning_rate": 2.525517827152614e-06, |
| "loss": 0.015, |
| "step": 768 |
| }, |
| { |
| "epoch": 6.152, |
| "grad_norm": 0.4560883939266205, |
| "learning_rate": 2.5127481527234397e-06, |
| "loss": 0.0193, |
| "step": 769 |
| }, |
| { |
| "epoch": 6.16, |
| "grad_norm": 0.5043846964836121, |
| "learning_rate": 2.5000000000000015e-06, |
| "loss": 0.024, |
| "step": 770 |
| }, |
| { |
| "epoch": 6.168, |
| "grad_norm": 0.3570071756839752, |
| "learning_rate": 2.487273479289574e-06, |
| "loss": 0.01, |
| "step": 771 |
| }, |
| { |
| "epoch": 6.176, |
| "grad_norm": 0.34656181931495667, |
| "learning_rate": 2.4745687007122636e-06, |
| "loss": 0.0118, |
| "step": 772 |
| }, |
| { |
| "epoch": 6.184, |
| "grad_norm": 0.44094350934028625, |
| "learning_rate": 2.4618857742000463e-06, |
| "loss": 0.024, |
| "step": 773 |
| }, |
| { |
| "epoch": 6.192, |
| "grad_norm": 0.4078379273414612, |
| "learning_rate": 2.449224809495815e-06, |
| "loss": 0.0267, |
| "step": 774 |
| }, |
| { |
| "epoch": 6.2, |
| "grad_norm": 0.45042550563812256, |
| "learning_rate": 2.436585916152426e-06, |
| "loss": 0.0248, |
| "step": 775 |
| }, |
| { |
| "epoch": 6.208, |
| "grad_norm": 0.5565236210823059, |
| "learning_rate": 2.423969203531768e-06, |
| "loss": 0.022, |
| "step": 776 |
| }, |
| { |
| "epoch": 6.216, |
| "grad_norm": 0.33686888217926025, |
| "learning_rate": 2.411374780803793e-06, |
| "loss": 0.0074, |
| "step": 777 |
| }, |
| { |
| "epoch": 6.224, |
| "grad_norm": 0.4109979569911957, |
| "learning_rate": 2.3988027569455895e-06, |
| "loss": 0.0114, |
| "step": 778 |
| }, |
| { |
| "epoch": 6.232, |
| "grad_norm": 0.5003446340560913, |
| "learning_rate": 2.3862532407404306e-06, |
| "loss": 0.0297, |
| "step": 779 |
| }, |
| { |
| "epoch": 6.24, |
| "grad_norm": 0.30241599678993225, |
| "learning_rate": 2.373726340776837e-06, |
| "loss": 0.0081, |
| "step": 780 |
| }, |
| { |
| "epoch": 6.248, |
| "grad_norm": 0.5940093398094177, |
| "learning_rate": 2.361222165447628e-06, |
| "loss": 0.0258, |
| "step": 781 |
| }, |
| { |
| "epoch": 6.256, |
| "grad_norm": 0.37417200207710266, |
| "learning_rate": 2.348740822949006e-06, |
| "loss": 0.011, |
| "step": 782 |
| }, |
| { |
| "epoch": 6.264, |
| "grad_norm": 0.36139291524887085, |
| "learning_rate": 2.33628242127959e-06, |
| "loss": 0.0195, |
| "step": 783 |
| }, |
| { |
| "epoch": 6.272, |
| "grad_norm": 0.4464220702648163, |
| "learning_rate": 2.323847068239504e-06, |
| "loss": 0.0182, |
| "step": 784 |
| }, |
| { |
| "epoch": 6.28, |
| "grad_norm": 0.3683570623397827, |
| "learning_rate": 2.3114348714294355e-06, |
| "loss": 0.0148, |
| "step": 785 |
| }, |
| { |
| "epoch": 6.288, |
| "grad_norm": 0.5403426885604858, |
| "learning_rate": 2.2990459382497086e-06, |
| "loss": 0.0123, |
| "step": 786 |
| }, |
| { |
| "epoch": 6.296, |
| "grad_norm": 0.47578224539756775, |
| "learning_rate": 2.2866803758993446e-06, |
| "loss": 0.0207, |
| "step": 787 |
| }, |
| { |
| "epoch": 6.304, |
| "grad_norm": 0.4909017086029053, |
| "learning_rate": 2.274338291375147e-06, |
| "loss": 0.0128, |
| "step": 788 |
| }, |
| { |
| "epoch": 6.312, |
| "grad_norm": 0.40206748247146606, |
| "learning_rate": 2.262019791470772e-06, |
| "loss": 0.0104, |
| "step": 789 |
| }, |
| { |
| "epoch": 6.32, |
| "grad_norm": 0.6169834733009338, |
| "learning_rate": 2.2497249827757933e-06, |
| "loss": 0.0316, |
| "step": 790 |
| }, |
| { |
| "epoch": 6.328, |
| "grad_norm": 0.36974218487739563, |
| "learning_rate": 2.2374539716748034e-06, |
| "loss": 0.008, |
| "step": 791 |
| }, |
| { |
| "epoch": 6.336, |
| "grad_norm": 0.455277681350708, |
| "learning_rate": 2.225206864346465e-06, |
| "loss": 0.0236, |
| "step": 792 |
| }, |
| { |
| "epoch": 6.344, |
| "grad_norm": 0.45521119236946106, |
| "learning_rate": 2.2129837667626147e-06, |
| "loss": 0.0184, |
| "step": 793 |
| }, |
| { |
| "epoch": 6.352, |
| "grad_norm": 0.542340099811554, |
| "learning_rate": 2.2007847846873342e-06, |
| "loss": 0.0257, |
| "step": 794 |
| }, |
| { |
| "epoch": 6.36, |
| "grad_norm": 0.4681219458580017, |
| "learning_rate": 2.188610023676041e-06, |
| "loss": 0.0293, |
| "step": 795 |
| }, |
| { |
| "epoch": 6.368, |
| "grad_norm": 0.4694705307483673, |
| "learning_rate": 2.176459589074566e-06, |
| "loss": 0.0147, |
| "step": 796 |
| }, |
| { |
| "epoch": 6.376, |
| "grad_norm": 0.4986151456832886, |
| "learning_rate": 2.164333586018259e-06, |
| "loss": 0.0242, |
| "step": 797 |
| }, |
| { |
| "epoch": 6.384, |
| "grad_norm": 0.3367967903614044, |
| "learning_rate": 2.1522321194310577e-06, |
| "loss": 0.0098, |
| "step": 798 |
| }, |
| { |
| "epoch": 6.392, |
| "grad_norm": 0.430538535118103, |
| "learning_rate": 2.1401552940245962e-06, |
| "loss": 0.0192, |
| "step": 799 |
| }, |
| { |
| "epoch": 6.4, |
| "grad_norm": 0.6271944046020508, |
| "learning_rate": 2.1281032142972933e-06, |
| "loss": 0.0229, |
| "step": 800 |
| }, |
| { |
| "epoch": 6.408, |
| "grad_norm": 0.33021512627601624, |
| "learning_rate": 2.1160759845334483e-06, |
| "loss": 0.0168, |
| "step": 801 |
| }, |
| { |
| "epoch": 6.416, |
| "grad_norm": 0.5376062989234924, |
| "learning_rate": 2.1040737088023323e-06, |
| "loss": 0.0154, |
| "step": 802 |
| }, |
| { |
| "epoch": 6.424, |
| "grad_norm": 0.5122645497322083, |
| "learning_rate": 2.0920964909573065e-06, |
| "loss": 0.0268, |
| "step": 803 |
| }, |
| { |
| "epoch": 6.432, |
| "grad_norm": 0.37193113565444946, |
| "learning_rate": 2.080144434634898e-06, |
| "loss": 0.0133, |
| "step": 804 |
| }, |
| { |
| "epoch": 6.44, |
| "grad_norm": 0.32150623202323914, |
| "learning_rate": 2.068217643253925e-06, |
| "loss": 0.0122, |
| "step": 805 |
| }, |
| { |
| "epoch": 6.448, |
| "grad_norm": 0.4551410675048828, |
| "learning_rate": 2.056316220014588e-06, |
| "loss": 0.0253, |
| "step": 806 |
| }, |
| { |
| "epoch": 6.456, |
| "grad_norm": 0.37979602813720703, |
| "learning_rate": 2.0444402678975876e-06, |
| "loss": 0.0175, |
| "step": 807 |
| }, |
| { |
| "epoch": 6.464, |
| "grad_norm": 0.38569003343582153, |
| "learning_rate": 2.0325898896632178e-06, |
| "loss": 0.0099, |
| "step": 808 |
| }, |
| { |
| "epoch": 6.4719999999999995, |
| "grad_norm": 0.4271693527698517, |
| "learning_rate": 2.0207651878505e-06, |
| "loss": 0.0129, |
| "step": 809 |
| }, |
| { |
| "epoch": 6.48, |
| "grad_norm": 0.3800446689128876, |
| "learning_rate": 2.0089662647762716e-06, |
| "loss": 0.0128, |
| "step": 810 |
| }, |
| { |
| "epoch": 6.4879999999999995, |
| "grad_norm": 0.4142042398452759, |
| "learning_rate": 1.997193222534316e-06, |
| "loss": 0.0158, |
| "step": 811 |
| }, |
| { |
| "epoch": 6.496, |
| "grad_norm": 0.46278542280197144, |
| "learning_rate": 1.9854461629944764e-06, |
| "loss": 0.0166, |
| "step": 812 |
| }, |
| { |
| "epoch": 6.504, |
| "grad_norm": 0.40608370304107666, |
| "learning_rate": 1.9737251878017678e-06, |
| "loss": 0.0205, |
| "step": 813 |
| }, |
| { |
| "epoch": 6.5120000000000005, |
| "grad_norm": 0.5644993185997009, |
| "learning_rate": 1.962030398375506e-06, |
| "loss": 0.0194, |
| "step": 814 |
| }, |
| { |
| "epoch": 6.52, |
| "grad_norm": 0.4298612177371979, |
| "learning_rate": 1.950361895908427e-06, |
| "loss": 0.0157, |
| "step": 815 |
| }, |
| { |
| "epoch": 6.5280000000000005, |
| "grad_norm": 0.8214244246482849, |
| "learning_rate": 1.9387197813658092e-06, |
| "loss": 0.0338, |
| "step": 816 |
| }, |
| { |
| "epoch": 6.536, |
| "grad_norm": 0.4823768436908722, |
| "learning_rate": 1.927104155484602e-06, |
| "loss": 0.0161, |
| "step": 817 |
| }, |
| { |
| "epoch": 6.5440000000000005, |
| "grad_norm": 0.45921590924263, |
| "learning_rate": 1.915515118772555e-06, |
| "loss": 0.0148, |
| "step": 818 |
| }, |
| { |
| "epoch": 6.552, |
| "grad_norm": 0.5129720568656921, |
| "learning_rate": 1.9039527715073424e-06, |
| "loss": 0.0163, |
| "step": 819 |
| }, |
| { |
| "epoch": 6.5600000000000005, |
| "grad_norm": 0.38792961835861206, |
| "learning_rate": 1.8924172137357038e-06, |
| "loss": 0.0155, |
| "step": 820 |
| }, |
| { |
| "epoch": 6.568, |
| "grad_norm": 0.5734500885009766, |
| "learning_rate": 1.8809085452725744e-06, |
| "loss": 0.0195, |
| "step": 821 |
| }, |
| { |
| "epoch": 6.576, |
| "grad_norm": 0.42012128233909607, |
| "learning_rate": 1.8694268657002197e-06, |
| "loss": 0.0179, |
| "step": 822 |
| }, |
| { |
| "epoch": 6.584, |
| "grad_norm": 0.6506406664848328, |
| "learning_rate": 1.8579722743673773e-06, |
| "loss": 0.0227, |
| "step": 823 |
| }, |
| { |
| "epoch": 6.592, |
| "grad_norm": 0.4907991886138916, |
| "learning_rate": 1.8465448703883959e-06, |
| "loss": 0.011, |
| "step": 824 |
| }, |
| { |
| "epoch": 6.6, |
| "grad_norm": 0.4725576639175415, |
| "learning_rate": 1.8351447526423728e-06, |
| "loss": 0.0149, |
| "step": 825 |
| }, |
| { |
| "epoch": 6.608, |
| "grad_norm": 0.4036758840084076, |
| "learning_rate": 1.8237720197723075e-06, |
| "loss": 0.0184, |
| "step": 826 |
| }, |
| { |
| "epoch": 6.616, |
| "grad_norm": 0.3650066554546356, |
| "learning_rate": 1.812426770184243e-06, |
| "loss": 0.015, |
| "step": 827 |
| }, |
| { |
| "epoch": 6.624, |
| "grad_norm": 0.38544753193855286, |
| "learning_rate": 1.8011091020464138e-06, |
| "loss": 0.0155, |
| "step": 828 |
| }, |
| { |
| "epoch": 6.632, |
| "grad_norm": 0.444048672914505, |
| "learning_rate": 1.789819113288397e-06, |
| "loss": 0.0177, |
| "step": 829 |
| }, |
| { |
| "epoch": 6.64, |
| "grad_norm": 0.5285355448722839, |
| "learning_rate": 1.7785569016002686e-06, |
| "loss": 0.0277, |
| "step": 830 |
| }, |
| { |
| "epoch": 6.648, |
| "grad_norm": 0.5375143885612488, |
| "learning_rate": 1.7673225644317487e-06, |
| "loss": 0.0203, |
| "step": 831 |
| }, |
| { |
| "epoch": 6.656, |
| "grad_norm": 0.32890117168426514, |
| "learning_rate": 1.75611619899137e-06, |
| "loss": 0.0144, |
| "step": 832 |
| }, |
| { |
| "epoch": 6.664, |
| "grad_norm": 0.3891383409500122, |
| "learning_rate": 1.7449379022456297e-06, |
| "loss": 0.013, |
| "step": 833 |
| }, |
| { |
| "epoch": 6.672, |
| "grad_norm": 0.4745618402957916, |
| "learning_rate": 1.7337877709181527e-06, |
| "loss": 0.0185, |
| "step": 834 |
| }, |
| { |
| "epoch": 6.68, |
| "grad_norm": 0.38062867522239685, |
| "learning_rate": 1.7226659014888548e-06, |
| "loss": 0.0102, |
| "step": 835 |
| }, |
| { |
| "epoch": 6.688, |
| "grad_norm": 0.787895917892456, |
| "learning_rate": 1.711572390193102e-06, |
| "loss": 0.0302, |
| "step": 836 |
| }, |
| { |
| "epoch": 6.696, |
| "grad_norm": 0.5411037802696228, |
| "learning_rate": 1.7005073330208881e-06, |
| "loss": 0.0249, |
| "step": 837 |
| }, |
| { |
| "epoch": 6.704, |
| "grad_norm": 0.4542792737483978, |
| "learning_rate": 1.689470825715998e-06, |
| "loss": 0.0112, |
| "step": 838 |
| }, |
| { |
| "epoch": 6.712, |
| "grad_norm": 0.5368099808692932, |
| "learning_rate": 1.6784629637751814e-06, |
| "loss": 0.0197, |
| "step": 839 |
| }, |
| { |
| "epoch": 6.72, |
| "grad_norm": 0.3907606899738312, |
| "learning_rate": 1.6674838424473172e-06, |
| "loss": 0.0131, |
| "step": 840 |
| }, |
| { |
| "epoch": 6.728, |
| "grad_norm": 0.49019739031791687, |
| "learning_rate": 1.6565335567326112e-06, |
| "loss": 0.0216, |
| "step": 841 |
| }, |
| { |
| "epoch": 6.736, |
| "grad_norm": 0.2934483587741852, |
| "learning_rate": 1.6456122013817477e-06, |
| "loss": 0.005, |
| "step": 842 |
| }, |
| { |
| "epoch": 6.744, |
| "grad_norm": 0.8866119384765625, |
| "learning_rate": 1.6347198708950884e-06, |
| "loss": 0.0194, |
| "step": 843 |
| }, |
| { |
| "epoch": 6.752, |
| "grad_norm": 0.7154141664505005, |
| "learning_rate": 1.6238566595218475e-06, |
| "loss": 0.0268, |
| "step": 844 |
| }, |
| { |
| "epoch": 6.76, |
| "grad_norm": 0.3840312361717224, |
| "learning_rate": 1.6130226612592787e-06, |
| "loss": 0.0081, |
| "step": 845 |
| }, |
| { |
| "epoch": 6.768, |
| "grad_norm": 0.513756275177002, |
| "learning_rate": 1.6022179698518525e-06, |
| "loss": 0.0203, |
| "step": 846 |
| }, |
| { |
| "epoch": 6.776, |
| "grad_norm": 0.7530612349510193, |
| "learning_rate": 1.591442678790467e-06, |
| "loss": 0.0494, |
| "step": 847 |
| }, |
| { |
| "epoch": 6.784, |
| "grad_norm": 0.29639124870300293, |
| "learning_rate": 1.580696881311611e-06, |
| "loss": 0.0076, |
| "step": 848 |
| }, |
| { |
| "epoch": 6.792, |
| "grad_norm": 0.812698483467102, |
| "learning_rate": 1.5699806703965787e-06, |
| "loss": 0.0288, |
| "step": 849 |
| }, |
| { |
| "epoch": 6.8, |
| "grad_norm": 0.47915413975715637, |
| "learning_rate": 1.5592941387706562e-06, |
| "loss": 0.0143, |
| "step": 850 |
| }, |
| { |
| "epoch": 6.808, |
| "grad_norm": 0.4331102669239044, |
| "learning_rate": 1.5486373789023206e-06, |
| "loss": 0.0149, |
| "step": 851 |
| }, |
| { |
| "epoch": 6.816, |
| "grad_norm": 0.44031015038490295, |
| "learning_rate": 1.538010483002435e-06, |
| "loss": 0.0183, |
| "step": 852 |
| }, |
| { |
| "epoch": 6.824, |
| "grad_norm": 0.4399348795413971, |
| "learning_rate": 1.5274135430234654e-06, |
| "loss": 0.0181, |
| "step": 853 |
| }, |
| { |
| "epoch": 6.832, |
| "grad_norm": 0.4304785430431366, |
| "learning_rate": 1.5168466506586654e-06, |
| "loss": 0.0173, |
| "step": 854 |
| }, |
| { |
| "epoch": 6.84, |
| "grad_norm": 0.5548763871192932, |
| "learning_rate": 1.506309897341297e-06, |
| "loss": 0.0295, |
| "step": 855 |
| }, |
| { |
| "epoch": 6.848, |
| "grad_norm": 0.4924432933330536, |
| "learning_rate": 1.4958033742438348e-06, |
| "loss": 0.0096, |
| "step": 856 |
| }, |
| { |
| "epoch": 6.856, |
| "grad_norm": 0.4524632394313812, |
| "learning_rate": 1.4853271722771772e-06, |
| "loss": 0.0211, |
| "step": 857 |
| }, |
| { |
| "epoch": 6.864, |
| "grad_norm": 0.4187138080596924, |
| "learning_rate": 1.4748813820898554e-06, |
| "loss": 0.0179, |
| "step": 858 |
| }, |
| { |
| "epoch": 6.872, |
| "grad_norm": 0.7639076113700867, |
| "learning_rate": 1.4644660940672628e-06, |
| "loss": 0.0138, |
| "step": 859 |
| }, |
| { |
| "epoch": 6.88, |
| "grad_norm": 0.7690819501876831, |
| "learning_rate": 1.454081398330855e-06, |
| "loss": 0.0202, |
| "step": 860 |
| }, |
| { |
| "epoch": 6.888, |
| "grad_norm": 0.7614538073539734, |
| "learning_rate": 1.4437273847373778e-06, |
| "loss": 0.0132, |
| "step": 861 |
| }, |
| { |
| "epoch": 6.896, |
| "grad_norm": 0.49523505568504333, |
| "learning_rate": 1.4334041428781003e-06, |
| "loss": 0.0196, |
| "step": 862 |
| }, |
| { |
| "epoch": 6.904, |
| "grad_norm": 0.9732420444488525, |
| "learning_rate": 1.4231117620780188e-06, |
| "loss": 0.019, |
| "step": 863 |
| }, |
| { |
| "epoch": 6.912, |
| "grad_norm": 0.42634353041648865, |
| "learning_rate": 1.4128503313951008e-06, |
| "loss": 0.0231, |
| "step": 864 |
| }, |
| { |
| "epoch": 6.92, |
| "grad_norm": 0.5485000610351562, |
| "learning_rate": 1.4026199396195078e-06, |
| "loss": 0.0207, |
| "step": 865 |
| }, |
| { |
| "epoch": 6.928, |
| "grad_norm": 0.3883606791496277, |
| "learning_rate": 1.3924206752728282e-06, |
| "loss": 0.0157, |
| "step": 866 |
| }, |
| { |
| "epoch": 6.936, |
| "grad_norm": 2.5304391384124756, |
| "learning_rate": 1.3822526266073044e-06, |
| "loss": 0.0119, |
| "step": 867 |
| }, |
| { |
| "epoch": 6.944, |
| "grad_norm": 0.4002678692340851, |
| "learning_rate": 1.3721158816050872e-06, |
| "loss": 0.0158, |
| "step": 868 |
| }, |
| { |
| "epoch": 6.952, |
| "grad_norm": 0.45000985264778137, |
| "learning_rate": 1.3620105279774532e-06, |
| "loss": 0.0135, |
| "step": 869 |
| }, |
| { |
| "epoch": 6.96, |
| "grad_norm": 2.6394693851470947, |
| "learning_rate": 1.3519366531640589e-06, |
| "loss": 0.0196, |
| "step": 870 |
| }, |
| { |
| "epoch": 6.968, |
| "grad_norm": 0.5044559240341187, |
| "learning_rate": 1.3418943443321807e-06, |
| "loss": 0.0145, |
| "step": 871 |
| }, |
| { |
| "epoch": 6.976, |
| "grad_norm": 0.39515820145606995, |
| "learning_rate": 1.3318836883759634e-06, |
| "loss": 0.0137, |
| "step": 872 |
| }, |
| { |
| "epoch": 6.984, |
| "grad_norm": 0.3949025273323059, |
| "learning_rate": 1.3219047719156575e-06, |
| "loss": 0.0164, |
| "step": 873 |
| }, |
| { |
| "epoch": 6.992, |
| "grad_norm": 0.4184512495994568, |
| "learning_rate": 1.3119576812968893e-06, |
| "loss": 0.0185, |
| "step": 874 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 0.36798545718193054, |
| "learning_rate": 1.3020425025898926e-06, |
| "loss": 0.0108, |
| "step": 875 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 1125, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.045822625779548e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|