| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 375, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.008, |
| "grad_norm": 3.784468173980713, |
| "learning_rate": 0.0, |
| "loss": 0.8231, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 3.725552797317505, |
| "learning_rate": 1.7543859649122808e-07, |
| "loss": 1.0003, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.024, |
| "grad_norm": 3.5788533687591553, |
| "learning_rate": 3.5087719298245616e-07, |
| "loss": 0.8452, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 3.4994688034057617, |
| "learning_rate": 5.263157894736843e-07, |
| "loss": 0.9015, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 3.340575695037842, |
| "learning_rate": 7.017543859649123e-07, |
| "loss": 0.91, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 3.4169387817382812, |
| "learning_rate": 8.771929824561404e-07, |
| "loss": 0.8981, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.056, |
| "grad_norm": 3.199388027191162, |
| "learning_rate": 1.0526315789473685e-06, |
| "loss": 0.8082, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 3.5631041526794434, |
| "learning_rate": 1.2280701754385965e-06, |
| "loss": 1.0333, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.072, |
| "grad_norm": 2.9914121627807617, |
| "learning_rate": 1.4035087719298246e-06, |
| "loss": 0.9455, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 3.418433904647827, |
| "learning_rate": 1.5789473684210526e-06, |
| "loss": 0.7966, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.088, |
| "grad_norm": 2.4177350997924805, |
| "learning_rate": 1.7543859649122807e-06, |
| "loss": 0.9202, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 1.7621887922286987, |
| "learning_rate": 1.929824561403509e-06, |
| "loss": 0.8102, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.104, |
| "grad_norm": 2.0125510692596436, |
| "learning_rate": 2.105263157894737e-06, |
| "loss": 0.8969, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 1.7342252731323242, |
| "learning_rate": 2.280701754385965e-06, |
| "loss": 0.8587, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.712204098701477, |
| "learning_rate": 2.456140350877193e-06, |
| "loss": 0.8708, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 1.412792682647705, |
| "learning_rate": 2.631578947368421e-06, |
| "loss": 0.8413, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.136, |
| "grad_norm": 1.5133228302001953, |
| "learning_rate": 2.8070175438596493e-06, |
| "loss": 0.7527, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 1.6985952854156494, |
| "learning_rate": 2.9824561403508774e-06, |
| "loss": 0.9278, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.152, |
| "grad_norm": 1.118333101272583, |
| "learning_rate": 3.157894736842105e-06, |
| "loss": 0.7364, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.1672853231430054, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.8443, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.168, |
| "grad_norm": 1.0605567693710327, |
| "learning_rate": 3.5087719298245615e-06, |
| "loss": 0.714, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 1.2411671876907349, |
| "learning_rate": 3.6842105263157896e-06, |
| "loss": 0.8215, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.184, |
| "grad_norm": 1.144175410270691, |
| "learning_rate": 3.859649122807018e-06, |
| "loss": 0.6955, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 1.3835996389389038, |
| "learning_rate": 4.035087719298246e-06, |
| "loss": 0.7968, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.1685224771499634, |
| "learning_rate": 4.210526315789474e-06, |
| "loss": 0.7425, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 1.1630430221557617, |
| "learning_rate": 4.385964912280702e-06, |
| "loss": 0.7122, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.216, |
| "grad_norm": 1.054180383682251, |
| "learning_rate": 4.56140350877193e-06, |
| "loss": 0.7967, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 1.0310646295547485, |
| "learning_rate": 4.736842105263158e-06, |
| "loss": 0.6822, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.232, |
| "grad_norm": 1.1047680377960205, |
| "learning_rate": 4.912280701754386e-06, |
| "loss": 0.739, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 1.712615966796875, |
| "learning_rate": 5.087719298245615e-06, |
| "loss": 0.7789, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.248, |
| "grad_norm": 1.1467419862747192, |
| "learning_rate": 5.263157894736842e-06, |
| "loss": 0.7517, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 0.9748108386993408, |
| "learning_rate": 5.438596491228071e-06, |
| "loss": 0.7997, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.264, |
| "grad_norm": 1.1908164024353027, |
| "learning_rate": 5.6140350877192985e-06, |
| "loss": 0.7893, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.272, |
| "grad_norm": 1.160500168800354, |
| "learning_rate": 5.789473684210527e-06, |
| "loss": 0.7667, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.0798025131225586, |
| "learning_rate": 5.964912280701755e-06, |
| "loss": 0.6695, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 1.128517746925354, |
| "learning_rate": 6.140350877192983e-06, |
| "loss": 0.7281, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.296, |
| "grad_norm": 1.1588135957717896, |
| "learning_rate": 6.31578947368421e-06, |
| "loss": 0.6856, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.304, |
| "grad_norm": 0.9977500438690186, |
| "learning_rate": 6.491228070175439e-06, |
| "loss": 0.7085, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.312, |
| "grad_norm": 1.0899662971496582, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 0.7503, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.0250076055526733, |
| "learning_rate": 6.842105263157896e-06, |
| "loss": 0.7469, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.328, |
| "grad_norm": 1.0345467329025269, |
| "learning_rate": 7.017543859649123e-06, |
| "loss": 0.7034, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.336, |
| "grad_norm": 0.9953707456588745, |
| "learning_rate": 7.192982456140352e-06, |
| "loss": 0.8141, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.344, |
| "grad_norm": 1.167507290840149, |
| "learning_rate": 7.368421052631579e-06, |
| "loss": 0.7411, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 0.9854204058647156, |
| "learning_rate": 7.5438596491228074e-06, |
| "loss": 0.7219, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.1585136651992798, |
| "learning_rate": 7.719298245614036e-06, |
| "loss": 0.7666, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.368, |
| "grad_norm": 0.9028245210647583, |
| "learning_rate": 7.894736842105265e-06, |
| "loss": 0.677, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.376, |
| "grad_norm": 1.0796241760253906, |
| "learning_rate": 8.070175438596492e-06, |
| "loss": 0.7579, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 0.8502395153045654, |
| "learning_rate": 8.24561403508772e-06, |
| "loss": 0.6178, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.392, |
| "grad_norm": 1.0221744775772095, |
| "learning_rate": 8.421052631578948e-06, |
| "loss": 0.7143, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.1040499210357666, |
| "learning_rate": 8.596491228070176e-06, |
| "loss": 0.739, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.408, |
| "grad_norm": 1.3160624504089355, |
| "learning_rate": 8.771929824561405e-06, |
| "loss": 0.7673, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 0.9868547916412354, |
| "learning_rate": 8.947368421052632e-06, |
| "loss": 0.6158, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.424, |
| "grad_norm": 0.97400963306427, |
| "learning_rate": 9.12280701754386e-06, |
| "loss": 0.7602, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.432, |
| "grad_norm": 0.9297606945037842, |
| "learning_rate": 9.298245614035088e-06, |
| "loss": 0.6972, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.9819108247756958, |
| "learning_rate": 9.473684210526315e-06, |
| "loss": 0.6661, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 0.9263789057731628, |
| "learning_rate": 9.649122807017545e-06, |
| "loss": 0.6548, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.456, |
| "grad_norm": 1.1375892162322998, |
| "learning_rate": 9.824561403508772e-06, |
| "loss": 0.7532, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.464, |
| "grad_norm": 0.9523381590843201, |
| "learning_rate": 1e-05, |
| "loss": 0.7023, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.472, |
| "grad_norm": 1.0329796075820923, |
| "learning_rate": 9.999978367986988e-06, |
| "loss": 0.6951, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.9789192080497742, |
| "learning_rate": 9.999913472135126e-06, |
| "loss": 0.7022, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.488, |
| "grad_norm": 1.0567715167999268, |
| "learning_rate": 9.999805313005946e-06, |
| "loss": 0.7192, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.496, |
| "grad_norm": 0.9230039715766907, |
| "learning_rate": 9.99965389153533e-06, |
| "loss": 0.6258, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.504, |
| "grad_norm": 1.0457018613815308, |
| "learning_rate": 9.999459209033495e-06, |
| "loss": 0.7205, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 0.8816724419593811, |
| "learning_rate": 9.999221267184993e-06, |
| "loss": 0.6797, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.7928981184959412, |
| "learning_rate": 9.998940068048688e-06, |
| "loss": 0.6074, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.528, |
| "grad_norm": 0.911626398563385, |
| "learning_rate": 9.998615614057743e-06, |
| "loss": 0.5725, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.536, |
| "grad_norm": 1.0122913122177124, |
| "learning_rate": 9.998247908019594e-06, |
| "loss": 0.7628, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 0.9864280819892883, |
| "learning_rate": 9.997836953115927e-06, |
| "loss": 0.6184, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.552, |
| "grad_norm": 1.4457077980041504, |
| "learning_rate": 9.997382752902658e-06, |
| "loss": 0.7471, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.9342664480209351, |
| "learning_rate": 9.996885311309892e-06, |
| "loss": 0.7304, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.568, |
| "grad_norm": 1.1148232221603394, |
| "learning_rate": 9.996344632641895e-06, |
| "loss": 0.8129, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 1.0656917095184326, |
| "learning_rate": 9.995760721577053e-06, |
| "loss": 0.7355, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.584, |
| "grad_norm": 1.0649811029434204, |
| "learning_rate": 9.995133583167833e-06, |
| "loss": 0.7288, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.592, |
| "grad_norm": 1.1274362802505493, |
| "learning_rate": 9.994463222840748e-06, |
| "loss": 0.7243, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.8427581191062927, |
| "learning_rate": 9.993749646396286e-06, |
| "loss": 0.5633, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 0.98853600025177, |
| "learning_rate": 9.992992860008893e-06, |
| "loss": 0.7045, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.616, |
| "grad_norm": 0.995302677154541, |
| "learning_rate": 9.99219287022689e-06, |
| "loss": 0.6268, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.624, |
| "grad_norm": 0.9867312908172607, |
| "learning_rate": 9.991349683972435e-06, |
| "loss": 0.6828, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.632, |
| "grad_norm": 0.9813309907913208, |
| "learning_rate": 9.990463308541452e-06, |
| "loss": 0.6601, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.9621861577033997, |
| "learning_rate": 9.989533751603578e-06, |
| "loss": 0.6984, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.648, |
| "grad_norm": 0.988609790802002, |
| "learning_rate": 9.988561021202083e-06, |
| "loss": 0.6877, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.656, |
| "grad_norm": 0.9378901720046997, |
| "learning_rate": 9.987545125753818e-06, |
| "loss": 0.6282, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.664, |
| "grad_norm": 0.932298481464386, |
| "learning_rate": 9.986486074049131e-06, |
| "loss": 0.7014, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.672, |
| "grad_norm": 0.9098712205886841, |
| "learning_rate": 9.985383875251783e-06, |
| "loss": 0.6461, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.9377619624137878, |
| "learning_rate": 9.98423853889889e-06, |
| "loss": 0.6283, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.688, |
| "grad_norm": 1.2829806804656982, |
| "learning_rate": 9.983050074900824e-06, |
| "loss": 0.6713, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.696, |
| "grad_norm": 0.981778085231781, |
| "learning_rate": 9.98181849354113e-06, |
| "loss": 0.643, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 1.2414823770523071, |
| "learning_rate": 9.980543805476447e-06, |
| "loss": 0.7538, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.712, |
| "grad_norm": 1.1841117143630981, |
| "learning_rate": 9.979226021736396e-06, |
| "loss": 0.8119, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.5399696826934814, |
| "learning_rate": 9.977865153723508e-06, |
| "loss": 0.726, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.728, |
| "grad_norm": 0.8497399687767029, |
| "learning_rate": 9.976461213213104e-06, |
| "loss": 0.6109, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.736, |
| "grad_norm": 0.9964956045150757, |
| "learning_rate": 9.975014212353212e-06, |
| "loss": 0.6957, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.744, |
| "grad_norm": 0.9907211065292358, |
| "learning_rate": 9.973524163664447e-06, |
| "loss": 0.7035, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.752, |
| "grad_norm": 1.0158443450927734, |
| "learning_rate": 9.971991080039912e-06, |
| "loss": 0.764, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.0125267505645752, |
| "learning_rate": 9.970414974745077e-06, |
| "loss": 0.6104, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 1.0134661197662354, |
| "learning_rate": 9.968795861417676e-06, |
| "loss": 0.6558, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.776, |
| "grad_norm": 1.1835541725158691, |
| "learning_rate": 9.967133754067581e-06, |
| "loss": 0.7853, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.784, |
| "grad_norm": 0.9032031893730164, |
| "learning_rate": 9.965428667076687e-06, |
| "loss": 0.7235, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.792, |
| "grad_norm": 1.2750600576400757, |
| "learning_rate": 9.963680615198774e-06, |
| "loss": 0.6768, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.0533162355422974, |
| "learning_rate": 9.961889613559396e-06, |
| "loss": 0.6036, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.808, |
| "grad_norm": 0.9049885869026184, |
| "learning_rate": 9.960055677655743e-06, |
| "loss": 0.5954, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.816, |
| "grad_norm": 1.2661690711975098, |
| "learning_rate": 9.958178823356503e-06, |
| "loss": 0.6608, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.824, |
| "grad_norm": 0.957478940486908, |
| "learning_rate": 9.956259066901733e-06, |
| "loss": 0.6722, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 1.0017534494400024, |
| "learning_rate": 9.954296424902709e-06, |
| "loss": 0.699, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.9536829590797424, |
| "learning_rate": 9.95229091434179e-06, |
| "loss": 0.7978, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.848, |
| "grad_norm": 0.9466567635536194, |
| "learning_rate": 9.950242552572272e-06, |
| "loss": 0.6377, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.856, |
| "grad_norm": 0.8663733601570129, |
| "learning_rate": 9.948151357318228e-06, |
| "loss": 0.6943, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.864, |
| "grad_norm": 0.9885082840919495, |
| "learning_rate": 9.946017346674362e-06, |
| "loss": 0.8259, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.872, |
| "grad_norm": 0.9989089369773865, |
| "learning_rate": 9.943840539105853e-06, |
| "loss": 0.7219, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.9034218192100525, |
| "learning_rate": 9.941620953448195e-06, |
| "loss": 0.6568, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.888, |
| "grad_norm": 0.9170519709587097, |
| "learning_rate": 9.939358608907026e-06, |
| "loss": 0.6996, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 1.0670710802078247, |
| "learning_rate": 9.937053525057977e-06, |
| "loss": 0.6088, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.904, |
| "grad_norm": 1.0475540161132812, |
| "learning_rate": 9.934705721846487e-06, |
| "loss": 0.7299, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.912, |
| "grad_norm": 0.8856591582298279, |
| "learning_rate": 9.932315219587641e-06, |
| "loss": 0.6026, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.9878740906715393, |
| "learning_rate": 9.92988203896599e-06, |
| "loss": 0.5906, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.928, |
| "grad_norm": 0.9897602796554565, |
| "learning_rate": 9.927406201035368e-06, |
| "loss": 0.6622, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.936, |
| "grad_norm": 1.0537142753601074, |
| "learning_rate": 9.924887727218724e-06, |
| "loss": 0.696, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.944, |
| "grad_norm": 0.9410306811332703, |
| "learning_rate": 9.922326639307918e-06, |
| "loss": 0.6053, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.952, |
| "grad_norm": 0.9250759482383728, |
| "learning_rate": 9.919722959463545e-06, |
| "loss": 0.702, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.9259200692176819, |
| "learning_rate": 9.917076710214739e-06, |
| "loss": 0.6032, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.968, |
| "grad_norm": 1.0778374671936035, |
| "learning_rate": 9.914387914458983e-06, |
| "loss": 0.6911, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.976, |
| "grad_norm": 0.8513089418411255, |
| "learning_rate": 9.911656595461899e-06, |
| "loss": 0.6422, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.984, |
| "grad_norm": 1.1916236877441406, |
| "learning_rate": 9.908882776857057e-06, |
| "loss": 0.7315, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.992, |
| "grad_norm": 0.9031611680984497, |
| "learning_rate": 9.906066482645774e-06, |
| "loss": 0.6034, |
| "step": 124 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.8987178802490234, |
| "learning_rate": 9.903207737196892e-06, |
| "loss": 0.7292, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.008, |
| "grad_norm": 1.241353988647461, |
| "learning_rate": 9.900306565246579e-06, |
| "loss": 0.7679, |
| "step": 126 |
| }, |
| { |
| "epoch": 1.016, |
| "grad_norm": 1.0967661142349243, |
| "learning_rate": 9.89736299189811e-06, |
| "loss": 0.6499, |
| "step": 127 |
| }, |
| { |
| "epoch": 1.024, |
| "grad_norm": 0.8532967567443848, |
| "learning_rate": 9.894377042621654e-06, |
| "loss": 0.6416, |
| "step": 128 |
| }, |
| { |
| "epoch": 1.032, |
| "grad_norm": 0.951801061630249, |
| "learning_rate": 9.891348743254046e-06, |
| "loss": 0.6654, |
| "step": 129 |
| }, |
| { |
| "epoch": 1.04, |
| "grad_norm": 1.057618498802185, |
| "learning_rate": 9.888278119998573e-06, |
| "loss": 0.623, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.048, |
| "grad_norm": 0.8845769166946411, |
| "learning_rate": 9.885165199424738e-06, |
| "loss": 0.5969, |
| "step": 131 |
| }, |
| { |
| "epoch": 1.056, |
| "grad_norm": 0.8258888721466064, |
| "learning_rate": 9.882010008468038e-06, |
| "loss": 0.4811, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.064, |
| "grad_norm": 0.9165515899658203, |
| "learning_rate": 9.878812574429722e-06, |
| "loss": 0.6168, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.072, |
| "grad_norm": 0.8288542032241821, |
| "learning_rate": 9.875572924976568e-06, |
| "loss": 0.5462, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.08, |
| "grad_norm": 1.180208444595337, |
| "learning_rate": 9.87229108814063e-06, |
| "loss": 0.5903, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.088, |
| "grad_norm": 0.9072532653808594, |
| "learning_rate": 9.868967092319003e-06, |
| "loss": 0.5509, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.096, |
| "grad_norm": 1.2131155729293823, |
| "learning_rate": 9.865600966273576e-06, |
| "loss": 0.5167, |
| "step": 137 |
| }, |
| { |
| "epoch": 1.104, |
| "grad_norm": 1.149257779121399, |
| "learning_rate": 9.86219273913078e-06, |
| "loss": 0.5793, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.112, |
| "grad_norm": 0.8809230923652649, |
| "learning_rate": 9.858742440381343e-06, |
| "loss": 0.5457, |
| "step": 139 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 0.8877855539321899, |
| "learning_rate": 9.855250099880026e-06, |
| "loss": 0.5863, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.1280000000000001, |
| "grad_norm": 1.0398492813110352, |
| "learning_rate": 9.851715747845372e-06, |
| "loss": 0.6525, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.1360000000000001, |
| "grad_norm": 0.9238712191581726, |
| "learning_rate": 9.848139414859441e-06, |
| "loss": 0.586, |
| "step": 142 |
| }, |
| { |
| "epoch": 1.144, |
| "grad_norm": 0.9197869896888733, |
| "learning_rate": 9.844521131867546e-06, |
| "loss": 0.4541, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.152, |
| "grad_norm": 0.9942979216575623, |
| "learning_rate": 9.840860930177984e-06, |
| "loss": 0.655, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.16, |
| "grad_norm": 1.1098957061767578, |
| "learning_rate": 9.837158841461767e-06, |
| "loss": 0.603, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.168, |
| "grad_norm": 0.9269440770149231, |
| "learning_rate": 9.833414897752346e-06, |
| "loss": 0.5672, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.176, |
| "grad_norm": 1.0234994888305664, |
| "learning_rate": 9.829629131445342e-06, |
| "loss": 0.5307, |
| "step": 147 |
| }, |
| { |
| "epoch": 1.184, |
| "grad_norm": 0.9524175524711609, |
| "learning_rate": 9.825801575298248e-06, |
| "loss": 0.5078, |
| "step": 148 |
| }, |
| { |
| "epoch": 1.192, |
| "grad_norm": 0.9059958457946777, |
| "learning_rate": 9.821932262430164e-06, |
| "loss": 0.5077, |
| "step": 149 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.9674916863441467, |
| "learning_rate": 9.818021226321502e-06, |
| "loss": 0.554, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.208, |
| "grad_norm": 0.9286289215087891, |
| "learning_rate": 9.814068500813692e-06, |
| "loss": 0.5633, |
| "step": 151 |
| }, |
| { |
| "epoch": 1.216, |
| "grad_norm": 0.9656635522842407, |
| "learning_rate": 9.8100741201089e-06, |
| "loss": 0.5439, |
| "step": 152 |
| }, |
| { |
| "epoch": 1.224, |
| "grad_norm": 0.9533044099807739, |
| "learning_rate": 9.806038118769724e-06, |
| "loss": 0.5286, |
| "step": 153 |
| }, |
| { |
| "epoch": 1.232, |
| "grad_norm": 0.9019532203674316, |
| "learning_rate": 9.801960531718898e-06, |
| "loss": 0.477, |
| "step": 154 |
| }, |
| { |
| "epoch": 1.24, |
| "grad_norm": 0.9925954341888428, |
| "learning_rate": 9.797841394238987e-06, |
| "loss": 0.6048, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.248, |
| "grad_norm": 1.058506965637207, |
| "learning_rate": 9.793680741972084e-06, |
| "loss": 0.5777, |
| "step": 156 |
| }, |
| { |
| "epoch": 1.256, |
| "grad_norm": 0.9570199847221375, |
| "learning_rate": 9.789478610919508e-06, |
| "loss": 0.5514, |
| "step": 157 |
| }, |
| { |
| "epoch": 1.264, |
| "grad_norm": 0.9736652970314026, |
| "learning_rate": 9.785235037441473e-06, |
| "loss": 0.5932, |
| "step": 158 |
| }, |
| { |
| "epoch": 1.272, |
| "grad_norm": 0.9769409894943237, |
| "learning_rate": 9.780950058256802e-06, |
| "loss": 0.519, |
| "step": 159 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 0.854789137840271, |
| "learning_rate": 9.77662371044258e-06, |
| "loss": 0.5516, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.288, |
| "grad_norm": 1.084175944328308, |
| "learning_rate": 9.77225603143385e-06, |
| "loss": 0.7489, |
| "step": 161 |
| }, |
| { |
| "epoch": 1.296, |
| "grad_norm": 0.9743218421936035, |
| "learning_rate": 9.767847059023292e-06, |
| "loss": 0.6241, |
| "step": 162 |
| }, |
| { |
| "epoch": 1.304, |
| "grad_norm": 0.7581115365028381, |
| "learning_rate": 9.763396831360884e-06, |
| "loss": 0.586, |
| "step": 163 |
| }, |
| { |
| "epoch": 1.312, |
| "grad_norm": 0.9065977931022644, |
| "learning_rate": 9.75890538695358e-06, |
| "loss": 0.592, |
| "step": 164 |
| }, |
| { |
| "epoch": 1.32, |
| "grad_norm": 0.892532467842102, |
| "learning_rate": 9.75437276466497e-06, |
| "loss": 0.6491, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.328, |
| "grad_norm": 1.1423097848892212, |
| "learning_rate": 9.749799003714954e-06, |
| "loss": 0.5284, |
| "step": 166 |
| }, |
| { |
| "epoch": 1.336, |
| "grad_norm": 0.9437663555145264, |
| "learning_rate": 9.745184143679398e-06, |
| "loss": 0.5462, |
| "step": 167 |
| }, |
| { |
| "epoch": 1.3439999999999999, |
| "grad_norm": 1.9607871770858765, |
| "learning_rate": 9.74052822448978e-06, |
| "loss": 0.6264, |
| "step": 168 |
| }, |
| { |
| "epoch": 1.3519999999999999, |
| "grad_norm": 0.952648937702179, |
| "learning_rate": 9.735831286432869e-06, |
| "loss": 0.6131, |
| "step": 169 |
| }, |
| { |
| "epoch": 1.3599999999999999, |
| "grad_norm": 0.9836501479148865, |
| "learning_rate": 9.731093370150349e-06, |
| "loss": 0.5708, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.3679999999999999, |
| "grad_norm": 0.9113344550132751, |
| "learning_rate": 9.72631451663849e-06, |
| "loss": 0.441, |
| "step": 171 |
| }, |
| { |
| "epoch": 1.376, |
| "grad_norm": 0.7625031471252441, |
| "learning_rate": 9.721494767247779e-06, |
| "loss": 0.5014, |
| "step": 172 |
| }, |
| { |
| "epoch": 1.384, |
| "grad_norm": 0.841316819190979, |
| "learning_rate": 9.71663416368257e-06, |
| "loss": 0.5227, |
| "step": 173 |
| }, |
| { |
| "epoch": 1.392, |
| "grad_norm": 0.9969580173492432, |
| "learning_rate": 9.71173274800072e-06, |
| "loss": 0.5641, |
| "step": 174 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.746874988079071, |
| "learning_rate": 9.70679056261322e-06, |
| "loss": 0.5006, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.408, |
| "grad_norm": 1.0710760354995728, |
| "learning_rate": 9.70180765028384e-06, |
| "loss": 0.5583, |
| "step": 176 |
| }, |
| { |
| "epoch": 1.416, |
| "grad_norm": 0.9600836038589478, |
| "learning_rate": 9.696784054128749e-06, |
| "loss": 0.6547, |
| "step": 177 |
| }, |
| { |
| "epoch": 1.424, |
| "grad_norm": 0.9146630764007568, |
| "learning_rate": 9.691719817616148e-06, |
| "loss": 0.6432, |
| "step": 178 |
| }, |
| { |
| "epoch": 1.432, |
| "grad_norm": 1.3010084629058838, |
| "learning_rate": 9.686614984565888e-06, |
| "loss": 0.6419, |
| "step": 179 |
| }, |
| { |
| "epoch": 1.44, |
| "grad_norm": 0.8703446388244629, |
| "learning_rate": 9.681469599149093e-06, |
| "loss": 0.6048, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.448, |
| "grad_norm": 0.9326883554458618, |
| "learning_rate": 9.676283705887783e-06, |
| "loss": 0.5797, |
| "step": 181 |
| }, |
| { |
| "epoch": 1.456, |
| "grad_norm": 0.8628938794136047, |
| "learning_rate": 9.671057349654481e-06, |
| "loss": 0.5471, |
| "step": 182 |
| }, |
| { |
| "epoch": 1.464, |
| "grad_norm": 0.8387150168418884, |
| "learning_rate": 9.66579057567183e-06, |
| "loss": 0.4629, |
| "step": 183 |
| }, |
| { |
| "epoch": 1.472, |
| "grad_norm": 0.9188330769538879, |
| "learning_rate": 9.660483429512198e-06, |
| "loss": 0.5265, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.48, |
| "grad_norm": 0.8853181600570679, |
| "learning_rate": 9.65513595709729e-06, |
| "loss": 0.5754, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.488, |
| "grad_norm": 0.8418251872062683, |
| "learning_rate": 9.649748204697741e-06, |
| "loss": 0.5653, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.496, |
| "grad_norm": 1.0010942220687866, |
| "learning_rate": 9.644320218932723e-06, |
| "loss": 0.6375, |
| "step": 187 |
| }, |
| { |
| "epoch": 1.504, |
| "grad_norm": 0.9441695213317871, |
| "learning_rate": 9.63885204676954e-06, |
| "loss": 0.5651, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.512, |
| "grad_norm": 0.774837076663971, |
| "learning_rate": 9.63334373552322e-06, |
| "loss": 0.4978, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.52, |
| "grad_norm": 1.0337467193603516, |
| "learning_rate": 9.627795332856107e-06, |
| "loss": 0.7022, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.528, |
| "grad_norm": 1.0459073781967163, |
| "learning_rate": 9.622206886777448e-06, |
| "loss": 0.6576, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.536, |
| "grad_norm": 0.8906083106994629, |
| "learning_rate": 9.616578445642982e-06, |
| "loss": 0.5239, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.544, |
| "grad_norm": 0.7993283271789551, |
| "learning_rate": 9.61091005815451e-06, |
| "loss": 0.5833, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.552, |
| "grad_norm": 0.9584801197052002, |
| "learning_rate": 9.605201773359485e-06, |
| "loss": 0.5593, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.56, |
| "grad_norm": 1.0783772468566895, |
| "learning_rate": 9.599453640650585e-06, |
| "loss": 0.5807, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.568, |
| "grad_norm": 0.9267758727073669, |
| "learning_rate": 9.59366570976528e-06, |
| "loss": 0.5253, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.576, |
| "grad_norm": 0.879656195640564, |
| "learning_rate": 9.587838030785413e-06, |
| "loss": 0.5637, |
| "step": 197 |
| }, |
| { |
| "epoch": 1.584, |
| "grad_norm": 0.7291843891143799, |
| "learning_rate": 9.581970654136752e-06, |
| "loss": 0.4746, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.592, |
| "grad_norm": 1.1042965650558472, |
| "learning_rate": 9.576063630588563e-06, |
| "loss": 0.5335, |
| "step": 199 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.89472496509552, |
| "learning_rate": 9.570117011253173e-06, |
| "loss": 0.5244, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.608, |
| "grad_norm": 0.899229109287262, |
| "learning_rate": 9.56413084758552e-06, |
| "loss": 0.5962, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.616, |
| "grad_norm": 0.9164184331893921, |
| "learning_rate": 9.55810519138271e-06, |
| "loss": 0.5423, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.624, |
| "grad_norm": 0.949131429195404, |
| "learning_rate": 9.552040094783575e-06, |
| "loss": 0.6698, |
| "step": 203 |
| }, |
| { |
| "epoch": 1.6320000000000001, |
| "grad_norm": 1.300114393234253, |
| "learning_rate": 9.545935610268213e-06, |
| "loss": 0.7455, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.6400000000000001, |
| "grad_norm": 0.9564195871353149, |
| "learning_rate": 9.53979179065754e-06, |
| "loss": 0.6041, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.6480000000000001, |
| "grad_norm": 0.8021354675292969, |
| "learning_rate": 9.533608689112827e-06, |
| "loss": 0.5842, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.6560000000000001, |
| "grad_norm": 0.8193942904472351, |
| "learning_rate": 9.527386359135254e-06, |
| "loss": 0.4656, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.6640000000000001, |
| "grad_norm": 0.9571919441223145, |
| "learning_rate": 9.521124854565425e-06, |
| "loss": 0.6065, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.6720000000000002, |
| "grad_norm": 0.6706253886222839, |
| "learning_rate": 9.514824229582922e-06, |
| "loss": 0.5117, |
| "step": 209 |
| }, |
| { |
| "epoch": 1.6800000000000002, |
| "grad_norm": 0.896847665309906, |
| "learning_rate": 9.508484538705823e-06, |
| "loss": 0.4671, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.688, |
| "grad_norm": 1.0854939222335815, |
| "learning_rate": 9.50210583679024e-06, |
| "loss": 0.5669, |
| "step": 211 |
| }, |
| { |
| "epoch": 1.696, |
| "grad_norm": 0.83414626121521, |
| "learning_rate": 9.495688179029838e-06, |
| "loss": 0.5015, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.704, |
| "grad_norm": 0.8416348695755005, |
| "learning_rate": 9.48923162095536e-06, |
| "loss": 0.5467, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.712, |
| "grad_norm": 0.8415360450744629, |
| "learning_rate": 9.482736218434144e-06, |
| "loss": 0.5002, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.72, |
| "grad_norm": 0.8762681484222412, |
| "learning_rate": 9.476202027669644e-06, |
| "loss": 0.5915, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.728, |
| "grad_norm": 0.9399668574333191, |
| "learning_rate": 9.469629105200937e-06, |
| "loss": 0.6083, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.736, |
| "grad_norm": 0.8347932696342468, |
| "learning_rate": 9.463017507902245e-06, |
| "loss": 0.5588, |
| "step": 217 |
| }, |
| { |
| "epoch": 1.744, |
| "grad_norm": 0.8872931003570557, |
| "learning_rate": 9.45636729298243e-06, |
| "loss": 0.5585, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.752, |
| "grad_norm": 0.9400362968444824, |
| "learning_rate": 9.449678517984503e-06, |
| "loss": 0.4949, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.76, |
| "grad_norm": 0.9362907409667969, |
| "learning_rate": 9.442951240785135e-06, |
| "loss": 0.4697, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.768, |
| "grad_norm": 0.9293828010559082, |
| "learning_rate": 9.436185519594145e-06, |
| "loss": 0.5465, |
| "step": 221 |
| }, |
| { |
| "epoch": 1.776, |
| "grad_norm": 0.9569793939590454, |
| "learning_rate": 9.429381412954e-06, |
| "loss": 0.5107, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.784, |
| "grad_norm": 0.8464429974555969, |
| "learning_rate": 9.422538979739307e-06, |
| "loss": 0.5828, |
| "step": 223 |
| }, |
| { |
| "epoch": 1.792, |
| "grad_norm": 0.9483685493469238, |
| "learning_rate": 9.415658279156312e-06, |
| "loss": 0.5515, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.8740932941436768, |
| "learning_rate": 9.408739370742372e-06, |
| "loss": 0.5731, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.808, |
| "grad_norm": 0.8768803477287292, |
| "learning_rate": 9.401782314365458e-06, |
| "loss": 0.5504, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.8159999999999998, |
| "grad_norm": 0.9519500732421875, |
| "learning_rate": 9.39478717022362e-06, |
| "loss": 0.5498, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.8239999999999998, |
| "grad_norm": 1.1657662391662598, |
| "learning_rate": 9.387753998844482e-06, |
| "loss": 0.7251, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.8319999999999999, |
| "grad_norm": 0.9255201816558838, |
| "learning_rate": 9.380682861084703e-06, |
| "loss": 0.5701, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.8399999999999999, |
| "grad_norm": 0.8490859270095825, |
| "learning_rate": 9.37357381812946e-06, |
| "loss": 0.5303, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.8479999999999999, |
| "grad_norm": 0.9158905148506165, |
| "learning_rate": 9.366426931491917e-06, |
| "loss": 0.6711, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.8559999999999999, |
| "grad_norm": 0.8813873529434204, |
| "learning_rate": 9.359242263012693e-06, |
| "loss": 0.5088, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.8639999999999999, |
| "grad_norm": 0.9613426327705383, |
| "learning_rate": 9.352019874859326e-06, |
| "loss": 0.5749, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.8719999999999999, |
| "grad_norm": 0.7892547845840454, |
| "learning_rate": 9.344759829525734e-06, |
| "loss": 0.4288, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.88, |
| "grad_norm": 0.9048332571983337, |
| "learning_rate": 9.33746218983167e-06, |
| "loss": 0.5524, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.888, |
| "grad_norm": 1.0441476106643677, |
| "learning_rate": 9.330127018922195e-06, |
| "loss": 0.5697, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.896, |
| "grad_norm": 0.8699061274528503, |
| "learning_rate": 9.32275438026711e-06, |
| "loss": 0.5447, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.904, |
| "grad_norm": 0.8975421786308289, |
| "learning_rate": 9.315344337660422e-06, |
| "loss": 0.4788, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.912, |
| "grad_norm": 0.8610514402389526, |
| "learning_rate": 9.307896955219787e-06, |
| "loss": 0.5311, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 0.9291175007820129, |
| "learning_rate": 9.300412297385954e-06, |
| "loss": 0.4566, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.928, |
| "grad_norm": 0.8736976981163025, |
| "learning_rate": 9.29289042892221e-06, |
| "loss": 0.5223, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.936, |
| "grad_norm": 0.9177389144897461, |
| "learning_rate": 9.285331414913816e-06, |
| "loss": 0.5391, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.944, |
| "grad_norm": 0.8681254386901855, |
| "learning_rate": 9.277735320767449e-06, |
| "loss": 0.4491, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.952, |
| "grad_norm": 0.9195399284362793, |
| "learning_rate": 9.270102212210632e-06, |
| "loss": 0.5366, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.96, |
| "grad_norm": 1.0078879594802856, |
| "learning_rate": 9.262432155291167e-06, |
| "loss": 0.5684, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.968, |
| "grad_norm": 0.9095841646194458, |
| "learning_rate": 9.254725216376562e-06, |
| "loss": 0.559, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.976, |
| "grad_norm": 0.8708402514457703, |
| "learning_rate": 9.246981462153456e-06, |
| "loss": 0.5294, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.984, |
| "grad_norm": 0.8832100033760071, |
| "learning_rate": 9.239200959627048e-06, |
| "loss": 0.652, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.992, |
| "grad_norm": 0.8539401292800903, |
| "learning_rate": 9.231383776120512e-06, |
| "loss": 0.5354, |
| "step": 249 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.8361132144927979, |
| "learning_rate": 9.223529979274411e-06, |
| "loss": 0.5124, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.008, |
| "grad_norm": 1.3811569213867188, |
| "learning_rate": 9.215639637046121e-06, |
| "loss": 0.5122, |
| "step": 251 |
| }, |
| { |
| "epoch": 2.016, |
| "grad_norm": 1.0794579982757568, |
| "learning_rate": 9.207712817709237e-06, |
| "loss": 0.4453, |
| "step": 252 |
| }, |
| { |
| "epoch": 2.024, |
| "grad_norm": 0.8161504864692688, |
| "learning_rate": 9.19974958985298e-06, |
| "loss": 0.4507, |
| "step": 253 |
| }, |
| { |
| "epoch": 2.032, |
| "grad_norm": 0.934089183807373, |
| "learning_rate": 9.191750022381613e-06, |
| "loss": 0.3668, |
| "step": 254 |
| }, |
| { |
| "epoch": 2.04, |
| "grad_norm": 0.9390188455581665, |
| "learning_rate": 9.183714184513832e-06, |
| "loss": 0.4384, |
| "step": 255 |
| }, |
| { |
| "epoch": 2.048, |
| "grad_norm": 1.3397053480148315, |
| "learning_rate": 9.175642145782179e-06, |
| "loss": 0.4417, |
| "step": 256 |
| }, |
| { |
| "epoch": 2.056, |
| "grad_norm": 0.9079061150550842, |
| "learning_rate": 9.16753397603243e-06, |
| "loss": 0.4005, |
| "step": 257 |
| }, |
| { |
| "epoch": 2.064, |
| "grad_norm": 1.0313668251037598, |
| "learning_rate": 9.159389745423003e-06, |
| "loss": 0.4249, |
| "step": 258 |
| }, |
| { |
| "epoch": 2.072, |
| "grad_norm": 0.6768733859062195, |
| "learning_rate": 9.151209524424333e-06, |
| "loss": 0.3698, |
| "step": 259 |
| }, |
| { |
| "epoch": 2.08, |
| "grad_norm": 0.8785944581031799, |
| "learning_rate": 9.142993383818284e-06, |
| "loss": 0.35, |
| "step": 260 |
| }, |
| { |
| "epoch": 2.088, |
| "grad_norm": 1.2951953411102295, |
| "learning_rate": 9.134741394697517e-06, |
| "loss": 0.5264, |
| "step": 261 |
| }, |
| { |
| "epoch": 2.096, |
| "grad_norm": 0.9182500839233398, |
| "learning_rate": 9.126453628464889e-06, |
| "loss": 0.3614, |
| "step": 262 |
| }, |
| { |
| "epoch": 2.104, |
| "grad_norm": 1.2565897703170776, |
| "learning_rate": 9.118130156832823e-06, |
| "loss": 0.4656, |
| "step": 263 |
| }, |
| { |
| "epoch": 2.112, |
| "grad_norm": 0.857447624206543, |
| "learning_rate": 9.109771051822702e-06, |
| "loss": 0.388, |
| "step": 264 |
| }, |
| { |
| "epoch": 2.12, |
| "grad_norm": 0.8584538102149963, |
| "learning_rate": 9.10137638576423e-06, |
| "loss": 0.3833, |
| "step": 265 |
| }, |
| { |
| "epoch": 2.128, |
| "grad_norm": 1.0366853475570679, |
| "learning_rate": 9.09294623129482e-06, |
| "loss": 0.3688, |
| "step": 266 |
| }, |
| { |
| "epoch": 2.136, |
| "grad_norm": 0.8696836233139038, |
| "learning_rate": 9.084480661358954e-06, |
| "loss": 0.4356, |
| "step": 267 |
| }, |
| { |
| "epoch": 2.144, |
| "grad_norm": 0.8424058556556702, |
| "learning_rate": 9.07597974920756e-06, |
| "loss": 0.4523, |
| "step": 268 |
| }, |
| { |
| "epoch": 2.152, |
| "grad_norm": 0.8936377763748169, |
| "learning_rate": 9.067443568397378e-06, |
| "loss": 0.4195, |
| "step": 269 |
| }, |
| { |
| "epoch": 2.16, |
| "grad_norm": 0.7706946730613708, |
| "learning_rate": 9.058872192790314e-06, |
| "loss": 0.4114, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.168, |
| "grad_norm": 0.8530108332633972, |
| "learning_rate": 9.05026569655281e-06, |
| "loss": 0.4485, |
| "step": 271 |
| }, |
| { |
| "epoch": 2.176, |
| "grad_norm": 0.9020074009895325, |
| "learning_rate": 9.041624154155208e-06, |
| "loss": 0.4264, |
| "step": 272 |
| }, |
| { |
| "epoch": 2.184, |
| "grad_norm": 0.6583293676376343, |
| "learning_rate": 9.032947640371086e-06, |
| "loss": 0.3326, |
| "step": 273 |
| }, |
| { |
| "epoch": 2.192, |
| "grad_norm": 0.8414630889892578, |
| "learning_rate": 9.02423623027663e-06, |
| "loss": 0.3968, |
| "step": 274 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 0.9140536785125732, |
| "learning_rate": 9.01548999924997e-06, |
| "loss": 0.3917, |
| "step": 275 |
| }, |
| { |
| "epoch": 2.208, |
| "grad_norm": 0.8281865119934082, |
| "learning_rate": 9.006709022970547e-06, |
| "loss": 0.4069, |
| "step": 276 |
| }, |
| { |
| "epoch": 2.216, |
| "grad_norm": 0.7317336201667786, |
| "learning_rate": 8.997893377418432e-06, |
| "loss": 0.4185, |
| "step": 277 |
| }, |
| { |
| "epoch": 2.224, |
| "grad_norm": 1.2639456987380981, |
| "learning_rate": 8.98904313887369e-06, |
| "loss": 0.3394, |
| "step": 278 |
| }, |
| { |
| "epoch": 2.232, |
| "grad_norm": 0.8909697532653809, |
| "learning_rate": 8.980158383915714e-06, |
| "loss": 0.4178, |
| "step": 279 |
| }, |
| { |
| "epoch": 2.24, |
| "grad_norm": 0.8912489414215088, |
| "learning_rate": 8.971239189422555e-06, |
| "loss": 0.4751, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.248, |
| "grad_norm": 0.8743604421615601, |
| "learning_rate": 8.962285632570266e-06, |
| "loss": 0.438, |
| "step": 281 |
| }, |
| { |
| "epoch": 2.2560000000000002, |
| "grad_norm": 0.9688166379928589, |
| "learning_rate": 8.953297790832231e-06, |
| "loss": 0.4437, |
| "step": 282 |
| }, |
| { |
| "epoch": 2.2640000000000002, |
| "grad_norm": 0.9198762774467468, |
| "learning_rate": 8.944275741978495e-06, |
| "loss": 0.4303, |
| "step": 283 |
| }, |
| { |
| "epoch": 2.2720000000000002, |
| "grad_norm": 0.8178912997245789, |
| "learning_rate": 8.935219564075087e-06, |
| "loss": 0.4714, |
| "step": 284 |
| }, |
| { |
| "epoch": 2.2800000000000002, |
| "grad_norm": 1.30088472366333, |
| "learning_rate": 8.92612933548335e-06, |
| "loss": 0.3948, |
| "step": 285 |
| }, |
| { |
| "epoch": 2.288, |
| "grad_norm": 1.0883452892303467, |
| "learning_rate": 8.917005134859263e-06, |
| "loss": 0.3697, |
| "step": 286 |
| }, |
| { |
| "epoch": 2.296, |
| "grad_norm": 1.3085649013519287, |
| "learning_rate": 8.907847041152757e-06, |
| "loss": 0.4761, |
| "step": 287 |
| }, |
| { |
| "epoch": 2.304, |
| "grad_norm": 0.8386606574058533, |
| "learning_rate": 8.89865513360703e-06, |
| "loss": 0.3663, |
| "step": 288 |
| }, |
| { |
| "epoch": 2.312, |
| "grad_norm": 0.8883110284805298, |
| "learning_rate": 8.889429491757872e-06, |
| "loss": 0.4755, |
| "step": 289 |
| }, |
| { |
| "epoch": 2.32, |
| "grad_norm": 0.8336955308914185, |
| "learning_rate": 8.88017019543296e-06, |
| "loss": 0.3742, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.328, |
| "grad_norm": 0.7842932939529419, |
| "learning_rate": 8.870877324751186e-06, |
| "loss": 0.3338, |
| "step": 291 |
| }, |
| { |
| "epoch": 2.336, |
| "grad_norm": 0.8498310446739197, |
| "learning_rate": 8.861550960121946e-06, |
| "loss": 0.4814, |
| "step": 292 |
| }, |
| { |
| "epoch": 2.344, |
| "grad_norm": 0.785335123538971, |
| "learning_rate": 8.852191182244456e-06, |
| "loss": 0.3093, |
| "step": 293 |
| }, |
| { |
| "epoch": 2.352, |
| "grad_norm": 0.758841872215271, |
| "learning_rate": 8.842798072107055e-06, |
| "loss": 0.4555, |
| "step": 294 |
| }, |
| { |
| "epoch": 2.36, |
| "grad_norm": 0.8311530947685242, |
| "learning_rate": 8.833371710986493e-06, |
| "loss": 0.3642, |
| "step": 295 |
| }, |
| { |
| "epoch": 2.368, |
| "grad_norm": 0.8695397973060608, |
| "learning_rate": 8.823912180447237e-06, |
| "loss": 0.3611, |
| "step": 296 |
| }, |
| { |
| "epoch": 2.376, |
| "grad_norm": 0.9586389660835266, |
| "learning_rate": 8.81441956234076e-06, |
| "loss": 0.4375, |
| "step": 297 |
| }, |
| { |
| "epoch": 2.384, |
| "grad_norm": 1.3389581441879272, |
| "learning_rate": 8.804893938804839e-06, |
| "loss": 0.4707, |
| "step": 298 |
| }, |
| { |
| "epoch": 2.392, |
| "grad_norm": 0.8372800946235657, |
| "learning_rate": 8.795335392262841e-06, |
| "loss": 0.4676, |
| "step": 299 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.9432985186576843, |
| "learning_rate": 8.785744005423003e-06, |
| "loss": 0.5151, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.408, |
| "grad_norm": 0.7734491229057312, |
| "learning_rate": 8.77611986127773e-06, |
| "loss": 0.4515, |
| "step": 301 |
| }, |
| { |
| "epoch": 2.416, |
| "grad_norm": 0.7202035784721375, |
| "learning_rate": 8.766463043102864e-06, |
| "loss": 0.3825, |
| "step": 302 |
| }, |
| { |
| "epoch": 2.424, |
| "grad_norm": 0.9535182118415833, |
| "learning_rate": 8.756773634456975e-06, |
| "loss": 0.5219, |
| "step": 303 |
| }, |
| { |
| "epoch": 2.432, |
| "grad_norm": 0.7026752233505249, |
| "learning_rate": 8.747051719180626e-06, |
| "loss": 0.36, |
| "step": 304 |
| }, |
| { |
| "epoch": 2.44, |
| "grad_norm": 0.7914669513702393, |
| "learning_rate": 8.737297381395657e-06, |
| "loss": 0.4205, |
| "step": 305 |
| }, |
| { |
| "epoch": 2.448, |
| "grad_norm": 0.7313674688339233, |
| "learning_rate": 8.727510705504453e-06, |
| "loss": 0.4484, |
| "step": 306 |
| }, |
| { |
| "epoch": 2.456, |
| "grad_norm": 0.8241586089134216, |
| "learning_rate": 8.717691776189214e-06, |
| "loss": 0.3453, |
| "step": 307 |
| }, |
| { |
| "epoch": 2.464, |
| "grad_norm": 0.9311320185661316, |
| "learning_rate": 8.707840678411223e-06, |
| "loss": 0.3825, |
| "step": 308 |
| }, |
| { |
| "epoch": 2.472, |
| "grad_norm": 0.8270311951637268, |
| "learning_rate": 8.69795749741011e-06, |
| "loss": 0.4327, |
| "step": 309 |
| }, |
| { |
| "epoch": 2.48, |
| "grad_norm": 0.8111954927444458, |
| "learning_rate": 8.688042318703111e-06, |
| "loss": 0.3797, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.488, |
| "grad_norm": 0.8930568695068359, |
| "learning_rate": 8.678095228084343e-06, |
| "loss": 0.3948, |
| "step": 311 |
| }, |
| { |
| "epoch": 2.496, |
| "grad_norm": 1.5294451713562012, |
| "learning_rate": 8.66811631162404e-06, |
| "loss": 0.4458, |
| "step": 312 |
| }, |
| { |
| "epoch": 2.504, |
| "grad_norm": 1.0527986288070679, |
| "learning_rate": 8.65810565566782e-06, |
| "loss": 0.37, |
| "step": 313 |
| }, |
| { |
| "epoch": 2.512, |
| "grad_norm": 0.79234379529953, |
| "learning_rate": 8.648063346835943e-06, |
| "loss": 0.4103, |
| "step": 314 |
| }, |
| { |
| "epoch": 2.52, |
| "grad_norm": 0.8093475103378296, |
| "learning_rate": 8.637989472022548e-06, |
| "loss": 0.3947, |
| "step": 315 |
| }, |
| { |
| "epoch": 2.528, |
| "grad_norm": 0.9096853733062744, |
| "learning_rate": 8.627884118394913e-06, |
| "loss": 0.3167, |
| "step": 316 |
| }, |
| { |
| "epoch": 2.536, |
| "grad_norm": 0.7235214114189148, |
| "learning_rate": 8.617747373392697e-06, |
| "loss": 0.405, |
| "step": 317 |
| }, |
| { |
| "epoch": 2.544, |
| "grad_norm": 0.8731074929237366, |
| "learning_rate": 8.607579324727175e-06, |
| "loss": 0.4647, |
| "step": 318 |
| }, |
| { |
| "epoch": 2.552, |
| "grad_norm": 0.8666344881057739, |
| "learning_rate": 8.597380060380493e-06, |
| "loss": 0.3891, |
| "step": 319 |
| }, |
| { |
| "epoch": 2.56, |
| "grad_norm": 1.1291053295135498, |
| "learning_rate": 8.5871496686049e-06, |
| "loss": 0.461, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.568, |
| "grad_norm": 0.9848377108573914, |
| "learning_rate": 8.576888237921983e-06, |
| "loss": 0.4388, |
| "step": 321 |
| }, |
| { |
| "epoch": 2.576, |
| "grad_norm": 0.8755123019218445, |
| "learning_rate": 8.566595857121902e-06, |
| "loss": 0.4386, |
| "step": 322 |
| }, |
| { |
| "epoch": 2.584, |
| "grad_norm": 1.2679446935653687, |
| "learning_rate": 8.556272615262623e-06, |
| "loss": 0.3827, |
| "step": 323 |
| }, |
| { |
| "epoch": 2.592, |
| "grad_norm": 1.340916395187378, |
| "learning_rate": 8.545918601669147e-06, |
| "loss": 0.4543, |
| "step": 324 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 0.7855986952781677, |
| "learning_rate": 8.535533905932739e-06, |
| "loss": 0.3923, |
| "step": 325 |
| }, |
| { |
| "epoch": 2.608, |
| "grad_norm": 0.829623818397522, |
| "learning_rate": 8.525118617910144e-06, |
| "loss": 0.4529, |
| "step": 326 |
| }, |
| { |
| "epoch": 2.616, |
| "grad_norm": 0.932386040687561, |
| "learning_rate": 8.514672827722824e-06, |
| "loss": 0.4716, |
| "step": 327 |
| }, |
| { |
| "epoch": 2.624, |
| "grad_norm": 0.864211916923523, |
| "learning_rate": 8.504196625756166e-06, |
| "loss": 0.3825, |
| "step": 328 |
| }, |
| { |
| "epoch": 2.632, |
| "grad_norm": 0.7904481291770935, |
| "learning_rate": 8.493690102658703e-06, |
| "loss": 0.3582, |
| "step": 329 |
| }, |
| { |
| "epoch": 2.64, |
| "grad_norm": 0.6928600072860718, |
| "learning_rate": 8.483153349341336e-06, |
| "loss": 0.3512, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.648, |
| "grad_norm": 0.7888721227645874, |
| "learning_rate": 8.472586456976534e-06, |
| "loss": 0.4814, |
| "step": 331 |
| }, |
| { |
| "epoch": 2.656, |
| "grad_norm": 0.8113727569580078, |
| "learning_rate": 8.461989516997565e-06, |
| "loss": 0.4022, |
| "step": 332 |
| }, |
| { |
| "epoch": 2.664, |
| "grad_norm": 0.9936776757240295, |
| "learning_rate": 8.45136262109768e-06, |
| "loss": 0.6013, |
| "step": 333 |
| }, |
| { |
| "epoch": 2.672, |
| "grad_norm": 0.6121838688850403, |
| "learning_rate": 8.440705861229344e-06, |
| "loss": 0.3144, |
| "step": 334 |
| }, |
| { |
| "epoch": 2.68, |
| "grad_norm": 0.9799290299415588, |
| "learning_rate": 8.430019329603423e-06, |
| "loss": 0.4724, |
| "step": 335 |
| }, |
| { |
| "epoch": 2.6879999999999997, |
| "grad_norm": 0.8076691627502441, |
| "learning_rate": 8.41930311868839e-06, |
| "loss": 0.404, |
| "step": 336 |
| }, |
| { |
| "epoch": 2.6959999999999997, |
| "grad_norm": 0.9041216373443604, |
| "learning_rate": 8.408557321209534e-06, |
| "loss": 0.4434, |
| "step": 337 |
| }, |
| { |
| "epoch": 2.7039999999999997, |
| "grad_norm": 0.8356102108955383, |
| "learning_rate": 8.397782030148147e-06, |
| "loss": 0.4153, |
| "step": 338 |
| }, |
| { |
| "epoch": 2.7119999999999997, |
| "grad_norm": 1.0063518285751343, |
| "learning_rate": 8.386977338740724e-06, |
| "loss": 0.421, |
| "step": 339 |
| }, |
| { |
| "epoch": 2.7199999999999998, |
| "grad_norm": 0.741091787815094, |
| "learning_rate": 8.376143340478153e-06, |
| "loss": 0.3746, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.7279999999999998, |
| "grad_norm": 0.992077112197876, |
| "learning_rate": 8.365280129104912e-06, |
| "loss": 0.4036, |
| "step": 341 |
| }, |
| { |
| "epoch": 2.7359999999999998, |
| "grad_norm": 0.8580658435821533, |
| "learning_rate": 8.354387798618254e-06, |
| "loss": 0.3937, |
| "step": 342 |
| }, |
| { |
| "epoch": 2.7439999999999998, |
| "grad_norm": 0.9947491884231567, |
| "learning_rate": 8.34346644326739e-06, |
| "loss": 0.3879, |
| "step": 343 |
| }, |
| { |
| "epoch": 2.752, |
| "grad_norm": 0.8819236159324646, |
| "learning_rate": 8.332516157552684e-06, |
| "loss": 0.4296, |
| "step": 344 |
| }, |
| { |
| "epoch": 2.76, |
| "grad_norm": 0.7174811363220215, |
| "learning_rate": 8.321537036224822e-06, |
| "loss": 0.3896, |
| "step": 345 |
| }, |
| { |
| "epoch": 2.768, |
| "grad_norm": 0.7424646019935608, |
| "learning_rate": 8.310529174284004e-06, |
| "loss": 0.3654, |
| "step": 346 |
| }, |
| { |
| "epoch": 2.776, |
| "grad_norm": 0.9448553323745728, |
| "learning_rate": 8.299492666979114e-06, |
| "loss": 0.3425, |
| "step": 347 |
| }, |
| { |
| "epoch": 2.784, |
| "grad_norm": 0.7875136137008667, |
| "learning_rate": 8.288427609806899e-06, |
| "loss": 0.3703, |
| "step": 348 |
| }, |
| { |
| "epoch": 2.792, |
| "grad_norm": 0.9348728060722351, |
| "learning_rate": 8.277334098511147e-06, |
| "loss": 0.4224, |
| "step": 349 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 0.7805924415588379, |
| "learning_rate": 8.266212229081846e-06, |
| "loss": 0.3679, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.808, |
| "grad_norm": 0.8468799591064453, |
| "learning_rate": 8.255062097754371e-06, |
| "loss": 0.4477, |
| "step": 351 |
| }, |
| { |
| "epoch": 2.816, |
| "grad_norm": 0.7618088126182556, |
| "learning_rate": 8.243883801008632e-06, |
| "loss": 0.4523, |
| "step": 352 |
| }, |
| { |
| "epoch": 2.824, |
| "grad_norm": 0.7418726086616516, |
| "learning_rate": 8.232677435568252e-06, |
| "loss": 0.4286, |
| "step": 353 |
| }, |
| { |
| "epoch": 2.832, |
| "grad_norm": 0.7962421774864197, |
| "learning_rate": 8.221443098399733e-06, |
| "loss": 0.3583, |
| "step": 354 |
| }, |
| { |
| "epoch": 2.84, |
| "grad_norm": 0.9815137982368469, |
| "learning_rate": 8.210180886711603e-06, |
| "loss": 0.4421, |
| "step": 355 |
| }, |
| { |
| "epoch": 2.848, |
| "grad_norm": 0.8846978545188904, |
| "learning_rate": 8.198890897953586e-06, |
| "loss": 0.4625, |
| "step": 356 |
| }, |
| { |
| "epoch": 2.856, |
| "grad_norm": 0.7743475437164307, |
| "learning_rate": 8.187573229815757e-06, |
| "loss": 0.3382, |
| "step": 357 |
| }, |
| { |
| "epoch": 2.864, |
| "grad_norm": 0.9756518602371216, |
| "learning_rate": 8.176227980227693e-06, |
| "loss": 0.4967, |
| "step": 358 |
| }, |
| { |
| "epoch": 2.872, |
| "grad_norm": 0.919818103313446, |
| "learning_rate": 8.164855247357628e-06, |
| "loss": 0.4667, |
| "step": 359 |
| }, |
| { |
| "epoch": 2.88, |
| "grad_norm": 0.7409064173698425, |
| "learning_rate": 8.153455129611605e-06, |
| "loss": 0.3253, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.888, |
| "grad_norm": 0.8657407760620117, |
| "learning_rate": 8.142027725632622e-06, |
| "loss": 0.4967, |
| "step": 361 |
| }, |
| { |
| "epoch": 2.896, |
| "grad_norm": 0.8034350275993347, |
| "learning_rate": 8.130573134299782e-06, |
| "loss": 0.4267, |
| "step": 362 |
| }, |
| { |
| "epoch": 2.904, |
| "grad_norm": 0.8715494871139526, |
| "learning_rate": 8.119091454727427e-06, |
| "loss": 0.4406, |
| "step": 363 |
| }, |
| { |
| "epoch": 2.912, |
| "grad_norm": 0.6950966715812683, |
| "learning_rate": 8.107582786264299e-06, |
| "loss": 0.3652, |
| "step": 364 |
| }, |
| { |
| "epoch": 2.92, |
| "grad_norm": 0.8034759163856506, |
| "learning_rate": 8.09604722849266e-06, |
| "loss": 0.4174, |
| "step": 365 |
| }, |
| { |
| "epoch": 2.928, |
| "grad_norm": 0.9261465072631836, |
| "learning_rate": 8.084484881227449e-06, |
| "loss": 0.348, |
| "step": 366 |
| }, |
| { |
| "epoch": 2.936, |
| "grad_norm": 0.804601788520813, |
| "learning_rate": 8.072895844515398e-06, |
| "loss": 0.4119, |
| "step": 367 |
| }, |
| { |
| "epoch": 2.944, |
| "grad_norm": 0.9597411751747131, |
| "learning_rate": 8.061280218634192e-06, |
| "loss": 0.4271, |
| "step": 368 |
| }, |
| { |
| "epoch": 2.952, |
| "grad_norm": 0.9602273106575012, |
| "learning_rate": 8.049638104091575e-06, |
| "loss": 0.4448, |
| "step": 369 |
| }, |
| { |
| "epoch": 2.96, |
| "grad_norm": 0.8276174664497375, |
| "learning_rate": 8.037969601624495e-06, |
| "loss": 0.3886, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.968, |
| "grad_norm": 0.9530600309371948, |
| "learning_rate": 8.026274812198235e-06, |
| "loss": 0.4353, |
| "step": 371 |
| }, |
| { |
| "epoch": 2.976, |
| "grad_norm": 0.8712935447692871, |
| "learning_rate": 8.014553837005527e-06, |
| "loss": 0.4336, |
| "step": 372 |
| }, |
| { |
| "epoch": 2.984, |
| "grad_norm": 0.7948423624038696, |
| "learning_rate": 8.002806777465685e-06, |
| "loss": 0.4402, |
| "step": 373 |
| }, |
| { |
| "epoch": 2.992, |
| "grad_norm": 0.8660122156143188, |
| "learning_rate": 7.99103373522373e-06, |
| "loss": 0.3639, |
| "step": 374 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.9771859049797058, |
| "learning_rate": 7.9792348121495e-06, |
| "loss": 0.4096, |
| "step": 375 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 1125, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.019638269101998e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|