| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 873, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 3.7037037037037036e-07, |
| "loss": 1.6596, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 7.407407407407407e-07, |
| "loss": 1.7984, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 1.111111111111111e-06, |
| "loss": 1.7185, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 1.4814814814814815e-06, |
| "loss": 1.7882, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 1.8518518518518519e-06, |
| "loss": 1.7184, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 2.222222222222222e-06, |
| "loss": 1.7167, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 2.5925925925925925e-06, |
| "loss": 1.6061, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 2.962962962962963e-06, |
| "loss": 1.7068, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 1.4754, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 3.7037037037037037e-06, |
| "loss": 1.6545, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.074074074074074e-06, |
| "loss": 1.6856, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.444444444444444e-06, |
| "loss": 1.6343, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.814814814814815e-06, |
| "loss": 1.5836, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 5.185185185185185e-06, |
| "loss": 1.6395, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 5.555555555555557e-06, |
| "loss": 1.7376, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 5.925925925925926e-06, |
| "loss": 1.4839, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 6.296296296296297e-06, |
| "loss": 1.7036, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 1.7867, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 7.0370370370370375e-06, |
| "loss": 1.6244, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 7.4074074074074075e-06, |
| "loss": 1.6133, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 7.77777777777778e-06, |
| "loss": 1.5338, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 8.148148148148148e-06, |
| "loss": 1.4873, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 8.518518518518519e-06, |
| "loss": 1.566, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 8.888888888888888e-06, |
| "loss": 1.5234, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 9.25925925925926e-06, |
| "loss": 1.5517, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 9.62962962962963e-06, |
| "loss": 1.6403, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1e-05, |
| "loss": 1.5553, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 9.99996552545612e-06, |
| "loss": 1.4303, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 9.999862102299874e-06, |
| "loss": 1.5491, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 9.99968973195745e-06, |
| "loss": 1.5947, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 9.999448416805802e-06, |
| "loss": 1.6149, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 9.999138160172624e-06, |
| "loss": 1.6031, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 9.998758966336296e-06, |
| "loss": 1.4953, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 9.998310840525835e-06, |
| "loss": 1.3904, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 9.99779378892081e-06, |
| "loss": 1.5396, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 9.997207818651273e-06, |
| "loss": 1.5207, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 9.996552937797646e-06, |
| "loss": 1.4792, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 9.995829155390613e-06, |
| "loss": 1.549, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 9.995036481411005e-06, |
| "loss": 1.5933, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 9.994174926789648e-06, |
| "loss": 1.5199, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 9.993244503407227e-06, |
| "loss": 1.4438, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 9.99224522409411e-06, |
| "loss": 1.5248, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 9.991177102630173e-06, |
| "loss": 1.4997, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 9.99004015374462e-06, |
| "loss": 1.4953, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 9.988834393115768e-06, |
| "loss": 1.5326, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 9.987559837370832e-06, |
| "loss": 1.4969, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 9.986216504085709e-06, |
| "loss": 1.4415, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 9.984804411784717e-06, |
| "loss": 1.5773, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 9.983323579940351e-06, |
| "loss": 1.4887, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 9.981774028973013e-06, |
| "loss": 1.4998, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 9.980155780250728e-06, |
| "loss": 1.5108, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 9.97846885608885e-06, |
| "loss": 1.5011, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 9.976713279749754e-06, |
| "loss": 1.4729, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 9.97488907544252e-06, |
| "loss": 1.4965, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 9.972996268322594e-06, |
| "loss": 1.5399, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 9.971034884491436e-06, |
| "loss": 1.4602, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 9.969004950996175e-06, |
| "loss": 1.4677, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 9.96690649582922e-06, |
| "loss": 1.4103, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 9.964739547927892e-06, |
| "loss": 1.4356, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 9.962504137173997e-06, |
| "loss": 1.502, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 9.96020029439345e-06, |
| "loss": 1.5595, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 9.957828051355817e-06, |
| "loss": 1.4218, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 9.955387440773902e-06, |
| "loss": 1.4533, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 9.952878496303274e-06, |
| "loss": 1.4632, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 9.950301252541824e-06, |
| "loss": 1.3781, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 9.94765574502927e-06, |
| "loss": 1.4568, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 9.944942010246681e-06, |
| "loss": 1.4398, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 9.942160085615963e-06, |
| "loss": 1.4723, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 9.939310009499348e-06, |
| "loss": 1.4004, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 9.936391821198868e-06, |
| "loss": 1.3648, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 9.933405560955805e-06, |
| "loss": 1.4415, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 9.930351269950144e-06, |
| "loss": 1.4612, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 9.9272289903e-06, |
| "loss": 1.4325, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 9.924038765061042e-06, |
| "loss": 1.5181, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 9.92078063822589e-06, |
| "loss": 1.3426, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 9.917454654723522e-06, |
| "loss": 1.3409, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 9.914060860418644e-06, |
| "loss": 1.3939, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 9.910599302111057e-06, |
| "loss": 1.4446, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 9.907070027535022e-06, |
| "loss": 1.4905, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 9.903473085358589e-06, |
| "loss": 1.3647, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 9.899808525182935e-06, |
| "loss": 1.4727, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 9.896076397541676e-06, |
| "loss": 1.4646, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 9.892276753900173e-06, |
| "loss": 1.3373, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 9.888409646654818e-06, |
| "loss": 1.5058, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 9.884475129132312e-06, |
| "loss": 1.4937, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 9.880473255588937e-06, |
| "loss": 1.4754, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 9.876404081209796e-06, |
| "loss": 1.4366, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 9.872267662108064e-06, |
| "loss": 1.4724, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 9.868064055324204e-06, |
| "loss": 1.4958, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 9.863793318825186e-06, |
| "loss": 1.4369, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 9.859455511503691e-06, |
| "loss": 1.4184, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 9.855050693177286e-06, |
| "loss": 1.3802, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 9.850578924587614e-06, |
| "loss": 1.4848, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 9.846040267399548e-06, |
| "loss": 1.5789, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 9.841434784200341e-06, |
| "loss": 1.3857, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 9.83676253849877e-06, |
| "loss": 1.418, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 9.832023594724248e-06, |
| "loss": 1.4117, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 9.827218018225944e-06, |
| "loss": 1.5229, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 9.822345875271884e-06, |
| "loss": 1.4827, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 9.817407233048028e-06, |
| "loss": 1.3705, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 9.812402159657352e-06, |
| "loss": 1.4836, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 9.807330724118906e-06, |
| "loss": 1.4261, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 9.802192996366859e-06, |
| "loss": 1.3848, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 9.796989047249539e-06, |
| "loss": 1.3516, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 9.791718948528457e-06, |
| "loss": 1.4449, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 9.786382772877312e-06, |
| "loss": 1.4151, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 9.780980593880993e-06, |
| "loss": 1.4627, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 9.775512486034564e-06, |
| "loss": 1.3104, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 9.76997852474223e-06, |
| "loss": 1.3921, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 9.76437878631631e-06, |
| "loss": 1.4407, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 9.758713347976179e-06, |
| "loss": 1.5017, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 9.752982287847193e-06, |
| "loss": 1.4565, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 9.747185684959626e-06, |
| "loss": 1.4413, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 9.741323619247575e-06, |
| "loss": 1.3756, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 9.735396171547859e-06, |
| "loss": 1.3519, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 9.7294034235989e-06, |
| "loss": 1.4479, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 9.723345458039595e-06, |
| "loss": 1.4406, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 9.717222358408188e-06, |
| "loss": 1.4624, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 9.711034209141102e-06, |
| "loss": 1.4615, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 9.704781095571788e-06, |
| "loss": 1.4321, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 9.698463103929542e-06, |
| "loss": 1.4256, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 9.692080321338317e-06, |
| "loss": 1.4684, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 9.685632835815519e-06, |
| "loss": 1.3634, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 9.679120736270796e-06, |
| "loss": 1.5006, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 9.672544112504813e-06, |
| "loss": 1.3559, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 9.665903055208013e-06, |
| "loss": 1.3583, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 9.659197655959364e-06, |
| "loss": 1.3658, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 9.6524280072251e-06, |
| "loss": 1.3334, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 9.645594202357438e-06, |
| "loss": 1.408, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 9.638696335593304e-06, |
| "loss": 1.4738, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 9.63173450205302e-06, |
| "loss": 1.4019, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 9.624708797739002e-06, |
| "loss": 1.3439, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 9.617619319534427e-06, |
| "loss": 1.4233, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 9.610466165201912e-06, |
| "loss": 1.329, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 9.603249433382145e-06, |
| "loss": 1.3863, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 9.595969223592544e-06, |
| "loss": 1.3281, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 9.588625636225871e-06, |
| "loss": 1.443, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 9.58121877254886e-06, |
| "loss": 1.3968, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 9.573748734700806e-06, |
| "loss": 1.3448, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 9.566215625692168e-06, |
| "loss": 1.4587, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 9.558619549403148e-06, |
| "loss": 1.4847, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 9.550960610582251e-06, |
| "loss": 1.3366, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 9.543238914844844e-06, |
| "loss": 1.4567, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 9.535454568671705e-06, |
| "loss": 1.3963, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 9.527607679407545e-06, |
| "loss": 1.3651, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 9.519698355259537e-06, |
| "loss": 1.3715, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 9.51172670529582e-06, |
| "loss": 1.4307, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 9.503692839443988e-06, |
| "loss": 1.4342, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 9.495596868489588e-06, |
| "loss": 1.3459, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 9.487438904074581e-06, |
| "loss": 1.3055, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 9.47921905869581e-06, |
| "loss": 1.3297, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 9.47093744570344e-06, |
| "loss": 1.3391, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 9.462594179299408e-06, |
| "loss": 1.3502, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 9.45418937453583e-06, |
| "loss": 1.4598, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 9.445723147313434e-06, |
| "loss": 1.3428, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 9.437195614379947e-06, |
| "loss": 1.3898, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 9.428606893328493e-06, |
| "loss": 1.352, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 9.41995710259597e-06, |
| "loss": 1.4388, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 9.41124636146141e-06, |
| "loss": 1.3507, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 9.402474790044348e-06, |
| "loss": 1.3915, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 9.39364250930315e-06, |
| "loss": 1.4424, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 9.384749641033358e-06, |
| "loss": 1.3962, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 9.375796307866003e-06, |
| "loss": 1.2982, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 9.366782633265917e-06, |
| "loss": 1.3802, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 9.357708741530025e-06, |
| "loss": 1.4136, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 9.348574757785642e-06, |
| "loss": 1.3946, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 9.339380807988734e-06, |
| "loss": 1.2952, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 9.330127018922195e-06, |
| "loss": 1.3998, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 9.320813518194084e-06, |
| "loss": 1.3156, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 9.311440434235879e-06, |
| "loss": 1.4532, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 9.302007896300697e-06, |
| "loss": 1.3771, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 9.292516034461517e-06, |
| "loss": 1.4247, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 9.28296497960938e-06, |
| "loss": 1.3896, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 9.273354863451589e-06, |
| "loss": 1.4133, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 9.263685818509895e-06, |
| "loss": 1.3593, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 9.253957978118664e-06, |
| "loss": 1.3283, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 9.244171476423037e-06, |
| "loss": 1.3792, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 9.234326448377089e-06, |
| "loss": 1.3244, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 9.22442302974196e-06, |
| "loss": 1.3083, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 9.214461357083986e-06, |
| "loss": 1.3197, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 9.204441567772817e-06, |
| "loss": 1.4067, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 9.194363799979517e-06, |
| "loss": 1.3608, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 9.184228192674667e-06, |
| "loss": 1.3958, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 9.17403488562644e-06, |
| "loss": 1.3592, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 9.163784019398686e-06, |
| "loss": 1.362, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 9.153475735348973e-06, |
| "loss": 1.303, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 9.143110175626662e-06, |
| "loss": 1.3781, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 9.13268748317093e-06, |
| "loss": 1.4138, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 9.122207801708802e-06, |
| "loss": 1.438, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 9.111671275753175e-06, |
| "loss": 1.4004, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 9.101078050600823e-06, |
| "loss": 1.3989, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 9.090428272330381e-06, |
| "loss": 1.4085, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 9.079722087800353e-06, |
| "loss": 1.3661, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 9.06895964464707e-06, |
| "loss": 1.3699, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 9.058141091282656e-06, |
| "loss": 1.3042, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 9.047266576892993e-06, |
| "loss": 1.2713, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 9.036336251435647e-06, |
| "loss": 1.3376, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 9.025350265637816e-06, |
| "loss": 1.3499, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 9.014308770994235e-06, |
| "loss": 1.3658, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 9.003211919765102e-06, |
| "loss": 1.331, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 8.992059864973972e-06, |
| "loss": 1.2886, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 8.980852760405645e-06, |
| "loss": 1.3809, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 8.96959076060405e-06, |
| "loss": 1.3197, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 8.958274020870107e-06, |
| "loss": 1.4306, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 8.946902697259593e-06, |
| "loss": 1.3622, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 8.935476946580988e-06, |
| "loss": 1.3956, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 8.923996926393306e-06, |
| "loss": 1.3504, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 8.912462795003932e-06, |
| "loss": 1.3969, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 8.900874711466436e-06, |
| "loss": 1.4044, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 8.889232835578372e-06, |
| "loss": 1.3154, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 8.877537327879087e-06, |
| "loss": 1.3014, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 8.865788349647496e-06, |
| "loss": 1.3628, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 8.853986062899869e-06, |
| "loss": 1.3848, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 8.842130630387583e-06, |
| "loss": 1.382, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 8.83022221559489e-06, |
| "loss": 1.3952, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 8.818260982736662e-06, |
| "loss": 1.3529, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 8.80624709675611e-06, |
| "loss": 1.2393, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 8.794180723322537e-06, |
| "loss": 1.4167, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 8.782062028829028e-06, |
| "loss": 1.3627, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 8.769891180390168e-06, |
| "loss": 1.374, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 8.757668345839739e-06, |
| "loss": 1.385, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 8.745393693728395e-06, |
| "loss": 1.3289, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 8.733067393321354e-06, |
| "loss": 1.3307, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 8.72068961459605e-06, |
| "loss": 1.3902, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 8.708260528239788e-06, |
| "loss": 1.4119, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 8.695780305647405e-06, |
| "loss": 1.3628, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 8.683249118918895e-06, |
| "loss": 1.2731, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 8.670667140857034e-06, |
| "loss": 1.3873, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 8.658034544965003e-06, |
| "loss": 1.3426, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 8.645351505443997e-06, |
| "loss": 1.3858, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 8.632618197190817e-06, |
| "loss": 1.4124, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 8.619834795795458e-06, |
| "loss": 1.2791, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 8.607001477538697e-06, |
| "loss": 1.3503, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 8.594118419389648e-06, |
| "loss": 1.4009, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 8.581185799003334e-06, |
| "loss": 1.3875, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 8.568203794718228e-06, |
| "loss": 1.212, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 8.555172585553804e-06, |
| "loss": 1.4082, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 8.542092351208058e-06, |
| "loss": 1.3676, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 8.528963272055036e-06, |
| "loss": 1.3153, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 8.515785529142339e-06, |
| "loss": 1.3265, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 8.502559304188644e-06, |
| "loss": 1.2758, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 8.489284779581179e-06, |
| "loss": 1.365, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 8.475962138373212e-06, |
| "loss": 1.3663, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 8.46259156428154e-06, |
| "loss": 1.3429, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 8.449173241683934e-06, |
| "loss": 1.3457, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 8.43570735561662e-06, |
| "loss": 1.3705, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 8.422194091771709e-06, |
| "loss": 1.2759, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 8.408633636494643e-06, |
| "loss": 1.2924, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 8.395026176781627e-06, |
| "loss": 1.3508, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 8.381371900277045e-06, |
| "loss": 1.3227, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 8.367670995270883e-06, |
| "loss": 1.3489, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 8.353923650696119e-06, |
| "loss": 1.4203, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 8.340130056126126e-06, |
| "loss": 1.3484, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 8.326290401772057e-06, |
| "loss": 1.2958, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 8.312404878480222e-06, |
| "loss": 1.3201, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 8.298473677729453e-06, |
| "loss": 1.2972, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 8.284496991628465e-06, |
| "loss": 1.3441, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 8.270475012913212e-06, |
| "loss": 1.2578, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 8.25640793494422e-06, |
| "loss": 1.2723, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 8.24229595170393e-06, |
| "loss": 1.3463, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 8.228139257794012e-06, |
| "loss": 1.2866, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 8.213938048432697e-06, |
| "loss": 1.2462, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 8.19969251945207e-06, |
| "loss": 1.2395, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 8.185402867295373e-06, |
| "loss": 1.3298, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 8.171069289014307e-06, |
| "loss": 1.3464, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 8.156691982266299e-06, |
| "loss": 1.3382, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 8.142271145311784e-06, |
| "loss": 1.3392, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 8.127806977011476e-06, |
| "loss": 1.3726, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 8.113299676823614e-06, |
| "loss": 1.3787, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 8.098749444801226e-06, |
| "loss": 1.3239, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 8.08415648158935e-06, |
| "loss": 1.2717, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 8.069520988422292e-06, |
| "loss": 1.2725, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 8.054843167120827e-06, |
| "loss": 1.2724, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 8.040123220089437e-06, |
| "loss": 1.2731, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 8.025361350313506e-06, |
| "loss": 1.3407, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 8.010557761356523e-06, |
| "loss": 1.3206, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 7.99571265735728e-06, |
| "loss": 1.3384, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 7.980826243027052e-06, |
| "loss": 1.3022, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 7.965898723646777e-06, |
| "loss": 1.4177, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 7.950930305064224e-06, |
| "loss": 1.2906, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 7.935921193691153e-06, |
| "loss": 1.3392, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 7.920871596500473e-06, |
| "loss": 1.3754, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 7.905781721023384e-06, |
| "loss": 1.3237, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 7.890651775346512e-06, |
| "loss": 1.3333, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 7.875481968109052e-06, |
| "loss": 1.329, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 7.860272508499877e-06, |
| "loss": 1.3569, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 7.845023606254658e-06, |
| "loss": 1.3997, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 7.829735471652978e-06, |
| "loss": 1.2569, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 7.814408315515419e-06, |
| "loss": 1.3157, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 7.799042349200672e-06, |
| "loss": 1.3385, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 7.783637784602608e-06, |
| "loss": 1.3519, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 7.768194834147362e-06, |
| "loss": 1.3092, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 7.752713710790405e-06, |
| "loss": 1.4118, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 7.7371946280136e-06, |
| "loss": 1.4384, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 7.721637799822269e-06, |
| "loss": 1.319, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 7.706043440742235e-06, |
| "loss": 1.4091, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 7.690411765816864e-06, |
| "loss": 1.3586, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 7.674742990604101e-06, |
| "loss": 1.3887, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 7.659037331173498e-06, |
| "loss": 1.4584, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 7.643295004103232e-06, |
| "loss": 1.3274, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 7.627516226477123e-06, |
| "loss": 1.3145, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 7.611701215881635e-06, |
| "loss": 1.3378, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 7.595850190402877e-06, |
| "loss": 1.3808, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 7.579963368623602e-06, |
| "loss": 1.2816, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 7.564040969620179e-06, |
| "loss": 1.3451, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 7.548083212959588e-06, |
| "loss": 1.3767, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 7.532090318696382e-06, |
| "loss": 1.2972, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 7.516062507369655e-06, |
| "loss": 1.3871, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 1.2995, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 7.483903018086466e-06, |
| "loss": 1.3784, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 7.467771783603492e-06, |
| "loss": 1.2722, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 7.4516065189978625e-06, |
| "loss": 1.2584, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 7.435407447185623e-06, |
| "loss": 1.3558, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 7.419174791549023e-06, |
| "loss": 1.3191, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 7.402908775933419e-06, |
| "loss": 1.3213, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 7.386609624644201e-06, |
| "loss": 1.2918, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 7.370277562443689e-06, |
| "loss": 1.2747, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 7.353912814548042e-06, |
| "loss": 1.3291, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 7.337515606624148e-06, |
| "loss": 1.3367, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 7.321086164786513e-06, |
| "loss": 1.3425, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 7.30462471559414e-06, |
| "loss": 1.2055, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 7.288131486047414e-06, |
| "loss": 1.2522, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 7.2716067035849595e-06, |
| "loss": 1.3425, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 7.25505059608051e-06, |
| "loss": 1.3494, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 7.23846339183977e-06, |
| "loss": 1.3205, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 7.221845319597258e-06, |
| "loss": 1.3643, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 7.2051966085131584e-06, |
| "loss": 1.2947, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 7.18851748817016e-06, |
| "loss": 1.2807, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 7.1718081885702905e-06, |
| "loss": 1.2995, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 7.155068940131741e-06, |
| "loss": 1.2976, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 7.138299973685694e-06, |
| "loss": 1.3224, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 7.121501520473137e-06, |
| "loss": 1.3198, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 7.104673812141676e-06, |
| "loss": 1.2902, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 7.087817080742337e-06, |
| "loss": 1.2266, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 7.070931558726373e-06, |
| "loss": 1.1909, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 7.054017478942048e-06, |
| "loss": 1.2823, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 7.037075074631441e-06, |
| "loss": 1.3414, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 7.0201045794272135e-06, |
| "loss": 1.2356, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 7.003106227349399e-06, |
| "loss": 1.3991, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 6.9860802528021705e-06, |
| "loss": 1.3584, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 6.969026890570612e-06, |
| "loss": 1.391, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 6.9519463758174745e-06, |
| "loss": 1.2975, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 6.934838944079944e-06, |
| "loss": 1.2221, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 6.917704831266381e-06, |
| "loss": 1.266, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 6.9005442736530745e-06, |
| "loss": 1.2379, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 6.883357507880985e-06, |
| "loss": 1.2859, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 6.866144770952474e-06, |
| "loss": 1.3748, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 6.848906300228047e-06, |
| "loss": 1.2665, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 6.831642333423068e-06, |
| "loss": 1.3045, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 6.814353108604488e-06, |
| "loss": 1.3242, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 6.797038864187564e-06, |
| "loss": 1.3638, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 6.77969983893257e-06, |
| "loss": 1.3131, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 6.762336271941499e-06, |
| "loss": 1.265, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 6.7449484026547705e-06, |
| "loss": 1.3479, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 6.7275364708479316e-06, |
| "loss": 1.3901, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 6.710100716628345e-06, |
| "loss": 1.3811, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 6.692641380431879e-06, |
| "loss": 1.303, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 6.675158703019594e-06, |
| "loss": 1.2914, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 6.657652925474424e-06, |
| "loss": 1.2605, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 6.640124289197845e-06, |
| "loss": 1.2949, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 6.622573035906557e-06, |
| "loss": 1.2792, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 6.604999407629137e-06, |
| "loss": 1.2529, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 6.5874036467027135e-06, |
| "loss": 1.2953, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 6.5697859957696195e-06, |
| "loss": 1.3389, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 6.552146697774049e-06, |
| "loss": 1.3, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 6.534485995958699e-06, |
| "loss": 1.3147, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 6.51680413386143e-06, |
| "loss": 1.2693, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 6.499101355311891e-06, |
| "loss": 1.2744, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 6.481377904428171e-06, |
| "loss": 1.3277, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 6.4636340256134224e-06, |
| "loss": 1.3469, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 6.445869963552496e-06, |
| "loss": 1.3045, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 6.428085963208567e-06, |
| "loss": 1.2607, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 6.410282269819756e-06, |
| "loss": 1.2707, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 6.392459128895747e-06, |
| "loss": 1.2619, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 6.374616786214402e-06, |
| "loss": 1.2985, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 6.356755487818371e-06, |
| "loss": 1.3691, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 6.338875480011698e-06, |
| "loss": 1.3009, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 6.3209770093564315e-06, |
| "loss": 1.3129, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 6.303060322669214e-06, |
| "loss": 1.2962, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 6.285125667017886e-06, |
| "loss": 1.3026, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 6.267173289718079e-06, |
| "loss": 1.2494, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 6.249203438329799e-06, |
| "loss": 1.2837, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 6.23121636065402e-06, |
| "loss": 1.2899, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 6.213212304729259e-06, |
| "loss": 1.2559, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 6.195191518828163e-06, |
| "loss": 1.2479, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 6.177154251454082e-06, |
| "loss": 1.3153, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 6.1591007513376425e-06, |
| "loss": 1.3329, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 6.141031267433316e-06, |
| "loss": 1.2713, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 6.122946048915991e-06, |
| "loss": 1.3049, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 6.1048453451775305e-06, |
| "loss": 1.3078, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 6.086729405823335e-06, |
| "loss": 1.2158, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 6.0685984806689055e-06, |
| "loss": 1.2278, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 6.05045281973639e-06, |
| "loss": 1.2554, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 6.032292673251143e-06, |
| "loss": 1.257, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 6.014118291638272e-06, |
| "loss": 1.2632, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 5.995929925519181e-06, |
| "loss": 1.2637, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 5.977727825708123e-06, |
| "loss": 1.4115, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 5.959512243208732e-06, |
| "loss": 1.3101, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 5.941283429210568e-06, |
| "loss": 1.2819, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 5.9230416350856505e-06, |
| "loss": 1.1701, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 5.904787112384991e-06, |
| "loss": 1.3451, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 5.886520112835128e-06, |
| "loss": 1.294, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 5.8682408883346535e-06, |
| "loss": 1.289, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 5.849949690950736e-06, |
| "loss": 1.3007, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 5.831646772915651e-06, |
| "loss": 1.2988, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 5.8133323866233005e-06, |
| "loss": 1.3402, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 5.795006784625728e-06, |
| "loss": 1.2917, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 5.776670219629643e-06, |
| "loss": 1.2742, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 5.75832294449293e-06, |
| "loss": 1.1589, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 5.739965212221168e-06, |
| "loss": 1.2647, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 5.7215972759641335e-06, |
| "loss": 1.2089, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 5.703219389012317e-06, |
| "loss": 1.2267, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 5.684831804793427e-06, |
| "loss": 1.2323, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 5.666434776868895e-06, |
| "loss": 1.2575, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 5.64802855893038e-06, |
| "loss": 1.2609, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 5.629613404796267e-06, |
| "loss": 1.2414, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 5.611189568408173e-06, |
| "loss": 1.2782, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 5.592757303827441e-06, |
| "loss": 1.2637, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 5.574316865231637e-06, |
| "loss": 1.2786, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 5.5558685069110444e-06, |
| "loss": 1.2729, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 5.537412483265156e-06, |
| "loss": 1.2916, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 5.518949048799176e-06, |
| "loss": 1.2674, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 5.500478458120493e-06, |
| "loss": 1.2308, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 5.482000965935182e-06, |
| "loss": 1.3994, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 5.463516827044492e-06, |
| "loss": 1.2365, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 5.445026296341325e-06, |
| "loss": 1.3897, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 5.4265296288067235e-06, |
| "loss": 1.3042, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 5.408027079506362e-06, |
| "loss": 1.2559, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 5.389518903587016e-06, |
| "loss": 1.2159, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 5.371005356273058e-06, |
| "loss": 1.3298, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 5.352486692862926e-06, |
| "loss": 1.2856, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 5.3339631687256085e-06, |
| "loss": 1.3069, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 5.3154350392971245e-06, |
| "loss": 1.2573, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 5.296902560077e-06, |
| "loss": 1.322, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 5.278365986624743e-06, |
| "loss": 1.3452, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 5.259825574556315e-06, |
| "loss": 1.2812, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 5.241281579540619e-06, |
| "loss": 1.2764, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 5.222734257295963e-06, |
| "loss": 1.2183, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 5.2041838635865336e-06, |
| "loss": 1.4043, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 5.1856306542188805e-06, |
| "loss": 1.3147, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 5.1670748850383734e-06, |
| "loss": 1.2799, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 5.148516811925684e-06, |
| "loss": 1.2555, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 5.129956690793255e-06, |
| "loss": 1.2698, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 5.111394777581769e-06, |
| "loss": 1.3233, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 5.0928313282566255e-06, |
| "loss": 1.2816, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 5.074266598804402e-06, |
| "loss": 1.2227, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 5.0557008452293275e-06, |
| "loss": 1.2858, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 5.037134323549763e-06, |
| "loss": 1.2475, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 5.0185672897946515e-06, |
| "loss": 1.2544, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 5e-06, |
| "loss": 1.3282, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 4.981432710205351e-06, |
| "loss": 1.2523, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 4.962865676450239e-06, |
| "loss": 1.2541, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 4.944299154770673e-06, |
| "loss": 1.3198, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 4.925733401195601e-06, |
| "loss": 1.3546, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 4.907168671743377e-06, |
| "loss": 1.2943, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 4.888605222418232e-06, |
| "loss": 1.2895, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 4.8700433092067474e-06, |
| "loss": 1.3898, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 4.8514831880743175e-06, |
| "loss": 1.2536, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 4.832925114961629e-06, |
| "loss": 1.2977, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 4.814369345781121e-06, |
| "loss": 1.2635, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 4.795816136413467e-06, |
| "loss": 1.442, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 4.777265742704039e-06, |
| "loss": 1.2602, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 4.758718420459383e-06, |
| "loss": 1.3203, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 4.740174425443687e-06, |
| "loss": 1.2639, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 4.7216340133752604e-06, |
| "loss": 1.2657, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 4.703097439923e-06, |
| "loss": 1.2655, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 4.684564960702877e-06, |
| "loss": 1.287, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 4.666036831274392e-06, |
| "loss": 1.2951, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 4.647513307137076e-06, |
| "loss": 1.2133, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 4.628994643726942e-06, |
| "loss": 1.3232, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 4.610481096412985e-06, |
| "loss": 1.2329, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 4.591972920493638e-06, |
| "loss": 1.259, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 4.573470371193277e-06, |
| "loss": 1.2908, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 4.554973703658676e-06, |
| "loss": 1.3604, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 4.53648317295551e-06, |
| "loss": 1.296, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 4.517999034064819e-06, |
| "loss": 1.3323, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 4.499521541879508e-06, |
| "loss": 1.2711, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 4.4810509512008245e-06, |
| "loss": 1.1972, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 4.462587516734845e-06, |
| "loss": 1.276, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 4.444131493088956e-06, |
| "loss": 1.3355, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 4.425683134768365e-06, |
| "loss": 1.3096, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 4.40724269617256e-06, |
| "loss": 1.2206, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 4.388810431591829e-06, |
| "loss": 1.2797, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 4.3703865952037354e-06, |
| "loss": 1.3047, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.351971441069622e-06, |
| "loss": 1.3317, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.333565223131107e-06, |
| "loss": 1.1875, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.315168195206574e-06, |
| "loss": 1.2847, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.296780610987685e-06, |
| "loss": 1.1669, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.278402724035868e-06, |
| "loss": 1.2493, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.260034787778833e-06, |
| "loss": 1.2311, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.241677055507071e-06, |
| "loss": 1.2648, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.223329780370359e-06, |
| "loss": 1.2404, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.2049932153742726e-06, |
| "loss": 1.2094, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 4.186667613376702e-06, |
| "loss": 1.303, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 4.1683532270843505e-06, |
| "loss": 1.2948, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 4.150050309049267e-06, |
| "loss": 1.2527, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 4.131759111665349e-06, |
| "loss": 1.3017, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 4.113479887164873e-06, |
| "loss": 1.2143, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 4.09521288761501e-06, |
| "loss": 1.3148, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 4.076958364914352e-06, |
| "loss": 1.3612, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 4.0587165707894326e-06, |
| "loss": 1.1973, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 4.04048775679127e-06, |
| "loss": 1.3197, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 4.022272174291878e-06, |
| "loss": 1.2792, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 4.004070074480821e-06, |
| "loss": 1.2688, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 3.985881708361729e-06, |
| "loss": 1.2597, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 3.967707326748857e-06, |
| "loss": 1.2096, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 3.94954718026361e-06, |
| "loss": 1.3257, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 3.931401519331095e-06, |
| "loss": 1.2747, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 3.913270594176665e-06, |
| "loss": 1.2471, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 3.895154654822471e-06, |
| "loss": 1.1345, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 3.87705395108401e-06, |
| "loss": 1.2536, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 3.858968732566685e-06, |
| "loss": 1.219, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 3.840899248662358e-06, |
| "loss": 1.3678, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 3.822845748545919e-06, |
| "loss": 1.2971, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 3.8048084811718377e-06, |
| "loss": 1.2638, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 3.786787695270743e-06, |
| "loss": 1.254, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 3.7687836393459828e-06, |
| "loss": 1.2319, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 3.7507965616702015e-06, |
| "loss": 1.2633, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 3.732826710281923e-06, |
| "loss": 1.2397, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 3.7148743329821146e-06, |
| "loss": 1.2457, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 3.6969396773307888e-06, |
| "loss": 1.1932, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 3.6790229906435706e-06, |
| "loss": 1.232, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 3.6611245199883037e-06, |
| "loss": 1.2936, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 3.6432445121816308e-06, |
| "loss": 1.2693, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 3.6253832137856e-06, |
| "loss": 1.3185, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 3.6075408711042536e-06, |
| "loss": 1.2861, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 3.5897177301802455e-06, |
| "loss": 1.3201, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 3.571914036791435e-06, |
| "loss": 1.2737, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 3.5541300364475067e-06, |
| "loss": 1.2632, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 3.5363659743865797e-06, |
| "loss": 1.2531, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 3.518622095571831e-06, |
| "loss": 1.2531, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 3.5008986446881088e-06, |
| "loss": 1.2296, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 3.4831958661385716e-06, |
| "loss": 1.2491, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 3.465514004041301e-06, |
| "loss": 1.242, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 3.4478533022259527e-06, |
| "loss": 1.2202, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 3.4302140042303813e-06, |
| "loss": 1.2563, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 3.4125963532972878e-06, |
| "loss": 1.1436, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 3.395000592370864e-06, |
| "loss": 1.3096, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 3.3774269640934447e-06, |
| "loss": 1.3111, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 3.3598757108021546e-06, |
| "loss": 1.237, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 3.342347074525578e-06, |
| "loss": 1.2538, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 3.3248412969804065e-06, |
| "loss": 1.1838, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 3.307358619568123e-06, |
| "loss": 1.2605, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 3.289899283371657e-06, |
| "loss": 1.2921, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 3.2724635291520697e-06, |
| "loss": 1.2745, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 3.2550515973452295e-06, |
| "loss": 1.2481, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 3.2376637280585025e-06, |
| "loss": 1.2874, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 3.2203001610674322e-06, |
| "loss": 1.2855, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 3.202961135812437e-06, |
| "loss": 1.2453, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 3.185646891395514e-06, |
| "loss": 1.2974, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 3.1683576665769344e-06, |
| "loss": 1.2085, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 3.1510936997719557e-06, |
| "loss": 1.192, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 3.1338552290475265e-06, |
| "loss": 1.2529, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 3.1166424921190174e-06, |
| "loss": 1.3066, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 3.0994557263469267e-06, |
| "loss": 1.3012, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 3.0822951687336215e-06, |
| "loss": 1.2765, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 3.065161055920057e-06, |
| "loss": 1.1885, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 3.0480536241825263e-06, |
| "loss": 1.2846, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 3.03097310942939e-06, |
| "loss": 1.2697, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 3.013919747197832e-06, |
| "loss": 1.2531, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 2.996893772650602e-06, |
| "loss": 1.2579, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 2.9798954205727886e-06, |
| "loss": 1.2048, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 2.96292492536856e-06, |
| "loss": 1.1587, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 2.9459825210579534e-06, |
| "loss": 1.2425, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 2.929068441273629e-06, |
| "loss": 1.2004, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 2.9121829192576647e-06, |
| "loss": 1.2269, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 2.8953261878583263e-06, |
| "loss": 1.2296, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 2.8784984795268644e-06, |
| "loss": 1.2511, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 2.861700026314308e-06, |
| "loss": 1.2154, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 2.844931059868261e-06, |
| "loss": 1.2472, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 2.828191811429709e-06, |
| "loss": 1.2231, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2.811482511829842e-06, |
| "loss": 1.2338, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2.7948033914868415e-06, |
| "loss": 1.1874, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2.778154680402745e-06, |
| "loss": 1.1784, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2.7615366081602306e-06, |
| "loss": 1.2601, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2.74494940391949e-06, |
| "loss": 1.2372, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2.7283932964150417e-06, |
| "loss": 1.2243, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2.711868513952587e-06, |
| "loss": 1.2287, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2.69537528440586e-06, |
| "loss": 1.2571, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2.6789138352134885e-06, |
| "loss": 1.3302, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.6624843933758547e-06, |
| "loss": 1.2508, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.6460871854519594e-06, |
| "loss": 1.3273, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.6297224375563126e-06, |
| "loss": 1.2187, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.613390375355801e-06, |
| "loss": 1.2563, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.5970912240665815e-06, |
| "loss": 1.2393, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.5808252084509784e-06, |
| "loss": 1.2318, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.5645925528143778e-06, |
| "loss": 1.2727, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.54839348100214e-06, |
| "loss": 1.2413, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.5322282163965096e-06, |
| "loss": 1.2198, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 2.5160969819135368e-06, |
| "loss": 1.2257, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 2.5000000000000015e-06, |
| "loss": 1.1988, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 2.483937492630345e-06, |
| "loss": 1.2751, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 2.4679096813036202e-06, |
| "loss": 1.2558, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 2.4519167870404126e-06, |
| "loss": 1.2104, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 2.4359590303798243e-06, |
| "loss": 1.263, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 2.4200366313764e-06, |
| "loss": 1.3031, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 2.4041498095971253e-06, |
| "loss": 1.3141, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 2.388298784118366e-06, |
| "loss": 1.2409, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 2.3724837735228773e-06, |
| "loss": 1.2706, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 2.356704995896768e-06, |
| "loss": 1.3215, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 2.340962668826503e-06, |
| "loss": 1.248, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 2.3252570093959e-06, |
| "loss": 1.2072, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 2.309588234183137e-06, |
| "loss": 1.2875, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 2.293956559257766e-06, |
| "loss": 1.2191, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 2.2783622001777322e-06, |
| "loss": 1.1606, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 2.262805371986402e-06, |
| "loss": 1.3171, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 2.247286289209597e-06, |
| "loss": 1.2794, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 2.231805165852637e-06, |
| "loss": 1.2513, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 2.216362215397393e-06, |
| "loss": 1.2643, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 2.2009576507993273e-06, |
| "loss": 1.2275, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 2.1855916844845827e-06, |
| "loss": 1.2788, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 2.1702645283470238e-06, |
| "loss": 1.2536, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 2.1549763937453445e-06, |
| "loss": 1.2581, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 2.1397274915001254e-06, |
| "loss": 1.2904, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 2.1245180318909482e-06, |
| "loss": 1.2637, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 2.1093482246534896e-06, |
| "loss": 1.2643, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 2.0942182789766174e-06, |
| "loss": 1.2461, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 2.0791284034995296e-06, |
| "loss": 1.2346, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 2.064078806308848e-06, |
| "loss": 1.2702, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 2.0490696949357774e-06, |
| "loss": 1.2606, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 2.0341012763532243e-06, |
| "loss": 1.2199, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 2.0191737569729492e-06, |
| "loss": 1.2485, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 2.004287342642721e-06, |
| "loss": 1.2254, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 1.989442238643478e-06, |
| "loss": 1.1819, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 1.974638649686495e-06, |
| "loss": 1.3182, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 1.959876779910564e-06, |
| "loss": 1.2954, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 1.945156832879174e-06, |
| "loss": 1.2334, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 1.930479011577711e-06, |
| "loss": 1.1988, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 1.91584351841065e-06, |
| "loss": 1.236, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 1.9012505551987764e-06, |
| "loss": 1.3011, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 1.8867003231763847e-06, |
| "loss": 1.2476, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 1.872193022988526e-06, |
| "loss": 1.2689, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 1.8577288546882167e-06, |
| "loss": 1.1812, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 1.8433080177337043e-06, |
| "loss": 1.2817, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 1.8289307109856941e-06, |
| "loss": 1.2886, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 1.8145971327046274e-06, |
| "loss": 1.2686, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 1.8003074805479314e-06, |
| "loss": 1.1913, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 1.7860619515673034e-06, |
| "loss": 1.3015, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 1.771860742205988e-06, |
| "loss": 1.3085, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 1.7577040482960723e-06, |
| "loss": 1.2569, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 1.7435920650557808e-06, |
| "loss": 1.3083, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 1.7295249870867898e-06, |
| "loss": 1.2116, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 1.7155030083715362e-06, |
| "loss": 1.1131, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 1.7015263222705492e-06, |
| "loss": 1.212, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 1.6875951215197779e-06, |
| "loss": 1.256, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 1.6737095982279444e-06, |
| "loss": 1.291, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 1.6598699438738764e-06, |
| "loss": 1.2236, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 1.646076349303884e-06, |
| "loss": 1.157, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 1.6323290047291196e-06, |
| "loss": 1.314, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 1.618628099722957e-06, |
| "loss": 1.2609, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 1.604973823218376e-06, |
| "loss": 1.2084, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 1.5913663635053578e-06, |
| "loss": 1.2334, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 1.5778059082282932e-06, |
| "loss": 1.2386, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 1.56429264438338e-06, |
| "loss": 1.3237, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 1.550826758316068e-06, |
| "loss": 1.2669, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 1.5374084357184621e-06, |
| "loss": 1.1954, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 1.5240378616267887e-06, |
| "loss": 1.2389, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 1.510715220418823e-06, |
| "loss": 1.2534, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 1.4974406958113557e-06, |
| "loss": 1.1756, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 1.4842144708576606e-06, |
| "loss": 1.1699, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 1.4710367279449662e-06, |
| "loss": 1.2823, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 1.457907648791943e-06, |
| "loss": 1.2844, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 1.4448274144461965e-06, |
| "loss": 1.2512, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 1.431796205281773e-06, |
| "loss": 1.2727, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 1.4188142009966689e-06, |
| "loss": 1.2276, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 1.4058815806103542e-06, |
| "loss": 1.2762, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 1.3929985224613051e-06, |
| "loss": 1.3201, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.3801652042045416e-06, |
| "loss": 1.1758, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.367381802809185e-06, |
| "loss": 1.2246, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.3546484945560029e-06, |
| "loss": 1.1833, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.3419654550349987e-06, |
| "loss": 1.1716, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.329332859142967e-06, |
| "loss": 1.2747, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.3167508810811058e-06, |
| "loss": 1.2786, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.3042196943525942e-06, |
| "loss": 1.2917, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.2917394717602123e-06, |
| "loss": 1.2502, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.2793103854039518e-06, |
| "loss": 1.2029, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 1.2669326066786458e-06, |
| "loss": 1.2311, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 1.2546063062716069e-06, |
| "loss": 1.1958, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 1.242331654160263e-06, |
| "loss": 1.313, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 1.2301088196098332e-06, |
| "loss": 1.2361, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 1.2179379711709738e-06, |
| "loss": 1.2073, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 1.205819276677464e-06, |
| "loss": 1.2211, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 1.1937529032438905e-06, |
| "loss": 1.2087, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 1.1817390172633402e-06, |
| "loss": 1.2407, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 1.1697777844051105e-06, |
| "loss": 1.201, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.1578693696124193e-06, |
| "loss": 1.2321, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.1460139371001339e-06, |
| "loss": 1.2191, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.1342116503525059e-06, |
| "loss": 1.2591, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.1224626721209141e-06, |
| "loss": 1.188, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.1107671644216305e-06, |
| "loss": 1.2702, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.0991252885335651e-06, |
| "loss": 1.2203, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.0875372049960697e-06, |
| "loss": 1.235, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.0760030736066952e-06, |
| "loss": 1.2553, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.064523053419015e-06, |
| "loss": 1.1923, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.0530973027404073e-06, |
| "loss": 1.2705, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.041725979129894e-06, |
| "loss": 1.2714, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.0304092393959513e-06, |
| "loss": 1.2139, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.0191472395943552e-06, |
| "loss": 1.2619, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.0079401350260288e-06, |
| "loss": 1.1787, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 9.967880802348989e-07, |
| "loss": 1.19, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 9.856912290057668e-07, |
| "loss": 1.2921, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 9.746497343621857e-07, |
| "loss": 1.1421, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 9.63663748564353e-07, |
| "loss": 1.24, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 9.527334231070084e-07, |
| "loss": 1.2213, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 9.418589087173441e-07, |
| "loss": 1.2802, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 9.310403553529335e-07, |
| "loss": 1.1891, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 9.20277912199648e-07, |
| "loss": 1.218, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 9.095717276696214e-07, |
| "loss": 1.2472, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 8.989219493991791e-07, |
| "loss": 1.1592, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 8.883287242468242e-07, |
| "loss": 1.2343, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 8.777921982911996e-07, |
| "loss": 1.2461, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 8.673125168290713e-07, |
| "loss": 1.226, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 8.568898243733398e-07, |
| "loss": 1.2083, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 8.46524264651028e-07, |
| "loss": 1.2606, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 8.362159806013176e-07, |
| "loss": 1.2723, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 8.259651143735603e-07, |
| "loss": 1.1759, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 8.157718073253351e-07, |
| "loss": 1.3235, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 8.056362000204848e-07, |
| "loss": 1.1881, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 7.955584322271853e-07, |
| "loss": 1.2148, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 7.85538642916015e-07, |
| "loss": 1.2402, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 7.755769702580412e-07, |
| "loss": 1.2338, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 7.656735516229125e-07, |
| "loss": 1.2621, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 7.558285235769647e-07, |
| "loss": 1.1569, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 7.46042021881338e-07, |
| "loss": 1.2019, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 7.363141814901054e-07, |
| "loss": 1.2361, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 7.266451365484106e-07, |
| "loss": 1.2144, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 7.170350203906218e-07, |
| "loss": 1.2816, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 7.074839655384835e-07, |
| "loss": 1.1859, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 6.979921036993042e-07, |
| "loss": 1.304, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 6.885595657641214e-07, |
| "loss": 1.201, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 6.791864818059179e-07, |
| "loss": 1.1713, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 6.698729810778065e-07, |
| "loss": 1.2106, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 6.606191920112664e-07, |
| "loss": 1.2678, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 6.514252422143591e-07, |
| "loss": 1.3211, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 6.422912584699753e-07, |
| "loss": 1.2193, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 6.332173667340841e-07, |
| "loss": 1.204, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 6.242036921339973e-07, |
| "loss": 1.2639, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 6.152503589666426e-07, |
| "loss": 1.2609, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 6.063574906968511e-07, |
| "loss": 1.2043, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 5.975252099556544e-07, |
| "loss": 1.2423, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 5.887536385385917e-07, |
| "loss": 1.2067, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 5.800428974040311e-07, |
| "loss": 1.2295, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 5.713931066715078e-07, |
| "loss": 1.1883, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 5.628043856200543e-07, |
| "loss": 1.196, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 5.542768526865678e-07, |
| "loss": 1.2536, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 5.458106254641715e-07, |
| "loss": 1.2407, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 5.374058207005945e-07, |
| "loss": 1.2345, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 5.290625542965611e-07, |
| "loss": 1.1855, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 5.207809413041914e-07, |
| "loss": 1.2697, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 5.125610959254213e-07, |
| "loss": 1.2528, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 5.044031315104136e-07, |
| "loss": 1.2428, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 4.963071605560144e-07, |
| "loss": 1.2263, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 4.882732947041818e-07, |
| "loss": 1.2617, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 4.803016447404629e-07, |
| "loss": 1.1969, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 4.723923205924558e-07, |
| "loss": 1.1646, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 4.6454543132829653e-07, |
| "loss": 1.2267, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 4.5676108515515684e-07, |
| "loss": 1.2072, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 4.4903938941775084e-07, |
| "loss": 1.1877, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 4.413804505968533e-07, |
| "loss": 1.2038, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 4.3378437430783294e-07, |
| "loss": 1.2549, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 4.262512652991968e-07, |
| "loss": 1.268, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 4.187812274511427e-07, |
| "loss": 1.2731, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 4.113743637741296e-07, |
| "loss": 1.3252, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 4.040307764074586e-07, |
| "loss": 1.1899, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 3.9675056661785563e-07, |
| "loss": 1.2422, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 3.895338347980898e-07, |
| "loss": 1.2417, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 3.8238068046557276e-07, |
| "loss": 1.1714, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 3.752912022610006e-07, |
| "loss": 1.243, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 3.6826549794698074e-07, |
| "loss": 1.2715, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 3.6130366440669693e-07, |
| "loss": 1.2184, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 3.544057976425619e-07, |
| "loss": 1.1844, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 3.4757199277490106e-07, |
| "loss": 1.1547, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 3.408023440406355e-07, |
| "loss": 1.211, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 3.340969447919873e-07, |
| "loss": 1.1662, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 3.2745588749518775e-07, |
| "loss": 1.3596, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 3.2087926372920577e-07, |
| "loss": 1.1916, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 3.143671641844831e-07, |
| "loss": 1.2438, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 3.0791967866168394e-07, |
| "loss": 1.2936, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 3.015368960704584e-07, |
| "loss": 1.2453, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 2.9521890442821276e-07, |
| "loss": 1.2201, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 2.889657908589e-07, |
| "loss": 1.2094, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 2.8277764159181484e-07, |
| "loss": 1.1386, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 2.7665454196040665e-07, |
| "loss": 1.1688, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 2.7059657640110204e-07, |
| "loss": 1.2446, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 2.6460382845214125e-07, |
| "loss": 1.2187, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 2.5867638075242454e-07, |
| "loss": 1.2678, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 2.5281431504037555e-07, |
| "loss": 1.2246, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 2.470177121528089e-07, |
| "loss": 1.268, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 2.4128665202382327e-07, |
| "loss": 1.1303, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 2.356212136836894e-07, |
| "loss": 1.1405, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 2.3002147525777118e-07, |
| "loss": 1.2348, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 2.2448751396543788e-07, |
| "loss": 1.2552, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 2.1901940611900707e-07, |
| "loss": 1.2936, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 2.1361722712268772e-07, |
| "loss": 1.289, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 2.0828105147154275e-07, |
| "loss": 1.2411, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 2.0301095275046145e-07, |
| "loss": 1.2261, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 1.9780700363314255e-07, |
| "loss": 1.1747, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 1.926692758810955e-07, |
| "loss": 1.2051, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 1.8759784034264927e-07, |
| "loss": 1.2246, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 1.825927669519728e-07, |
| "loss": 1.2018, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 1.776541247281177e-07, |
| "loss": 1.1812, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 1.7278198177405614e-07, |
| "loss": 1.2621, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 1.679764052757532e-07, |
| "loss": 1.2021, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 1.6323746150123e-07, |
| "loss": 1.2577, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 1.5856521579965866e-07, |
| "loss": 1.2777, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 1.5395973260045273e-07, |
| "loss": 1.274, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 1.4942107541238705e-07, |
| "loss": 1.2606, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 1.449493068227159e-07, |
| "loss": 1.1909, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 1.4054448849631087e-07, |
| "loss": 1.1955, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 1.3620668117481471e-07, |
| "loss": 1.2731, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 1.319359446757973e-07, |
| "loss": 1.3342, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 1.2773233789193816e-07, |
| "loss": 1.1588, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 1.2359591879020528e-07, |
| "loss": 1.2291, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 1.1952674441106483e-07, |
| "loss": 1.1973, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 1.1552487086768871e-07, |
| "loss": 1.2551, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 1.1159035334518343e-07, |
| "loss": 1.2953, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.0772324609982787e-07, |
| "loss": 1.2416, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.03923602458324e-07, |
| "loss": 1.2722, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.0019147481706626e-07, |
| "loss": 1.1502, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 9.652691464141273e-08, |
| "loss": 1.2267, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 9.292997246497959e-08, |
| "loss": 1.2338, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 8.940069788894389e-08, |
| "loss": 1.3175, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 8.593913958135691e-08, |
| "loss": 1.2424, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 8.254534527647851e-08, |
| "loss": 1.2945, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 7.921936177411049e-08, |
| "loss": 1.1953, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 7.59612349389599e-08, |
| "loss": 1.226, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 7.277100970000062e-08, |
| "loss": 1.2159, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 6.964873004985717e-08, |
| "loss": 1.1667, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 6.659443904419638e-08, |
| "loss": 1.2288, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 6.360817880113335e-08, |
| "loss": 1.2918, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 6.06899905006525e-08, |
| "loss": 1.2764, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 5.783991438403802e-08, |
| "loss": 1.3111, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 5.505798975331933e-08, |
| "loss": 1.2598, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 5.234425497072981e-08, |
| "loss": 1.2023, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 4.9698747458176714e-08, |
| "loss": 1.2719, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 4.712150369672652e-08, |
| "loss": 1.198, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 4.461255922609986e-08, |
| "loss": 1.2503, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 4.217194864418295e-08, |
| "loss": 1.2348, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 3.979970560655133e-08, |
| "loss": 1.3072, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 3.749586282600359e-08, |
| "loss": 1.1894, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 3.526045207211059e-08, |
| "loss": 1.233, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 3.309350417077972e-08, |
| "loss": 1.209, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 3.0995049003826325e-08, |
| "loss": 1.1935, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 2.8965115508564622e-08, |
| "loss": 1.3144, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 2.700373167740744e-08, |
| "loss": 1.2205, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 2.511092455747932e-08, |
| "loss": 1.1731, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 2.3286720250246255e-08, |
| "loss": 1.087, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 2.153114391115152e-08, |
| "loss": 1.2086, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 1.984421974927375e-08, |
| "loss": 1.1868, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 1.8225971026987755e-08, |
| "loss": 1.2852, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 1.6676420059649756e-08, |
| "loss": 1.289, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.5195588215283773e-08, |
| "loss": 1.2034, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.3783495914291844e-08, |
| "loss": 1.1606, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.244016262916814e-08, |
| "loss": 1.1817, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.1165606884234182e-08, |
| "loss": 1.1963, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 9.959846255381267e-09, |
| "loss": 1.23, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 8.822897369827333e-09, |
| "loss": 1.1343, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 7.754775905891576e-09, |
| "loss": 1.2412, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 6.755496592773525e-09, |
| "loss": 1.3113, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 5.825073210352084e-09, |
| "loss": 1.2007, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 4.9635185889967966e-09, |
| "loss": 1.172, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 4.170844609387992e-09, |
| "loss": 1.2584, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 3.4470622023558e-09, |
| "loss": 1.2319, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 2.792181348726941e-09, |
| "loss": 1.2648, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 2.20621107918928e-09, |
| "loss": 1.2215, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.6891594741663686e-09, |
| "loss": 1.3185, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.2410336637047604e-09, |
| "loss": 1.2158, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 8.618398273779749e-10, |
| "loss": 1.2343, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 5.515831941993455e-10, |
| "loss": 1.2476, |
| "step": 869 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 3.1026804255207544e-10, |
| "loss": 1.2134, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 1.378977001276205e-10, |
| "loss": 1.22, |
| "step": 871 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 3.447454388127991e-11, |
| "loss": 1.2135, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.0, |
| "loss": 1.1774, |
| "step": 873 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 873, |
| "total_flos": 6.557120667907523e+18, |
| "train_loss": 1.3115756642777485, |
| "train_runtime": 2453.1326, |
| "train_samples_per_second": 91.074, |
| "train_steps_per_second": 0.356 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 873, |
| "num_train_epochs": 1, |
| "save_steps": 5000, |
| "total_flos": 6.557120667907523e+18, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|