| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 3125, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "grad_norm": 7.186999998384225, |
| "learning_rate": 1.0638297872340426e-07, |
| "loss": 0.8419, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 15.195501156241782, |
| "learning_rate": 2.1276595744680852e-07, |
| "loss": 0.8607, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 6.471859790144152, |
| "learning_rate": 3.1914893617021275e-07, |
| "loss": 0.8707, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 8.663960370878844, |
| "learning_rate": 4.2553191489361704e-07, |
| "loss": 1.0322, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 3.846806967544908, |
| "learning_rate": 5.319148936170213e-07, |
| "loss": 0.293, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 3.649076371491653, |
| "learning_rate": 6.382978723404255e-07, |
| "loss": 0.3746, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 3.7565091564146607, |
| "learning_rate": 7.446808510638298e-07, |
| "loss": 0.3011, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 3.7390758749183277, |
| "learning_rate": 8.510638297872341e-07, |
| "loss": 0.2792, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 2.9837584928681395, |
| "learning_rate": 9.574468085106384e-07, |
| "loss": 0.3738, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 5.947697119780937, |
| "learning_rate": 1.0638297872340427e-06, |
| "loss": 0.7324, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 8.260074298895535, |
| "learning_rate": 1.170212765957447e-06, |
| "loss": 0.7568, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 5.570877454874293, |
| "learning_rate": 1.276595744680851e-06, |
| "loss": 0.9866, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 6.179108386871164, |
| "learning_rate": 1.3829787234042555e-06, |
| "loss": 0.7508, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 6.33706086250621, |
| "learning_rate": 1.4893617021276596e-06, |
| "loss": 0.6779, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 5.348047232724415, |
| "learning_rate": 1.595744680851064e-06, |
| "loss": 0.7782, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 2.3373731647637666, |
| "learning_rate": 1.7021276595744682e-06, |
| "loss": 0.2998, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 6.074878326998715, |
| "learning_rate": 1.8085106382978727e-06, |
| "loss": 0.817, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 5.919283016168947, |
| "learning_rate": 1.9148936170212767e-06, |
| "loss": 0.8892, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 4.5315206387377005, |
| "learning_rate": 2.021276595744681e-06, |
| "loss": 0.5379, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 2.4658963420960665, |
| "learning_rate": 2.1276595744680853e-06, |
| "loss": 0.3507, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 2.5168553994661753, |
| "learning_rate": 2.2340425531914894e-06, |
| "loss": 0.2616, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 6.926878667097406, |
| "learning_rate": 2.340425531914894e-06, |
| "loss": 0.8075, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 2.190515637234529, |
| "learning_rate": 2.446808510638298e-06, |
| "loss": 0.2471, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 2.1011555987879365, |
| "learning_rate": 2.553191489361702e-06, |
| "loss": 0.2433, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 5.466686958576261, |
| "learning_rate": 2.6595744680851065e-06, |
| "loss": 0.778, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 2.1202865982496992, |
| "learning_rate": 2.765957446808511e-06, |
| "loss": 0.2767, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 2.1714962642460858, |
| "learning_rate": 2.8723404255319155e-06, |
| "loss": 0.2637, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 6.713848871868974, |
| "learning_rate": 2.978723404255319e-06, |
| "loss": 0.8652, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 2.115806755756005, |
| "learning_rate": 3.0851063829787237e-06, |
| "loss": 0.2327, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 7.6527791089253006, |
| "learning_rate": 3.191489361702128e-06, |
| "loss": 0.8428, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 5.952188134702268, |
| "learning_rate": 3.297872340425532e-06, |
| "loss": 0.8583, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 19.692620114754746, |
| "learning_rate": 3.4042553191489363e-06, |
| "loss": 0.7858, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 2.0759321339191934, |
| "learning_rate": 3.510638297872341e-06, |
| "loss": 0.2241, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 5.5301026342716595, |
| "learning_rate": 3.6170212765957453e-06, |
| "loss": 0.663, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 5.585744509955249, |
| "learning_rate": 3.723404255319149e-06, |
| "loss": 0.6562, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 6.265222587262519, |
| "learning_rate": 3.8297872340425535e-06, |
| "loss": 0.5355, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 2.4701842123897486, |
| "learning_rate": 3.936170212765958e-06, |
| "loss": 0.2582, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 2.122181502870864, |
| "learning_rate": 4.042553191489362e-06, |
| "loss": 0.2856, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 6.9282484979982195, |
| "learning_rate": 4.148936170212766e-06, |
| "loss": 0.6174, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 5.975838840358755, |
| "learning_rate": 4.255319148936171e-06, |
| "loss": 0.7906, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 2.0345349032020126, |
| "learning_rate": 4.361702127659575e-06, |
| "loss": 0.2424, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 2.0615934625099346, |
| "learning_rate": 4.468085106382979e-06, |
| "loss": 0.2116, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 5.705896830279454, |
| "learning_rate": 4.574468085106383e-06, |
| "loss": 0.6891, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 13.158667169865367, |
| "learning_rate": 4.680851063829788e-06, |
| "loss": 0.5684, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 8.208102208041616, |
| "learning_rate": 4.787234042553192e-06, |
| "loss": 0.7109, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 13.41232565070186, |
| "learning_rate": 4.893617021276596e-06, |
| "loss": 0.5345, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 4.382535756710318, |
| "learning_rate": 5e-06, |
| "loss": 0.5668, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 1.9642261227014204, |
| "learning_rate": 5.106382978723404e-06, |
| "loss": 0.2235, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.355165416214269, |
| "learning_rate": 5.212765957446809e-06, |
| "loss": 0.2517, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.251290417809313, |
| "learning_rate": 5.319148936170213e-06, |
| "loss": 0.2282, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.1719053672114463, |
| "learning_rate": 5.425531914893617e-06, |
| "loss": 0.2927, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 1.9890876845802554, |
| "learning_rate": 5.531914893617022e-06, |
| "loss": 0.2073, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.3250034678509506, |
| "learning_rate": 5.638297872340426e-06, |
| "loss": 0.2757, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 6.374850230264314, |
| "learning_rate": 5.744680851063831e-06, |
| "loss": 0.6418, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.0771023570501375, |
| "learning_rate": 5.851063829787235e-06, |
| "loss": 0.2194, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.110659928743081, |
| "learning_rate": 5.957446808510638e-06, |
| "loss": 0.2423, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.0592390004569676, |
| "learning_rate": 6.063829787234044e-06, |
| "loss": 0.2304, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.057093727460016, |
| "learning_rate": 6.170212765957447e-06, |
| "loss": 0.2304, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 5.045560870554051, |
| "learning_rate": 6.276595744680851e-06, |
| "loss": 0.553, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 8.28752932319891, |
| "learning_rate": 6.382978723404256e-06, |
| "loss": 0.5713, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 1.7885632031927219, |
| "learning_rate": 6.48936170212766e-06, |
| "loss": 0.224, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 5.192017917650936, |
| "learning_rate": 6.595744680851064e-06, |
| "loss": 0.521, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.3351268569331904, |
| "learning_rate": 6.702127659574469e-06, |
| "loss": 0.2633, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.1532968301258983, |
| "learning_rate": 6.808510638297873e-06, |
| "loss": 0.2165, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 1.9511934221645288, |
| "learning_rate": 6.914893617021278e-06, |
| "loss": 0.2079, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 1.949857025522768, |
| "learning_rate": 7.021276595744682e-06, |
| "loss": 0.247, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 1.8713256319822902, |
| "learning_rate": 7.127659574468085e-06, |
| "loss": 0.1753, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.2299096915752994, |
| "learning_rate": 7.234042553191491e-06, |
| "loss": 0.2232, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 1.9291440073894126, |
| "learning_rate": 7.340425531914894e-06, |
| "loss": 0.2033, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 5.717848884350137, |
| "learning_rate": 7.446808510638298e-06, |
| "loss": 0.5015, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.3133611736855175, |
| "learning_rate": 7.553191489361703e-06, |
| "loss": 0.2118, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.0854358236952564, |
| "learning_rate": 7.659574468085107e-06, |
| "loss": 0.2394, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.224655649542742, |
| "learning_rate": 7.765957446808511e-06, |
| "loss": 0.265, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 5.929271235180811, |
| "learning_rate": 7.872340425531916e-06, |
| "loss": 0.769, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 6.3708936816392265, |
| "learning_rate": 7.97872340425532e-06, |
| "loss": 0.7683, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.2604552953768673, |
| "learning_rate": 8.085106382978723e-06, |
| "loss": 0.2831, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 6.126149805744268, |
| "learning_rate": 8.191489361702128e-06, |
| "loss": 0.6417, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 5.373342833190464, |
| "learning_rate": 8.297872340425532e-06, |
| "loss": 0.6714, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 2.4780087686567374, |
| "learning_rate": 8.404255319148937e-06, |
| "loss": 0.274, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 6.0560530873284275, |
| "learning_rate": 8.510638297872341e-06, |
| "loss": 0.6787, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 5.52228143292844, |
| "learning_rate": 8.617021276595746e-06, |
| "loss": 0.5285, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 7.424573867850644, |
| "learning_rate": 8.72340425531915e-06, |
| "loss": 0.8279, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 2.0566553331232797, |
| "learning_rate": 8.829787234042555e-06, |
| "loss": 0.1796, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 1.7736431844288432, |
| "learning_rate": 8.936170212765958e-06, |
| "loss": 0.1994, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 4.983461904065465, |
| "learning_rate": 9.042553191489362e-06, |
| "loss": 0.5259, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 8.163488446770579, |
| "learning_rate": 9.148936170212767e-06, |
| "loss": 0.886, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 5.988575327788609, |
| "learning_rate": 9.255319148936171e-06, |
| "loss": 0.7793, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 1.8929495581316096, |
| "learning_rate": 9.361702127659576e-06, |
| "loss": 0.2037, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 2.2085110026193715, |
| "learning_rate": 9.46808510638298e-06, |
| "loss": 0.2481, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 9.762190689935485, |
| "learning_rate": 9.574468085106385e-06, |
| "loss": 0.6555, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 4.402889827649181, |
| "learning_rate": 9.680851063829787e-06, |
| "loss": 0.5985, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 2.0189402746940495, |
| "learning_rate": 9.787234042553192e-06, |
| "loss": 0.2526, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 8.113680435659742, |
| "learning_rate": 9.893617021276596e-06, |
| "loss": 0.725, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 4.8822328335458245, |
| "learning_rate": 1e-05, |
| "loss": 0.482, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 7.462243307627707, |
| "learning_rate": 9.999997314236036e-06, |
| "loss": 0.5765, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 2.368939909600803, |
| "learning_rate": 9.999989256947029e-06, |
| "loss": 0.2621, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 6.4184710762944235, |
| "learning_rate": 9.999975828141635e-06, |
| "loss": 0.5185, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 1.9004648689011145, |
| "learning_rate": 9.999957027834282e-06, |
| "loss": 0.244, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 2.01583191932197, |
| "learning_rate": 9.999932856045164e-06, |
| "loss": 0.2724, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 2.0809206317338558, |
| "learning_rate": 9.99990331280025e-06, |
| "loss": 0.2221, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 2.0466622912745494, |
| "learning_rate": 9.999868398131282e-06, |
| "loss": 0.2625, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 7.446390750033542, |
| "learning_rate": 9.999828112075764e-06, |
| "loss": 0.6449, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 6.7602889610343215, |
| "learning_rate": 9.99978245467698e-06, |
| "loss": 0.7039, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 1.9500980506545675, |
| "learning_rate": 9.999731425983975e-06, |
| "loss": 0.2355, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 7.2513016036019335, |
| "learning_rate": 9.999675026051576e-06, |
| "loss": 0.6968, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 43.33082783120513, |
| "learning_rate": 9.999613254940368e-06, |
| "loss": 0.4654, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 1.9004552677187696, |
| "learning_rate": 9.999546112716715e-06, |
| "loss": 0.2029, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 2.194236317843529, |
| "learning_rate": 9.999473599452746e-06, |
| "loss": 0.2813, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 8.035356250059664, |
| "learning_rate": 9.999395715226365e-06, |
| "loss": 0.667, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 1.9145140818009003, |
| "learning_rate": 9.999312460121242e-06, |
| "loss": 0.2297, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 1.7982756322993592, |
| "learning_rate": 9.999223834226817e-06, |
| "loss": 0.2475, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 1.7314876163084512, |
| "learning_rate": 9.999129837638303e-06, |
| "loss": 0.1724, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 7.323150351960898, |
| "learning_rate": 9.999030470456684e-06, |
| "loss": 0.6735, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 2.1080102277359214, |
| "learning_rate": 9.998925732788706e-06, |
| "loss": 0.2153, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 5.903214475636129, |
| "learning_rate": 9.99881562474689e-06, |
| "loss": 0.6093, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 1.9446574201365474, |
| "learning_rate": 9.998700146449528e-06, |
| "loss": 0.2293, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 9.402474227627586, |
| "learning_rate": 9.998579298020676e-06, |
| "loss": 0.4865, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 5.997676640949038, |
| "learning_rate": 9.998453079590167e-06, |
| "loss": 0.4849, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 2.05414612783157, |
| "learning_rate": 9.998321491293592e-06, |
| "loss": 0.3184, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 6.6314295483116545, |
| "learning_rate": 9.998184533272321e-06, |
| "loss": 0.7459, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 1.9723909371748105, |
| "learning_rate": 9.998042205673489e-06, |
| "loss": 0.2924, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 2.1580999203943905, |
| "learning_rate": 9.997894508649995e-06, |
| "loss": 0.272, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 1.8013373131993664, |
| "learning_rate": 9.997741442360515e-06, |
| "loss": 0.2739, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 8.06345215974651, |
| "learning_rate": 9.99758300696949e-06, |
| "loss": 0.6538, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 1.9726083487811696, |
| "learning_rate": 9.997419202647124e-06, |
| "loss": 0.245, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 1.9678765556915165, |
| "learning_rate": 9.997250029569395e-06, |
| "loss": 0.2684, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 10.237118233392872, |
| "learning_rate": 9.997075487918047e-06, |
| "loss": 0.619, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 1.9105611501823063, |
| "learning_rate": 9.99689557788059e-06, |
| "loss": 0.2739, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 2.0096267154815597, |
| "learning_rate": 9.996710299650302e-06, |
| "loss": 0.3259, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 4.445495901455712, |
| "learning_rate": 9.996519653426229e-06, |
| "loss": 0.5219, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 5.605850026827202, |
| "learning_rate": 9.996323639413185e-06, |
| "loss": 0.6143, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 1.8916002019145026, |
| "learning_rate": 9.996122257821746e-06, |
| "loss": 0.224, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 15.579924374242733, |
| "learning_rate": 9.99591550886826e-06, |
| "loss": 0.7796, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 7.221135134011073, |
| "learning_rate": 9.995703392774836e-06, |
| "loss": 0.4544, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 16.49546367183523, |
| "learning_rate": 9.995485909769354e-06, |
| "loss": 0.6682, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 13.874669468561029, |
| "learning_rate": 9.995263060085456e-06, |
| "loss": 0.7609, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 6.303393045385302, |
| "learning_rate": 9.99503484396255e-06, |
| "loss": 0.4596, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 9.058577656777699, |
| "learning_rate": 9.99480126164581e-06, |
| "loss": 0.6164, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 6.637101781956161, |
| "learning_rate": 9.994562313386177e-06, |
| "loss": 0.7215, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 1.765877060254261, |
| "learning_rate": 9.994317999440351e-06, |
| "loss": 0.2007, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 12.923758398693709, |
| "learning_rate": 9.994068320070805e-06, |
| "loss": 0.5322, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 2.00990811412376, |
| "learning_rate": 9.993813275545764e-06, |
| "loss": 0.2872, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 8.332512786467698, |
| "learning_rate": 9.99355286613923e-06, |
| "loss": 0.5918, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 11.049783636211211, |
| "learning_rate": 9.993287092130956e-06, |
| "loss": 0.6115, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 2.0191315641950207, |
| "learning_rate": 9.993015953806472e-06, |
| "loss": 0.2369, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 7.267256545843167, |
| "learning_rate": 9.992739451457058e-06, |
| "loss": 0.6323, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 2.0704901493165035, |
| "learning_rate": 9.992457585379764e-06, |
| "loss": 0.2656, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 7.429213454553269, |
| "learning_rate": 9.992170355877398e-06, |
| "loss": 0.6515, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 5.525353846641136, |
| "learning_rate": 9.991877763258538e-06, |
| "loss": 0.6956, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 6.99048709636458, |
| "learning_rate": 9.991579807837511e-06, |
| "loss": 0.6963, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 1.8319076937894148, |
| "learning_rate": 9.991276489934416e-06, |
| "loss": 0.278, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 8.30888476109113, |
| "learning_rate": 9.990967809875107e-06, |
| "loss": 0.5689, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 10.121949999243235, |
| "learning_rate": 9.990653767991203e-06, |
| "loss": 0.6782, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 6.949448664548724, |
| "learning_rate": 9.99033436462008e-06, |
| "loss": 0.8458, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 1.7631365862407675, |
| "learning_rate": 9.990009600104875e-06, |
| "loss": 0.2387, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 1.95646929645247, |
| "learning_rate": 9.989679474794484e-06, |
| "loss": 0.2584, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 1.5968211493704039, |
| "learning_rate": 9.989343989043563e-06, |
| "loss": 0.2326, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 5.43077862961733, |
| "learning_rate": 9.989003143212526e-06, |
| "loss": 0.5847, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 1.7014016514942623, |
| "learning_rate": 9.988656937667544e-06, |
| "loss": 0.2557, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 5.4041752599024, |
| "learning_rate": 9.98830537278055e-06, |
| "loss": 0.5055, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 2.112904610234277, |
| "learning_rate": 9.987948448929232e-06, |
| "loss": 0.2591, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 1.8032816303999848, |
| "learning_rate": 9.987586166497032e-06, |
| "loss": 0.2757, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 1.772808892651813, |
| "learning_rate": 9.987218525873155e-06, |
| "loss": 0.2461, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 1.911641867802362, |
| "learning_rate": 9.98684552745256e-06, |
| "loss": 0.2403, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 1.7493725607696422, |
| "learning_rate": 9.98646717163596e-06, |
| "loss": 0.2433, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 7.7961677705380605, |
| "learning_rate": 9.986083458829824e-06, |
| "loss": 0.5065, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 2.177300666490729, |
| "learning_rate": 9.985694389446378e-06, |
| "loss": 0.2436, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 4.530179647043451, |
| "learning_rate": 9.9852999639036e-06, |
| "loss": 0.5275, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 2.090913867645795, |
| "learning_rate": 9.984900182625226e-06, |
| "loss": 0.3134, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 6.731588330774811, |
| "learning_rate": 9.98449504604074e-06, |
| "loss": 0.7066, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 14.448000839451272, |
| "learning_rate": 9.984084554585387e-06, |
| "loss": 0.7049, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 5.742724641735612, |
| "learning_rate": 9.983668708700156e-06, |
| "loss": 0.5357, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 1.8344366677326172, |
| "learning_rate": 9.983247508831795e-06, |
| "loss": 0.2459, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 1.9099309329965592, |
| "learning_rate": 9.9828209554328e-06, |
| "loss": 0.2658, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 1.6321601388074647, |
| "learning_rate": 9.982389048961421e-06, |
| "loss": 0.2604, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 7.549083750784864, |
| "learning_rate": 9.981951789881657e-06, |
| "loss": 0.7438, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 4.950187800141307, |
| "learning_rate": 9.981509178663256e-06, |
| "loss": 0.5696, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 7.226166260751547, |
| "learning_rate": 9.98106121578172e-06, |
| "loss": 0.7509, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 9.549776784209499, |
| "learning_rate": 9.980607901718297e-06, |
| "loss": 0.7863, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 2.2974781122942716, |
| "learning_rate": 9.980149236959986e-06, |
| "loss": 0.2952, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 6.5401223182948245, |
| "learning_rate": 9.979685221999532e-06, |
| "loss": 0.5763, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 5.752458813451822, |
| "learning_rate": 9.97921585733543e-06, |
| "loss": 0.618, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 5.5480082188645525, |
| "learning_rate": 9.97874114347192e-06, |
| "loss": 0.622, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 1.808710962103662, |
| "learning_rate": 9.978261080918988e-06, |
| "loss": 0.2828, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 1.799105004739947, |
| "learning_rate": 9.977775670192373e-06, |
| "loss": 0.2528, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 6.243875053588291, |
| "learning_rate": 9.977284911813549e-06, |
| "loss": 0.5032, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 4.880295190945132, |
| "learning_rate": 9.976788806309742e-06, |
| "loss": 0.547, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 1.6908031523481455, |
| "learning_rate": 9.976287354213924e-06, |
| "loss": 0.1809, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 11.874030701403033, |
| "learning_rate": 9.975780556064806e-06, |
| "loss": 0.6395, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 9.639996712082842, |
| "learning_rate": 9.975268412406842e-06, |
| "loss": 0.6557, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 2.1448436503091655, |
| "learning_rate": 9.974750923790234e-06, |
| "loss": 0.2842, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 4.108059139362723, |
| "learning_rate": 9.97422809077092e-06, |
| "loss": 0.6994, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 5.188802830624759, |
| "learning_rate": 9.973699913910584e-06, |
| "loss": 0.5398, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 1.7631244750636252, |
| "learning_rate": 9.97316639377665e-06, |
| "loss": 0.2767, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 5.776454382682188, |
| "learning_rate": 9.97262753094228e-06, |
| "loss": 0.5958, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 2.0539095988416336, |
| "learning_rate": 9.972083325986377e-06, |
| "loss": 0.2507, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 7.71647708129, |
| "learning_rate": 9.971533779493586e-06, |
| "loss": 0.6313, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 9.885032841099774, |
| "learning_rate": 9.970978892054286e-06, |
| "loss": 0.5581, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 1.78503609944141, |
| "learning_rate": 9.970418664264596e-06, |
| "loss": 0.2364, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 5.183970320492835, |
| "learning_rate": 9.969853096726372e-06, |
| "loss": 0.7529, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 4.590290754655386, |
| "learning_rate": 9.969282190047207e-06, |
| "loss": 0.523, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 4.57222467490719, |
| "learning_rate": 9.968705944840428e-06, |
| "loss": 0.4612, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 6.707984670342173, |
| "learning_rate": 9.968124361725098e-06, |
| "loss": 0.6136, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 10.496256043657745, |
| "learning_rate": 9.967537441326018e-06, |
| "loss": 0.6126, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 9.868715415306239, |
| "learning_rate": 9.966945184273716e-06, |
| "loss": 0.6428, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 9.668861159469753, |
| "learning_rate": 9.966347591204459e-06, |
| "loss": 0.7014, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 1.7954523281162194, |
| "learning_rate": 9.965744662760246e-06, |
| "loss": 0.2307, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 1.9575524083025957, |
| "learning_rate": 9.965136399588803e-06, |
| "loss": 0.2551, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.0487271777285883, |
| "learning_rate": 9.964522802343593e-06, |
| "loss": 0.3312, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 5.406807318098485, |
| "learning_rate": 9.963903871683806e-06, |
| "loss": 0.6646, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 5.369438968167279, |
| "learning_rate": 9.963279608274364e-06, |
| "loss": 0.5249, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.0976265634507163, |
| "learning_rate": 9.962650012785917e-06, |
| "loss": 0.3104, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.0455693449228236, |
| "learning_rate": 9.962015085894838e-06, |
| "loss": 0.2619, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 5.460373040624954, |
| "learning_rate": 9.961374828283239e-06, |
| "loss": 0.6343, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 1.6036581876392395, |
| "learning_rate": 9.960729240638947e-06, |
| "loss": 0.2809, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 1.6539767213635388, |
| "learning_rate": 9.960078323655524e-06, |
| "loss": 0.2592, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 5.2639118853712885, |
| "learning_rate": 9.959422078032253e-06, |
| "loss": 0.6271, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 6.183046858090365, |
| "learning_rate": 9.958760504474144e-06, |
| "loss": 0.6513, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 1.8640131168963845, |
| "learning_rate": 9.958093603691923e-06, |
| "loss": 0.2528, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 1.8894317015528737, |
| "learning_rate": 9.957421376402053e-06, |
| "loss": 0.2938, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 1.8225244919346641, |
| "learning_rate": 9.956743823326704e-06, |
| "loss": 0.2652, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.016628969006215, |
| "learning_rate": 9.956060945193781e-06, |
| "loss": 0.2655, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 1.871720299399146, |
| "learning_rate": 9.955372742736903e-06, |
| "loss": 0.2282, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 5.251804505199205, |
| "learning_rate": 9.954679216695406e-06, |
| "loss": 0.7544, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 1.6841352163321124, |
| "learning_rate": 9.953980367814354e-06, |
| "loss": 0.2324, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 1.9551569313943153, |
| "learning_rate": 9.953276196844519e-06, |
| "loss": 0.2607, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.279701863291198, |
| "learning_rate": 9.9525667045424e-06, |
| "loss": 0.2336, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.0942832584637263, |
| "learning_rate": 9.951851891670206e-06, |
| "loss": 0.2428, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.03070287347475, |
| "learning_rate": 9.951131758995866e-06, |
| "loss": 0.2874, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 1.6708016558581609, |
| "learning_rate": 9.950406307293023e-06, |
| "loss": 0.2417, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 6.63843141297905, |
| "learning_rate": 9.949675537341031e-06, |
| "loss": 0.6903, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 11.998445255287676, |
| "learning_rate": 9.948939449924964e-06, |
| "loss": 0.594, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 1.9657571867354962, |
| "learning_rate": 9.948198045835601e-06, |
| "loss": 0.2757, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 5.238820252838469, |
| "learning_rate": 9.94745132586944e-06, |
| "loss": 0.591, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 4.199219289945727, |
| "learning_rate": 9.946699290828683e-06, |
| "loss": 0.6224, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.286185413567256, |
| "learning_rate": 9.94594194152125e-06, |
| "loss": 0.2635, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 5.155721769180376, |
| "learning_rate": 9.945179278760759e-06, |
| "loss": 0.652, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 1.7670381153982928, |
| "learning_rate": 9.94441130336655e-06, |
| "loss": 0.2414, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 1.7930068540247865, |
| "learning_rate": 9.943638016163658e-06, |
| "loss": 0.2304, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 5.175473506021528, |
| "learning_rate": 9.942859417982833e-06, |
| "loss": 0.6327, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 6.483158980004239, |
| "learning_rate": 9.942075509660527e-06, |
| "loss": 0.7988, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 1.890761915446787, |
| "learning_rate": 9.941286292038894e-06, |
| "loss": 0.2275, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.2858130028155097, |
| "learning_rate": 9.940491765965798e-06, |
| "loss": 0.3103, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 1.677858006478938, |
| "learning_rate": 9.939691932294804e-06, |
| "loss": 0.195, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 9.548415559641747, |
| "learning_rate": 9.938886791885172e-06, |
| "loss": 0.8219, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 1.7070562876457582, |
| "learning_rate": 9.938076345601875e-06, |
| "loss": 0.2794, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 5.186033973337935, |
| "learning_rate": 9.937260594315578e-06, |
| "loss": 0.5882, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 7.0285474198747036, |
| "learning_rate": 9.936439538902644e-06, |
| "loss": 0.8776, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 6.120945551228446, |
| "learning_rate": 9.935613180245143e-06, |
| "loss": 0.5905, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.197689977984544, |
| "learning_rate": 9.934781519230832e-06, |
| "loss": 0.2693, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.2876710297000424, |
| "learning_rate": 9.933944556753173e-06, |
| "loss": 0.2444, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 1.839881700013773, |
| "learning_rate": 9.933102293711314e-06, |
| "loss": 0.2687, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 6.663100438730311, |
| "learning_rate": 9.932254731010108e-06, |
| "loss": 0.5255, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 6.266933677264044, |
| "learning_rate": 9.931401869560096e-06, |
| "loss": 0.5345, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 1.8303981871835013, |
| "learning_rate": 9.93054371027751e-06, |
| "loss": 0.2347, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 6.26129546605341, |
| "learning_rate": 9.929680254084273e-06, |
| "loss": 0.6773, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 7.267941666701096, |
| "learning_rate": 9.928811501908006e-06, |
| "loss": 0.704, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 1.6836439908992122, |
| "learning_rate": 9.92793745468201e-06, |
| "loss": 0.2223, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 1.9495656535116324, |
| "learning_rate": 9.927058113345282e-06, |
| "loss": 0.2466, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 1.8209131439186006, |
| "learning_rate": 9.926173478842502e-06, |
| "loss": 0.2687, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 1.931823503029787, |
| "learning_rate": 9.925283552124039e-06, |
| "loss": 0.2572, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.2229504002443874, |
| "learning_rate": 9.924388334145943e-06, |
| "loss": 0.2779, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 7.114886827530793, |
| "learning_rate": 9.923487825869955e-06, |
| "loss": 0.7203, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 1.6552448432832247, |
| "learning_rate": 9.922582028263495e-06, |
| "loss": 0.2394, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 6.512736012940505, |
| "learning_rate": 9.921670942299664e-06, |
| "loss": 0.7148, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.619176270047411, |
| "learning_rate": 9.92075456895725e-06, |
| "loss": 0.2278, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.7636787308085922, |
| "learning_rate": 9.919832909220717e-06, |
| "loss": 0.2654, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 6.366453218825804, |
| "learning_rate": 9.91890596408021e-06, |
| "loss": 0.5214, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.7926927092351945, |
| "learning_rate": 9.917973734531549e-06, |
| "loss": 0.3013, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 9.27917675911004, |
| "learning_rate": 9.917036221576235e-06, |
| "loss": 0.4644, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 6.5166207880778675, |
| "learning_rate": 9.916093426221445e-06, |
| "loss": 0.7791, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.7803593703574099, |
| "learning_rate": 9.915145349480027e-06, |
| "loss": 0.2063, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.7164340087067527, |
| "learning_rate": 9.914191992370504e-06, |
| "loss": 0.2404, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.7053855130147761, |
| "learning_rate": 9.913233355917075e-06, |
| "loss": 0.2496, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 8.170343927786698, |
| "learning_rate": 9.91226944114961e-06, |
| "loss": 0.4809, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 11.373476584361782, |
| "learning_rate": 9.911300249103646e-06, |
| "loss": 0.4187, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.956910542155799, |
| "learning_rate": 9.910325780820391e-06, |
| "loss": 0.279, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 5.541510463993858, |
| "learning_rate": 9.90934603734672e-06, |
| "loss": 0.5838, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 7.9733412880119925, |
| "learning_rate": 9.908361019735181e-06, |
| "loss": 0.5904, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.4831004582918137, |
| "learning_rate": 9.907370729043984e-06, |
| "loss": 0.2193, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.984365249719393, |
| "learning_rate": 9.906375166336998e-06, |
| "loss": 0.2606, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 5.067254711450738, |
| "learning_rate": 9.905374332683768e-06, |
| "loss": 0.6655, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 13.994611511088726, |
| "learning_rate": 9.904368229159494e-06, |
| "loss": 0.7717, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.925005418994528, |
| "learning_rate": 9.903356856845035e-06, |
| "loss": 0.2474, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 5.749146638259409, |
| "learning_rate": 9.902340216826915e-06, |
| "loss": 0.559, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 5.16502544359464, |
| "learning_rate": 9.90131831019732e-06, |
| "loss": 0.6786, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.871063685934545, |
| "learning_rate": 9.900291138054086e-06, |
| "loss": 0.2044, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.8115443702306113, |
| "learning_rate": 9.899258701500712e-06, |
| "loss": 0.2443, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.762357615478928, |
| "learning_rate": 9.89822100164635e-06, |
| "loss": 0.2604, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.581180552288332, |
| "learning_rate": 9.897178039605803e-06, |
| "loss": 0.2554, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.7046379154630582, |
| "learning_rate": 9.896129816499535e-06, |
| "loss": 0.2242, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.6587541886565693, |
| "learning_rate": 9.89507633345366e-06, |
| "loss": 0.2934, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.767186610249366, |
| "learning_rate": 9.894017591599934e-06, |
| "loss": 0.2572, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 8.622410807088071, |
| "learning_rate": 9.892953592075776e-06, |
| "loss": 0.8176, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.463559656044879, |
| "learning_rate": 9.891884336024242e-06, |
| "loss": 0.2255, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 6.224974673190329, |
| "learning_rate": 9.890809824594041e-06, |
| "loss": 0.6921, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 6.275651838369934, |
| "learning_rate": 9.889730058939529e-06, |
| "loss": 0.4799, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.6281851231133666, |
| "learning_rate": 9.8886450402207e-06, |
| "loss": 0.2179, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.619527212818428, |
| "learning_rate": 9.8875547696032e-06, |
| "loss": 0.2594, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 5.934279358354166, |
| "learning_rate": 9.88645924825831e-06, |
| "loss": 0.6144, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.815838597766129, |
| "learning_rate": 9.885358477362956e-06, |
| "loss": 0.2479, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.7555660967992508, |
| "learning_rate": 9.8842524580997e-06, |
| "loss": 0.2647, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 2.094040601120562, |
| "learning_rate": 9.883141191656748e-06, |
| "loss": 0.2699, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.7027153956456136, |
| "learning_rate": 9.88202467922794e-06, |
| "loss": 0.2925, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 9.715297506105168, |
| "learning_rate": 9.880902922012747e-06, |
| "loss": 0.7013, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.8421367831514053, |
| "learning_rate": 9.879775921216284e-06, |
| "loss": 0.2537, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.6479818584304384, |
| "learning_rate": 9.87864367804929e-06, |
| "loss": 0.2513, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.8001522637106266, |
| "learning_rate": 9.877506193728144e-06, |
| "loss": 0.2984, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.4737787840102685, |
| "learning_rate": 9.876363469474848e-06, |
| "loss": 0.2117, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.606172703146015, |
| "learning_rate": 9.87521550651704e-06, |
| "loss": 0.2092, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 8.202056619133858, |
| "learning_rate": 9.874062306087983e-06, |
| "loss": 0.636, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 7.108320148950683, |
| "learning_rate": 9.872903869426564e-06, |
| "loss": 0.7243, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 5.506551610092641, |
| "learning_rate": 9.8717401977773e-06, |
| "loss": 0.5324, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 7.394711036441917, |
| "learning_rate": 9.870571292390331e-06, |
| "loss": 0.6106, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.7168080888601716, |
| "learning_rate": 9.869397154521418e-06, |
| "loss": 0.243, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.630605514144201, |
| "learning_rate": 9.868217785431942e-06, |
| "loss": 0.2204, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 2.0363393202892546, |
| "learning_rate": 9.867033186388906e-06, |
| "loss": 0.2953, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 5.788957640221821, |
| "learning_rate": 9.865843358664933e-06, |
| "loss": 0.5283, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 9.925878268253182, |
| "learning_rate": 9.86464830353826e-06, |
| "loss": 0.5873, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 6.488725346331065, |
| "learning_rate": 9.863448022292742e-06, |
| "loss": 0.6456, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.787596878035153, |
| "learning_rate": 9.86224251621785e-06, |
| "loss": 0.2382, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.6260525664288765, |
| "learning_rate": 9.861031786608663e-06, |
| "loss": 0.2009, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.785192574131053, |
| "learning_rate": 9.859815834765875e-06, |
| "loss": 0.2502, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.6494386800023098, |
| "learning_rate": 9.858594661995792e-06, |
| "loss": 0.2729, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.5171719328800632, |
| "learning_rate": 9.857368269610325e-06, |
| "loss": 0.1935, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.6019978694116501, |
| "learning_rate": 9.856136658926993e-06, |
| "loss": 0.2303, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 6.077455820696771, |
| "learning_rate": 9.854899831268926e-06, |
| "loss": 0.6023, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.7670319271592085, |
| "learning_rate": 9.85365778796485e-06, |
| "loss": 0.2914, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 9.537528884281995, |
| "learning_rate": 9.852410530349102e-06, |
| "loss": 0.643, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 1.862552947917582, |
| "learning_rate": 9.851158059761617e-06, |
| "loss": 0.245, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 6.338295160744264, |
| "learning_rate": 9.849900377547933e-06, |
| "loss": 0.6725, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 1.5749237201985713, |
| "learning_rate": 9.848637485059183e-06, |
| "loss": 0.2203, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 1.7798119281452047, |
| "learning_rate": 9.8473693836521e-06, |
| "loss": 0.2694, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 5.306498575393919, |
| "learning_rate": 9.846096074689012e-06, |
| "loss": 0.6167, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 1.9986387257054445, |
| "learning_rate": 9.844817559537841e-06, |
| "loss": 0.3073, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 1.7917036615218112, |
| "learning_rate": 9.843533839572105e-06, |
| "loss": 0.1986, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 1.9444855554710954, |
| "learning_rate": 9.842244916170913e-06, |
| "loss": 0.2507, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 5.515741606750657, |
| "learning_rate": 9.840950790718959e-06, |
| "loss": 0.7681, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 1.739345255762653, |
| "learning_rate": 9.83965146460653e-06, |
| "loss": 0.2939, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 15.818366926020826, |
| "learning_rate": 9.838346939229501e-06, |
| "loss": 0.566, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 9.475580321325761, |
| "learning_rate": 9.83703721598933e-06, |
| "loss": 0.6702, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 1.5561445661668403, |
| "learning_rate": 9.835722296293058e-06, |
| "loss": 0.2575, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 1.6374928593645883, |
| "learning_rate": 9.834402181553314e-06, |
| "loss": 0.2574, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 1.7720785739929117, |
| "learning_rate": 9.833076873188303e-06, |
| "loss": 0.2415, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 20.445412723262997, |
| "learning_rate": 9.831746372621811e-06, |
| "loss": 0.5882, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 1.6000166556546866, |
| "learning_rate": 9.830410681283203e-06, |
| "loss": 0.2209, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 1.6465319703747732, |
| "learning_rate": 9.829069800607418e-06, |
| "loss": 0.2412, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 4.8046461402805605, |
| "learning_rate": 9.827723732034972e-06, |
| "loss": 0.6327, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 1.870520515355423, |
| "learning_rate": 9.826372477011956e-06, |
| "loss": 0.2609, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 1.7548998761142005, |
| "learning_rate": 9.825016036990029e-06, |
| "loss": 0.2656, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 1.7931254599659536, |
| "learning_rate": 9.823654413426424e-06, |
| "loss": 0.3034, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 1.5247697719663527, |
| "learning_rate": 9.822287607783938e-06, |
| "loss": 0.2237, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 1.798493899979202, |
| "learning_rate": 9.820915621530939e-06, |
| "loss": 0.2746, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 5.203687613711387, |
| "learning_rate": 9.81953845614136e-06, |
| "loss": 0.7089, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 6.1928739384010685, |
| "learning_rate": 9.818156113094699e-06, |
| "loss": 0.6123, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 1.616422274885141, |
| "learning_rate": 9.816768593876012e-06, |
| "loss": 0.2769, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 6.601813272912302, |
| "learning_rate": 9.81537589997592e-06, |
| "loss": 0.6817, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 1.8732789501841371, |
| "learning_rate": 9.8139780328906e-06, |
| "loss": 0.24, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 5.817718745140282, |
| "learning_rate": 9.812574994121791e-06, |
| "loss": 0.5641, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.7035610866186042, |
| "learning_rate": 9.811166785176785e-06, |
| "loss": 0.2141, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 6.059233018531428, |
| "learning_rate": 9.809753407568427e-06, |
| "loss": 0.5628, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.9798596247591356, |
| "learning_rate": 9.80833486281512e-06, |
| "loss": 0.2539, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 11.649935725007392, |
| "learning_rate": 9.80691115244081e-06, |
| "loss": 0.7371, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.6194679707686632, |
| "learning_rate": 9.805482277974999e-06, |
| "loss": 0.2111, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 2.312164201949697, |
| "learning_rate": 9.804048240952736e-06, |
| "loss": 0.2856, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.8007353724474517, |
| "learning_rate": 9.802609042914614e-06, |
| "loss": 0.2582, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 24.776689688816656, |
| "learning_rate": 9.80116468540677e-06, |
| "loss": 0.5147, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 7.163689734496585, |
| "learning_rate": 9.79971516998089e-06, |
| "loss": 0.6445, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 7.864571925331906, |
| "learning_rate": 9.79826049819419e-06, |
| "loss": 0.6159, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.7450807359970308, |
| "learning_rate": 9.796800671609436e-06, |
| "loss": 0.2518, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 8.727857811594829, |
| "learning_rate": 9.795335691794929e-06, |
| "loss": 0.5013, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 8.919913818750866, |
| "learning_rate": 9.793865560324503e-06, |
| "loss": 0.6285, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 8.429917857657687, |
| "learning_rate": 9.792390278777527e-06, |
| "loss": 0.6394, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 12.476180607787782, |
| "learning_rate": 9.790909848738907e-06, |
| "loss": 0.6466, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 7.098836577863875, |
| "learning_rate": 9.789424271799075e-06, |
| "loss": 0.709, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 5.801376908749421, |
| "learning_rate": 9.787933549553996e-06, |
| "loss": 0.6269, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 9.496884685048222, |
| "learning_rate": 9.786437683605161e-06, |
| "loss": 0.6664, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.6319948879086812, |
| "learning_rate": 9.78493667555959e-06, |
| "loss": 0.2707, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.5708516466390696, |
| "learning_rate": 9.783430527029818e-06, |
| "loss": 0.1913, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 8.023793238995243, |
| "learning_rate": 9.781919239633912e-06, |
| "loss": 0.5994, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.8126674527164652, |
| "learning_rate": 9.780402814995458e-06, |
| "loss": 0.2318, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 14.426470287475814, |
| "learning_rate": 9.77888125474356e-06, |
| "loss": 0.6649, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 10.71094575036116, |
| "learning_rate": 9.777354560512835e-06, |
| "loss": 0.494, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 7.713425379700125, |
| "learning_rate": 9.77582273394342e-06, |
| "loss": 0.7045, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 8.943228477771676, |
| "learning_rate": 9.774285776680967e-06, |
| "loss": 0.5535, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.7267654150299514, |
| "learning_rate": 9.772743690376636e-06, |
| "loss": 0.2491, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.8191540559886012, |
| "learning_rate": 9.7711964766871e-06, |
| "loss": 0.2703, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 6.544574630134056, |
| "learning_rate": 9.76964413727454e-06, |
| "loss": 0.5892, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 5.71619739119168, |
| "learning_rate": 9.768086673806638e-06, |
| "loss": 0.5279, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.7386028680970294, |
| "learning_rate": 9.766524087956592e-06, |
| "loss": 0.2672, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 10.798882919692216, |
| "learning_rate": 9.764956381403095e-06, |
| "loss": 0.6545, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 12.465881210964136, |
| "learning_rate": 9.76338355583034e-06, |
| "loss": 0.6039, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 5.494982234088186, |
| "learning_rate": 9.761805612928025e-06, |
| "loss": 0.519, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 5.053272331920926, |
| "learning_rate": 9.760222554391343e-06, |
| "loss": 0.5176, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 9.891972560729801, |
| "learning_rate": 9.758634381920982e-06, |
| "loss": 0.5177, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 4.2656155278201675, |
| "learning_rate": 9.757041097223123e-06, |
| "loss": 0.6046, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 1.6395442627324692, |
| "learning_rate": 9.755442702009443e-06, |
| "loss": 0.2523, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 5.571416657542466, |
| "learning_rate": 9.753839197997105e-06, |
| "loss": 0.5219, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 4.3700690088783025, |
| "learning_rate": 9.752230586908767e-06, |
| "loss": 0.2403, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 7.3442548066250275, |
| "learning_rate": 9.75061687047256e-06, |
| "loss": 0.4425, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 5.102192276870105, |
| "learning_rate": 9.748998050422117e-06, |
| "loss": 0.6099, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 3.596595451608656, |
| "learning_rate": 9.747374128496541e-06, |
| "loss": 0.2457, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 4.638267201907921, |
| "learning_rate": 9.745745106440422e-06, |
| "loss": 0.4629, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 5.457131519006896, |
| "learning_rate": 9.744110986003826e-06, |
| "loss": 0.5778, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 3.6000720224378533, |
| "learning_rate": 9.742471768942299e-06, |
| "loss": 0.2596, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 1.7493100911873254, |
| "learning_rate": 9.740827457016863e-06, |
| "loss": 0.2691, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 6.068632023860864, |
| "learning_rate": 9.739178051994008e-06, |
| "loss": 0.6243, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 2.034333215512261, |
| "learning_rate": 9.7375235556457e-06, |
| "loss": 0.2646, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 108.67978047726994, |
| "learning_rate": 9.735863969749373e-06, |
| "loss": 0.5287, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 18.032116388026967, |
| "learning_rate": 9.734199296087932e-06, |
| "loss": 0.277, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 6.247397291623058, |
| "learning_rate": 9.732529536449741e-06, |
| "loss": 0.8102, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 1.8587537608039923, |
| "learning_rate": 9.730854692628637e-06, |
| "loss": 0.2621, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 1.719599060155659, |
| "learning_rate": 9.729174766423912e-06, |
| "loss": 0.2645, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 1.5511281705551219, |
| "learning_rate": 9.72748975964032e-06, |
| "loss": 0.2389, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 1.657070469633183, |
| "learning_rate": 9.725799674088072e-06, |
| "loss": 0.2275, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 10.362288274296773, |
| "learning_rate": 9.724104511582838e-06, |
| "loss": 0.5781, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 5.574234794631768, |
| "learning_rate": 9.72240427394574e-06, |
| "loss": 0.5624, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 7.4448490908876135, |
| "learning_rate": 9.720698963003351e-06, |
| "loss": 0.6852, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 1.5433086783424876, |
| "learning_rate": 9.7189885805877e-06, |
| "loss": 0.2052, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 5.282788060395431, |
| "learning_rate": 9.717273128536259e-06, |
| "loss": 0.753, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 1.9816401253282105, |
| "learning_rate": 9.715552608691944e-06, |
| "loss": 0.2751, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.861973572972449, |
| "learning_rate": 9.713827022903124e-06, |
| "loss": 0.2489, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.8784674952919553, |
| "learning_rate": 9.712096373023603e-06, |
| "loss": 0.2807, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 2.0645563466318584, |
| "learning_rate": 9.710360660912629e-06, |
| "loss": 0.3016, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 6.682479363719368, |
| "learning_rate": 9.708619888434887e-06, |
| "loss": 0.5129, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.7400890155991255, |
| "learning_rate": 9.706874057460497e-06, |
| "loss": 0.2594, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 8.86234579968045, |
| "learning_rate": 9.705123169865016e-06, |
| "loss": 0.6763, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 14.882292145776598, |
| "learning_rate": 9.703367227529432e-06, |
| "loss": 0.488, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 7.657954403197554, |
| "learning_rate": 9.701606232340165e-06, |
| "loss": 0.4734, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.8928143901726409, |
| "learning_rate": 9.699840186189061e-06, |
| "loss": 0.2786, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 6.017547091352742, |
| "learning_rate": 9.698069090973391e-06, |
| "loss": 0.6868, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.5864654805452103, |
| "learning_rate": 9.696292948595857e-06, |
| "loss": 0.2131, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.761612831154044, |
| "learning_rate": 9.694511760964578e-06, |
| "loss": 0.2532, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.7403966486679432, |
| "learning_rate": 9.69272552999309e-06, |
| "loss": 0.2561, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.7323797944976795, |
| "learning_rate": 9.690934257600353e-06, |
| "loss": 0.2465, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.516914235462595, |
| "learning_rate": 9.689137945710742e-06, |
| "loss": 0.263, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 7.682260069657834, |
| "learning_rate": 9.687336596254045e-06, |
| "loss": 0.513, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 5.417370136409828, |
| "learning_rate": 9.685530211165459e-06, |
| "loss": 0.5077, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.7532214718484167, |
| "learning_rate": 9.683718792385595e-06, |
| "loss": 0.2618, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 7.559056297443667, |
| "learning_rate": 9.681902341860471e-06, |
| "loss": 0.7226, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.783196373519197, |
| "learning_rate": 9.680080861541511e-06, |
| "loss": 0.2088, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 4.952472408705992, |
| "learning_rate": 9.678254353385538e-06, |
| "loss": 0.5452, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 5.609645206584396, |
| "learning_rate": 9.676422819354785e-06, |
| "loss": 0.5814, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.8253918239886093, |
| "learning_rate": 9.674586261416874e-06, |
| "loss": 0.2326, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.9423878166580468, |
| "learning_rate": 9.672744681544834e-06, |
| "loss": 0.3192, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.7183487922754226, |
| "learning_rate": 9.670898081717079e-06, |
| "loss": 0.261, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.583231612184501, |
| "learning_rate": 9.669046463917427e-06, |
| "loss": 0.1921, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.699976665331866, |
| "learning_rate": 9.667189830135078e-06, |
| "loss": 0.2459, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 4.830208654876044, |
| "learning_rate": 9.665328182364627e-06, |
| "loss": 0.5133, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 8.704400793715795, |
| "learning_rate": 9.663461522606049e-06, |
| "loss": 0.6686, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 6.937812975673912, |
| "learning_rate": 9.66158985286471e-06, |
| "loss": 0.388, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 9.162898390854838, |
| "learning_rate": 9.659713175151352e-06, |
| "loss": 0.5718, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.5464286975229098, |
| "learning_rate": 9.657831491482103e-06, |
| "loss": 0.2102, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 9.004355033796678, |
| "learning_rate": 9.655944803878467e-06, |
| "loss": 0.5886, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 1.7914194566193655, |
| "learning_rate": 9.654053114367321e-06, |
| "loss": 0.2858, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 1.7496161140767366, |
| "learning_rate": 9.65215642498092e-06, |
| "loss": 0.2231, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 5.660911158180701, |
| "learning_rate": 9.650254737756883e-06, |
| "loss": 0.4718, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 1.602415790855819, |
| "learning_rate": 9.648348054738208e-06, |
| "loss": 0.2206, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 8.297344269887379, |
| "learning_rate": 9.646436377973253e-06, |
| "loss": 0.695, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 4.562823331457611, |
| "learning_rate": 9.644519709515746e-06, |
| "loss": 0.3869, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 6.510693383446655, |
| "learning_rate": 9.642598051424772e-06, |
| "loss": 0.6238, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 5.338732275568637, |
| "learning_rate": 9.640671405764777e-06, |
| "loss": 0.463, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 1.726729233128839, |
| "learning_rate": 9.638739774605572e-06, |
| "loss": 0.2686, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 7.508232814292006, |
| "learning_rate": 9.636803160022314e-06, |
| "loss": 0.7177, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 1.7462604260074626, |
| "learning_rate": 9.634861564095525e-06, |
| "loss": 0.2734, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 1.547103753887767, |
| "learning_rate": 9.632914988911066e-06, |
| "loss": 0.2215, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 1.65680886983581, |
| "learning_rate": 9.63096343656016e-06, |
| "loss": 0.2663, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 6.9542028644509815, |
| "learning_rate": 9.629006909139363e-06, |
| "loss": 0.7216, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 16.065217703971534, |
| "learning_rate": 9.62704540875059e-06, |
| "loss": 0.5666, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 1.8217908058149368, |
| "learning_rate": 9.625078937501089e-06, |
| "loss": 0.2915, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 5.675952292457328, |
| "learning_rate": 9.62310749750345e-06, |
| "loss": 0.54, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 7.354932909323429, |
| "learning_rate": 9.621131090875603e-06, |
| "loss": 0.4529, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 7.552225342234603, |
| "learning_rate": 9.619149719740817e-06, |
| "loss": 0.6706, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 7.003636609102059, |
| "learning_rate": 9.617163386227683e-06, |
| "loss": 0.5179, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 7.87176422191421, |
| "learning_rate": 9.615172092470134e-06, |
| "loss": 0.5432, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 8.722305589354004, |
| "learning_rate": 9.613175840607428e-06, |
| "loss": 0.6106, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 1.6766957441259023, |
| "learning_rate": 9.611174632784147e-06, |
| "loss": 0.222, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 7.88698944356783, |
| "learning_rate": 9.609168471150202e-06, |
| "loss": 0.5604, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 4.901710771804992, |
| "learning_rate": 9.607157357860823e-06, |
| "loss": 0.5331, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 1.541628104886714, |
| "learning_rate": 9.605141295076561e-06, |
| "loss": 0.2418, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 1.646102352925008, |
| "learning_rate": 9.603120284963284e-06, |
| "loss": 0.2368, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 22.17130760384479, |
| "learning_rate": 9.601094329692173e-06, |
| "loss": 0.5453, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 1.532667603750822, |
| "learning_rate": 9.599063431439721e-06, |
| "loss": 0.2178, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 1.4035418727441649, |
| "learning_rate": 9.597027592387739e-06, |
| "loss": 0.2121, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 4.478680004347673, |
| "learning_rate": 9.594986814723335e-06, |
| "loss": 0.4924, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 7.468955163342549, |
| "learning_rate": 9.59294110063893e-06, |
| "loss": 0.8559, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 7.517207014437163, |
| "learning_rate": 9.590890452332249e-06, |
| "loss": 0.7547, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.7386384148451286, |
| "learning_rate": 9.588834872006308e-06, |
| "loss": 0.2873, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 6.111369512347133, |
| "learning_rate": 9.586774361869436e-06, |
| "loss": 0.7991, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 5.818342367685323, |
| "learning_rate": 9.584708924135245e-06, |
| "loss": 0.6113, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.8684943149292454, |
| "learning_rate": 9.582638561022646e-06, |
| "loss": 0.2406, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.6927560874280256, |
| "learning_rate": 9.580563274755848e-06, |
| "loss": 0.2438, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 7.383883334475161, |
| "learning_rate": 9.578483067564335e-06, |
| "loss": 0.5863, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 4.77457350402351, |
| "learning_rate": 9.576397941682891e-06, |
| "loss": 0.6171, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.5213260321597142, |
| "learning_rate": 9.574307899351574e-06, |
| "loss": 0.2399, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.6725404783281153, |
| "learning_rate": 9.572212942815734e-06, |
| "loss": 0.2136, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.8319233281937581, |
| "learning_rate": 9.570113074325986e-06, |
| "loss": 0.2536, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 14.083688310977637, |
| "learning_rate": 9.568008296138238e-06, |
| "loss": 0.7214, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.5351720079812152, |
| "learning_rate": 9.565898610513661e-06, |
| "loss": 0.2357, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.846796286369406, |
| "learning_rate": 9.563784019718704e-06, |
| "loss": 0.2659, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 9.109412341380644, |
| "learning_rate": 9.561664526025082e-06, |
| "loss": 0.6468, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 4.665032230332076, |
| "learning_rate": 9.55954013170978e-06, |
| "loss": 0.5738, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 11.970552835497465, |
| "learning_rate": 9.557410839055047e-06, |
| "loss": 0.4872, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 11.403070918811682, |
| "learning_rate": 9.555276650348393e-06, |
| "loss": 0.7133, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.8223233705669901, |
| "learning_rate": 9.55313756788259e-06, |
| "loss": 0.2925, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.852613958622955, |
| "learning_rate": 9.550993593955665e-06, |
| "loss": 0.2913, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 4.498398929916578, |
| "learning_rate": 9.548844730870903e-06, |
| "loss": 0.4451, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.648219625284076, |
| "learning_rate": 9.546690980936836e-06, |
| "loss": 0.2615, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.5284412497408182, |
| "learning_rate": 9.544532346467254e-06, |
| "loss": 0.226, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 8.983584090394281, |
| "learning_rate": 9.542368829781186e-06, |
| "loss": 0.5188, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.625012319223229, |
| "learning_rate": 9.540200433202913e-06, |
| "loss": 0.2223, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 7.848716453870051, |
| "learning_rate": 9.538027159061955e-06, |
| "loss": 0.6142, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 5.619009301696381, |
| "learning_rate": 9.535849009693072e-06, |
| "loss": 0.4499, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.5799572961064898, |
| "learning_rate": 9.533665987436262e-06, |
| "loss": 0.2796, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.8129121062203717, |
| "learning_rate": 9.531478094636758e-06, |
| "loss": 0.2311, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 5.939978021754256, |
| "learning_rate": 9.529285333645027e-06, |
| "loss": 0.4902, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 4.379150393398199, |
| "learning_rate": 9.527087706816762e-06, |
| "loss": 0.4729, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.885728998582742, |
| "learning_rate": 9.524885216512887e-06, |
| "loss": 0.2484, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 4.846423738543627, |
| "learning_rate": 9.522677865099548e-06, |
| "loss": 0.4624, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.7713682514140527, |
| "learning_rate": 9.520465654948119e-06, |
| "loss": 0.2428, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.6484256990751187, |
| "learning_rate": 9.518248588435185e-06, |
| "loss": 0.2728, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.85025912629073, |
| "learning_rate": 9.516026667942557e-06, |
| "loss": 0.2968, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.472425467009718, |
| "learning_rate": 9.513799895857252e-06, |
| "loss": 0.2189, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.8324029561679585, |
| "learning_rate": 9.511568274571508e-06, |
| "loss": 0.2788, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.853724342533399, |
| "learning_rate": 9.509331806482767e-06, |
| "loss": 0.3316, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.7662162335150542, |
| "learning_rate": 9.507090493993677e-06, |
| "loss": 0.2845, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 6.857778864737697, |
| "learning_rate": 9.504844339512096e-06, |
| "loss": 0.8126, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.4154894498443327, |
| "learning_rate": 9.502593345451078e-06, |
| "loss": 0.234, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.559160109741637, |
| "learning_rate": 9.500337514228878e-06, |
| "loss": 0.2134, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.7392915493940049, |
| "learning_rate": 9.49807684826895e-06, |
| "loss": 0.2505, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 5.7068179181213345, |
| "learning_rate": 9.495811349999941e-06, |
| "loss": 0.6602, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 8.078581062271683, |
| "learning_rate": 9.493541021855685e-06, |
| "loss": 0.5826, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.5377398073960513, |
| "learning_rate": 9.49126586627521e-06, |
| "loss": 0.2403, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 2.1239080125298515, |
| "learning_rate": 9.488985885702728e-06, |
| "loss": 0.2966, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.6240974393234549, |
| "learning_rate": 9.486701082587635e-06, |
| "loss": 0.2331, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.6747138927073455, |
| "learning_rate": 9.484411459384508e-06, |
| "loss": 0.2135, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 7.124509975698555, |
| "learning_rate": 9.482117018553101e-06, |
| "loss": 0.4871, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 5.410259795106361, |
| "learning_rate": 9.479817762558345e-06, |
| "loss": 0.7595, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.9126348752324163, |
| "learning_rate": 9.477513693870347e-06, |
| "loss": 0.2562, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 5.776277118670486, |
| "learning_rate": 9.475204814964374e-06, |
| "loss": 0.4539, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 8.454367566571307, |
| "learning_rate": 9.472891128320874e-06, |
| "loss": 0.5973, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 5.913077225031681, |
| "learning_rate": 9.470572636425451e-06, |
| "loss": 0.6188, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 6.090466598908505, |
| "learning_rate": 9.46824934176887e-06, |
| "loss": 0.6631, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 6.198244063689312, |
| "learning_rate": 9.465921246847067e-06, |
| "loss": 0.5703, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.660589900649375, |
| "learning_rate": 9.463588354161122e-06, |
| "loss": 0.2352, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 8.707932228113194, |
| "learning_rate": 9.461250666217277e-06, |
| "loss": 0.6988, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.6480449260174401, |
| "learning_rate": 9.458908185526921e-06, |
| "loss": 0.2167, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.746732564216852, |
| "learning_rate": 9.456560914606594e-06, |
| "loss": 0.2634, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 9.349519321728842, |
| "learning_rate": 9.454208855977986e-06, |
| "loss": 0.5316, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 5.516413135371106, |
| "learning_rate": 9.451852012167924e-06, |
| "loss": 0.7709, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 17.400695990877335, |
| "learning_rate": 9.449490385708378e-06, |
| "loss": 0.5955, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.6392762842053246, |
| "learning_rate": 9.447123979136457e-06, |
| "loss": 0.2294, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 5.600355142882258, |
| "learning_rate": 9.444752794994408e-06, |
| "loss": 0.6541, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.5746993685116102, |
| "learning_rate": 9.4423768358296e-06, |
| "loss": 0.2396, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.7105327612570742, |
| "learning_rate": 9.439996104194546e-06, |
| "loss": 0.2193, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 7.441655802757599, |
| "learning_rate": 9.437610602646878e-06, |
| "loss": 0.6482, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 6.95945207101213, |
| "learning_rate": 9.43522033374935e-06, |
| "loss": 0.4506, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 5.44751626018781, |
| "learning_rate": 9.432825300069848e-06, |
| "loss": 0.5949, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 5.133345036398373, |
| "learning_rate": 9.430425504181361e-06, |
| "loss": 0.5625, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.793145741950075, |
| "learning_rate": 9.428020948662012e-06, |
| "loss": 0.251, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 5.737725174283238, |
| "learning_rate": 9.425611636095023e-06, |
| "loss": 0.5844, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.5754937662547965, |
| "learning_rate": 9.423197569068733e-06, |
| "loss": 0.2238, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 4.114959234043904, |
| "learning_rate": 9.420778750176588e-06, |
| "loss": 0.3734, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.6458700109490814, |
| "learning_rate": 9.418355182017138e-06, |
| "loss": 0.2593, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 7.9018071590278565, |
| "learning_rate": 9.41592686719404e-06, |
| "loss": 0.7318, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.5511525841874374, |
| "learning_rate": 9.413493808316038e-06, |
| "loss": 0.2632, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 4.173891491715113, |
| "learning_rate": 9.411056007996989e-06, |
| "loss": 0.6797, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.5845364864420737, |
| "learning_rate": 9.408613468855829e-06, |
| "loss": 0.241, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.686902076792496, |
| "learning_rate": 9.406166193516596e-06, |
| "loss": 0.2577, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 4.920333098863435, |
| "learning_rate": 9.403714184608411e-06, |
| "loss": 0.4498, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 5.364993266959027, |
| "learning_rate": 9.40125744476548e-06, |
| "loss": 0.5533, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.6173874199022953, |
| "learning_rate": 9.398795976627091e-06, |
| "loss": 0.276, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.6041241221350935, |
| "learning_rate": 9.396329782837614e-06, |
| "loss": 0.2537, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 5.644765129803302, |
| "learning_rate": 9.393858866046494e-06, |
| "loss": 0.6233, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 6.940660129614199, |
| "learning_rate": 9.391383228908253e-06, |
| "loss": 0.6914, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.624608021417376, |
| "learning_rate": 9.388902874082482e-06, |
| "loss": 0.2075, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.6578572047598017, |
| "learning_rate": 9.386417804233836e-06, |
| "loss": 0.2461, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.6521043358538268, |
| "learning_rate": 9.383928022032044e-06, |
| "loss": 0.2652, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 6.970230658708915, |
| "learning_rate": 9.381433530151887e-06, |
| "loss": 0.5884, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 1.7553736548624894, |
| "learning_rate": 9.37893433127322e-06, |
| "loss": 0.2887, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 1.9350946570850633, |
| "learning_rate": 9.376430428080939e-06, |
| "loss": 0.2824, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 1.7506675874139501, |
| "learning_rate": 9.373921823265004e-06, |
| "loss": 0.2994, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 1.7958765064076347, |
| "learning_rate": 9.371408519520421e-06, |
| "loss": 0.2839, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 1.589297384908829, |
| "learning_rate": 9.36889051954725e-06, |
| "loss": 0.2233, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 1.5861450409894153, |
| "learning_rate": 9.366367826050593e-06, |
| "loss": 0.2131, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 1.6207442015067852, |
| "learning_rate": 9.36384044174059e-06, |
| "loss": 0.2613, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 1.6179364940734717, |
| "learning_rate": 9.361308369332426e-06, |
| "loss": 0.2564, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 1.4492002384966378, |
| "learning_rate": 9.358771611546319e-06, |
| "loss": 0.2223, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 6.026513940478561, |
| "learning_rate": 9.356230171107524e-06, |
| "loss": 0.625, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 4.252908040021938, |
| "learning_rate": 9.353684050746323e-06, |
| "loss": 0.5486, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 4.585690047708393, |
| "learning_rate": 9.351133253198027e-06, |
| "loss": 0.508, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 1.8093757029592847, |
| "learning_rate": 9.348577781202976e-06, |
| "loss": 0.3174, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 4.8059623125324125, |
| "learning_rate": 9.346017637506523e-06, |
| "loss": 0.6029, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 4.820157002934122, |
| "learning_rate": 9.343452824859048e-06, |
| "loss": 0.6118, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 4.792292745536942, |
| "learning_rate": 9.340883346015941e-06, |
| "loss": 0.4967, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 5.778832171325122, |
| "learning_rate": 9.338309203737609e-06, |
| "loss": 0.6744, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 4.150228699187317, |
| "learning_rate": 9.335730400789466e-06, |
| "loss": 0.5699, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 7.722756882503639, |
| "learning_rate": 9.333146939941938e-06, |
| "loss": 0.6951, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 5.296551030338628, |
| "learning_rate": 9.330558823970448e-06, |
| "loss": 0.6075, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 1.6248021042250564, |
| "learning_rate": 9.327966055655424e-06, |
| "loss": 0.2108, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 5.8872539663544226, |
| "learning_rate": 9.325368637782292e-06, |
| "loss": 0.427, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 1.5516053060269306, |
| "learning_rate": 9.322766573141473e-06, |
| "loss": 0.2381, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 8.414158151979361, |
| "learning_rate": 9.320159864528378e-06, |
| "loss": 0.61, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 1.4150529791591346, |
| "learning_rate": 9.31754851474341e-06, |
| "loss": 0.2366, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 6.312607193376232, |
| "learning_rate": 9.314932526591956e-06, |
| "loss": 0.6776, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 9.194739133513545, |
| "learning_rate": 9.312311902884388e-06, |
| "loss": 0.4316, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 5.343246517463489, |
| "learning_rate": 9.309686646436053e-06, |
| "loss": 0.5531, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 5.920143525678418, |
| "learning_rate": 9.307056760067284e-06, |
| "loss": 0.7927, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 5.312603829868963, |
| "learning_rate": 9.30442224660338e-06, |
| "loss": 0.6748, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 7.410183273720859, |
| "learning_rate": 9.301783108874611e-06, |
| "loss": 0.4833, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 6.241546780821695, |
| "learning_rate": 9.299139349716221e-06, |
| "loss": 0.6342, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 6.614197398378457, |
| "learning_rate": 9.296490971968416e-06, |
| "loss": 0.7108, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 11.12842756885836, |
| "learning_rate": 9.293837978476359e-06, |
| "loss": 0.7183, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.5690747712727964, |
| "learning_rate": 9.291180372090178e-06, |
| "loss": 0.2025, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 7.567264761751759, |
| "learning_rate": 9.288518155664956e-06, |
| "loss": 0.5909, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.7632136067242392, |
| "learning_rate": 9.285851332060722e-06, |
| "loss": 0.2026, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 6.789591111745626, |
| "learning_rate": 9.283179904142465e-06, |
| "loss": 0.7386, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 5.91600337942705, |
| "learning_rate": 9.280503874780112e-06, |
| "loss": 0.628, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 6.981500952396759, |
| "learning_rate": 9.277823246848537e-06, |
| "loss": 0.5649, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.8397071025068248, |
| "learning_rate": 9.275138023227555e-06, |
| "loss": 0.286, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.607878832996544, |
| "learning_rate": 9.272448206801912e-06, |
| "loss": 0.2084, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 7.555829633070186, |
| "learning_rate": 9.269753800461299e-06, |
| "loss": 0.5523, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 9.27324587935331, |
| "learning_rate": 9.267054807100327e-06, |
| "loss": 0.5827, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.7081135272874568, |
| "learning_rate": 9.264351229618541e-06, |
| "loss": 0.2807, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.62276817476442, |
| "learning_rate": 9.261643070920409e-06, |
| "loss": 0.2197, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 9.382840642334658, |
| "learning_rate": 9.258930333915325e-06, |
| "loss": 0.5187, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 6.020399116685949, |
| "learning_rate": 9.256213021517593e-06, |
| "loss": 0.6994, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.551810172772469, |
| "learning_rate": 9.253491136646437e-06, |
| "loss": 0.1855, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 12.477242115295764, |
| "learning_rate": 9.250764682225997e-06, |
| "loss": 0.5945, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 2.023072932974826, |
| "learning_rate": 9.248033661185313e-06, |
| "loss": 0.2755, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.5946135073812442, |
| "learning_rate": 9.24529807645834e-06, |
| "loss": 0.2641, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.8137331869914235, |
| "learning_rate": 9.24255793098393e-06, |
| "loss": 0.2332, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 6.142954377653386, |
| "learning_rate": 9.23981322770584e-06, |
| "loss": 0.4164, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.6930437395931752, |
| "learning_rate": 9.237063969572713e-06, |
| "loss": 0.2722, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.6418304944887714, |
| "learning_rate": 9.2343101595381e-06, |
| "loss": 0.2373, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.4910986023588277, |
| "learning_rate": 9.23155180056043e-06, |
| "loss": 0.198, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 23.14030810895617, |
| "learning_rate": 9.228788895603024e-06, |
| "loss": 0.5749, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 5.76419133673438, |
| "learning_rate": 9.226021447634085e-06, |
| "loss": 0.5483, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 7.3039110909254825, |
| "learning_rate": 9.223249459626704e-06, |
| "loss": 0.6503, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.5353953125637876, |
| "learning_rate": 9.220472934558838e-06, |
| "loss": 0.2744, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.562708677756112, |
| "learning_rate": 9.217691875413323e-06, |
| "loss": 0.2552, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.7750167517256847, |
| "learning_rate": 9.214906285177867e-06, |
| "loss": 0.2382, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.4632002541152762, |
| "learning_rate": 9.212116166845048e-06, |
| "loss": 0.2458, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.629934994703476, |
| "learning_rate": 9.209321523412303e-06, |
| "loss": 0.2484, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.5393945631387778, |
| "learning_rate": 9.206522357881931e-06, |
| "loss": 0.2483, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 5.128491776174643, |
| "learning_rate": 9.203718673261098e-06, |
| "loss": 0.4915, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.7357749156562752, |
| "learning_rate": 9.20091047256181e-06, |
| "loss": 0.2484, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.386563520463881, |
| "learning_rate": 9.198097758800938e-06, |
| "loss": 0.2054, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.586299737267633, |
| "learning_rate": 9.195280535000196e-06, |
| "loss": 0.2761, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.607760059641086, |
| "learning_rate": 9.19245880418614e-06, |
| "loss": 0.227, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 8.145264164527298, |
| "learning_rate": 9.189632569390172e-06, |
| "loss": 0.6212, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.5998220170775523, |
| "learning_rate": 9.186801833648535e-06, |
| "loss": 0.2534, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 27.003441048008447, |
| "learning_rate": 9.183966600002301e-06, |
| "loss": 0.4521, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 18.871874110217032, |
| "learning_rate": 9.181126871497378e-06, |
| "loss": 0.7232, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 6.596082007569886, |
| "learning_rate": 9.178282651184506e-06, |
| "loss": 0.5631, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 7.388392936405897, |
| "learning_rate": 9.175433942119238e-06, |
| "loss": 0.6402, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.768985853028781, |
| "learning_rate": 9.172580747361968e-06, |
| "loss": 0.3014, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.7419466909192685, |
| "learning_rate": 9.169723069977892e-06, |
| "loss": 0.2612, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.8395910363602748, |
| "learning_rate": 9.166860913037032e-06, |
| "loss": 0.246, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.5135649476546447, |
| "learning_rate": 9.163994279614218e-06, |
| "loss": 0.2318, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 12.152057367824401, |
| "learning_rate": 9.161123172789091e-06, |
| "loss": 0.4432, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.4904021731698984, |
| "learning_rate": 9.158247595646098e-06, |
| "loss": 0.2012, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 5.699349685689834, |
| "learning_rate": 9.155367551274485e-06, |
| "loss": 0.4084, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 7.3980368624923925, |
| "learning_rate": 9.152483042768302e-06, |
| "loss": 0.5378, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.6587560737993832, |
| "learning_rate": 9.149594073226391e-06, |
| "loss": 0.2606, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.599962130861489, |
| "learning_rate": 9.14670064575239e-06, |
| "loss": 0.2274, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.7445914210601148, |
| "learning_rate": 9.143802763454723e-06, |
| "loss": 0.266, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 4.49859542583798, |
| "learning_rate": 9.140900429446601e-06, |
| "loss": 0.7648, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.5330251303876583, |
| "learning_rate": 9.137993646846018e-06, |
| "loss": 0.2794, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.5084724425102092, |
| "learning_rate": 9.135082418775746e-06, |
| "loss": 0.2433, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.6364880389220589, |
| "learning_rate": 9.132166748363335e-06, |
| "loss": 0.2534, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 8.579718620310658, |
| "learning_rate": 9.129246638741108e-06, |
| "loss": 0.6495, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 1.6621812549035762, |
| "learning_rate": 9.126322093046149e-06, |
| "loss": 0.2948, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 13.310540789425271, |
| "learning_rate": 9.123393114420318e-06, |
| "loss": 0.5038, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 6.525291196096151, |
| "learning_rate": 9.120459706010233e-06, |
| "loss": 0.6439, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 5.700086882795741, |
| "learning_rate": 9.11752187096727e-06, |
| "loss": 0.3714, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 9.559097162394567, |
| "learning_rate": 9.114579612447562e-06, |
| "loss": 0.7145, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 7.429212102267612, |
| "learning_rate": 9.111632933611993e-06, |
| "loss": 0.4803, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 1.7216462064235085, |
| "learning_rate": 9.108681837626199e-06, |
| "loss": 0.2794, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 12.791478411516074, |
| "learning_rate": 9.105726327660556e-06, |
| "loss": 0.7364, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 6.133174155539079, |
| "learning_rate": 9.102766406890185e-06, |
| "loss": 0.7558, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 1.6929638635543172, |
| "learning_rate": 9.099802078494947e-06, |
| "loss": 0.2172, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 1.648205135013034, |
| "learning_rate": 9.096833345659437e-06, |
| "loss": 0.2948, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 1.5931274816148164, |
| "learning_rate": 9.09386021157298e-06, |
| "loss": 0.2752, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 19.09722886984461, |
| "learning_rate": 9.09088267942963e-06, |
| "loss": 0.5787, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 7.094242815810287, |
| "learning_rate": 9.087900752428168e-06, |
| "loss": 0.6867, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 8.952583171505099, |
| "learning_rate": 9.084914433772094e-06, |
| "loss": 0.4357, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 10.097596781830811, |
| "learning_rate": 9.081923726669626e-06, |
| "loss": 0.432, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 1.6240538724758538, |
| "learning_rate": 9.0789286343337e-06, |
| "loss": 0.2482, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 1.7394940860333676, |
| "learning_rate": 9.075929159981957e-06, |
| "loss": 0.2349, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 6.247495284748309, |
| "learning_rate": 9.072925306836751e-06, |
| "loss": 0.6756, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 2.644825838147394, |
| "learning_rate": 9.06991707812514e-06, |
| "loss": 0.2562, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 4.9425686233157125, |
| "learning_rate": 9.066904477078875e-06, |
| "loss": 0.5812, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 8.611710175765456, |
| "learning_rate": 9.063887506934417e-06, |
| "loss": 0.541, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 9.798715952262672, |
| "learning_rate": 9.06086617093291e-06, |
| "loss": 0.6091, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 5.090455849781971, |
| "learning_rate": 9.057840472320192e-06, |
| "loss": 0.369, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 4.724373661013194, |
| "learning_rate": 9.054810414346789e-06, |
| "loss": 0.525, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 4.634085714902342, |
| "learning_rate": 9.05177600026791e-06, |
| "loss": 0.5811, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 1.6644096217496105, |
| "learning_rate": 9.048737233343442e-06, |
| "loss": 0.2294, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 1.5592779619156767, |
| "learning_rate": 9.045694116837948e-06, |
| "loss": 0.2123, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 1.587281520082312, |
| "learning_rate": 9.042646654020667e-06, |
| "loss": 0.2175, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 1.67902318285347, |
| "learning_rate": 9.039594848165507e-06, |
| "loss": 0.2544, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 6.287298171598728, |
| "learning_rate": 9.036538702551037e-06, |
| "loss": 0.5301, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 8.160902954056166, |
| "learning_rate": 9.03347822046049e-06, |
| "loss": 0.7306, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 24.09115381418598, |
| "learning_rate": 9.03041340518176e-06, |
| "loss": 0.6126, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 9.806039321186942, |
| "learning_rate": 9.027344260007401e-06, |
| "loss": 0.5585, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 7.069193941856005, |
| "learning_rate": 9.024270788234606e-06, |
| "loss": 0.499, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.5194477737410057, |
| "learning_rate": 9.021192993165224e-06, |
| "loss": 0.2322, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 6.0641664147259835, |
| "learning_rate": 9.01811087810575e-06, |
| "loss": 0.6656, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 2.5206543795362655, |
| "learning_rate": 9.015024446367315e-06, |
| "loss": 0.2521, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 2.0917356658790958, |
| "learning_rate": 9.01193370126569e-06, |
| "loss": 0.2604, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 9.499645522937357, |
| "learning_rate": 9.008838646121282e-06, |
| "loss": 0.5874, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.8038572094239258, |
| "learning_rate": 9.005739284259123e-06, |
| "loss": 0.2524, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 7.157493390955907, |
| "learning_rate": 9.002635619008877e-06, |
| "loss": 0.5076, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 6.397558022513768, |
| "learning_rate": 8.999527653704829e-06, |
| "loss": 0.547, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 4.619919620826792, |
| "learning_rate": 8.996415391685882e-06, |
| "loss": 0.4299, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 6.529113341573041, |
| "learning_rate": 8.993298836295556e-06, |
| "loss": 0.6004, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 8.48129945705957, |
| "learning_rate": 8.990177990881986e-06, |
| "loss": 0.6099, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.7333448005910954, |
| "learning_rate": 8.987052858797914e-06, |
| "loss": 0.2503, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.6374799656813561, |
| "learning_rate": 8.983923443400682e-06, |
| "loss": 0.2465, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.5156187701860444, |
| "learning_rate": 8.980789748052245e-06, |
| "loss": 0.2298, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.5643473209138472, |
| "learning_rate": 8.977651776119145e-06, |
| "loss": 0.2259, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.376360105350978, |
| "learning_rate": 8.974509530972523e-06, |
| "loss": 0.183, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.64982442464039, |
| "learning_rate": 8.971363015988115e-06, |
| "loss": 0.2379, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 7.018141561272961, |
| "learning_rate": 8.968212234546235e-06, |
| "loss": 0.544, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 6.31348366155464, |
| "learning_rate": 8.965057190031785e-06, |
| "loss": 0.502, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.5730767002449284, |
| "learning_rate": 8.961897885834247e-06, |
| "loss": 0.217, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.4574113349478706, |
| "learning_rate": 8.958734325347684e-06, |
| "loss": 0.1932, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.5853274210824488, |
| "learning_rate": 8.955566511970721e-06, |
| "loss": 0.2622, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 7.869412759007845, |
| "learning_rate": 8.95239444910656e-06, |
| "loss": 0.6187, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.493489624359773, |
| "learning_rate": 8.949218140162965e-06, |
| "loss": 0.1976, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 9.868875837133936, |
| "learning_rate": 8.946037588552266e-06, |
| "loss": 0.7038, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.5670087425538508, |
| "learning_rate": 8.94285279769134e-06, |
| "loss": 0.2122, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 7.262462716485521, |
| "learning_rate": 8.939663771001632e-06, |
| "loss": 0.5317, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 1.7074620966901546, |
| "learning_rate": 8.93647051190913e-06, |
| "loss": 0.2161, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 1.563088640604172, |
| "learning_rate": 8.93327302384437e-06, |
| "loss": 0.2535, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 1.7432968666284017, |
| "learning_rate": 8.930071310242429e-06, |
| "loss": 0.2418, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 5.530145271928336, |
| "learning_rate": 8.926865374542928e-06, |
| "loss": 0.6239, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 1.6816103340065633, |
| "learning_rate": 8.92365522019002e-06, |
| "loss": 0.2183, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 1.3399913301251825, |
| "learning_rate": 8.920440850632395e-06, |
| "loss": 0.2205, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 11.550609549065676, |
| "learning_rate": 8.917222269323263e-06, |
| "loss": 0.5839, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 5.165694148077282, |
| "learning_rate": 8.91399947972037e-06, |
| "loss": 0.5495, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 7.330150211344736, |
| "learning_rate": 8.91077248528597e-06, |
| "loss": 0.5782, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 13.151204937371048, |
| "learning_rate": 8.907541289486847e-06, |
| "loss": 0.6479, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 1.519064133046649, |
| "learning_rate": 8.904305895794292e-06, |
| "loss": 0.2459, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 1.6022800920512217, |
| "learning_rate": 8.901066307684102e-06, |
| "loss": 0.2458, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 1.7147403426955812, |
| "learning_rate": 8.89782252863659e-06, |
| "loss": 0.2569, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 5.670430578581199, |
| "learning_rate": 8.894574562136561e-06, |
| "loss": 0.5057, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 6.959529161210214, |
| "learning_rate": 8.89132241167333e-06, |
| "loss": 0.6522, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 1.7820823733746547, |
| "learning_rate": 8.888066080740692e-06, |
| "loss": 0.279, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 7.361769640465293, |
| "learning_rate": 8.88480557283695e-06, |
| "loss": 0.6678, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 5.163392974539672, |
| "learning_rate": 8.88154089146488e-06, |
| "loss": 0.5459, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 6.24247169542295, |
| "learning_rate": 8.878272040131748e-06, |
| "loss": 0.595, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 1.463345046277402, |
| "learning_rate": 8.874999022349303e-06, |
| "loss": 0.2448, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 6.242443414538638, |
| "learning_rate": 8.871721841633762e-06, |
| "loss": 0.6578, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 5.682520955037908, |
| "learning_rate": 8.868440501505822e-06, |
| "loss": 0.5833, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 6.484549304514638, |
| "learning_rate": 8.865155005490643e-06, |
| "loss": 0.7468, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 6.302754066055148, |
| "learning_rate": 8.861865357117852e-06, |
| "loss": 0.7221, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 5.857443360175216, |
| "learning_rate": 8.858571559921539e-06, |
| "loss": 0.6406, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 1.7623095964863735, |
| "learning_rate": 8.855273617440243e-06, |
| "loss": 0.2485, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 5.210709853076465, |
| "learning_rate": 8.851971533216968e-06, |
| "loss": 0.5855, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 8.365648509155578, |
| "learning_rate": 8.848665310799156e-06, |
| "loss": 0.5802, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 1.5648919156529912, |
| "learning_rate": 8.845354953738706e-06, |
| "loss": 0.2344, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 8.703297721624168, |
| "learning_rate": 8.84204046559195e-06, |
| "loss": 0.7219, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 6.170541209244984, |
| "learning_rate": 8.83872184991966e-06, |
| "loss": 0.5098, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.7385020557898248, |
| "learning_rate": 8.835399110287046e-06, |
| "loss": 0.2236, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.4661230490009658, |
| "learning_rate": 8.832072250263746e-06, |
| "loss": 0.2413, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.65953158170735, |
| "learning_rate": 8.82874127342382e-06, |
| "loss": 0.2573, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.5080627608844777, |
| "learning_rate": 8.82540618334576e-06, |
| "loss": 0.2457, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 9.832298571768307, |
| "learning_rate": 8.82206698361247e-06, |
| "loss": 0.7066, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 8.277558372822316, |
| "learning_rate": 8.818723677811269e-06, |
| "loss": 0.5355, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.4418416223623052, |
| "learning_rate": 8.815376269533893e-06, |
| "loss": 0.2066, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 9.649849845477053, |
| "learning_rate": 8.812024762376477e-06, |
| "loss": 0.6768, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 5.843745363878495, |
| "learning_rate": 8.808669159939568e-06, |
| "loss": 0.6621, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 9.414511673736458, |
| "learning_rate": 8.805309465828105e-06, |
| "loss": 0.7382, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.516642979165497, |
| "learning_rate": 8.80194568365143e-06, |
| "loss": 0.2167, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 3.990212164513675, |
| "learning_rate": 8.798577817023269e-06, |
| "loss": 0.4577, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.5896855461224302, |
| "learning_rate": 8.795205869561742e-06, |
| "loss": 0.231, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 6.974568598248131, |
| "learning_rate": 8.79182984488935e-06, |
| "loss": 0.6463, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 12.322774136067109, |
| "learning_rate": 8.788449746632976e-06, |
| "loss": 0.6105, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.669707596316299, |
| "learning_rate": 8.78506557842388e-06, |
| "loss": 0.2275, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 10.283945570112477, |
| "learning_rate": 8.781677343897687e-06, |
| "loss": 0.7113, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 9.663439942965859, |
| "learning_rate": 8.778285046694403e-06, |
| "loss": 0.5229, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.512582270088964, |
| "learning_rate": 8.77488869045839e-06, |
| "loss": 0.2626, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 6.957722562450112, |
| "learning_rate": 8.771488278838368e-06, |
| "loss": 0.5329, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 8.546833167469059, |
| "learning_rate": 8.768083815487428e-06, |
| "loss": 0.6326, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 6.569317927329583, |
| "learning_rate": 8.764675304062992e-06, |
| "loss": 0.6385, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 6.826173570363225, |
| "learning_rate": 8.76126274822685e-06, |
| "loss": 0.4427, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 6.370630972136984, |
| "learning_rate": 8.75784615164513e-06, |
| "loss": 0.4011, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 14.141899407748205, |
| "learning_rate": 8.754425517988298e-06, |
| "loss": 0.6297, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 25.04984916586043, |
| "learning_rate": 8.751000850931162e-06, |
| "loss": 0.5998, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.4086297275415713, |
| "learning_rate": 8.74757215415286e-06, |
| "loss": 0.2335, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 5.8832008847198685, |
| "learning_rate": 8.74413943133686e-06, |
| "loss": 0.6533, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 6.128009992673158, |
| "learning_rate": 8.740702686170955e-06, |
| "loss": 0.5133, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.8100722093658241, |
| "learning_rate": 8.73726192234726e-06, |
| "loss": 0.2315, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 8.121105568280397, |
| "learning_rate": 8.733817143562207e-06, |
| "loss": 0.4929, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 6.063649005918384, |
| "learning_rate": 8.73036835351654e-06, |
| "loss": 0.4996, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.4302425013841211, |
| "learning_rate": 8.726915555915317e-06, |
| "loss": 0.1863, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.6939262130664616, |
| "learning_rate": 8.723458754467893e-06, |
| "loss": 0.1969, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 7.056615528678235, |
| "learning_rate": 8.719997952887932e-06, |
| "loss": 0.5954, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.3693791069491372, |
| "learning_rate": 8.71653315489339e-06, |
| "loss": 0.1708, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.605845650568055, |
| "learning_rate": 8.71306436420652e-06, |
| "loss": 0.1999, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.6213491814636538, |
| "learning_rate": 8.709591584553865e-06, |
| "loss": 0.2564, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.5534122030041937, |
| "learning_rate": 8.706114819666249e-06, |
| "loss": 0.2171, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 24.46971085214679, |
| "learning_rate": 8.702634073278784e-06, |
| "loss": 0.5805, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 10.299870968946921, |
| "learning_rate": 8.699149349130848e-06, |
| "loss": 0.7832, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.6564460747680942, |
| "learning_rate": 8.695660650966109e-06, |
| "loss": 0.2721, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 4.500603362360334, |
| "learning_rate": 8.692167982532487e-06, |
| "loss": 0.4253, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 10.538332097952509, |
| "learning_rate": 8.688671347582178e-06, |
| "loss": 0.5227, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.464779623110201, |
| "learning_rate": 8.685170749871638e-06, |
| "loss": 0.2154, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 8.657587901907425, |
| "learning_rate": 8.681666193161578e-06, |
| "loss": 0.5902, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 9.225749603194462, |
| "learning_rate": 8.67815768121696e-06, |
| "loss": 0.6304, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.6130060216926752, |
| "learning_rate": 8.674645217807e-06, |
| "loss": 0.2261, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 7.367222456767956, |
| "learning_rate": 8.671128806705159e-06, |
| "loss": 0.6718, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 5.761031895494796, |
| "learning_rate": 8.667608451689135e-06, |
| "loss": 0.5507, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 6.8020377060599735, |
| "learning_rate": 8.664084156540864e-06, |
| "loss": 0.6018, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 7.149889953402013, |
| "learning_rate": 8.660555925046518e-06, |
| "loss": 0.535, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 9.66930716361746, |
| "learning_rate": 8.657023760996497e-06, |
| "loss": 0.7477, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.8553835758820338, |
| "learning_rate": 8.653487668185419e-06, |
| "loss": 0.2639, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.5807595552460332, |
| "learning_rate": 8.649947650412135e-06, |
| "loss": 0.2533, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 5.003858299431936, |
| "learning_rate": 8.646403711479702e-06, |
| "loss": 0.584, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 18.940436427722357, |
| "learning_rate": 8.642855855195394e-06, |
| "loss": 0.5363, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.5435972264861446, |
| "learning_rate": 8.639304085370692e-06, |
| "loss": 0.2301, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 7.591995588519707, |
| "learning_rate": 8.635748405821285e-06, |
| "loss": 0.6262, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.4897553699726007, |
| "learning_rate": 8.632188820367056e-06, |
| "loss": 0.1984, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.6942123556365838, |
| "learning_rate": 8.62862533283209e-06, |
| "loss": 0.2532, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.6469115110904957, |
| "learning_rate": 8.625057947044662e-06, |
| "loss": 0.2305, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 6.303157276945475, |
| "learning_rate": 8.62148666683723e-06, |
| "loss": 0.6848, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 20.989116059598974, |
| "learning_rate": 8.617911496046446e-06, |
| "loss": 0.7894, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 6.561870313214275, |
| "learning_rate": 8.614332438513132e-06, |
| "loss": 0.6544, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 5.14236901016574, |
| "learning_rate": 8.610749498082291e-06, |
| "loss": 0.6395, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.5235194242647812, |
| "learning_rate": 8.607162678603097e-06, |
| "loss": 0.2324, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.704837203299887, |
| "learning_rate": 8.603571983928888e-06, |
| "loss": 0.2514, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.4187248790707914, |
| "learning_rate": 8.599977417917169e-06, |
| "loss": 0.2461, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 4.701976212131134, |
| "learning_rate": 8.5963789844296e-06, |
| "loss": 0.5067, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.4414253542300597, |
| "learning_rate": 8.592776687332003e-06, |
| "loss": 0.2048, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 6.611317201824275, |
| "learning_rate": 8.58917053049434e-06, |
| "loss": 0.5289, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.4949147393495994, |
| "learning_rate": 8.58556051779073e-06, |
| "loss": 0.2724, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.6284172525696397, |
| "learning_rate": 8.581946653099427e-06, |
| "loss": 0.2875, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.6197613046378103, |
| "learning_rate": 8.578328940302827e-06, |
| "loss": 0.2145, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 4.714725732777521, |
| "learning_rate": 8.574707383287459e-06, |
| "loss": 0.6025, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 6.3927437325020255, |
| "learning_rate": 8.571081985943984e-06, |
| "loss": 0.6115, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 8.096428448758006, |
| "learning_rate": 8.567452752167183e-06, |
| "loss": 0.6769, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.5775772470528593, |
| "learning_rate": 8.563819685855963e-06, |
| "loss": 0.233, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.4985075939845311, |
| "learning_rate": 8.560182790913349e-06, |
| "loss": 0.21, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.4528883699852078, |
| "learning_rate": 8.556542071246476e-06, |
| "loss": 0.2423, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.8045829129907685, |
| "learning_rate": 8.552897530766592e-06, |
| "loss": 0.3062, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 6.363121819025957, |
| "learning_rate": 8.549249173389045e-06, |
| "loss": 0.5701, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 5.323940029256627, |
| "learning_rate": 8.545597003033286e-06, |
| "loss": 0.5023, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 2.027599692607025, |
| "learning_rate": 8.54194102362286e-06, |
| "loss": 0.2693, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 6.546242260539323, |
| "learning_rate": 8.538281239085411e-06, |
| "loss": 0.6131, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 7.295077814114934, |
| "learning_rate": 8.534617653352661e-06, |
| "loss": 0.5568, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 6.484864693221408, |
| "learning_rate": 8.530950270360425e-06, |
| "loss": 0.5634, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.5403880030840937, |
| "learning_rate": 8.52727909404859e-06, |
| "loss": 0.2438, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 5.07206173015186, |
| "learning_rate": 8.523604128361123e-06, |
| "loss": 0.6061, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.6685551330454524, |
| "learning_rate": 8.519925377246057e-06, |
| "loss": 0.2368, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.728742188440905, |
| "learning_rate": 8.516242844655498e-06, |
| "loss": 0.2153, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.5952897553270604, |
| "learning_rate": 8.512556534545612e-06, |
| "loss": 0.2266, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.4863335402974511, |
| "learning_rate": 8.50886645087662e-06, |
| "loss": 0.2076, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 11.340967979123214, |
| "learning_rate": 8.5051725976128e-06, |
| "loss": 0.6309, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 6.899729097187553, |
| "learning_rate": 8.50147497872248e-06, |
| "loss": 0.7037, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 6.422692032652846, |
| "learning_rate": 8.497773598178033e-06, |
| "loss": 0.6942, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.4342106968594903, |
| "learning_rate": 8.494068459955871e-06, |
| "loss": 0.2169, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 11.421594355547967, |
| "learning_rate": 8.490359568036446e-06, |
| "loss": 0.5014, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.6416992908316417, |
| "learning_rate": 8.486646926404243e-06, |
| "loss": 0.2854, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.5490326018370097, |
| "learning_rate": 8.48293053904777e-06, |
| "loss": 0.2449, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 7.257007373529935, |
| "learning_rate": 8.479210409959565e-06, |
| "loss": 0.6813, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.8945067281458927, |
| "learning_rate": 8.475486543136181e-06, |
| "loss": 0.2896, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 9.4701021587884, |
| "learning_rate": 8.471758942578193e-06, |
| "loss": 0.6194, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 10.511886956772441, |
| "learning_rate": 8.46802761229018e-06, |
| "loss": 0.6511, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 7.863057086989649, |
| "learning_rate": 8.464292556280734e-06, |
| "loss": 0.5907, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 6.167361796987621, |
| "learning_rate": 8.46055377856244e-06, |
| "loss": 0.5901, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 19.680661510408303, |
| "learning_rate": 8.456811283151896e-06, |
| "loss": 0.5894, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 9.438303117934163, |
| "learning_rate": 8.453065074069682e-06, |
| "loss": 0.5033, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.625431846534336, |
| "learning_rate": 8.449315155340369e-06, |
| "loss": 0.2799, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.7554253384961724, |
| "learning_rate": 8.44556153099252e-06, |
| "loss": 0.3268, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 6.1580376296166435, |
| "learning_rate": 8.441804205058672e-06, |
| "loss": 0.6697, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 7.051159034338603, |
| "learning_rate": 8.43804318157534e-06, |
| "loss": 0.578, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 6.7336555036387455, |
| "learning_rate": 8.434278464583018e-06, |
| "loss": 0.5324, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 8.150776766355575, |
| "learning_rate": 8.430510058126156e-06, |
| "loss": 0.636, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.704230168862192, |
| "learning_rate": 8.426737966253176e-06, |
| "loss": 0.2553, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.576742469550776, |
| "learning_rate": 8.422962193016459e-06, |
| "loss": 0.2505, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.722189642524467, |
| "learning_rate": 8.41918274247234e-06, |
| "loss": 0.2113, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 4.841587903078907, |
| "learning_rate": 8.415399618681101e-06, |
| "loss": 0.6088, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.5448985811556761, |
| "learning_rate": 8.411612825706976e-06, |
| "loss": 0.227, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 6.33255782512184, |
| "learning_rate": 8.407822367618135e-06, |
| "loss": 0.5294, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 5.7490443677960466, |
| "learning_rate": 8.40402824848669e-06, |
| "loss": 0.6894, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 8.311970930734226, |
| "learning_rate": 8.400230472388684e-06, |
| "loss": 0.6214, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 3.927420266585678, |
| "learning_rate": 8.396429043404088e-06, |
| "loss": 0.3584, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.8058260712439282, |
| "learning_rate": 8.3926239656168e-06, |
| "loss": 0.2764, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 6.524393247368285, |
| "learning_rate": 8.388815243114637e-06, |
| "loss": 0.5819, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 65.01476758786156, |
| "learning_rate": 8.385002879989328e-06, |
| "loss": 0.5696, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.6470301998126498, |
| "learning_rate": 8.381186880336518e-06, |
| "loss": 0.2538, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 7.253730717024666, |
| "learning_rate": 8.377367248255757e-06, |
| "loss": 0.5736, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 8.346427209524279, |
| "learning_rate": 8.373543987850494e-06, |
| "loss": 0.6371, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.4447458999529055, |
| "learning_rate": 8.369717103228084e-06, |
| "loss": 0.2204, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 16.640222927391598, |
| "learning_rate": 8.365886598499766e-06, |
| "loss": 0.4546, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.705159341486315, |
| "learning_rate": 8.362052477780677e-06, |
| "loss": 0.2985, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.8126938156568462, |
| "learning_rate": 8.35821474518983e-06, |
| "loss": 0.2424, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.9265518644124338, |
| "learning_rate": 8.354373404850124e-06, |
| "loss": 0.2831, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 4.312216463404106, |
| "learning_rate": 8.350528460888334e-06, |
| "loss": 0.5011, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.6362315327967591, |
| "learning_rate": 8.346679917435104e-06, |
| "loss": 0.2444, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.5451004685493321, |
| "learning_rate": 8.342827778624943e-06, |
| "loss": 0.2263, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.634051725367432, |
| "learning_rate": 8.33897204859623e-06, |
| "loss": 0.2642, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 10.006264049410033, |
| "learning_rate": 8.335112731491192e-06, |
| "loss": 0.5239, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.499144688672292, |
| "learning_rate": 8.331249831455921e-06, |
| "loss": 0.21, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 6.83080401550471, |
| "learning_rate": 8.327383352640347e-06, |
| "loss": 0.7192, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 5.445123052559625, |
| "learning_rate": 8.323513299198252e-06, |
| "loss": 0.593, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.5800330102245062, |
| "learning_rate": 8.319639675287255e-06, |
| "loss": 0.1956, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 10.985925353289685, |
| "learning_rate": 8.315762485068815e-06, |
| "loss": 0.5086, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 7.066589444646886, |
| "learning_rate": 8.311881732708213e-06, |
| "loss": 0.5215, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 7.67825901174198, |
| "learning_rate": 8.307997422374569e-06, |
| "loss": 0.8038, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 6.304539661629028, |
| "learning_rate": 8.304109558240817e-06, |
| "loss": 0.5006, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.6000598294765127, |
| "learning_rate": 8.300218144483709e-06, |
| "loss": 0.2031, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 6.873152965555242, |
| "learning_rate": 8.296323185283816e-06, |
| "loss": 0.6036, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.8715148595581925, |
| "learning_rate": 8.292424684825514e-06, |
| "loss": 0.249, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.7200882059628804, |
| "learning_rate": 8.28852264729698e-06, |
| "loss": 0.2683, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 6.2879962991369345, |
| "learning_rate": 8.284617076890199e-06, |
| "loss": 0.4912, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.6424687050291746, |
| "learning_rate": 8.280707977800944e-06, |
| "loss": 0.2321, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 8.060673556802215, |
| "learning_rate": 8.276795354228785e-06, |
| "loss": 0.6667, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.6034982772062687, |
| "learning_rate": 8.272879210377074e-06, |
| "loss": 0.2779, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 6.265308314957551, |
| "learning_rate": 8.268959550452946e-06, |
| "loss": 0.6491, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 5.224539104730754, |
| "learning_rate": 8.265036378667312e-06, |
| "loss": 0.6368, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 4.9642791771130055, |
| "learning_rate": 8.261109699234862e-06, |
| "loss": 0.6846, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 5.326075465035541, |
| "learning_rate": 8.257179516374045e-06, |
| "loss": 0.5553, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 5.123716962012845, |
| "learning_rate": 8.253245834307079e-06, |
| "loss": 0.5297, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 8.63248593059496, |
| "learning_rate": 8.249308657259943e-06, |
| "loss": 0.5384, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 8.948336121415593, |
| "learning_rate": 8.245367989462368e-06, |
| "loss": 0.4715, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.5286834573921628, |
| "learning_rate": 8.241423835147833e-06, |
| "loss": 0.2124, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 6.778579936718943, |
| "learning_rate": 8.237476198553567e-06, |
| "loss": 0.5749, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 8.735820903898144, |
| "learning_rate": 8.233525083920536e-06, |
| "loss": 0.6569, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.6071581878864913, |
| "learning_rate": 8.229570495493447e-06, |
| "loss": 0.2267, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.4098869673911527, |
| "learning_rate": 8.225612437520736e-06, |
| "loss": 0.2043, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.4849291540429495, |
| "learning_rate": 8.221650914254566e-06, |
| "loss": 0.2583, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.6908420580341865, |
| "learning_rate": 8.217685929950823e-06, |
| "loss": 0.2791, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.577991292993094, |
| "learning_rate": 8.213717488869113e-06, |
| "loss": 0.2564, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.6304667338549887, |
| "learning_rate": 8.209745595272755e-06, |
| "loss": 0.2492, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 6.12028252403479, |
| "learning_rate": 8.205770253428775e-06, |
| "loss": 0.4604, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.5198702030985125, |
| "learning_rate": 8.201791467607905e-06, |
| "loss": 0.2431, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.4871758189086686, |
| "learning_rate": 8.197809242084575e-06, |
| "loss": 0.2491, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 5.207452305123646, |
| "learning_rate": 8.193823581136919e-06, |
| "loss": 0.5642, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.7255330273398901, |
| "learning_rate": 8.189834489046746e-06, |
| "loss": 0.2537, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.343723206415828, |
| "learning_rate": 8.185841970099566e-06, |
| "loss": 0.1964, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 8.361566314702689, |
| "learning_rate": 8.181846028584563e-06, |
| "loss": 0.5342, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.4091082835237403, |
| "learning_rate": 8.177846668794598e-06, |
| "loss": 0.1915, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 5.632022424143484, |
| "learning_rate": 8.173843895026207e-06, |
| "loss": 0.5986, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.810578946218269, |
| "learning_rate": 8.169837711579591e-06, |
| "loss": 0.2462, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 5.474212152359759, |
| "learning_rate": 8.165828122758615e-06, |
| "loss": 0.6495, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 5.909343469433645, |
| "learning_rate": 8.161815132870806e-06, |
| "loss": 0.6268, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.650687508215565, |
| "learning_rate": 8.157798746227337e-06, |
| "loss": 0.1904, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.5497901471006141, |
| "learning_rate": 8.153778967143035e-06, |
| "loss": 0.2185, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.590537747184238, |
| "learning_rate": 8.149755799936377e-06, |
| "loss": 0.2029, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.8145458158756518, |
| "learning_rate": 8.145729248929466e-06, |
| "loss": 0.3129, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 6.599237686710324, |
| "learning_rate": 8.141699318448053e-06, |
| "loss": 0.6723, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 5.0690496052178835, |
| "learning_rate": 8.137666012821514e-06, |
| "loss": 0.6524, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.687080582405302, |
| "learning_rate": 8.13362933638285e-06, |
| "loss": 0.2391, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 6.38689930953936, |
| "learning_rate": 8.129589293468689e-06, |
| "loss": 0.5736, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.5226156646274227, |
| "learning_rate": 8.125545888419269e-06, |
| "loss": 0.2518, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.4502611685092015, |
| "learning_rate": 8.12149912557844e-06, |
| "loss": 0.2387, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.4924326291951637, |
| "learning_rate": 8.117449009293668e-06, |
| "loss": 0.262, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 8.88618189798143, |
| "learning_rate": 8.113395543916012e-06, |
| "loss": 0.7492, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.5439044787564637, |
| "learning_rate": 8.109338733800132e-06, |
| "loss": 0.2688, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 6.043219000084835, |
| "learning_rate": 8.10527858330428e-06, |
| "loss": 0.6305, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 6.691904839136024, |
| "learning_rate": 8.101215096790305e-06, |
| "loss": 0.6562, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 7.741322245640578, |
| "learning_rate": 8.097148278623628e-06, |
| "loss": 0.6244, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.6772790622352332, |
| "learning_rate": 8.093078133173256e-06, |
| "loss": 0.2499, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.6939554958023613, |
| "learning_rate": 8.089004664811767e-06, |
| "loss": 0.2931, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 7.300929933908393, |
| "learning_rate": 8.084927877915314e-06, |
| "loss": 0.6952, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.6200759183085958, |
| "learning_rate": 8.080847776863609e-06, |
| "loss": 0.2202, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.577818449634306, |
| "learning_rate": 8.07676436603993e-06, |
| "loss": 0.2436, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.7568781422144297, |
| "learning_rate": 8.072677649831107e-06, |
| "loss": 0.2333, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 9.024773267499588, |
| "learning_rate": 8.068587632627521e-06, |
| "loss": 0.501, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.5420024123926483, |
| "learning_rate": 8.064494318823102e-06, |
| "loss": 0.2753, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.5656359437849388, |
| "learning_rate": 8.060397712815318e-06, |
| "loss": 0.2497, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.6247670447736333, |
| "learning_rate": 8.056297819005177e-06, |
| "loss": 0.2289, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 4.756855006115204, |
| "learning_rate": 8.052194641797217e-06, |
| "loss": 0.795, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.6605381181871688, |
| "learning_rate": 8.048088185599507e-06, |
| "loss": 0.2345, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.4577986656434714, |
| "learning_rate": 8.043978454823632e-06, |
| "loss": 0.2514, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.8563748924672185, |
| "learning_rate": 8.0398654538847e-06, |
| "loss": 0.3161, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 5.014797297051163, |
| "learning_rate": 8.035749187201333e-06, |
| "loss": 0.517, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.5680786373670434, |
| "learning_rate": 8.031629659195657e-06, |
| "loss": 0.2402, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 8.196102593968495, |
| "learning_rate": 8.027506874293304e-06, |
| "loss": 0.5746, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 7.278802967337519, |
| "learning_rate": 8.023380836923404e-06, |
| "loss": 0.7167, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.4157414766476062, |
| "learning_rate": 8.019251551518585e-06, |
| "loss": 0.2333, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 18.566101779205937, |
| "learning_rate": 8.015119022514958e-06, |
| "loss": 0.5788, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 5.5214903643031255, |
| "learning_rate": 8.010983254352127e-06, |
| "loss": 0.6308, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 5.844163978244507, |
| "learning_rate": 8.006844251473165e-06, |
| "loss": 0.6814, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.4867426534826607, |
| "learning_rate": 8.002702018324629e-06, |
| "loss": 0.1987, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.5159336503630736, |
| "learning_rate": 7.998556559356543e-06, |
| "loss": 0.2561, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.4554991897170064, |
| "learning_rate": 7.994407879022397e-06, |
| "loss": 0.1772, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 13.232086482270557, |
| "learning_rate": 7.990255981779139e-06, |
| "loss": 0.5657, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 6.499740410030531, |
| "learning_rate": 7.986100872087177e-06, |
| "loss": 0.6406, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 7.167695370460036, |
| "learning_rate": 7.981942554410371e-06, |
| "loss": 0.6665, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.8426339914572896, |
| "learning_rate": 7.97778103321602e-06, |
| "loss": 0.21, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 8.557321656981843, |
| "learning_rate": 7.973616312974876e-06, |
| "loss": 0.4842, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 5.874621150418908, |
| "learning_rate": 7.969448398161115e-06, |
| "loss": 0.6745, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.6665195438715732, |
| "learning_rate": 7.965277293252354e-06, |
| "loss": 0.2129, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 5.9711904071282245, |
| "learning_rate": 7.961103002729634e-06, |
| "loss": 0.475, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.700950579625396, |
| "learning_rate": 7.956925531077417e-06, |
| "loss": 0.2788, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 6.1089608261539485, |
| "learning_rate": 7.952744882783587e-06, |
| "loss": 0.5439, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.689080045654782, |
| "learning_rate": 7.948561062339435e-06, |
| "loss": 0.2755, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.7153482062599326, |
| "learning_rate": 7.944374074239665e-06, |
| "loss": 0.2422, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.646721440739929, |
| "learning_rate": 7.940183922982381e-06, |
| "loss": 0.2675, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 5.372490958973571, |
| "learning_rate": 7.935990613069087e-06, |
| "loss": 0.6422, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 9.198653623452538, |
| "learning_rate": 7.931794149004675e-06, |
| "loss": 0.6502, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.52594294469927, |
| "learning_rate": 7.927594535297433e-06, |
| "loss": 0.217, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 6.280041790877249, |
| "learning_rate": 7.923391776459031e-06, |
| "loss": 0.6249, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 5.868426083787363, |
| "learning_rate": 7.919185877004515e-06, |
| "loss": 0.6534, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 6.731001321305892, |
| "learning_rate": 7.914976841452304e-06, |
| "loss": 0.7446, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.5875925359845395, |
| "learning_rate": 7.91076467432419e-06, |
| "loss": 0.2311, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 4.9586770613684035, |
| "learning_rate": 7.90654938014533e-06, |
| "loss": 0.6075, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 7.0514251067612115, |
| "learning_rate": 7.902330963444234e-06, |
| "loss": 0.5834, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.4457420322548138, |
| "learning_rate": 7.898109428752773e-06, |
| "loss": 0.2278, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 5.223756030865263, |
| "learning_rate": 7.893884780606164e-06, |
| "loss": 0.4812, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.5123115814863226, |
| "learning_rate": 7.889657023542973e-06, |
| "loss": 0.2431, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 8.1927800996665, |
| "learning_rate": 7.885426162105101e-06, |
| "loss": 0.7178, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 1.4361919853028358, |
| "learning_rate": 7.881192200837785e-06, |
| "loss": 0.2334, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 1.6821320951520453, |
| "learning_rate": 7.876955144289594e-06, |
| "loss": 0.2727, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 1.64615433229082, |
| "learning_rate": 7.872714997012421e-06, |
| "loss": 0.287, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 1.5961821260575324, |
| "learning_rate": 7.868471763561482e-06, |
| "loss": 0.2612, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 4.260052766123083, |
| "learning_rate": 7.864225448495304e-06, |
| "loss": 0.6239, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 27.705341590163066, |
| "learning_rate": 7.85997605637573e-06, |
| "loss": 0.7627, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 5.396292625469772, |
| "learning_rate": 7.855723591767903e-06, |
| "loss": 0.6497, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 1.5134632432860375, |
| "learning_rate": 7.85146805924027e-06, |
| "loss": 0.2252, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 8.969826699493574, |
| "learning_rate": 7.847209463364574e-06, |
| "loss": 0.5587, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 8.041023991570867, |
| "learning_rate": 7.842947808715848e-06, |
| "loss": 0.6362, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 1.5662874953195092, |
| "learning_rate": 7.83868309987241e-06, |
| "loss": 0.2194, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 6.299266990989718, |
| "learning_rate": 7.834415341415862e-06, |
| "loss": 0.6061, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 1.860859508577284, |
| "learning_rate": 7.830144537931082e-06, |
| "loss": 0.2787, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 1.482805550334068, |
| "learning_rate": 7.825870694006217e-06, |
| "loss": 0.2164, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 6.613164329297252, |
| "learning_rate": 7.82159381423268e-06, |
| "loss": 0.5754, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 9.863889557878194, |
| "learning_rate": 7.817313903205148e-06, |
| "loss": 0.6181, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 5.726291356356693, |
| "learning_rate": 7.813030965521554e-06, |
| "loss": 0.6551, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 6.774162233097553, |
| "learning_rate": 7.80874500578308e-06, |
| "loss": 0.6264, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 6.234214286889, |
| "learning_rate": 7.804456028594158e-06, |
| "loss": 0.6222, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 1.9347285684404372, |
| "learning_rate": 7.80016403856246e-06, |
| "loss": 0.2751, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 1.5520764220517966, |
| "learning_rate": 7.795869040298895e-06, |
| "loss": 0.2234, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 6.052441876883189, |
| "learning_rate": 7.791571038417602e-06, |
| "loss": 0.4274, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 5.895285255858338, |
| "learning_rate": 7.78727003753595e-06, |
| "loss": 0.6069, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 10.79283859744142, |
| "learning_rate": 7.782966042274529e-06, |
| "loss": 0.5398, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 5.795367350445128, |
| "learning_rate": 7.778659057257144e-06, |
| "loss": 0.5116, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 7.115590126112468, |
| "learning_rate": 7.774349087110813e-06, |
| "loss": 0.5862, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 19.186958069824044, |
| "learning_rate": 7.77003613646576e-06, |
| "loss": 0.6993, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 1.5596763052378617, |
| "learning_rate": 7.765720209955414e-06, |
| "loss": 0.2379, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 5.5453115203298875, |
| "learning_rate": 7.761401312216398e-06, |
| "loss": 0.7534, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 1.4347707338100348, |
| "learning_rate": 7.757079447888529e-06, |
| "loss": 0.2546, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.3853027058911302, |
| "learning_rate": 7.752754621614807e-06, |
| "loss": 0.1907, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 9.12407220325666, |
| "learning_rate": 7.748426838041421e-06, |
| "loss": 0.6993, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.4582603432101668, |
| "learning_rate": 7.744096101817731e-06, |
| "loss": 0.2337, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 6.544452978143089, |
| "learning_rate": 7.73976241759627e-06, |
| "loss": 0.5982, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.547444676269326, |
| "learning_rate": 7.73542579003274e-06, |
| "loss": 0.2192, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 7.899907932162064, |
| "learning_rate": 7.731086223786006e-06, |
| "loss": 0.5546, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.720644882947618, |
| "learning_rate": 7.726743723518087e-06, |
| "loss": 0.2878, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.4463293971324411, |
| "learning_rate": 7.722398293894153e-06, |
| "loss": 0.2411, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 6.399152675925439, |
| "learning_rate": 7.718049939582529e-06, |
| "loss": 0.6644, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.535805744652878, |
| "learning_rate": 7.713698665254669e-06, |
| "loss": 0.2172, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.8411273230737173, |
| "learning_rate": 7.70934447558518e-06, |
| "loss": 0.2479, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 8.476171050864176, |
| "learning_rate": 7.704987375251782e-06, |
| "loss": 0.7114, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 6.170794617861031, |
| "learning_rate": 7.70062736893534e-06, |
| "loss": 0.707, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.5428935228716163, |
| "learning_rate": 7.696264461319831e-06, |
| "loss": 0.1995, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.4706280562326233, |
| "learning_rate": 7.69189865709235e-06, |
| "loss": 0.2156, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.5182855633267356, |
| "learning_rate": 7.687529960943107e-06, |
| "loss": 0.2155, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 6.598787588558327, |
| "learning_rate": 7.683158377565415e-06, |
| "loss": 0.5596, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.4701515988054943, |
| "learning_rate": 7.678783911655691e-06, |
| "loss": 0.2387, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.7487342920858722, |
| "learning_rate": 7.674406567913447e-06, |
| "loss": 0.2849, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 6.720506302872264, |
| "learning_rate": 7.67002635104129e-06, |
| "loss": 0.6633, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.528372063446923, |
| "learning_rate": 7.66564326574491e-06, |
| "loss": 0.2445, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.4659505765883247, |
| "learning_rate": 7.661257316733078e-06, |
| "loss": 0.2436, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.5795243509696442, |
| "learning_rate": 7.656868508717648e-06, |
| "loss": 0.2671, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 9.825712492400415, |
| "learning_rate": 7.652476846413537e-06, |
| "loss": 0.7208, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.5701890378920915, |
| "learning_rate": 7.648082334538735e-06, |
| "loss": 0.262, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 6.6832118611123095, |
| "learning_rate": 7.64368497781429e-06, |
| "loss": 0.5806, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.6414381358525465, |
| "learning_rate": 7.639284780964307e-06, |
| "loss": 0.2409, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 6.37110642480901, |
| "learning_rate": 7.634881748715941e-06, |
| "loss": 0.6684, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.43825188221218, |
| "learning_rate": 7.630475885799395e-06, |
| "loss": 0.199, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 5.637745509627005, |
| "learning_rate": 7.626067196947913e-06, |
| "loss": 0.7578, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 7.540397124778485, |
| "learning_rate": 7.621655686897771e-06, |
| "loss": 0.6169, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.5383291305985427, |
| "learning_rate": 7.617241360388282e-06, |
| "loss": 0.1869, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.5769069469444448, |
| "learning_rate": 7.612824222161781e-06, |
| "loss": 0.2225, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.6398866286392093, |
| "learning_rate": 7.608404276963623e-06, |
| "loss": 0.2062, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 5.470655820078232, |
| "learning_rate": 7.60398152954218e-06, |
| "loss": 0.6265, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.6552078121199802, |
| "learning_rate": 7.599555984648836e-06, |
| "loss": 0.2076, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.6012296119356733, |
| "learning_rate": 7.595127647037976e-06, |
| "loss": 0.1988, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 6.939505698849627, |
| "learning_rate": 7.590696521466992e-06, |
| "loss": 0.5608, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 4.910673417408083, |
| "learning_rate": 7.586262612696263e-06, |
| "loss": 0.677, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.6897523696154988, |
| "learning_rate": 7.5818259254891614e-06, |
| "loss": 0.2501, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.3434317652867565, |
| "learning_rate": 7.577386464612049e-06, |
| "loss": 0.1795, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.6902559742508552, |
| "learning_rate": 7.572944234834261e-06, |
| "loss": 0.2267, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.3883211446125365, |
| "learning_rate": 7.568499240928109e-06, |
| "loss": 0.2141, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.5298055785686242, |
| "learning_rate": 7.5640514876688765e-06, |
| "loss": 0.2406, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 9.496424132872413, |
| "learning_rate": 7.559600979834809e-06, |
| "loss": 0.4984, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.6057294345638613, |
| "learning_rate": 7.555147722207111e-06, |
| "loss": 0.2431, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 5.860705570333863, |
| "learning_rate": 7.550691719569944e-06, |
| "loss": 0.5838, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.5642369137762708, |
| "learning_rate": 7.546232976710413e-06, |
| "loss": 0.2329, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.5083245316947056, |
| "learning_rate": 7.541771498418575e-06, |
| "loss": 0.2231, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 12.391806214588945, |
| "learning_rate": 7.537307289487419e-06, |
| "loss": 0.6165, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 6.900426095465898, |
| "learning_rate": 7.532840354712868e-06, |
| "loss": 0.5279, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.6678388256247385, |
| "learning_rate": 7.5283706988937765e-06, |
| "loss": 0.2434, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 7.705637433534857, |
| "learning_rate": 7.523898326831921e-06, |
| "loss": 0.516, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.405010079202137, |
| "learning_rate": 7.5194232433319955e-06, |
| "loss": 0.2058, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 6.5585545569480015, |
| "learning_rate": 7.514945453201608e-06, |
| "loss": 0.5136, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 36.403557439297025, |
| "learning_rate": 7.510464961251271e-06, |
| "loss": 0.5227, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 4.83369309587166, |
| "learning_rate": 7.505981772294404e-06, |
| "loss": 0.4469, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 11.349035410601026, |
| "learning_rate": 7.501495891147322e-06, |
| "loss": 0.7265, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.5579260386667464, |
| "learning_rate": 7.497007322629231e-06, |
| "loss": 0.2236, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 10.065921900057672, |
| "learning_rate": 7.492516071562226e-06, |
| "loss": 0.5644, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.8056548718693688, |
| "learning_rate": 7.488022142771282e-06, |
| "loss": 0.264, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.727491729848117, |
| "learning_rate": 7.483525541084253e-06, |
| "loss": 0.2353, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.6648267876413188, |
| "learning_rate": 7.479026271331864e-06, |
| "loss": 0.2699, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 5.528195596975851, |
| "learning_rate": 7.4745243383477055e-06, |
| "loss": 0.5324, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.483370410699109, |
| "learning_rate": 7.470019746968226e-06, |
| "loss": 0.2263, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 9.464428186655587, |
| "learning_rate": 7.4655125020327376e-06, |
| "loss": 0.7973, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.513103440189699, |
| "learning_rate": 7.461002608383396e-06, |
| "loss": 0.2109, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.6308949314556291, |
| "learning_rate": 7.456490070865206e-06, |
| "loss": 0.2618, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 5.951063225201904, |
| "learning_rate": 7.4519748943260126e-06, |
| "loss": 0.5295, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 12.78982777413769, |
| "learning_rate": 7.447457083616494e-06, |
| "loss": 0.644, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.517175737638629, |
| "learning_rate": 7.44293664359016e-06, |
| "loss": 0.2783, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.31606475875191, |
| "learning_rate": 7.438413579103344e-06, |
| "loss": 0.2124, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.5883162286889605, |
| "learning_rate": 7.433887895015199e-06, |
| "loss": 0.2407, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 4.557176169061043, |
| "learning_rate": 7.429359596187694e-06, |
| "loss": 0.4328, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.6339977285742284, |
| "learning_rate": 7.424828687485606e-06, |
| "loss": 0.198, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.3224105771875427, |
| "learning_rate": 7.420295173776515e-06, |
| "loss": 0.2403, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.4829810191238573, |
| "learning_rate": 7.415759059930799e-06, |
| "loss": 0.2191, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.4337356279338318, |
| "learning_rate": 7.411220350821631e-06, |
| "loss": 0.2743, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.65314659862354, |
| "learning_rate": 7.406679051324972e-06, |
| "loss": 0.2609, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.7342291144299689, |
| "learning_rate": 7.402135166319567e-06, |
| "loss": 0.2289, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.696792075138794, |
| "learning_rate": 7.397588700686933e-06, |
| "loss": 0.27, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.6044938667398625, |
| "learning_rate": 7.393039659311366e-06, |
| "loss": 0.2504, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 5.503861269795774, |
| "learning_rate": 7.388488047079927e-06, |
| "loss": 0.4895, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.587935245498577, |
| "learning_rate": 7.383933868882438e-06, |
| "loss": 0.2838, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.4158476436413507, |
| "learning_rate": 7.379377129611478e-06, |
| "loss": 0.2073, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.7373105615765783, |
| "learning_rate": 7.374817834162378e-06, |
| "loss": 0.2975, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 8.831666955944666, |
| "learning_rate": 7.3702559874332125e-06, |
| "loss": 0.5983, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 9.31665713413904, |
| "learning_rate": 7.3656915943247984e-06, |
| "loss": 0.6335, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 7.06706466947314, |
| "learning_rate": 7.3611246597406925e-06, |
| "loss": 0.4158, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.5915787530412477, |
| "learning_rate": 7.356555188587178e-06, |
| "loss": 0.1906, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.4657555028102185, |
| "learning_rate": 7.351983185773259e-06, |
| "loss": 0.1868, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 9.539314795725554, |
| "learning_rate": 7.347408656210666e-06, |
| "loss": 0.7162, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.6430391123504855, |
| "learning_rate": 7.342831604813844e-06, |
| "loss": 0.2382, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 6.29943915826192, |
| "learning_rate": 7.338252036499941e-06, |
| "loss": 0.5422, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.5739552981093123, |
| "learning_rate": 7.333669956188815e-06, |
| "loss": 0.2203, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 5.891490773197444, |
| "learning_rate": 7.3290853688030196e-06, |
| "loss": 0.5411, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.4076848345888082, |
| "learning_rate": 7.324498279267803e-06, |
| "loss": 0.2263, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.771801958389905, |
| "learning_rate": 7.319908692511103e-06, |
| "loss": 0.2898, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.5087526253506416, |
| "learning_rate": 7.315316613463535e-06, |
| "loss": 0.277, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 5.112445795077407, |
| "learning_rate": 7.310722047058396e-06, |
| "loss": 0.5377, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 5.482099175081986, |
| "learning_rate": 7.306124998231655e-06, |
| "loss": 0.6483, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.3755375793547229, |
| "learning_rate": 7.301525471921949e-06, |
| "loss": 0.2425, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 6.510124397597574, |
| "learning_rate": 7.296923473070571e-06, |
| "loss": 0.5505, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 3.8451392454881526, |
| "learning_rate": 7.292319006621477e-06, |
| "loss": 0.4919, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.6767803172131721, |
| "learning_rate": 7.2877120775212685e-06, |
| "loss": 0.2552, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 5.311592966112747, |
| "learning_rate": 7.283102690719198e-06, |
| "loss": 0.6434, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.6591025243854203, |
| "learning_rate": 7.278490851167155e-06, |
| "loss": 0.2732, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 5.11335338371546, |
| "learning_rate": 7.2738765638196625e-06, |
| "loss": 0.4616, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.4705208467577287, |
| "learning_rate": 7.269259833633877e-06, |
| "loss": 0.237, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 5.8603109418712185, |
| "learning_rate": 7.264640665569577e-06, |
| "loss": 0.7292, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 5.671324260946241, |
| "learning_rate": 7.26001906458916e-06, |
| "loss": 0.6222, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.542420695545935, |
| "learning_rate": 7.255395035657639e-06, |
| "loss": 0.2652, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.6422364296970795, |
| "learning_rate": 7.250768583742634e-06, |
| "loss": 0.2404, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.6714535837388254, |
| "learning_rate": 7.246139713814365e-06, |
| "loss": 0.2571, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.6316225727344515, |
| "learning_rate": 7.241508430845656e-06, |
| "loss": 0.2256, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.4787343653996288, |
| "learning_rate": 7.236874739811921e-06, |
| "loss": 0.2382, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.4072637031481081, |
| "learning_rate": 7.232238645691157e-06, |
| "loss": 0.2002, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.426067123824302, |
| "learning_rate": 7.227600153463947e-06, |
| "loss": 0.1755, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 4.655704700840468, |
| "learning_rate": 7.222959268113452e-06, |
| "loss": 0.3598, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 5.370522241713215, |
| "learning_rate": 7.218315994625397e-06, |
| "loss": 0.557, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.5889383930450987, |
| "learning_rate": 7.213670337988079e-06, |
| "loss": 0.2034, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.8072550518122952, |
| "learning_rate": 7.209022303192351e-06, |
| "loss": 0.2207, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 83.64482496451608, |
| "learning_rate": 7.204371895231623e-06, |
| "loss": 0.6215, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.7440266241540439, |
| "learning_rate": 7.199719119101858e-06, |
| "loss": 0.2275, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 7.512565989250296, |
| "learning_rate": 7.195063979801554e-06, |
| "loss": 0.4643, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 6.892103848259442, |
| "learning_rate": 7.190406482331757e-06, |
| "loss": 0.5403, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.4777073711805213, |
| "learning_rate": 7.18574663169604e-06, |
| "loss": 0.2513, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 7.282807661223366, |
| "learning_rate": 7.1810844329005095e-06, |
| "loss": 0.5989, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.5132827019244917, |
| "learning_rate": 7.176419890953788e-06, |
| "loss": 0.2253, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 7.717783537027684, |
| "learning_rate": 7.171753010867023e-06, |
| "loss": 0.6799, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 5.943915152849767, |
| "learning_rate": 7.167083797653866e-06, |
| "loss": 0.5902, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.4586529974257227, |
| "learning_rate": 7.162412256330481e-06, |
| "loss": 0.202, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 7.186611443861163, |
| "learning_rate": 7.157738391915531e-06, |
| "loss": 0.5302, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.6834896407533133, |
| "learning_rate": 7.153062209430174e-06, |
| "loss": 0.251, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 7.405116272777637, |
| "learning_rate": 7.148383713898058e-06, |
| "loss": 0.5743, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.522999107758133, |
| "learning_rate": 7.143702910345318e-06, |
| "loss": 0.2723, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.880200730593367, |
| "learning_rate": 7.139019803800569e-06, |
| "loss": 0.2816, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.3816806243398323, |
| "learning_rate": 7.134334399294897e-06, |
| "loss": 0.2367, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.7284420755099217, |
| "learning_rate": 7.129646701861858e-06, |
| "loss": 0.2544, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.629550120138974, |
| "learning_rate": 7.124956716537471e-06, |
| "loss": 0.2068, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.6243307078750684, |
| "learning_rate": 7.120264448360214e-06, |
| "loss": 0.213, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 10.565396265308518, |
| "learning_rate": 7.115569902371018e-06, |
| "loss": 0.5904, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 5.465892487151138, |
| "learning_rate": 7.110873083613259e-06, |
| "loss": 0.6009, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.5358502320339789, |
| "learning_rate": 7.106173997132755e-06, |
| "loss": 0.2387, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.363560919662825, |
| "learning_rate": 7.101472647977761e-06, |
| "loss": 0.2115, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 4.92920993447301, |
| "learning_rate": 7.096769041198964e-06, |
| "loss": 0.5299, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.6398438629568042, |
| "learning_rate": 7.0920631818494745e-06, |
| "loss": 0.2603, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.3740360168319345, |
| "learning_rate": 7.087355074984823e-06, |
| "loss": 0.198, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.5816758489089993, |
| "learning_rate": 7.082644725662954e-06, |
| "loss": 0.2146, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 5.105693805152416, |
| "learning_rate": 7.077932138944225e-06, |
| "loss": 0.5887, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 23.6755027232162, |
| "learning_rate": 7.073217319891391e-06, |
| "loss": 0.6281, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 8.074180552610876, |
| "learning_rate": 7.068500273569612e-06, |
| "loss": 0.6841, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.398156113046471, |
| "learning_rate": 7.063781005046433e-06, |
| "loss": 0.1879, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 11.617325223575968, |
| "learning_rate": 7.059059519391794e-06, |
| "loss": 0.7071, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 10.09826264231247, |
| "learning_rate": 7.054335821678012e-06, |
| "loss": 0.4581, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.7538931020652984, |
| "learning_rate": 7.049609916979782e-06, |
| "loss": 0.2602, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 3.735135329799002, |
| "learning_rate": 7.044881810374169e-06, |
| "loss": 0.3716, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 4.869733006281636, |
| "learning_rate": 7.040151506940605e-06, |
| "loss": 0.2934, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 7.683606301965944, |
| "learning_rate": 7.035419011760882e-06, |
| "loss": 0.697, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 1.6759892898397664, |
| "learning_rate": 7.0306843299191465e-06, |
| "loss": 0.2519, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 4.443344577883213, |
| "learning_rate": 7.0259474665018915e-06, |
| "loss": 0.5027, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 1.4516786600799045, |
| "learning_rate": 7.0212084265979575e-06, |
| "loss": 0.2009, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 5.018830872924124, |
| "learning_rate": 7.016467215298519e-06, |
| "loss": 0.702, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 13.372418015625748, |
| "learning_rate": 7.011723837697091e-06, |
| "loss": 0.7114, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 6.277932946105866, |
| "learning_rate": 7.0069782988895056e-06, |
| "loss": 0.5805, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 1.531528513144936, |
| "learning_rate": 7.002230603973924e-06, |
| "loss": 0.1991, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 1.8223707208415971, |
| "learning_rate": 6.9974807580508205e-06, |
| "loss": 0.2809, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 8.546674650105915, |
| "learning_rate": 6.992728766222982e-06, |
| "loss": 0.6171, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 9.548484544610723, |
| "learning_rate": 6.987974633595498e-06, |
| "loss": 0.5622, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 1.5120183426116132, |
| "learning_rate": 6.9832183652757625e-06, |
| "loss": 0.2235, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 6.71791971273723, |
| "learning_rate": 6.978459966373458e-06, |
| "loss": 0.5058, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 6.364431409221105, |
| "learning_rate": 6.973699442000561e-06, |
| "loss": 0.4824, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 5.7766999911114985, |
| "learning_rate": 6.96893679727133e-06, |
| "loss": 0.4832, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 1.7531010618552019, |
| "learning_rate": 6.9641720373022996e-06, |
| "loss": 0.1971, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 1.6570198426139906, |
| "learning_rate": 6.959405167212278e-06, |
| "loss": 0.2393, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 1.5456440395214717, |
| "learning_rate": 6.954636192122339e-06, |
| "loss": 0.2604, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 1.6624183851105219, |
| "learning_rate": 6.949865117155823e-06, |
| "loss": 0.2813, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 8.06647912493275, |
| "learning_rate": 6.94509194743832e-06, |
| "loss": 0.5583, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 7.806322784742366, |
| "learning_rate": 6.940316688097675e-06, |
| "loss": 0.3821, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 1.4797776369891873, |
| "learning_rate": 6.935539344263971e-06, |
| "loss": 0.2116, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 1.7083919322940107, |
| "learning_rate": 6.93075992106954e-06, |
| "loss": 0.2568, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 9.226085074768402, |
| "learning_rate": 6.925978423648941e-06, |
| "loss": 0.6846, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 1.375716705377596, |
| "learning_rate": 6.921194857138963e-06, |
| "loss": 0.197, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 5.685446489524216, |
| "learning_rate": 6.91640922667862e-06, |
| "loss": 0.4744, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 1.5215358669218515, |
| "learning_rate": 6.911621537409139e-06, |
| "loss": 0.2391, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 6.511798129596943, |
| "learning_rate": 6.906831794473963e-06, |
| "loss": 0.517, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 4.6931560492107725, |
| "learning_rate": 6.9020400030187394e-06, |
| "loss": 0.593, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 7.712398832752451, |
| "learning_rate": 6.897246168191317e-06, |
| "loss": 0.5824, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.613022044089724, |
| "learning_rate": 6.892450295141737e-06, |
| "loss": 0.2741, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 7.279473121046338, |
| "learning_rate": 6.887652389022236e-06, |
| "loss": 0.438, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 5.838290307338631, |
| "learning_rate": 6.88285245498723e-06, |
| "loss": 0.5796, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.5404039634116309, |
| "learning_rate": 6.878050498193314e-06, |
| "loss": 0.2412, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 14.262886472471997, |
| "learning_rate": 6.873246523799256e-06, |
| "loss": 0.6166, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 7.073604111640909, |
| "learning_rate": 6.868440536965997e-06, |
| "loss": 0.6247, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.5119057367294504, |
| "learning_rate": 6.863632542856632e-06, |
| "loss": 0.22, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 6.770024997584175, |
| "learning_rate": 6.858822546636417e-06, |
| "loss": 0.5132, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 4.621401190894274, |
| "learning_rate": 6.854010553472757e-06, |
| "loss": 0.5888, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 5.731610298277731, |
| "learning_rate": 6.849196568535201e-06, |
| "loss": 0.6065, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 14.911449842987928, |
| "learning_rate": 6.8443805969954445e-06, |
| "loss": 0.6184, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 4.60898253192161, |
| "learning_rate": 6.839562644027311e-06, |
| "loss": 0.5104, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 6.096953598140278, |
| "learning_rate": 6.834742714806754e-06, |
| "loss": 0.5489, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.5113402009838244, |
| "learning_rate": 6.8299208145118475e-06, |
| "loss": 0.2437, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 5.071904749982615, |
| "learning_rate": 6.825096948322791e-06, |
| "loss": 0.4732, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 5.92365925359732, |
| "learning_rate": 6.820271121421889e-06, |
| "loss": 0.4908, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.4776199050644825, |
| "learning_rate": 6.815443338993554e-06, |
| "loss": 0.2662, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 34.18987797681408, |
| "learning_rate": 6.810613606224299e-06, |
| "loss": 0.5637, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.5500535111980442, |
| "learning_rate": 6.805781928302732e-06, |
| "loss": 0.238, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.4603160104682158, |
| "learning_rate": 6.800948310419554e-06, |
| "loss": 0.2363, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 7.992049335749039, |
| "learning_rate": 6.796112757767547e-06, |
| "loss": 0.6291, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 6.296197488841442, |
| "learning_rate": 6.7912752755415716e-06, |
| "loss": 0.5768, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 5.462929177553609, |
| "learning_rate": 6.786435868938561e-06, |
| "loss": 0.6117, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.4717241337016855, |
| "learning_rate": 6.78159454315752e-06, |
| "loss": 0.2492, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 9.776704836987184, |
| "learning_rate": 6.776751303399509e-06, |
| "loss": 0.5362, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 6.608262013803274, |
| "learning_rate": 6.771906154867649e-06, |
| "loss": 0.5421, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.5844296226578645, |
| "learning_rate": 6.767059102767109e-06, |
| "loss": 0.2356, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 6.090627408845525, |
| "learning_rate": 6.7622101523051045e-06, |
| "loss": 0.6988, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.522165906490889, |
| "learning_rate": 6.757359308690889e-06, |
| "loss": 0.2305, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.751553003654809, |
| "learning_rate": 6.7525065771357546e-06, |
| "loss": 0.2702, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.55487397284913, |
| "learning_rate": 6.7476519628530145e-06, |
| "loss": 0.27, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.5713202363010224, |
| "learning_rate": 6.742795471058009e-06, |
| "loss": 0.2193, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.5682347759148718, |
| "learning_rate": 6.737937106968094e-06, |
| "loss": 0.1929, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 10.448845373480625, |
| "learning_rate": 6.7330768758026374e-06, |
| "loss": 0.6081, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.4756514362204363, |
| "learning_rate": 6.728214782783013e-06, |
| "loss": 0.2493, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 8.736394717506005, |
| "learning_rate": 6.723350833132596e-06, |
| "loss": 0.5611, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.432299091066597, |
| "learning_rate": 6.7184850320767505e-06, |
| "loss": 0.1889, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 6.2082942303491455, |
| "learning_rate": 6.7136173848428375e-06, |
| "loss": 0.5751, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 5.900510845746761, |
| "learning_rate": 6.708747896660196e-06, |
| "loss": 0.5811, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 7.232392559033582, |
| "learning_rate": 6.703876572760144e-06, |
| "loss": 0.5332, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 5.012627379636099, |
| "learning_rate": 6.6990034183759726e-06, |
| "loss": 0.421, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.6039349162455028, |
| "learning_rate": 6.694128438742939e-06, |
| "loss": 0.2281, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 7.378904891237365, |
| "learning_rate": 6.689251639098261e-06, |
| "loss": 0.5378, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.6722139873092432, |
| "learning_rate": 6.684373024681112e-06, |
| "loss": 0.2682, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.6777328407422487, |
| "learning_rate": 6.679492600732614e-06, |
| "loss": 0.2174, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 6.0752801994029415, |
| "learning_rate": 6.674610372495832e-06, |
| "loss": 0.6384, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 6.741101694208744, |
| "learning_rate": 6.669726345215776e-06, |
| "loss": 0.7203, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.6406867327439791, |
| "learning_rate": 6.66484052413938e-06, |
| "loss": 0.2416, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 7.514032353916119, |
| "learning_rate": 6.659952914515508e-06, |
| "loss": 0.485, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.5597751611658328, |
| "learning_rate": 6.65506352159495e-06, |
| "loss": 0.2443, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.5405462003001633, |
| "learning_rate": 6.650172350630406e-06, |
| "loss": 0.2741, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 6.525607991866356, |
| "learning_rate": 6.645279406876488e-06, |
| "loss": 0.5759, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 5.002358514388684, |
| "learning_rate": 6.640384695589714e-06, |
| "loss": 0.4653, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.4727288285377615, |
| "learning_rate": 6.635488222028497e-06, |
| "loss": 0.2807, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.5045382510946763, |
| "learning_rate": 6.630589991453148e-06, |
| "loss": 0.2361, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.6865073317669421, |
| "learning_rate": 6.6256900091258644e-06, |
| "loss": 0.2688, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.820786742344778, |
| "learning_rate": 6.620788280310722e-06, |
| "loss": 0.2648, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.7462808336866076, |
| "learning_rate": 6.615884810273678e-06, |
| "loss": 0.2464, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.5298051068868284, |
| "learning_rate": 6.610979604282557e-06, |
| "loss": 0.2221, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 5.872999684855236, |
| "learning_rate": 6.606072667607048e-06, |
| "loss": 0.6946, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 7.095836416798553, |
| "learning_rate": 6.601164005518702e-06, |
| "loss": 0.5355, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.694822554317753, |
| "learning_rate": 6.59625362329092e-06, |
| "loss": 0.2686, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.7911594577885972, |
| "learning_rate": 6.591341526198955e-06, |
| "loss": 0.2835, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.7186980047557423, |
| "learning_rate": 6.586427719519901e-06, |
| "loss": 0.2505, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.477694989730813, |
| "learning_rate": 6.581512208532685e-06, |
| "loss": 0.2179, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 8.594806131062327, |
| "learning_rate": 6.576594998518071e-06, |
| "loss": 0.5909, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 5.231386413737722, |
| "learning_rate": 6.5716760947586425e-06, |
| "loss": 0.4832, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.5295569495021915, |
| "learning_rate": 6.566755502538806e-06, |
| "loss": 0.246, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 8.168014180758991, |
| "learning_rate": 6.561833227144784e-06, |
| "loss": 0.5241, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.43115436817828, |
| "learning_rate": 6.556909273864601e-06, |
| "loss": 0.1902, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.4397401401153558, |
| "learning_rate": 6.551983647988089e-06, |
| "loss": 0.2052, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.4140575314568167, |
| "learning_rate": 6.547056354806874e-06, |
| "loss": 0.2099, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 13.958535014184495, |
| "learning_rate": 6.542127399614376e-06, |
| "loss": 0.5045, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 5.929928698067259, |
| "learning_rate": 6.5371967877058e-06, |
| "loss": 0.6021, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 6.4073521805396165, |
| "learning_rate": 6.532264524378128e-06, |
| "loss": 0.5263, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.4476342489402703, |
| "learning_rate": 6.52733061493012e-06, |
| "loss": 0.24, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.5255415468091642, |
| "learning_rate": 6.522395064662299e-06, |
| "loss": 0.2699, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.4021509689977685, |
| "learning_rate": 6.517457878876958e-06, |
| "loss": 0.2091, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.5747361561140838, |
| "learning_rate": 6.512519062878142e-06, |
| "loss": 0.2619, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 7.755109727547206, |
| "learning_rate": 6.507578621971646e-06, |
| "loss": 0.6403, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 5.277901904623132, |
| "learning_rate": 6.502636561465018e-06, |
| "loss": 0.5602, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 6.702260641203853, |
| "learning_rate": 6.497692886667537e-06, |
| "loss": 0.6175, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.3942715927968807, |
| "learning_rate": 6.492747602890223e-06, |
| "loss": 0.2102, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.776422204585946, |
| "learning_rate": 6.487800715445822e-06, |
| "loss": 0.2451, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.4579178378320363, |
| "learning_rate": 6.4828522296488014e-06, |
| "loss": 0.2442, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 4.907790366492386, |
| "learning_rate": 6.477902150815347e-06, |
| "loss": 0.5934, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.604508114703477, |
| "learning_rate": 6.472950484263359e-06, |
| "loss": 0.2447, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 8.215101926760493, |
| "learning_rate": 6.467997235312437e-06, |
| "loss": 0.5843, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 5.134432922013831, |
| "learning_rate": 6.463042409283885e-06, |
| "loss": 0.6326, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 11.270702143795317, |
| "learning_rate": 6.458086011500703e-06, |
| "loss": 0.5792, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.5642748476276402, |
| "learning_rate": 6.453128047287573e-06, |
| "loss": 0.2395, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 5.643908819098535, |
| "learning_rate": 6.448168521970865e-06, |
| "loss": 0.6205, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 5.88595989332353, |
| "learning_rate": 6.443207440878624e-06, |
| "loss": 0.5761, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 6.492703074635773, |
| "learning_rate": 6.438244809340568e-06, |
| "loss": 0.6278, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 5.962111127877899, |
| "learning_rate": 6.43328063268808e-06, |
| "loss": 0.6743, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 1.661193805884559, |
| "learning_rate": 6.428314916254203e-06, |
| "loss": 0.2352, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 4.8410545725964695, |
| "learning_rate": 6.423347665373633e-06, |
| "loss": 0.6717, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 5.857134308919506, |
| "learning_rate": 6.418378885382716e-06, |
| "loss": 0.3853, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 1.654657491282791, |
| "learning_rate": 6.41340858161944e-06, |
| "loss": 0.2793, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 1.5100294686951206, |
| "learning_rate": 6.408436759423431e-06, |
| "loss": 0.2342, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 1.5015655233250866, |
| "learning_rate": 6.403463424135943e-06, |
| "loss": 0.2294, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 4.342949530298343, |
| "learning_rate": 6.398488581099859e-06, |
| "loss": 0.688, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 4.847372303171137, |
| "learning_rate": 6.393512235659681e-06, |
| "loss": 0.6396, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 1.559018842354788, |
| "learning_rate": 6.388534393161525e-06, |
| "loss": 0.2347, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 5.652103439251986, |
| "learning_rate": 6.383555058953115e-06, |
| "loss": 0.6426, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 1.64044125352812, |
| "learning_rate": 6.378574238383776e-06, |
| "loss": 0.2685, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 7.090198166881005, |
| "learning_rate": 6.373591936804433e-06, |
| "loss": 0.5847, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 4.883742202613736, |
| "learning_rate": 6.3686081595676e-06, |
| "loss": 0.464, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 6.09957916816325, |
| "learning_rate": 6.3636229120273766e-06, |
| "loss": 0.5592, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 5.403859884825113, |
| "learning_rate": 6.3586361995394415e-06, |
| "loss": 0.5085, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 5.554581031595441, |
| "learning_rate": 6.353648027461048e-06, |
| "loss": 0.4029, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 16.1020169268518, |
| "learning_rate": 6.348658401151018e-06, |
| "loss": 0.6541, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 1.4614761809805563, |
| "learning_rate": 6.343667325969736e-06, |
| "loss": 0.2115, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 5.740622425139341, |
| "learning_rate": 6.3386748072791395e-06, |
| "loss": 0.5811, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 4.76093657013544, |
| "learning_rate": 6.33368085044272e-06, |
| "loss": 0.6175, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 4.631229554250956, |
| "learning_rate": 6.328685460825512e-06, |
| "loss": 0.506, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 1.648768239858883, |
| "learning_rate": 6.323688643794094e-06, |
| "loss": 0.2979, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 20.025290556974912, |
| "learning_rate": 6.318690404716572e-06, |
| "loss": 0.4681, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 14.346862070384379, |
| "learning_rate": 6.313690748962582e-06, |
| "loss": 0.4402, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 1.4828472003197997, |
| "learning_rate": 6.3086896819032814e-06, |
| "loss": 0.2206, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 7.0220406456740445, |
| "learning_rate": 6.303687208911348e-06, |
| "loss": 0.6517, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 1.5489288875091363, |
| "learning_rate": 6.298683335360962e-06, |
| "loss": 0.2078, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 6.176570408327461, |
| "learning_rate": 6.293678066627816e-06, |
| "loss": 0.4571, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 10.591530096478122, |
| "learning_rate": 6.288671408089098e-06, |
| "loss": 0.6328, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 6.31658187255168, |
| "learning_rate": 6.283663365123486e-06, |
| "loss": 0.4785, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 6.758008842878058, |
| "learning_rate": 6.278653943111152e-06, |
| "loss": 0.4837, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.4104171677419424, |
| "learning_rate": 6.273643147433743e-06, |
| "loss": 0.2289, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.6197009224422692, |
| "learning_rate": 6.268630983474388e-06, |
| "loss": 0.2829, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 6.249525275292694, |
| "learning_rate": 6.263617456617681e-06, |
| "loss": 0.6438, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.513923936350329, |
| "learning_rate": 6.258602572249683e-06, |
| "loss": 0.2671, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.7552401802120616, |
| "learning_rate": 6.2535863357579105e-06, |
| "loss": 0.2371, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.731838791910952, |
| "learning_rate": 6.248568752531337e-06, |
| "loss": 0.2664, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 4.4219639243732995, |
| "learning_rate": 6.243549827960378e-06, |
| "loss": 0.7413, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.5935077451476027, |
| "learning_rate": 6.238529567436892e-06, |
| "loss": 0.2352, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 6.377410112066094, |
| "learning_rate": 6.233507976354174e-06, |
| "loss": 0.5239, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 14.826981063929932, |
| "learning_rate": 6.228485060106948e-06, |
| "loss": 0.6102, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 3.709700351161225, |
| "learning_rate": 6.223460824091358e-06, |
| "loss": 0.3748, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 7.541911782934291, |
| "learning_rate": 6.218435273704973e-06, |
| "loss": 0.6081, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 5.034766786731644, |
| "learning_rate": 6.213408414346765e-06, |
| "loss": 0.533, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 6.106431140775753, |
| "learning_rate": 6.208380251417122e-06, |
| "loss": 0.587, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 5.62746791692136, |
| "learning_rate": 6.203350790317825e-06, |
| "loss": 0.586, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.5934871745788728, |
| "learning_rate": 6.198320036452051e-06, |
| "loss": 0.2625, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.7027648894463596, |
| "learning_rate": 6.193287995224371e-06, |
| "loss": 0.2191, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 9.100797592288487, |
| "learning_rate": 6.18825467204073e-06, |
| "loss": 0.5967, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.4300400133146662, |
| "learning_rate": 6.183220072308459e-06, |
| "loss": 0.2114, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 8.307304161283804, |
| "learning_rate": 6.178184201436256e-06, |
| "loss": 0.7205, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 8.519423645264656, |
| "learning_rate": 6.173147064834183e-06, |
| "loss": 0.5529, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 8.721720890194614, |
| "learning_rate": 6.168108667913666e-06, |
| "loss": 0.7219, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 10.656532411122027, |
| "learning_rate": 6.163069016087483e-06, |
| "loss": 0.6465, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.7054559572063817, |
| "learning_rate": 6.158028114769758e-06, |
| "loss": 0.2313, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.5034160019470488, |
| "learning_rate": 6.152985969375962e-06, |
| "loss": 0.2213, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.441204816973364, |
| "learning_rate": 6.147942585322898e-06, |
| "loss": 0.185, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 6.463243020843492, |
| "learning_rate": 6.142897968028704e-06, |
| "loss": 0.5157, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.6634596678438656, |
| "learning_rate": 6.137852122912839e-06, |
| "loss": 0.2605, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.5140618946971276, |
| "learning_rate": 6.1328050553960804e-06, |
| "loss": 0.2057, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.3827811621096124, |
| "learning_rate": 6.1277567709005245e-06, |
| "loss": 0.2351, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 12.096242813456865, |
| "learning_rate": 6.122707274849572e-06, |
| "loss": 0.691, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.5592714009568418, |
| "learning_rate": 6.117656572667921e-06, |
| "loss": 0.2206, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.2934985017598122, |
| "learning_rate": 6.112604669781572e-06, |
| "loss": 0.1876, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.5518426348580512, |
| "learning_rate": 6.107551571617813e-06, |
| "loss": 0.1925, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 5.958954702269303, |
| "learning_rate": 6.1024972836052135e-06, |
| "loss": 0.6241, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 5.442321985431483, |
| "learning_rate": 6.0974418111736235e-06, |
| "loss": 0.6867, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.551766771257588, |
| "learning_rate": 6.092385159754165e-06, |
| "loss": 0.1898, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 4.001212140548543, |
| "learning_rate": 6.0873273347792275e-06, |
| "loss": 0.5045, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.7793938294035376, |
| "learning_rate": 6.0822683416824625e-06, |
| "loss": 0.2023, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.63866212649055, |
| "learning_rate": 6.077208185898772e-06, |
| "loss": 0.2622, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 3.381632394924379, |
| "learning_rate": 6.07214687286431e-06, |
| "loss": 0.2216, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.5895154817359234, |
| "learning_rate": 6.067084408016475e-06, |
| "loss": 0.2351, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.443672079594385, |
| "learning_rate": 6.0620207967939e-06, |
| "loss": 0.1934, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 7.115039207145125, |
| "learning_rate": 6.0569560446364495e-06, |
| "loss": 0.6115, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 7.315327542828079, |
| "learning_rate": 6.051890156985217e-06, |
| "loss": 0.6131, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 5.564007578956525, |
| "learning_rate": 6.046823139282515e-06, |
| "loss": 0.5503, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 8.161419091740283, |
| "learning_rate": 6.041754996971866e-06, |
| "loss": 0.5399, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 5.94974351366777, |
| "learning_rate": 6.036685735498004e-06, |
| "loss": 0.6738, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 6.733711412578767, |
| "learning_rate": 6.031615360306867e-06, |
| "loss": 0.3184, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.7126580681320984, |
| "learning_rate": 6.026543876845586e-06, |
| "loss": 0.2007, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.7011935235958762, |
| "learning_rate": 6.021471290562484e-06, |
| "loss": 0.287, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 5.4274651443922375, |
| "learning_rate": 6.016397606907069e-06, |
| "loss": 0.4691, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 5.239967968092811, |
| "learning_rate": 6.011322831330028e-06, |
| "loss": 0.6135, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.6278847693376925, |
| "learning_rate": 6.0062469692832205e-06, |
| "loss": 0.2015, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.511697518375073, |
| "learning_rate": 6.001170026219673e-06, |
| "loss": 0.2016, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.5004007832623714, |
| "learning_rate": 5.996092007593572e-06, |
| "loss": 0.2587, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 5.52428019575851, |
| "learning_rate": 5.9910129188602665e-06, |
| "loss": 0.4986, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 6.192378356562513, |
| "learning_rate": 5.985932765476246e-06, |
| "loss": 0.5822, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 4.408229036197582, |
| "learning_rate": 5.9808515528991486e-06, |
| "loss": 0.6131, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 3.9940885634068106, |
| "learning_rate": 5.975769286587747e-06, |
| "loss": 0.5009, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.5060988705786211, |
| "learning_rate": 5.970685972001953e-06, |
| "loss": 0.1932, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.7045467912876315, |
| "learning_rate": 5.965601614602798e-06, |
| "loss": 0.2377, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.614092221012137, |
| "learning_rate": 5.960516219852433e-06, |
| "loss": 0.251, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.7816140159741642, |
| "learning_rate": 5.955429793214129e-06, |
| "loss": 0.2594, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 5.581341187828204, |
| "learning_rate": 5.950342340152261e-06, |
| "loss": 0.5742, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 7.234871787931018, |
| "learning_rate": 5.945253866132308e-06, |
| "loss": 0.5095, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.4986143435034935, |
| "learning_rate": 5.940164376620847e-06, |
| "loss": 0.294, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.562984444963245, |
| "learning_rate": 5.935073877085546e-06, |
| "loss": 0.1948, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 6.374888940526056, |
| "learning_rate": 5.9299823729951544e-06, |
| "loss": 0.5791, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 7.329003624019677, |
| "learning_rate": 5.9248898698195054e-06, |
| "loss": 0.5772, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 5.348094808840994, |
| "learning_rate": 5.919796373029504e-06, |
| "loss": 0.6713, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.5162721511146773, |
| "learning_rate": 5.914701888097121e-06, |
| "loss": 0.2235, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.5933679221185735, |
| "learning_rate": 5.90960642049539e-06, |
| "loss": 0.2001, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.5968570997261389, |
| "learning_rate": 5.904509975698399e-06, |
| "loss": 0.2059, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 6.032773887097328, |
| "learning_rate": 5.8994125591812914e-06, |
| "loss": 0.6078, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 14.010395787236638, |
| "learning_rate": 5.894314176420247e-06, |
| "loss": 0.5693, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 6.1418982593984355, |
| "learning_rate": 5.889214832892489e-06, |
| "loss": 0.4619, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.748778932171975, |
| "learning_rate": 5.8841145340762665e-06, |
| "loss": 0.2725, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.5967154667979515, |
| "learning_rate": 5.879013285450863e-06, |
| "loss": 0.2116, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 5.50949370617981, |
| "learning_rate": 5.873911092496577e-06, |
| "loss": 0.7226, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 5.423469449952084, |
| "learning_rate": 5.8688079606947226e-06, |
| "loss": 0.6379, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 8.37037831983272, |
| "learning_rate": 5.8637038955276225e-06, |
| "loss": 0.5217, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.549702917190697, |
| "learning_rate": 5.858598902478604e-06, |
| "loss": 0.2595, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.5640442062254063, |
| "learning_rate": 5.853492987031989e-06, |
| "loss": 0.2442, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 5.853289519816507, |
| "learning_rate": 5.8483861546730915e-06, |
| "loss": 0.6658, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 7.383185087894194, |
| "learning_rate": 5.843278410888208e-06, |
| "loss": 0.5993, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 6.915037700741412, |
| "learning_rate": 5.838169761164616e-06, |
| "loss": 0.6638, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.6018368980030406, |
| "learning_rate": 5.83306021099057e-06, |
| "loss": 0.2413, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 5.669865895754325, |
| "learning_rate": 5.827949765855285e-06, |
| "loss": 0.7527, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 6.946321758995887, |
| "learning_rate": 5.822838431248943e-06, |
| "loss": 0.7266, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 8.295686551610215, |
| "learning_rate": 5.817726212662678e-06, |
| "loss": 0.564, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.3597518824025032, |
| "learning_rate": 5.812613115588575e-06, |
| "loss": 0.1907, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.532874896372509, |
| "learning_rate": 5.807499145519663e-06, |
| "loss": 0.2264, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 11.40493517560385, |
| "learning_rate": 5.802384307949909e-06, |
| "loss": 0.5242, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 6.279974313265418, |
| "learning_rate": 5.79726860837421e-06, |
| "loss": 0.4193, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 7.086086409607318, |
| "learning_rate": 5.792152052288391e-06, |
| "loss": 0.4238, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 5.433958488568387, |
| "learning_rate": 5.787034645189199e-06, |
| "loss": 0.6271, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.590299256922156, |
| "learning_rate": 5.7819163925742915e-06, |
| "loss": 0.244, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 5.871403509877293, |
| "learning_rate": 5.776797299942236e-06, |
| "loss": 0.4004, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.5616498231554254, |
| "learning_rate": 5.771677372792502e-06, |
| "loss": 0.251, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 6.874638425737895, |
| "learning_rate": 5.766556616625456e-06, |
| "loss": 0.4635, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 5.844574655902825, |
| "learning_rate": 5.7614350369423555e-06, |
| "loss": 0.5394, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.5094171919456973, |
| "learning_rate": 5.7563126392453415e-06, |
| "loss": 0.2052, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 6.914812909343156, |
| "learning_rate": 5.751189429037435e-06, |
| "loss": 0.5199, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.4516814450314157, |
| "learning_rate": 5.746065411822528e-06, |
| "loss": 0.2077, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 10.75699520458192, |
| "learning_rate": 5.740940593105383e-06, |
| "loss": 0.6669, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.4071175629833979, |
| "learning_rate": 5.73581497839162e-06, |
| "loss": 0.1879, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.7856542360993077, |
| "learning_rate": 5.730688573187715e-06, |
| "loss": 0.2152, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 4.801735894205422, |
| "learning_rate": 5.725561383000994e-06, |
| "loss": 0.5234, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 8.100454032430392, |
| "learning_rate": 5.720433413339627e-06, |
| "loss": 0.6928, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 6.165357334442091, |
| "learning_rate": 5.71530466971262e-06, |
| "loss": 0.7244, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.4082644657868995, |
| "learning_rate": 5.710175157629812e-06, |
| "loss": 0.179, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 5.230557380399823, |
| "learning_rate": 5.705044882601862e-06, |
| "loss": 0.6223, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 6.311395519487412, |
| "learning_rate": 5.69991385014026e-06, |
| "loss": 0.5669, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.5884491952741175, |
| "learning_rate": 5.694782065757298e-06, |
| "loss": 0.2397, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.5967611503008956, |
| "learning_rate": 5.689649534966083e-06, |
| "loss": 0.2117, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 9.269060222334259, |
| "learning_rate": 5.684516263280519e-06, |
| "loss": 0.4528, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.5570685995516629, |
| "learning_rate": 5.679382256215311e-06, |
| "loss": 0.2377, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.5572563031572018, |
| "learning_rate": 5.674247519285951e-06, |
| "loss": 0.2151, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 5.291556244427816, |
| "learning_rate": 5.6691120580087126e-06, |
| "loss": 0.5447, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.6515665279687242, |
| "learning_rate": 5.6639758779006535e-06, |
| "loss": 0.2395, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 6.089072374992358, |
| "learning_rate": 5.6588389844796e-06, |
| "loss": 0.575, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.5221716539675059, |
| "learning_rate": 5.653701383264147e-06, |
| "loss": 0.2671, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.6032218271717784, |
| "learning_rate": 5.648563079773646e-06, |
| "loss": 0.2181, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.4587673717871135, |
| "learning_rate": 5.6434240795282045e-06, |
| "loss": 0.1982, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.812150431885437, |
| "learning_rate": 5.63828438804868e-06, |
| "loss": 0.273, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.3381332936511572, |
| "learning_rate": 5.6331440108566735e-06, |
| "loss": 0.2072, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.611767812418644, |
| "learning_rate": 5.628002953474521e-06, |
| "loss": 0.2202, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 7.9373186172470716, |
| "learning_rate": 5.622861221425286e-06, |
| "loss": 0.6506, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.4949252067454528, |
| "learning_rate": 5.617718820232762e-06, |
| "loss": 0.2792, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.694110592549428, |
| "learning_rate": 5.612575755421459e-06, |
| "loss": 0.2656, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.7161503822743196, |
| "learning_rate": 5.607432032516601e-06, |
| "loss": 0.2191, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.4554034554643032, |
| "learning_rate": 5.602287657044116e-06, |
| "loss": 0.1987, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 5.5181690890907795, |
| "learning_rate": 5.597142634530639e-06, |
| "loss": 0.4919, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.507976163980248, |
| "learning_rate": 5.5919969705034914e-06, |
| "loss": 0.2824, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 13.592867993844482, |
| "learning_rate": 5.586850670490694e-06, |
| "loss": 0.559, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 6.669820839398386, |
| "learning_rate": 5.581703740020943e-06, |
| "loss": 0.4962, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.547748409577588, |
| "learning_rate": 5.576556184623615e-06, |
| "loss": 0.2775, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.5626471518217038, |
| "learning_rate": 5.571408009828757e-06, |
| "loss": 0.1881, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.6137393051541997, |
| "learning_rate": 5.56625922116708e-06, |
| "loss": 0.2485, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 9.060923338470827, |
| "learning_rate": 5.561109824169962e-06, |
| "loss": 0.5879, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.5275029545197205, |
| "learning_rate": 5.555959824369426e-06, |
| "loss": 0.2227, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 5.890581980889761, |
| "learning_rate": 5.550809227298144e-06, |
| "loss": 0.5153, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 9.476490387947244, |
| "learning_rate": 5.545658038489433e-06, |
| "loss": 0.6009, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.5572148092298645, |
| "learning_rate": 5.540506263477243e-06, |
| "loss": 0.2361, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 8.160987697726984, |
| "learning_rate": 5.535353907796155e-06, |
| "loss": 0.6372, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.6589959079076817, |
| "learning_rate": 5.530200976981375e-06, |
| "loss": 0.2323, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.4941811975869728, |
| "learning_rate": 5.525047476568722e-06, |
| "loss": 0.209, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.4608107235234649, |
| "learning_rate": 5.519893412094631e-06, |
| "loss": 0.191, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.5807509840278664, |
| "learning_rate": 5.514738789096146e-06, |
| "loss": 0.2184, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 5.140081435868278, |
| "learning_rate": 5.509583613110904e-06, |
| "loss": 0.5034, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 5.338634886118849, |
| "learning_rate": 5.504427889677141e-06, |
| "loss": 0.5531, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 5.834479825244042, |
| "learning_rate": 5.499271624333676e-06, |
| "loss": 0.5415, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.409874549266494, |
| "learning_rate": 5.494114822619918e-06, |
| "loss": 0.2146, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 8.157767358611192, |
| "learning_rate": 5.488957490075846e-06, |
| "loss": 0.4026, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 5.29294075997288, |
| "learning_rate": 5.483799632242012e-06, |
| "loss": 0.4952, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 7.020074969303334, |
| "learning_rate": 5.478641254659528e-06, |
| "loss": 0.619, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 6.501749877133997, |
| "learning_rate": 5.473482362870073e-06, |
| "loss": 0.7001, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 9.145386640976326, |
| "learning_rate": 5.468322962415871e-06, |
| "loss": 0.8264, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.4552497346840383, |
| "learning_rate": 5.463163058839694e-06, |
| "loss": 0.1967, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 6.150379067884863, |
| "learning_rate": 5.4580026576848565e-06, |
| "loss": 0.4477, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 6.856821377217917, |
| "learning_rate": 5.452841764495203e-06, |
| "loss": 0.5338, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 5.548740046702573, |
| "learning_rate": 5.4476803848151146e-06, |
| "loss": 0.4569, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 5.2199746505908315, |
| "learning_rate": 5.442518524189489e-06, |
| "loss": 0.5058, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.569638947005932, |
| "learning_rate": 5.4373561881637405e-06, |
| "loss": 0.2068, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 6.571124488387639, |
| "learning_rate": 5.432193382283794e-06, |
| "loss": 0.59, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 11.072398210438648, |
| "learning_rate": 5.4270301120960856e-06, |
| "loss": 0.5034, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.6604978324242805, |
| "learning_rate": 5.421866383147541e-06, |
| "loss": 0.2269, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.5631886466397849, |
| "learning_rate": 5.416702200985585e-06, |
| "loss": 0.2451, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.6920966611788442, |
| "learning_rate": 5.411537571158127e-06, |
| "loss": 0.2147, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.428778094040855, |
| "learning_rate": 5.406372499213557e-06, |
| "loss": 0.1904, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 8.719729696636715, |
| "learning_rate": 5.401206990700741e-06, |
| "loss": 0.7195, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 6.322321002079452, |
| "learning_rate": 5.396041051169016e-06, |
| "loss": 0.2925, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.4600090811959099, |
| "learning_rate": 5.390874686168176e-06, |
| "loss": 0.2174, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 8.808701673971717, |
| "learning_rate": 5.385707901248478e-06, |
| "loss": 0.51, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 5.090679953288352, |
| "learning_rate": 5.380540701960627e-06, |
| "loss": 0.6264, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 7.9727189340200235, |
| "learning_rate": 5.375373093855774e-06, |
| "loss": 0.5375, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.5756568587127695, |
| "learning_rate": 5.37020508248551e-06, |
| "loss": 0.2231, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 7.208546903518983, |
| "learning_rate": 5.365036673401857e-06, |
| "loss": 0.6135, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 6.451098663402852, |
| "learning_rate": 5.359867872157267e-06, |
| "loss": 0.6545, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 5.373817665970237, |
| "learning_rate": 5.354698684304613e-06, |
| "loss": 0.5989, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 6.9457673284712955, |
| "learning_rate": 5.3495291153971806e-06, |
| "loss": 0.4982, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.9560317530021443, |
| "learning_rate": 5.344359170988668e-06, |
| "loss": 0.236, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.6454647228387338, |
| "learning_rate": 5.339188856633173e-06, |
| "loss": 0.2521, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 7.26682281445475, |
| "learning_rate": 5.3340181778851954e-06, |
| "loss": 0.6656, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.565229116455418, |
| "learning_rate": 5.328847140299624e-06, |
| "loss": 0.2452, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.4624006842320267, |
| "learning_rate": 5.323675749431732e-06, |
| "loss": 0.2404, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.3092376971382669, |
| "learning_rate": 5.318504010837175e-06, |
| "loss": 0.2074, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.3964277051842584, |
| "learning_rate": 5.313331930071981e-06, |
| "loss": 0.2302, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 5.998195505824346, |
| "learning_rate": 5.308159512692544e-06, |
| "loss": 0.5598, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.506562101070047, |
| "learning_rate": 5.302986764255621e-06, |
| "loss": 0.2524, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.6797851981172873, |
| "learning_rate": 5.297813690318325e-06, |
| "loss": 0.2354, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 7.9833282259171545, |
| "learning_rate": 5.292640296438116e-06, |
| "loss": 0.5724, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.351785654007111, |
| "learning_rate": 5.287466588172804e-06, |
| "loss": 0.1724, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 6.805419920563617, |
| "learning_rate": 5.2822925710805305e-06, |
| "loss": 0.6574, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.4500618472699565, |
| "learning_rate": 5.27711825071977e-06, |
| "loss": 0.2205, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.4666057939408428, |
| "learning_rate": 5.2719436326493255e-06, |
| "loss": 0.2131, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 9.876809659366371, |
| "learning_rate": 5.266768722428318e-06, |
| "loss": 0.4681, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.6895598053477852, |
| "learning_rate": 5.261593525616181e-06, |
| "loss": 0.2415, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 33.141230223671855, |
| "learning_rate": 5.256418047772659e-06, |
| "loss": 0.6495, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 7.222142527434707, |
| "learning_rate": 5.251242294457796e-06, |
| "loss": 0.5127, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.606258762350376, |
| "learning_rate": 5.2460662712319335e-06, |
| "loss": 0.1983, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 5.820040399203535, |
| "learning_rate": 5.240889983655701e-06, |
| "loss": 0.7071, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.5914402191905133, |
| "learning_rate": 5.235713437290012e-06, |
| "loss": 0.2751, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 6.225603346384584, |
| "learning_rate": 5.230536637696062e-06, |
| "loss": 0.5746, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.541989696517418, |
| "learning_rate": 5.225359590435312e-06, |
| "loss": 0.2241, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 8.6583511717209, |
| "learning_rate": 5.220182301069499e-06, |
| "loss": 0.5356, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.5076977854306777, |
| "learning_rate": 5.215004775160608e-06, |
| "loss": 0.2231, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.6451346099309636, |
| "learning_rate": 5.209827018270886e-06, |
| "loss": 0.2346, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.475130054029139, |
| "learning_rate": 5.204649035962825e-06, |
| "loss": 0.1982, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 8.804277991122476, |
| "learning_rate": 5.199470833799164e-06, |
| "loss": 0.5918, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 6.092780082625882, |
| "learning_rate": 5.1942924173428725e-06, |
| "loss": 0.5275, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.6915166302676385, |
| "learning_rate": 5.18911379215715e-06, |
| "loss": 0.2502, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.6615219075229888, |
| "learning_rate": 5.1839349638054245e-06, |
| "loss": 0.2571, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 5.776297222782462, |
| "learning_rate": 5.178755937851341e-06, |
| "loss": 0.6465, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.5680198666699618, |
| "learning_rate": 5.173576719858755e-06, |
| "loss": 0.2135, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.5331842990028555, |
| "learning_rate": 5.168397315391729e-06, |
| "loss": 0.2177, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.7123322876331615, |
| "learning_rate": 5.1632177300145255e-06, |
| "loss": 0.2162, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 5.657687983274009, |
| "learning_rate": 5.1580379692916025e-06, |
| "loss": 0.6432, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 6.8565715982878075, |
| "learning_rate": 5.152858038787608e-06, |
| "loss": 0.6184, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.2964004826638862, |
| "learning_rate": 5.147677944067368e-06, |
| "loss": 0.1906, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 8.914120125918338, |
| "learning_rate": 5.142497690695888e-06, |
| "loss": 0.5578, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.4918586765278812, |
| "learning_rate": 5.137317284238344e-06, |
| "loss": 0.1902, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 4.910516822516931, |
| "learning_rate": 5.1321367302600726e-06, |
| "loss": 0.5745, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.4232854829167805, |
| "learning_rate": 5.126956034326573e-06, |
| "loss": 0.2139, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.6422619704895127, |
| "learning_rate": 5.121775202003499e-06, |
| "loss": 0.2442, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 10.239078670288134, |
| "learning_rate": 5.116594238856645e-06, |
| "loss": 0.6108, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.5131236382195519, |
| "learning_rate": 5.111413150451948e-06, |
| "loss": 0.2163, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.4550396146124138, |
| "learning_rate": 5.1062319423554815e-06, |
| "loss": 0.2177, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 6.032387469093304, |
| "learning_rate": 5.101050620133447e-06, |
| "loss": 0.5713, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.5872937712989676, |
| "learning_rate": 5.095869189352166e-06, |
| "loss": 0.2172, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 8.086742056216735, |
| "learning_rate": 5.090687655578078e-06, |
| "loss": 0.539, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 7.058019487453007, |
| "learning_rate": 5.0855060243777366e-06, |
| "loss": 0.497, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.5755013536979725, |
| "learning_rate": 5.080324301317795e-06, |
| "loss": 0.2408, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 8.28028752357728, |
| "learning_rate": 5.0751424919650085e-06, |
| "loss": 0.5223, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 7.1948558854661595, |
| "learning_rate": 5.069960601886224e-06, |
| "loss": 0.4149, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.6331384794955384, |
| "learning_rate": 5.064778636648371e-06, |
| "loss": 0.2335, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.600789803430444, |
| "learning_rate": 5.05959660181847e-06, |
| "loss": 0.217, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 6.480435576720631, |
| "learning_rate": 5.054414502963605e-06, |
| "loss": 0.6898, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 6.300727891687989, |
| "learning_rate": 5.049232345650936e-06, |
| "loss": 0.524, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 7.718258211695682, |
| "learning_rate": 5.044050135447682e-06, |
| "loss": 0.6407, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 6.390117065260238, |
| "learning_rate": 5.038867877921124e-06, |
| "loss": 0.5661, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.5551064516429625, |
| "learning_rate": 5.033685578638586e-06, |
| "loss": 0.2444, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.366443591531705, |
| "learning_rate": 5.028503243167443e-06, |
| "loss": 0.2058, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.5408277425842203, |
| "learning_rate": 5.023320877075107e-06, |
| "loss": 0.2366, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 28.713074028896308, |
| "learning_rate": 5.0181384859290215e-06, |
| "loss": 0.6233, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 5.933668894742834, |
| "learning_rate": 5.01295607529666e-06, |
| "loss": 0.5112, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 6.807756503769824, |
| "learning_rate": 5.007773650745514e-06, |
| "loss": 0.5858, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.456530499385341, |
| "learning_rate": 5.0025912178430925e-06, |
| "loss": 0.178, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.2971745276954956, |
| "learning_rate": 4.997408782156909e-06, |
| "loss": 0.1656, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 4.7365125043629295, |
| "learning_rate": 4.9922263492544885e-06, |
| "loss": 0.5786, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.5329901231772882, |
| "learning_rate": 4.987043924703342e-06, |
| "loss": 0.1941, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 5.556001088065364, |
| "learning_rate": 4.981861514070979e-06, |
| "loss": 0.5425, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 6.003284521046527, |
| "learning_rate": 4.976679122924896e-06, |
| "loss": 0.5762, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.4350546509665651, |
| "learning_rate": 4.971496756832557e-06, |
| "loss": 0.1769, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 11.116719286554067, |
| "learning_rate": 4.966314421361416e-06, |
| "loss": 0.7031, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 5.856978400288504, |
| "learning_rate": 4.9611321220788775e-06, |
| "loss": 0.5082, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 6.127190486034729, |
| "learning_rate": 4.955949864552318e-06, |
| "loss": 0.436, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 7.534478150983952, |
| "learning_rate": 4.950767654349067e-06, |
| "loss": 0.7365, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.4807038799566166, |
| "learning_rate": 4.945585497036396e-06, |
| "loss": 0.1882, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.4785272497673148, |
| "learning_rate": 4.940403398181531e-06, |
| "loss": 0.1919, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.7903098995121822, |
| "learning_rate": 4.935221363351631e-06, |
| "loss": 0.2484, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 6.8985010613029925, |
| "learning_rate": 4.930039398113779e-06, |
| "loss": 0.6661, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 7.243755959377869, |
| "learning_rate": 4.924857508034994e-06, |
| "loss": 0.6885, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 7.642501092348366, |
| "learning_rate": 4.919675698682206e-06, |
| "loss": 0.6593, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.5911392117340943, |
| "learning_rate": 4.914493975622263e-06, |
| "loss": 0.2475, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.4100831001147551, |
| "learning_rate": 4.909312344421923e-06, |
| "loss": 0.2414, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 6.571236025451696, |
| "learning_rate": 4.904130810647836e-06, |
| "loss": 0.4527, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 6.669335144079173, |
| "learning_rate": 4.898949379866556e-06, |
| "loss": 0.6132, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.4163218798256578, |
| "learning_rate": 4.893768057644519e-06, |
| "loss": 0.1848, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 11.033302150040155, |
| "learning_rate": 4.888586849548053e-06, |
| "loss": 0.7577, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.511734054726008, |
| "learning_rate": 4.883405761143357e-06, |
| "loss": 0.242, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 7.732217467007269, |
| "learning_rate": 4.878224797996502e-06, |
| "loss": 0.634, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.5410664491465915, |
| "learning_rate": 4.873043965673427e-06, |
| "loss": 0.2178, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.648199921207977, |
| "learning_rate": 4.86786326973993e-06, |
| "loss": 0.1893, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.6163905000816106, |
| "learning_rate": 4.862682715761658e-06, |
| "loss": 0.2211, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 7.6480927880722005, |
| "learning_rate": 4.857502309304114e-06, |
| "loss": 0.4649, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 7.508552081177089, |
| "learning_rate": 4.852322055932633e-06, |
| "loss": 0.5303, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 3.464004710850647, |
| "learning_rate": 4.8471419612123925e-06, |
| "loss": 0.4644, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.6136388326074615, |
| "learning_rate": 4.841962030708398e-06, |
| "loss": 0.2076, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 7.585705888438502, |
| "learning_rate": 4.836782269985475e-06, |
| "loss": 0.5187, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.4571905814330208, |
| "learning_rate": 4.831602684608274e-06, |
| "loss": 0.2184, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.5722559874679716, |
| "learning_rate": 4.826423280141247e-06, |
| "loss": 0.2403, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 4.848711773204869, |
| "learning_rate": 4.82124406214866e-06, |
| "loss": 0.604, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 8.078099268633892, |
| "learning_rate": 4.816065036194576e-06, |
| "loss": 0.6213, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.4522697917992005, |
| "learning_rate": 4.810886207842852e-06, |
| "loss": 0.1996, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 9.659059380062939, |
| "learning_rate": 4.80570758265713e-06, |
| "loss": 0.5396, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.3500567745829162, |
| "learning_rate": 4.800529166200837e-06, |
| "loss": 0.1811, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.409901723972015, |
| "learning_rate": 4.795350964037174e-06, |
| "loss": 0.193, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.67728842944199, |
| "learning_rate": 4.790172981729116e-06, |
| "loss": 0.2073, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.4747163179967033, |
| "learning_rate": 4.784995224839394e-06, |
| "loss": 0.2011, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.3918472947201297, |
| "learning_rate": 4.779817698930502e-06, |
| "loss": 0.1807, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.5452694954497928, |
| "learning_rate": 4.774640409564688e-06, |
| "loss": 0.2433, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.482549087730426, |
| "learning_rate": 4.76946336230394e-06, |
| "loss": 0.2244, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.7114952280499294, |
| "learning_rate": 4.76428656270999e-06, |
| "loss": 0.1936, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.7086166774833853, |
| "learning_rate": 4.759110016344302e-06, |
| "loss": 0.2624, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 5.141583085656002, |
| "learning_rate": 4.753933728768069e-06, |
| "loss": 0.7628, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.5872873234003366, |
| "learning_rate": 4.748757705542205e-06, |
| "loss": 0.2484, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 5.550112130971094, |
| "learning_rate": 4.743581952227342e-06, |
| "loss": 0.4721, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 8.536380150480248, |
| "learning_rate": 4.73840647438382e-06, |
| "loss": 0.6757, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 4.989894442579678, |
| "learning_rate": 4.733231277571683e-06, |
| "loss": 0.6371, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.4358952778672174, |
| "learning_rate": 4.7280563673506745e-06, |
| "loss": 0.205, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 6.398696302319853, |
| "learning_rate": 4.722881749280232e-06, |
| "loss": 0.6624, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.5544463809533282, |
| "learning_rate": 4.717707428919471e-06, |
| "loss": 0.2099, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.5735953592365777, |
| "learning_rate": 4.712533411827197e-06, |
| "loss": 0.2089, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.9030007651152627, |
| "learning_rate": 4.707359703561885e-06, |
| "loss": 0.2436, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 13.291749202832973, |
| "learning_rate": 4.702186309681677e-06, |
| "loss": 0.5519, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.410983901672467, |
| "learning_rate": 4.697013235744382e-06, |
| "loss": 0.1887, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.613523732340002, |
| "learning_rate": 4.6918404873074574e-06, |
| "loss": 0.2727, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.6072885094968894, |
| "learning_rate": 4.68666806992802e-06, |
| "loss": 0.2198, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.5578545006744975, |
| "learning_rate": 4.681495989162826e-06, |
| "loss": 0.1873, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.3940148056278308, |
| "learning_rate": 4.676324250568269e-06, |
| "loss": 0.2252, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 5.288350557992271, |
| "learning_rate": 4.671152859700377e-06, |
| "loss": 0.5956, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 18.703974121950782, |
| "learning_rate": 4.665981822114805e-06, |
| "loss": 0.68, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.5045278407254707, |
| "learning_rate": 4.660811143366828e-06, |
| "loss": 0.2309, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.7567489603891941, |
| "learning_rate": 4.655640829011335e-06, |
| "loss": 0.2693, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 12.370221926729593, |
| "learning_rate": 4.65047088460282e-06, |
| "loss": 0.5591, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 6.638345947199636, |
| "learning_rate": 4.645301315695387e-06, |
| "loss": 0.6048, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 7.489636741394892, |
| "learning_rate": 4.6401321278427334e-06, |
| "loss": 0.5245, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 6.883036801328955, |
| "learning_rate": 4.634963326598143e-06, |
| "loss": 0.4518, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 6.186896493463807, |
| "learning_rate": 4.629794917514492e-06, |
| "loss": 0.4841, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.3125884874686817, |
| "learning_rate": 4.624626906144227e-06, |
| "loss": 0.2029, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 9.82696660510622, |
| "learning_rate": 4.619459298039373e-06, |
| "loss": 0.7887, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.4749125491531885, |
| "learning_rate": 4.614292098751524e-06, |
| "loss": 0.194, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.4302320856558082, |
| "learning_rate": 4.609125313831826e-06, |
| "loss": 0.2203, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.295901379646909, |
| "learning_rate": 4.603958948830985e-06, |
| "loss": 0.2134, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 7.043975863801591, |
| "learning_rate": 4.5987930092992596e-06, |
| "loss": 0.6505, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 4.988966009411801, |
| "learning_rate": 4.593627500786444e-06, |
| "loss": 0.5901, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 7.100701994793637, |
| "learning_rate": 4.588462428841875e-06, |
| "loss": 0.4935, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.5612284060847004, |
| "learning_rate": 4.5832977990144165e-06, |
| "loss": 0.2397, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 7.763162077786609, |
| "learning_rate": 4.578133616852462e-06, |
| "loss": 0.5933, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.6277933828415267, |
| "learning_rate": 4.572969887903916e-06, |
| "loss": 0.2468, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 9.051605590968945, |
| "learning_rate": 4.5678066177162065e-06, |
| "loss": 0.5205, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.5020282985899696, |
| "learning_rate": 4.562643811836263e-06, |
| "loss": 0.2084, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.5209349923279736, |
| "learning_rate": 4.557481475810512e-06, |
| "loss": 0.199, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.5830508474830205, |
| "learning_rate": 4.5523196151848846e-06, |
| "loss": 0.2403, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.5070688422079719, |
| "learning_rate": 4.547158235504797e-06, |
| "loss": 0.2077, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 5.724310924018424, |
| "learning_rate": 4.541997342315145e-06, |
| "loss": 0.6048, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.665470954850008, |
| "learning_rate": 4.536836941160308e-06, |
| "loss": 0.2369, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.437798091753683, |
| "learning_rate": 4.5316770375841315e-06, |
| "loss": 0.2101, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 5.1224393390996115, |
| "learning_rate": 4.526517637129927e-06, |
| "loss": 0.6111, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.673943747936093, |
| "learning_rate": 4.5213587453404736e-06, |
| "loss": 0.2374, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 7.690904641655284, |
| "learning_rate": 4.5162003677579905e-06, |
| "loss": 0.4557, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 14.562195036058197, |
| "learning_rate": 4.511042509924157e-06, |
| "loss": 0.6341, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 6.243810690232639, |
| "learning_rate": 4.505885177380083e-06, |
| "loss": 0.5888, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.475041588060696, |
| "learning_rate": 4.5007283756663245e-06, |
| "loss": 0.2086, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.6999297366948942, |
| "learning_rate": 4.495572110322862e-06, |
| "loss": 0.2425, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.4591341691196764, |
| "learning_rate": 4.490416386889097e-06, |
| "loss": 0.2335, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.5909566837696103, |
| "learning_rate": 4.485261210903854e-06, |
| "loss": 0.2214, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.6263574903008335, |
| "learning_rate": 4.48010658790537e-06, |
| "loss": 0.2573, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.715216576143055, |
| "learning_rate": 4.47495252343128e-06, |
| "loss": 0.2013, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 7.54061829603288, |
| "learning_rate": 4.469799023018628e-06, |
| "loss": 0.552, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 5.734174964300568, |
| "learning_rate": 4.464646092203846e-06, |
| "loss": 0.6602, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.6703194095537768, |
| "learning_rate": 4.459493736522759e-06, |
| "loss": 0.2743, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 16.523338714753198, |
| "learning_rate": 4.4543419615105685e-06, |
| "loss": 0.5262, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.470165111573854, |
| "learning_rate": 4.449190772701857e-06, |
| "loss": 0.2122, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 5.174557962848891, |
| "learning_rate": 4.444040175630577e-06, |
| "loss": 0.4696, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.473974371288623, |
| "learning_rate": 4.438890175830039e-06, |
| "loss": 0.2049, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 6.846259281417841, |
| "learning_rate": 4.433740778832919e-06, |
| "loss": 0.5117, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.618092015002244, |
| "learning_rate": 4.428591990171246e-06, |
| "loss": 0.2407, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.314876889469853, |
| "learning_rate": 4.423443815376387e-06, |
| "loss": 0.2395, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.3907786379474978, |
| "learning_rate": 4.41829625997906e-06, |
| "loss": 0.1988, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 5.320405944111289, |
| "learning_rate": 4.413149329509307e-06, |
| "loss": 0.5652, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 6.131506633508225, |
| "learning_rate": 4.4080030294965085e-06, |
| "loss": 0.6283, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 7.11126164609056, |
| "learning_rate": 4.402857365469364e-06, |
| "loss": 0.6257, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 5.173440219100077, |
| "learning_rate": 4.397712342955885e-06, |
| "loss": 0.4752, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.5879725972573628, |
| "learning_rate": 4.392567967483401e-06, |
| "loss": 0.1979, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.6411051746008234, |
| "learning_rate": 4.387424244578543e-06, |
| "loss": 0.2097, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.3874887488821213, |
| "learning_rate": 4.38228117976724e-06, |
| "loss": 0.1998, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 6.474706912612365, |
| "learning_rate": 4.377138778574716e-06, |
| "loss": 0.598, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.4060198079872865, |
| "learning_rate": 4.371997046525481e-06, |
| "loss": 0.1733, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 5.102142597037899, |
| "learning_rate": 4.366855989143326e-06, |
| "loss": 0.4742, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.6057890375213386, |
| "learning_rate": 4.3617156119513206e-06, |
| "loss": 0.2359, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 6.140387765097553, |
| "learning_rate": 4.356575920471796e-06, |
| "loss": 0.5508, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 4.3850640405697385, |
| "learning_rate": 4.351436920226357e-06, |
| "loss": 0.4268, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.5197048783350897, |
| "learning_rate": 4.346298616735855e-06, |
| "loss": 0.2046, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 20.629712192925226, |
| "learning_rate": 4.3411610155204e-06, |
| "loss": 0.4235, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.6883210321669588, |
| "learning_rate": 4.336024122099348e-06, |
| "loss": 0.1832, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.5904537055421788, |
| "learning_rate": 4.330887941991288e-06, |
| "loss": 0.2142, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 5.795803994129138, |
| "learning_rate": 4.325752480714052e-06, |
| "loss": 0.5504, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 6.523796739514021, |
| "learning_rate": 4.320617743784691e-06, |
| "loss": 0.6077, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.5539773029900492, |
| "learning_rate": 4.315483736719482e-06, |
| "loss": 0.1987, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.6922857302290626, |
| "learning_rate": 4.310350465033919e-06, |
| "loss": 0.2046, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.7881382147495553, |
| "learning_rate": 4.305217934242703e-06, |
| "loss": 0.2259, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 7.370882923128106, |
| "learning_rate": 4.30008614985974e-06, |
| "loss": 0.6432, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 6.986938487466245, |
| "learning_rate": 4.294955117398139e-06, |
| "loss": 0.5934, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.6701181700532506, |
| "learning_rate": 4.28982484237019e-06, |
| "loss": 0.2474, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.7166998278994992, |
| "learning_rate": 4.284695330287383e-06, |
| "loss": 0.2933, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.504265962284257, |
| "learning_rate": 4.279566586660375e-06, |
| "loss": 0.2139, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 4.411824646932481, |
| "learning_rate": 4.274438616999007e-06, |
| "loss": 0.5801, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.6267117477861073, |
| "learning_rate": 4.269311426812287e-06, |
| "loss": 0.1945, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.4356191284625996, |
| "learning_rate": 4.264185021608382e-06, |
| "loss": 0.2397, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.5724914920821444, |
| "learning_rate": 4.259059406894619e-06, |
| "loss": 0.2002, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.7246348319553122, |
| "learning_rate": 4.253934588177473e-06, |
| "loss": 0.2042, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 6.5861733210418905, |
| "learning_rate": 4.248810570962567e-06, |
| "loss": 0.5944, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.6896236515184517, |
| "learning_rate": 4.24368736075466e-06, |
| "loss": 0.2512, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.3707584100361558, |
| "learning_rate": 4.238564963057646e-06, |
| "loss": 0.1468, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 6.754086049900066, |
| "learning_rate": 4.233443383374545e-06, |
| "loss": 0.4956, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.7097356157742607, |
| "learning_rate": 4.228322627207499e-06, |
| "loss": 0.204, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.407742853131279, |
| "learning_rate": 4.223202700057765e-06, |
| "loss": 0.1992, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 3.8199802577515234, |
| "learning_rate": 4.21808360742571e-06, |
| "loss": 0.5266, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 4.401863554668091, |
| "learning_rate": 4.212965354810802e-06, |
| "loss": 0.4576, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 4.212513214675979, |
| "learning_rate": 4.207847947711609e-06, |
| "loss": 0.3691, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 7.627512871078331, |
| "learning_rate": 4.202731391625793e-06, |
| "loss": 0.5152, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.7060818304324066, |
| "learning_rate": 4.1976156920500935e-06, |
| "loss": 0.2203, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.5876225424702133, |
| "learning_rate": 4.19250085448034e-06, |
| "loss": 0.2687, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 6.748841244034787, |
| "learning_rate": 4.187386884411426e-06, |
| "loss": 0.6513, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.7081167230192895, |
| "learning_rate": 4.182273787337323e-06, |
| "loss": 0.2453, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 6.566783302797043, |
| "learning_rate": 4.177161568751058e-06, |
| "loss": 0.4728, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.3607097035796645, |
| "learning_rate": 4.172050234144716e-06, |
| "loss": 0.1577, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 5.071976306728582, |
| "learning_rate": 4.16693978900943e-06, |
| "loss": 0.5186, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.4231539023670694, |
| "learning_rate": 4.161830238835386e-06, |
| "loss": 0.2001, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 6.42368716816888, |
| "learning_rate": 4.156721589111794e-06, |
| "loss": 0.5327, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 7.420872560774164, |
| "learning_rate": 4.151613845326912e-06, |
| "loss": 0.595, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 8.846826652876, |
| "learning_rate": 4.146507012968013e-06, |
| "loss": 0.508, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.7543235755795679, |
| "learning_rate": 4.141401097521396e-06, |
| "loss": 0.2541, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.5510284346806966, |
| "learning_rate": 4.136296104472378e-06, |
| "loss": 0.2492, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 5.458911366718936, |
| "learning_rate": 4.131192039305278e-06, |
| "loss": 0.4002, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.4430146067488399, |
| "learning_rate": 4.1260889075034254e-06, |
| "loss": 0.1913, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 7.781277777619646, |
| "learning_rate": 4.120986714549139e-06, |
| "loss": 0.7018, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.4839708473717983, |
| "learning_rate": 4.115885465923734e-06, |
| "loss": 0.1654, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.4460528605904932, |
| "learning_rate": 4.110785167107514e-06, |
| "loss": 0.1957, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 6.773365435616381, |
| "learning_rate": 4.1056858235797545e-06, |
| "loss": 0.5647, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 12.220426218358293, |
| "learning_rate": 4.100587440818709e-06, |
| "loss": 0.5426, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 22.772817762692874, |
| "learning_rate": 4.0954900243016016e-06, |
| "loss": 0.7357, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 7.48829174411786, |
| "learning_rate": 4.090393579504612e-06, |
| "loss": 0.7169, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 4.648309740903713, |
| "learning_rate": 4.085298111902882e-06, |
| "loss": 0.5263, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.597602967806243, |
| "learning_rate": 4.080203626970498e-06, |
| "loss": 0.2189, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 6.071825457725986, |
| "learning_rate": 4.0751101301804945e-06, |
| "loss": 0.4255, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.652792052327222, |
| "learning_rate": 4.070017627004847e-06, |
| "loss": 0.2202, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.5537026369081302, |
| "learning_rate": 4.0649261229144554e-06, |
| "loss": 0.23, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 8.08258986927151, |
| "learning_rate": 4.059835623379155e-06, |
| "loss": 0.5197, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 7.320119345147985, |
| "learning_rate": 4.054746133867693e-06, |
| "loss": 0.5979, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.4035316461225102, |
| "learning_rate": 4.0496576598477396e-06, |
| "loss": 0.2177, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 4.939010069100584, |
| "learning_rate": 4.044570206785874e-06, |
| "loss": 0.5821, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 5.746077810353174, |
| "learning_rate": 4.039483780147568e-06, |
| "loss": 0.6111, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.6318997310079097, |
| "learning_rate": 4.0343983853972045e-06, |
| "loss": 0.278, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.6138834178838686, |
| "learning_rate": 4.029314027998049e-06, |
| "loss": 0.2164, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.5217006808164721, |
| "learning_rate": 4.024230713412253e-06, |
| "loss": 0.2481, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.560099534240655, |
| "learning_rate": 4.019148447100855e-06, |
| "loss": 0.1939, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 4.902827913888488, |
| "learning_rate": 4.014067234523756e-06, |
| "loss": 0.4879, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.3308791069045471, |
| "learning_rate": 4.008987081139734e-06, |
| "loss": 0.1588, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 5.461107239679109, |
| "learning_rate": 4.0039079924064285e-06, |
| "loss": 0.5968, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 5.210652903402672, |
| "learning_rate": 3.998829973780329e-06, |
| "loss": 0.497, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.4707306984844475, |
| "learning_rate": 3.993753030716783e-06, |
| "loss": 0.236, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.5171120911365463, |
| "learning_rate": 3.988677168669974e-06, |
| "loss": 0.2225, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 4.87098980089652, |
| "learning_rate": 3.983602393092931e-06, |
| "loss": 0.4864, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.4358674306353407, |
| "learning_rate": 3.978528709437518e-06, |
| "loss": 0.2057, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.3934230347776113, |
| "learning_rate": 3.973456123154415e-06, |
| "loss": 0.1959, |
| "step": 1809 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.477054205162155, |
| "learning_rate": 3.9683846396931345e-06, |
| "loss": 0.1982, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 8.558841450833153, |
| "learning_rate": 3.9633142645019965e-06, |
| "loss": 0.6265, |
| "step": 1811 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.5627936620922227, |
| "learning_rate": 3.958245003028136e-06, |
| "loss": 0.2235, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 6.318760709208686, |
| "learning_rate": 3.953176860717488e-06, |
| "loss": 0.5263, |
| "step": 1813 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 7.759952604054854, |
| "learning_rate": 3.948109843014784e-06, |
| "loss": 0.6218, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 5.424748184559697, |
| "learning_rate": 3.9430439553635504e-06, |
| "loss": 0.457, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.5948020264956497, |
| "learning_rate": 3.937979203206103e-06, |
| "loss": 0.1881, |
| "step": 1816 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.607966744580618, |
| "learning_rate": 3.932915591983526e-06, |
| "loss": 0.2084, |
| "step": 1817 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.4836365539928573, |
| "learning_rate": 3.927853127135692e-06, |
| "loss": 0.2011, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.5935039901049737, |
| "learning_rate": 3.92279181410123e-06, |
| "loss": 0.2311, |
| "step": 1819 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.478616159116555, |
| "learning_rate": 3.917731658317538e-06, |
| "loss": 0.2153, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 6.201648254842364, |
| "learning_rate": 3.912672665220773e-06, |
| "loss": 0.4949, |
| "step": 1821 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.707593293804673, |
| "learning_rate": 3.907614840245836e-06, |
| "loss": 0.2136, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 12.780929778792782, |
| "learning_rate": 3.90255818882638e-06, |
| "loss": 0.6331, |
| "step": 1823 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.5792322617919698, |
| "learning_rate": 3.897502716394789e-06, |
| "loss": 0.2206, |
| "step": 1824 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.580528741954114, |
| "learning_rate": 3.892448428382189e-06, |
| "loss": 0.1995, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 6.1834818314351425, |
| "learning_rate": 3.887395330218429e-06, |
| "loss": 0.6776, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 6.116530477185117, |
| "learning_rate": 3.8823434273320794e-06, |
| "loss": 0.4948, |
| "step": 1827 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 39.93379751672083, |
| "learning_rate": 3.877292725150429e-06, |
| "loss": 0.7192, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.4710177923811774, |
| "learning_rate": 3.872243229099476e-06, |
| "loss": 0.2086, |
| "step": 1829 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.4747346622685849, |
| "learning_rate": 3.86719494460392e-06, |
| "loss": 0.1929, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 9.177001305514132, |
| "learning_rate": 3.8621478770871645e-06, |
| "loss": 0.5628, |
| "step": 1831 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.6173533928294679, |
| "learning_rate": 3.857102031971298e-06, |
| "loss": 0.2614, |
| "step": 1832 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 10.674067426470812, |
| "learning_rate": 3.852057414677102e-06, |
| "loss": 0.7517, |
| "step": 1833 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.671545325079946, |
| "learning_rate": 3.84701403062404e-06, |
| "loss": 0.2241, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.4176956897590025, |
| "learning_rate": 3.841971885230243e-06, |
| "loss": 0.2066, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.5737701194084075, |
| "learning_rate": 3.83693098391252e-06, |
| "loss": 0.2385, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 6.775264960285367, |
| "learning_rate": 3.8318913320863355e-06, |
| "loss": 0.5762, |
| "step": 1837 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 5.734145522241342, |
| "learning_rate": 3.826852935165818e-06, |
| "loss": 0.453, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.5178221139864456, |
| "learning_rate": 3.8218157985637465e-06, |
| "loss": 0.2138, |
| "step": 1839 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 6.409268309251431, |
| "learning_rate": 3.816779927691542e-06, |
| "loss": 0.5105, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 5.486427847842529, |
| "learning_rate": 3.811745327959271e-06, |
| "loss": 0.4761, |
| "step": 1841 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.6145787439209798, |
| "learning_rate": 3.8067120047756313e-06, |
| "loss": 0.2377, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 7.775836928688941, |
| "learning_rate": 3.801679963547949e-06, |
| "loss": 0.7424, |
| "step": 1843 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.6393563920290177, |
| "learning_rate": 3.7966492096821773e-06, |
| "loss": 0.2145, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.554227386371731, |
| "learning_rate": 3.7916197485828793e-06, |
| "loss": 0.2287, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 64.51241735007792, |
| "learning_rate": 3.786591585653235e-06, |
| "loss": 0.6451, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 5.94324955915427, |
| "learning_rate": 3.7815647262950293e-06, |
| "loss": 0.5017, |
| "step": 1847 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.627161395834181, |
| "learning_rate": 3.7765391759086424e-06, |
| "loss": 0.2171, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 9.63365608585251, |
| "learning_rate": 3.771514939893055e-06, |
| "loss": 0.5464, |
| "step": 1849 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 6.662502043180906, |
| "learning_rate": 3.766492023645827e-06, |
| "loss": 0.6075, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.3520781910506796, |
| "learning_rate": 3.761470432563109e-06, |
| "loss": 0.1646, |
| "step": 1851 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.4609425559575497, |
| "learning_rate": 3.7564501720396242e-06, |
| "loss": 0.1679, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.5524679112039492, |
| "learning_rate": 3.7514312474686643e-06, |
| "loss": 0.1711, |
| "step": 1853 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.4775660733216538, |
| "learning_rate": 3.74641366424209e-06, |
| "loss": 0.183, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 9.881574715535093, |
| "learning_rate": 3.7413974277503183e-06, |
| "loss": 0.5769, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 26.213951475540537, |
| "learning_rate": 3.7363825433823187e-06, |
| "loss": 0.5921, |
| "step": 1856 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.4810382533508477, |
| "learning_rate": 3.7313690165256134e-06, |
| "loss": 0.2302, |
| "step": 1857 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 4.144681656116014, |
| "learning_rate": 3.7263568525662574e-06, |
| "loss": 0.5283, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 14.430189983347654, |
| "learning_rate": 3.7213460568888493e-06, |
| "loss": 0.6371, |
| "step": 1859 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 5.732369200430303, |
| "learning_rate": 3.716336634876516e-06, |
| "loss": 0.508, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 7.109457208818201, |
| "learning_rate": 3.711328591910904e-06, |
| "loss": 0.4142, |
| "step": 1861 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.4623094872575517, |
| "learning_rate": 3.7063219333721857e-06, |
| "loss": 0.1638, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.339292140127946, |
| "learning_rate": 3.7013166646390384e-06, |
| "loss": 0.175, |
| "step": 1863 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.4369749908600948, |
| "learning_rate": 3.6963127910886526e-06, |
| "loss": 0.1886, |
| "step": 1864 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.3876469665691422, |
| "learning_rate": 3.691310318096719e-06, |
| "loss": 0.1957, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 15.726410514421962, |
| "learning_rate": 3.6863092510374198e-06, |
| "loss": 0.5672, |
| "step": 1866 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.7560756066611702, |
| "learning_rate": 3.68130959528343e-06, |
| "loss": 0.2264, |
| "step": 1867 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 9.844124211427951, |
| "learning_rate": 3.6763113562059077e-06, |
| "loss": 0.4824, |
| "step": 1868 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 6.583757731411973, |
| "learning_rate": 3.6713145391744877e-06, |
| "loss": 0.5138, |
| "step": 1869 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.542809947764467, |
| "learning_rate": 3.6663191495572827e-06, |
| "loss": 0.2279, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.6469751073956655, |
| "learning_rate": 3.661325192720862e-06, |
| "loss": 0.2698, |
| "step": 1871 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.4259574440751757, |
| "learning_rate": 3.6563326740302664e-06, |
| "loss": 0.1936, |
| "step": 1872 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.6102929765619705, |
| "learning_rate": 3.6513415988489824e-06, |
| "loss": 0.2107, |
| "step": 1873 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 11.98613733766446, |
| "learning_rate": 3.6463519725389516e-06, |
| "loss": 0.5655, |
| "step": 1874 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 4.931491582455747, |
| "learning_rate": 3.64136380046056e-06, |
| "loss": 0.5029, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.740292176898178, |
| "learning_rate": 3.6363770879726247e-06, |
| "loss": 0.2635, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 5.246034674002168, |
| "learning_rate": 3.6313918404324e-06, |
| "loss": 0.4744, |
| "step": 1877 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.5416381623598616, |
| "learning_rate": 3.6264080631955683e-06, |
| "loss": 0.2057, |
| "step": 1878 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 6.5065831334552025, |
| "learning_rate": 3.621425761616224e-06, |
| "loss": 0.4561, |
| "step": 1879 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.5709150371409581, |
| "learning_rate": 3.616444941046887e-06, |
| "loss": 0.226, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.7661564422053209, |
| "learning_rate": 3.6114656068384767e-06, |
| "loss": 0.2397, |
| "step": 1881 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.6447190302236576, |
| "learning_rate": 3.6064877643403194e-06, |
| "loss": 0.2028, |
| "step": 1882 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 7.133717608567513, |
| "learning_rate": 3.601511418900143e-06, |
| "loss": 0.6751, |
| "step": 1883 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 8.186006171230737, |
| "learning_rate": 3.5965365758640587e-06, |
| "loss": 0.5874, |
| "step": 1884 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.7959754256515827, |
| "learning_rate": 3.591563240576572e-06, |
| "loss": 0.2749, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 8.066879169307333, |
| "learning_rate": 3.5865914183805606e-06, |
| "loss": 0.7024, |
| "step": 1886 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.561140650314446, |
| "learning_rate": 3.581621114617284e-06, |
| "loss": 0.2427, |
| "step": 1887 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.5876581831452745, |
| "learning_rate": 3.5766523346263682e-06, |
| "loss": 0.1859, |
| "step": 1888 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 4.346267893932182, |
| "learning_rate": 3.571685083745798e-06, |
| "loss": 0.4841, |
| "step": 1889 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 5.402297344543703, |
| "learning_rate": 3.56671936731192e-06, |
| "loss": 0.5467, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 22.42184732934803, |
| "learning_rate": 3.561755190659434e-06, |
| "loss": 0.6674, |
| "step": 1891 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.4982944172410966, |
| "learning_rate": 3.556792559121377e-06, |
| "loss": 0.1991, |
| "step": 1892 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.4595813614244566, |
| "learning_rate": 3.5518314780291384e-06, |
| "loss": 0.2148, |
| "step": 1893 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.4316698683372513, |
| "learning_rate": 3.5468719527124294e-06, |
| "loss": 0.2419, |
| "step": 1894 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 9.347706963596861, |
| "learning_rate": 3.541913988499299e-06, |
| "loss": 0.5543, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 6.88633177109033, |
| "learning_rate": 3.5369575907161167e-06, |
| "loss": 0.6354, |
| "step": 1896 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.4594224251034926, |
| "learning_rate": 3.5320027646875643e-06, |
| "loss": 0.2086, |
| "step": 1897 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 13.25718892488323, |
| "learning_rate": 3.5270495157366434e-06, |
| "loss": 0.5, |
| "step": 1898 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.423121637160957, |
| "learning_rate": 3.5220978491846534e-06, |
| "loss": 0.2008, |
| "step": 1899 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 5.130360114988962, |
| "learning_rate": 3.517147770351199e-06, |
| "loss": 0.6193, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 5.320821455316036, |
| "learning_rate": 3.5121992845541797e-06, |
| "loss": 0.4942, |
| "step": 1901 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.659820858403778, |
| "learning_rate": 3.507252397109777e-06, |
| "loss": 0.2019, |
| "step": 1902 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 2.63154526082194, |
| "learning_rate": 3.5023071133324627e-06, |
| "loss": 0.2248, |
| "step": 1903 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 7.022826563452933, |
| "learning_rate": 3.497363438534984e-06, |
| "loss": 0.5328, |
| "step": 1904 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 5.697366547300721, |
| "learning_rate": 3.4924213780283545e-06, |
| "loss": 0.5742, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.331484318545027, |
| "learning_rate": 3.4874809371218608e-06, |
| "loss": 0.1748, |
| "step": 1906 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 6.555156381913028, |
| "learning_rate": 3.4825421211230437e-06, |
| "loss": 0.4464, |
| "step": 1907 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.831238929483396, |
| "learning_rate": 3.4776049353377016e-06, |
| "loss": 0.2653, |
| "step": 1908 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.5539349573467844, |
| "learning_rate": 3.4726693850698824e-06, |
| "loss": 0.1831, |
| "step": 1909 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.4921531483106225, |
| "learning_rate": 3.467735475621873e-06, |
| "loss": 0.2183, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 6.822048837955331, |
| "learning_rate": 3.4628032122942024e-06, |
| "loss": 0.6846, |
| "step": 1911 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.4375609709992334, |
| "learning_rate": 3.4578726003856245e-06, |
| "loss": 0.1897, |
| "step": 1912 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 8.331973779115568, |
| "learning_rate": 3.4529436451931263e-06, |
| "loss": 0.6711, |
| "step": 1913 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.433757835791655, |
| "learning_rate": 3.448016352011914e-06, |
| "loss": 0.206, |
| "step": 1914 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.4097043159816938, |
| "learning_rate": 3.4430907261354e-06, |
| "loss": 0.2271, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.4640202341252477, |
| "learning_rate": 3.438166772855218e-06, |
| "loss": 0.2438, |
| "step": 1916 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 7.217793275103471, |
| "learning_rate": 3.4332444974611946e-06, |
| "loss": 0.5843, |
| "step": 1917 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.6274207665218545, |
| "learning_rate": 3.428323905241358e-06, |
| "loss": 0.2075, |
| "step": 1918 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.542948114277552, |
| "learning_rate": 3.4234050014819308e-06, |
| "loss": 0.1905, |
| "step": 1919 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 4.881173284230082, |
| "learning_rate": 3.4184877914673155e-06, |
| "loss": 0.5326, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.6121710493528836, |
| "learning_rate": 3.4135722804801004e-06, |
| "loss": 0.2501, |
| "step": 1921 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 6.562998966937795, |
| "learning_rate": 3.4086584738010455e-06, |
| "loss": 0.7177, |
| "step": 1922 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 5.635588077680637, |
| "learning_rate": 3.4037463767090807e-06, |
| "loss": 0.7366, |
| "step": 1923 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 5.703916710895146, |
| "learning_rate": 3.3988359944812997e-06, |
| "loss": 0.5989, |
| "step": 1924 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 5.786545352497819, |
| "learning_rate": 3.3939273323929533e-06, |
| "loss": 0.5596, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.4666539704166872, |
| "learning_rate": 3.3890203957174437e-06, |
| "loss": 0.1994, |
| "step": 1926 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.336478320422882, |
| "learning_rate": 3.3841151897263234e-06, |
| "loss": 0.1835, |
| "step": 1927 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 6.148092605217953, |
| "learning_rate": 3.379211719689278e-06, |
| "loss": 0.4879, |
| "step": 1928 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.7210581135618828, |
| "learning_rate": 3.3743099908741385e-06, |
| "loss": 0.236, |
| "step": 1929 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 4.728235540536196, |
| "learning_rate": 3.3694100085468535e-06, |
| "loss": 0.5054, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 4.778817295690519, |
| "learning_rate": 3.364511777971504e-06, |
| "loss": 0.5797, |
| "step": 1931 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.6879178455255748, |
| "learning_rate": 3.3596153044102897e-06, |
| "loss": 0.2031, |
| "step": 1932 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 6.524561783199086, |
| "learning_rate": 3.354720593123514e-06, |
| "loss": 0.4807, |
| "step": 1933 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.4707396174392589, |
| "learning_rate": 3.349827649369596e-06, |
| "loss": 0.2166, |
| "step": 1934 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.4140887072173238, |
| "learning_rate": 3.3449364784050515e-06, |
| "loss": 0.1924, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.5230715057418331, |
| "learning_rate": 3.3400470854844925e-06, |
| "loss": 0.2251, |
| "step": 1936 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.588316726208856, |
| "learning_rate": 3.3351594758606222e-06, |
| "loss": 0.1941, |
| "step": 1937 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.392968028630388, |
| "learning_rate": 3.3302736547842263e-06, |
| "loss": 0.203, |
| "step": 1938 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 15.627180374394712, |
| "learning_rate": 3.3253896275041677e-06, |
| "loss": 0.6433, |
| "step": 1939 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.4727777997541183, |
| "learning_rate": 3.3205073992673885e-06, |
| "loss": 0.1627, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.3929575755280745, |
| "learning_rate": 3.3156269753188895e-06, |
| "loss": 0.2365, |
| "step": 1941 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.6450741699218787, |
| "learning_rate": 3.310748360901741e-06, |
| "loss": 0.2574, |
| "step": 1942 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.4319705292160325, |
| "learning_rate": 3.3058715612570623e-06, |
| "loss": 0.1816, |
| "step": 1943 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.907979158558452, |
| "learning_rate": 3.300996581624028e-06, |
| "loss": 0.2189, |
| "step": 1944 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.5258151364041213, |
| "learning_rate": 3.2961234272398578e-06, |
| "loss": 0.22, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.5108684651351383, |
| "learning_rate": 3.291252103339806e-06, |
| "loss": 0.2239, |
| "step": 1946 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 8.675314497598764, |
| "learning_rate": 3.2863826151571654e-06, |
| "loss": 0.6347, |
| "step": 1947 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.5687204871979519, |
| "learning_rate": 3.2815149679232507e-06, |
| "loss": 0.2125, |
| "step": 1948 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.5618198177694862, |
| "learning_rate": 3.276649166867406e-06, |
| "loss": 0.1645, |
| "step": 1949 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 21.69616175468184, |
| "learning_rate": 3.271785217216987e-06, |
| "loss": 0.5923, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.6603910334654677, |
| "learning_rate": 3.266923124197363e-06, |
| "loss": 0.2036, |
| "step": 1951 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 9.831953297013888, |
| "learning_rate": 3.2620628930319065e-06, |
| "loss": 0.5955, |
| "step": 1952 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.4383358845394665, |
| "learning_rate": 3.257204528941993e-06, |
| "loss": 0.1878, |
| "step": 1953 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.8937038713171377, |
| "learning_rate": 3.2523480371469863e-06, |
| "loss": 0.2566, |
| "step": 1954 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.6601631341773913, |
| "learning_rate": 3.2474934228642475e-06, |
| "loss": 0.2116, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.4946472491543998, |
| "learning_rate": 3.242640691309111e-06, |
| "loss": 0.2136, |
| "step": 1956 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.663572259417549, |
| "learning_rate": 3.2377898476948964e-06, |
| "loss": 0.2139, |
| "step": 1957 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.563525039515537, |
| "learning_rate": 3.2329408972328934e-06, |
| "loss": 0.2232, |
| "step": 1958 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 8.282674321367239, |
| "learning_rate": 3.2280938451323524e-06, |
| "loss": 0.5165, |
| "step": 1959 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 5.514702796745394, |
| "learning_rate": 3.223248696600493e-06, |
| "loss": 0.5625, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 7.337272723423271, |
| "learning_rate": 3.2184054568424817e-06, |
| "loss": 0.652, |
| "step": 1961 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 13.4565422202732, |
| "learning_rate": 3.2135641310614383e-06, |
| "loss": 0.4632, |
| "step": 1962 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 8.20574380936658, |
| "learning_rate": 3.20872472445843e-06, |
| "loss": 0.5901, |
| "step": 1963 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.6472218244461125, |
| "learning_rate": 3.203887242232455e-06, |
| "loss": 0.2726, |
| "step": 1964 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 5.353518645247188, |
| "learning_rate": 3.1990516895804467e-06, |
| "loss": 0.6127, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 6.213613480442003, |
| "learning_rate": 3.1942180716972698e-06, |
| "loss": 0.6286, |
| "step": 1966 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.7739841634082338, |
| "learning_rate": 3.189386393775703e-06, |
| "loss": 0.2519, |
| "step": 1967 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.6210267913735106, |
| "learning_rate": 3.1845566610064487e-06, |
| "loss": 0.1995, |
| "step": 1968 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.5761620412579846, |
| "learning_rate": 3.179728878578112e-06, |
| "loss": 0.2101, |
| "step": 1969 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.4856745471000272, |
| "learning_rate": 3.1749030516772084e-06, |
| "loss": 0.2027, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.486780761283473, |
| "learning_rate": 3.170079185488153e-06, |
| "loss": 0.2093, |
| "step": 1971 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 10.021905881679904, |
| "learning_rate": 3.165257285193248e-06, |
| "loss": 0.5723, |
| "step": 1972 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 6.322326068998638, |
| "learning_rate": 3.1604373559726915e-06, |
| "loss": 0.5558, |
| "step": 1973 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 9.687341983036632, |
| "learning_rate": 3.1556194030045563e-06, |
| "loss": 0.5726, |
| "step": 1974 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 6.1359474444950735, |
| "learning_rate": 3.1508034314647994e-06, |
| "loss": 0.5188, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 9.759980812467012, |
| "learning_rate": 3.1459894465272467e-06, |
| "loss": 0.7004, |
| "step": 1976 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 9.084618702056398, |
| "learning_rate": 3.1411774533635854e-06, |
| "loss": 0.5408, |
| "step": 1977 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 8.909269377302472, |
| "learning_rate": 3.136367457143369e-06, |
| "loss": 0.465, |
| "step": 1978 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 8.94043320476312, |
| "learning_rate": 3.1315594630340052e-06, |
| "loss": 0.6813, |
| "step": 1979 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 6.183749546562952, |
| "learning_rate": 3.1267534762007435e-06, |
| "loss": 0.4669, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.5741234273784965, |
| "learning_rate": 3.1219495018066888e-06, |
| "loss": 0.2195, |
| "step": 1981 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 7.276163102620715, |
| "learning_rate": 3.1171475450127717e-06, |
| "loss": 0.636, |
| "step": 1982 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.6501460615291372, |
| "learning_rate": 3.112347610977764e-06, |
| "loss": 0.2233, |
| "step": 1983 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 6.706422678256099, |
| "learning_rate": 3.1075497048582635e-06, |
| "loss": 0.5977, |
| "step": 1984 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 5.121825184827592, |
| "learning_rate": 3.102753831808685e-06, |
| "loss": 0.5635, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 5.944671423831112, |
| "learning_rate": 3.097959996981263e-06, |
| "loss": 0.6126, |
| "step": 1986 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.5199018358150311, |
| "learning_rate": 3.093168205526038e-06, |
| "loss": 0.1821, |
| "step": 1987 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 5.402984492584964, |
| "learning_rate": 3.0883784625908618e-06, |
| "loss": 0.5082, |
| "step": 1988 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 8.79597122978233, |
| "learning_rate": 3.083590773321383e-06, |
| "loss": 0.6937, |
| "step": 1989 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 6.055763599004475, |
| "learning_rate": 3.0788051428610377e-06, |
| "loss": 0.5702, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.539020068194516, |
| "learning_rate": 3.0740215763510617e-06, |
| "loss": 0.2656, |
| "step": 1991 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.4104076247022932, |
| "learning_rate": 3.069240078930461e-06, |
| "loss": 0.1914, |
| "step": 1992 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 6.2384152458290965, |
| "learning_rate": 3.0644606557360303e-06, |
| "loss": 0.554, |
| "step": 1993 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 5.780603707969804, |
| "learning_rate": 3.0596833119023283e-06, |
| "loss": 0.5852, |
| "step": 1994 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.4946923984893086, |
| "learning_rate": 3.054908052561681e-06, |
| "loss": 0.2216, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.5449652616854799, |
| "learning_rate": 3.0501348828441767e-06, |
| "loss": 0.2157, |
| "step": 1996 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 7.333546997801793, |
| "learning_rate": 3.0453638078776614e-06, |
| "loss": 0.5461, |
| "step": 1997 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.411111171589759, |
| "learning_rate": 3.0405948327877233e-06, |
| "loss": 0.1716, |
| "step": 1998 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 9.2317474522265, |
| "learning_rate": 3.0358279626977034e-06, |
| "loss": 0.6057, |
| "step": 1999 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.6595407476924195, |
| "learning_rate": 3.0310632027286717e-06, |
| "loss": 0.252, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 10.38123760792797, |
| "learning_rate": 3.026300557999439e-06, |
| "loss": 0.4916, |
| "step": 2001 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 9.01720415482311, |
| "learning_rate": 3.021540033626544e-06, |
| "loss": 0.6574, |
| "step": 2002 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.4450001469176612, |
| "learning_rate": 3.0167816347242396e-06, |
| "loss": 0.1954, |
| "step": 2003 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.549734064948244, |
| "learning_rate": 3.012025366404504e-06, |
| "loss": 0.1883, |
| "step": 2004 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.409288695489273, |
| "learning_rate": 3.00727123377702e-06, |
| "loss": 0.1711, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.496332161334161, |
| "learning_rate": 3.002519241949181e-06, |
| "loss": 0.1661, |
| "step": 2006 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.5805932084747782, |
| "learning_rate": 2.997769396026078e-06, |
| "loss": 0.2469, |
| "step": 2007 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.5664354665584634, |
| "learning_rate": 2.9930217011104957e-06, |
| "loss": 0.2136, |
| "step": 2008 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.6462334538493848, |
| "learning_rate": 2.98827616230291e-06, |
| "loss": 0.2298, |
| "step": 2009 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 6.35756523628157, |
| "learning_rate": 2.9835327847014816e-06, |
| "loss": 0.5649, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 6.929907060457112, |
| "learning_rate": 2.9787915734020446e-06, |
| "loss": 0.3497, |
| "step": 2011 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 9.218474483538316, |
| "learning_rate": 2.9740525334981105e-06, |
| "loss": 0.5576, |
| "step": 2012 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 8.80653089641477, |
| "learning_rate": 2.9693156700808556e-06, |
| "loss": 0.5257, |
| "step": 2013 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.537386140580507, |
| "learning_rate": 2.9645809882391187e-06, |
| "loss": 0.2227, |
| "step": 2014 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 5.894783871681709, |
| "learning_rate": 2.959848493059396e-06, |
| "loss": 0.5558, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.4413244205235274, |
| "learning_rate": 2.9551181896258317e-06, |
| "loss": 0.1972, |
| "step": 2016 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 8.829090357580167, |
| "learning_rate": 2.9503900830202202e-06, |
| "loss": 0.645, |
| "step": 2017 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.4472880455418229, |
| "learning_rate": 2.9456641783219897e-06, |
| "loss": 0.1827, |
| "step": 2018 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 14.388491914537449, |
| "learning_rate": 2.9409404806082077e-06, |
| "loss": 0.5649, |
| "step": 2019 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.49466934015538, |
| "learning_rate": 2.936218994953568e-06, |
| "loss": 0.2012, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.6108952372793497, |
| "learning_rate": 2.93149972643039e-06, |
| "loss": 0.2223, |
| "step": 2021 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.4561213432155709, |
| "learning_rate": 2.9267826801086103e-06, |
| "loss": 0.2059, |
| "step": 2022 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.3975976715050646, |
| "learning_rate": 2.9220678610557773e-06, |
| "loss": 0.188, |
| "step": 2023 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 5.734050672169014, |
| "learning_rate": 2.9173552743370454e-06, |
| "loss": 0.5869, |
| "step": 2024 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.382280356501303, |
| "learning_rate": 2.912644925015179e-06, |
| "loss": 0.1759, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.540302821087291, |
| "learning_rate": 2.9079368181505263e-06, |
| "loss": 0.1892, |
| "step": 2026 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 5.756507654015998, |
| "learning_rate": 2.9032309588010372e-06, |
| "loss": 0.589, |
| "step": 2027 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.423251323977148, |
| "learning_rate": 2.8985273520222414e-06, |
| "loss": 0.1678, |
| "step": 2028 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 14.511239475297987, |
| "learning_rate": 2.893826002867247e-06, |
| "loss": 0.5658, |
| "step": 2029 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 5.579483150865824, |
| "learning_rate": 2.889126916386744e-06, |
| "loss": 0.6073, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 6.3032834228556505, |
| "learning_rate": 2.884430097628984e-06, |
| "loss": 0.5893, |
| "step": 2031 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 10.049849649109069, |
| "learning_rate": 2.879735551639787e-06, |
| "loss": 0.7286, |
| "step": 2032 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.7248569077162244, |
| "learning_rate": 2.8750432834625312e-06, |
| "loss": 0.2495, |
| "step": 2033 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.4096401965762477, |
| "learning_rate": 2.8703532981381437e-06, |
| "loss": 0.1887, |
| "step": 2034 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 17.53524436300472, |
| "learning_rate": 2.8656656007051055e-06, |
| "loss": 0.5363, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.5426792696958962, |
| "learning_rate": 2.860980196199432e-06, |
| "loss": 0.2052, |
| "step": 2036 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.4129423465102582, |
| "learning_rate": 2.8562970896546815e-06, |
| "loss": 0.2227, |
| "step": 2037 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.3850626142230287, |
| "learning_rate": 2.8516162861019437e-06, |
| "loss": 0.1734, |
| "step": 2038 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.5490721017643914, |
| "learning_rate": 2.846937790569828e-06, |
| "loss": 0.2004, |
| "step": 2039 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.4591522754651152, |
| "learning_rate": 2.84226160808447e-06, |
| "loss": 0.1587, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 7.18258853851953, |
| "learning_rate": 2.837587743669521e-06, |
| "loss": 0.6012, |
| "step": 2041 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 6.390632791410251, |
| "learning_rate": 2.8329162023461355e-06, |
| "loss": 0.6074, |
| "step": 2042 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 4.020579988074557, |
| "learning_rate": 2.82824698913298e-06, |
| "loss": 0.3801, |
| "step": 2043 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 7.657631858464726, |
| "learning_rate": 2.823580109046212e-06, |
| "loss": 0.4631, |
| "step": 2044 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.703102975789837, |
| "learning_rate": 2.8189155670994913e-06, |
| "loss": 0.2326, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 5.848069849088212, |
| "learning_rate": 2.814253368303961e-06, |
| "loss": 0.5003, |
| "step": 2046 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 7.306567833977227, |
| "learning_rate": 2.809593517668243e-06, |
| "loss": 0.5175, |
| "step": 2047 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.56651860206685, |
| "learning_rate": 2.804936020198447e-06, |
| "loss": 0.2633, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.4504924419562817, |
| "learning_rate": 2.800280880898143e-06, |
| "loss": 0.1824, |
| "step": 2049 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.7417020399544938, |
| "learning_rate": 2.795628104768376e-06, |
| "loss": 0.2284, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.4453823335646736, |
| "learning_rate": 2.79097769680765e-06, |
| "loss": 0.2183, |
| "step": 2051 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.5264151059448143, |
| "learning_rate": 2.7863296620119217e-06, |
| "loss": 0.182, |
| "step": 2052 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.6659410056454296, |
| "learning_rate": 2.781684005374604e-06, |
| "loss": 0.2121, |
| "step": 2053 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 9.928166120867077, |
| "learning_rate": 2.777040731886549e-06, |
| "loss": 0.4689, |
| "step": 2054 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 5.856001861805286, |
| "learning_rate": 2.7723998465360537e-06, |
| "loss": 0.6054, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.664206349083662, |
| "learning_rate": 2.7677613543088432e-06, |
| "loss": 0.2158, |
| "step": 2056 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.4771185514429073, |
| "learning_rate": 2.7631252601880816e-06, |
| "loss": 0.2255, |
| "step": 2057 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 5.576621706196896, |
| "learning_rate": 2.7584915691543444e-06, |
| "loss": 0.4679, |
| "step": 2058 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.8554856852860553, |
| "learning_rate": 2.753860286185637e-06, |
| "loss": 0.2524, |
| "step": 2059 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.507848522901589, |
| "learning_rate": 2.7492314162573687e-06, |
| "loss": 0.2138, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 5.643669411238887, |
| "learning_rate": 2.744604964342364e-06, |
| "loss": 0.5348, |
| "step": 2061 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.4696225760376407, |
| "learning_rate": 2.7399809354108415e-06, |
| "loss": 0.2237, |
| "step": 2062 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.4569540399198748, |
| "learning_rate": 2.735359334430424e-06, |
| "loss": 0.2011, |
| "step": 2063 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.654621339761889, |
| "learning_rate": 2.7307401663661247e-06, |
| "loss": 0.2536, |
| "step": 2064 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.64194064592359, |
| "learning_rate": 2.7261234361803383e-06, |
| "loss": 0.1966, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.5042566355669824, |
| "learning_rate": 2.721509148832847e-06, |
| "loss": 0.1871, |
| "step": 2066 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 11.15477235002936, |
| "learning_rate": 2.7168973092808025e-06, |
| "loss": 0.6684, |
| "step": 2067 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.500220526937373, |
| "learning_rate": 2.7122879224787315e-06, |
| "loss": 0.2112, |
| "step": 2068 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.5089956192186091, |
| "learning_rate": 2.7076809933785254e-06, |
| "loss": 0.209, |
| "step": 2069 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.3978138088369234, |
| "learning_rate": 2.70307652692943e-06, |
| "loss": 0.232, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 5.935467932603042, |
| "learning_rate": 2.6984745280780524e-06, |
| "loss": 0.5995, |
| "step": 2071 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 7.033255000629101, |
| "learning_rate": 2.6938750017683457e-06, |
| "loss": 0.5448, |
| "step": 2072 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 5.77239610023653, |
| "learning_rate": 2.6892779529416045e-06, |
| "loss": 0.5068, |
| "step": 2073 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.535161586473928, |
| "learning_rate": 2.6846833865364674e-06, |
| "loss": 0.2284, |
| "step": 2074 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.494737736566833, |
| "learning_rate": 2.6800913074888984e-06, |
| "loss": 0.2188, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 4.584594545726581, |
| "learning_rate": 2.6755017207321964e-06, |
| "loss": 0.5806, |
| "step": 2076 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 6.140984993945612, |
| "learning_rate": 2.6709146311969813e-06, |
| "loss": 0.6306, |
| "step": 2077 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.5974617544111862, |
| "learning_rate": 2.666330043811185e-06, |
| "loss": 0.2068, |
| "step": 2078 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.4237024176576276, |
| "learning_rate": 2.66174796350006e-06, |
| "loss": 0.1976, |
| "step": 2079 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 6.873143281215649, |
| "learning_rate": 2.657168395186157e-06, |
| "loss": 0.5466, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 10.481236055001368, |
| "learning_rate": 2.6525913437893346e-06, |
| "loss": 0.5907, |
| "step": 2081 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 7.5131083787464314, |
| "learning_rate": 2.648016814226742e-06, |
| "loss": 0.6489, |
| "step": 2082 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 5.960104977478367, |
| "learning_rate": 2.6434448114128252e-06, |
| "loss": 0.5608, |
| "step": 2083 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 5.589418506171804, |
| "learning_rate": 2.6388753402593083e-06, |
| "loss": 0.4707, |
| "step": 2084 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.480444693363657, |
| "learning_rate": 2.6343084056752032e-06, |
| "loss": 0.1878, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.4776866473270172, |
| "learning_rate": 2.6297440125667904e-06, |
| "loss": 0.1888, |
| "step": 2086 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.526571669167614, |
| "learning_rate": 2.6251821658376265e-06, |
| "loss": 0.2001, |
| "step": 2087 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 7.8001761727734475, |
| "learning_rate": 2.620622870388524e-06, |
| "loss": 0.5157, |
| "step": 2088 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 5.482754910414564, |
| "learning_rate": 2.616066131117563e-06, |
| "loss": 0.5156, |
| "step": 2089 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 8.68340690123736, |
| "learning_rate": 2.6115119529200748e-06, |
| "loss": 0.748, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 8.952323657950684, |
| "learning_rate": 2.6069603406886347e-06, |
| "loss": 0.7035, |
| "step": 2091 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 5.986362463099499, |
| "learning_rate": 2.60241129931307e-06, |
| "loss": 0.494, |
| "step": 2092 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.439463408245643, |
| "learning_rate": 2.597864833680436e-06, |
| "loss": 0.2374, |
| "step": 2093 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 5.119700884405849, |
| "learning_rate": 2.593320948675029e-06, |
| "loss": 0.531, |
| "step": 2094 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.5447946877587033, |
| "learning_rate": 2.588779649178371e-06, |
| "loss": 0.1656, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 10.055606302840184, |
| "learning_rate": 2.5842409400692026e-06, |
| "loss": 0.4637, |
| "step": 2096 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 5.548653527200506, |
| "learning_rate": 2.579704826223488e-06, |
| "loss": 0.5466, |
| "step": 2097 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 9.668830579623814, |
| "learning_rate": 2.575171312514395e-06, |
| "loss": 0.5984, |
| "step": 2098 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 6.052766473122458, |
| "learning_rate": 2.570640403812306e-06, |
| "loss": 0.5132, |
| "step": 2099 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 5.124825402309653, |
| "learning_rate": 2.5661121049848026e-06, |
| "loss": 0.5369, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 4.929832561363666, |
| "learning_rate": 2.5615864208966573e-06, |
| "loss": 0.4245, |
| "step": 2101 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.807616728517506, |
| "learning_rate": 2.55706335640984e-06, |
| "loss": 0.232, |
| "step": 2102 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.4950067489323393, |
| "learning_rate": 2.552542916383507e-06, |
| "loss": 0.1892, |
| "step": 2103 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.5777636616102162, |
| "learning_rate": 2.5480251056739874e-06, |
| "loss": 0.1931, |
| "step": 2104 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.5057852293550578, |
| "learning_rate": 2.543509929134794e-06, |
| "loss": 0.2454, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 6.032793642333784, |
| "learning_rate": 2.5389973916166037e-06, |
| "loss": 0.5323, |
| "step": 2106 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.539488745621972, |
| "learning_rate": 2.534487497967262e-06, |
| "loss": 0.2214, |
| "step": 2107 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.8573540084986893, |
| "learning_rate": 2.529980253031774e-06, |
| "loss": 0.2008, |
| "step": 2108 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 7.887172142365945, |
| "learning_rate": 2.5254756616522953e-06, |
| "loss": 0.6405, |
| "step": 2109 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 12.12537771401812, |
| "learning_rate": 2.5209737286681367e-06, |
| "loss": 0.5544, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 6.755619318097899, |
| "learning_rate": 2.5164744589157488e-06, |
| "loss": 0.4186, |
| "step": 2111 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.293112090324478, |
| "learning_rate": 2.5119778572287195e-06, |
| "loss": 0.155, |
| "step": 2112 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.3807406177529438, |
| "learning_rate": 2.5074839284377774e-06, |
| "loss": 0.1717, |
| "step": 2113 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 9.441092800844213, |
| "learning_rate": 2.5029926773707713e-06, |
| "loss": 0.5546, |
| "step": 2114 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.3544900079158484, |
| "learning_rate": 2.49850410885268e-06, |
| "loss": 0.1522, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 5.412249658146296, |
| "learning_rate": 2.4940182277055987e-06, |
| "loss": 0.4664, |
| "step": 2116 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 9.5821734481972, |
| "learning_rate": 2.4895350387487304e-06, |
| "loss": 0.7129, |
| "step": 2117 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 9.044995221975011, |
| "learning_rate": 2.485054546798395e-06, |
| "loss": 0.5705, |
| "step": 2118 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.4767352774929565, |
| "learning_rate": 2.4805767566680057e-06, |
| "loss": 0.1969, |
| "step": 2119 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.5576494940386558, |
| "learning_rate": 2.4761016731680792e-06, |
| "loss": 0.1951, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.5841094849229551, |
| "learning_rate": 2.4716293011062248e-06, |
| "loss": 0.2328, |
| "step": 2121 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 4.941718067928212, |
| "learning_rate": 2.467159645287133e-06, |
| "loss": 0.396, |
| "step": 2122 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.333176738276689, |
| "learning_rate": 2.4626927105125834e-06, |
| "loss": 0.169, |
| "step": 2123 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.4017110709066132, |
| "learning_rate": 2.4582285015814263e-06, |
| "loss": 0.1784, |
| "step": 2124 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.4012363237470569, |
| "learning_rate": 2.4537670232895866e-06, |
| "loss": 0.1968, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.668346328054327, |
| "learning_rate": 2.4493082804300585e-06, |
| "loss": 0.2318, |
| "step": 2126 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 7.387875598852549, |
| "learning_rate": 2.4448522777928903e-06, |
| "loss": 0.6096, |
| "step": 2127 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 7.470739210749414, |
| "learning_rate": 2.4403990201651915e-06, |
| "loss": 0.5272, |
| "step": 2128 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.363422577842959, |
| "learning_rate": 2.435948512331125e-06, |
| "loss": 0.2037, |
| "step": 2129 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 6.057297144444324, |
| "learning_rate": 2.4315007590718913e-06, |
| "loss": 0.5972, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.5487673475235315, |
| "learning_rate": 2.427055765165741e-06, |
| "loss": 0.1995, |
| "step": 2131 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.4962584550323683, |
| "learning_rate": 2.4226135353879516e-06, |
| "loss": 0.1925, |
| "step": 2132 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.5106017838876058, |
| "learning_rate": 2.4181740745108377e-06, |
| "loss": 0.2128, |
| "step": 2133 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.5513108161316838, |
| "learning_rate": 2.413737387303739e-06, |
| "loss": 0.2068, |
| "step": 2134 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.5451192131024105, |
| "learning_rate": 2.4093034785330087e-06, |
| "loss": 0.1922, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 8.49502966881639, |
| "learning_rate": 2.4048723529620246e-06, |
| "loss": 0.5727, |
| "step": 2136 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 9.300165349897595, |
| "learning_rate": 2.4004440153511642e-06, |
| "loss": 0.6384, |
| "step": 2137 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 8.31084953343189, |
| "learning_rate": 2.396018470457821e-06, |
| "loss": 0.608, |
| "step": 2138 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.575142105462621, |
| "learning_rate": 2.3915957230363783e-06, |
| "loss": 0.1931, |
| "step": 2139 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.591976965943769, |
| "learning_rate": 2.3871757778382216e-06, |
| "loss": 0.2191, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.4538578312332406, |
| "learning_rate": 2.3827586396117207e-06, |
| "loss": 0.2013, |
| "step": 2141 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.4533705881801298, |
| "learning_rate": 2.378344313102231e-06, |
| "loss": 0.1762, |
| "step": 2142 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.8439687180760513, |
| "learning_rate": 2.373932803052089e-06, |
| "loss": 0.2151, |
| "step": 2143 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.5904218951550402, |
| "learning_rate": 2.369524114200607e-06, |
| "loss": 0.2218, |
| "step": 2144 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 10.188160335340218, |
| "learning_rate": 2.3651182512840604e-06, |
| "loss": 0.4334, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.393277693000533, |
| "learning_rate": 2.360715219035694e-06, |
| "loss": 0.239, |
| "step": 2146 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.6821612536370698, |
| "learning_rate": 2.356315022185712e-06, |
| "loss": 0.2136, |
| "step": 2147 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 6.536278091744725, |
| "learning_rate": 2.3519176654612657e-06, |
| "loss": 0.6949, |
| "step": 2148 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.5439508048161585, |
| "learning_rate": 2.3475231535864653e-06, |
| "loss": 0.2314, |
| "step": 2149 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 4.809688001029302, |
| "learning_rate": 2.3431314912823543e-06, |
| "loss": 0.458, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.5992254269863293, |
| "learning_rate": 2.338742683266923e-06, |
| "loss": 0.2103, |
| "step": 2151 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 5.189303577879517, |
| "learning_rate": 2.3343567342550933e-06, |
| "loss": 0.6068, |
| "step": 2152 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.6646510638154512, |
| "learning_rate": 2.329973648958712e-06, |
| "loss": 0.2296, |
| "step": 2153 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.5316039306664393, |
| "learning_rate": 2.3255934320865555e-06, |
| "loss": 0.1989, |
| "step": 2154 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 6.156795151941741, |
| "learning_rate": 2.3212160883443107e-06, |
| "loss": 0.6604, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.4808369477187273, |
| "learning_rate": 2.316841622434586e-06, |
| "loss": 0.235, |
| "step": 2156 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 8.765104106256636, |
| "learning_rate": 2.3124700390568945e-06, |
| "loss": 0.4996, |
| "step": 2157 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 9.551495354691971, |
| "learning_rate": 2.30810134290765e-06, |
| "loss": 0.6904, |
| "step": 2158 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.5267502119057248, |
| "learning_rate": 2.3037355386801683e-06, |
| "loss": 0.1824, |
| "step": 2159 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.520976510571733, |
| "learning_rate": 2.2993726310646603e-06, |
| "loss": 0.2111, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.6117412402555655, |
| "learning_rate": 2.2950126247482178e-06, |
| "loss": 0.2201, |
| "step": 2161 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 6.984267858215095, |
| "learning_rate": 2.2906555244148233e-06, |
| "loss": 0.5403, |
| "step": 2162 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 6.084749533351431, |
| "learning_rate": 2.2863013347453305e-06, |
| "loss": 0.6068, |
| "step": 2163 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.6333208376879278, |
| "learning_rate": 2.2819500604174733e-06, |
| "loss": 0.2174, |
| "step": 2164 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 8.002520614785373, |
| "learning_rate": 2.277601706105847e-06, |
| "loss": 0.5838, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 4.938421877014078, |
| "learning_rate": 2.2732562764819157e-06, |
| "loss": 0.4133, |
| "step": 2166 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 8.594016694676238, |
| "learning_rate": 2.2689137762139952e-06, |
| "loss": 0.5487, |
| "step": 2167 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 8.935725275208199, |
| "learning_rate": 2.264574209967262e-06, |
| "loss": 0.7306, |
| "step": 2168 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.6260834195103835, |
| "learning_rate": 2.260237582403732e-06, |
| "loss": 0.1896, |
| "step": 2169 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 11.481929304026814, |
| "learning_rate": 2.2559038981822724e-06, |
| "loss": 0.5342, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 5.7505730354611035, |
| "learning_rate": 2.2515731619585814e-06, |
| "loss": 0.3837, |
| "step": 2171 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 5.632461495267201, |
| "learning_rate": 2.247245378385195e-06, |
| "loss": 0.578, |
| "step": 2172 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 5.280938569839824, |
| "learning_rate": 2.242920552111473e-06, |
| "loss": 0.5747, |
| "step": 2173 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.622621353949337, |
| "learning_rate": 2.238598687783603e-06, |
| "loss": 0.2271, |
| "step": 2174 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.4094720336722553, |
| "learning_rate": 2.234279790044588e-06, |
| "loss": 0.2039, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.466984919198312, |
| "learning_rate": 2.229963863534241e-06, |
| "loss": 0.2309, |
| "step": 2176 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 11.239427326449347, |
| "learning_rate": 2.225650912889188e-06, |
| "loss": 0.4516, |
| "step": 2177 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.6792138363705542, |
| "learning_rate": 2.221340942742858e-06, |
| "loss": 0.2394, |
| "step": 2178 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 6.313849287440056, |
| "learning_rate": 2.2170339577254714e-06, |
| "loss": 0.5399, |
| "step": 2179 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 5.135409647455387, |
| "learning_rate": 2.212729962464051e-06, |
| "loss": 0.512, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.4052058341244174, |
| "learning_rate": 2.208428961582399e-06, |
| "loss": 0.2496, |
| "step": 2181 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.4710826958438774, |
| "learning_rate": 2.2041309597011057e-06, |
| "loss": 0.2335, |
| "step": 2182 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 25.617847624969013, |
| "learning_rate": 2.1998359614375412e-06, |
| "loss": 0.559, |
| "step": 2183 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.6057405096713442, |
| "learning_rate": 2.1955439714058422e-06, |
| "loss": 0.2036, |
| "step": 2184 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.4263831277019077, |
| "learning_rate": 2.191254994216922e-06, |
| "loss": 0.1784, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.301358164273251, |
| "learning_rate": 2.186969034478448e-06, |
| "loss": 0.1634, |
| "step": 2186 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 4.195040872383668, |
| "learning_rate": 2.182686096794852e-06, |
| "loss": 0.3323, |
| "step": 2187 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 7.276705829606348, |
| "learning_rate": 2.1784061857673217e-06, |
| "loss": 0.5848, |
| "step": 2188 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.609015303555973, |
| "learning_rate": 2.174129305993784e-06, |
| "loss": 0.229, |
| "step": 2189 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 8.030857623344524, |
| "learning_rate": 2.1698554620689178e-06, |
| "loss": 0.4022, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.636436337760029, |
| "learning_rate": 2.165584658584138e-06, |
| "loss": 0.222, |
| "step": 2191 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 14.087436399279088, |
| "learning_rate": 2.16131690012759e-06, |
| "loss": 0.5201, |
| "step": 2192 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 15.306812454070704, |
| "learning_rate": 2.157052191284154e-06, |
| "loss": 0.5923, |
| "step": 2193 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.6706590067478497, |
| "learning_rate": 2.1527905366354292e-06, |
| "loss": 0.2799, |
| "step": 2194 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.215860995659821, |
| "learning_rate": 2.1485319407597315e-06, |
| "loss": 0.1549, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 4.921574667054535, |
| "learning_rate": 2.1442764082321e-06, |
| "loss": 0.5129, |
| "step": 2196 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 5.732924842836198, |
| "learning_rate": 2.140023943624272e-06, |
| "loss": 0.6023, |
| "step": 2197 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 8.102984763000496, |
| "learning_rate": 2.135774551504698e-06, |
| "loss": 0.4917, |
| "step": 2198 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.6618262296117945, |
| "learning_rate": 2.1315282364385197e-06, |
| "loss": 0.2193, |
| "step": 2199 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.3960460419289757, |
| "learning_rate": 2.1272850029875802e-06, |
| "loss": 0.1574, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 11.603423005829917, |
| "learning_rate": 2.1230448557104087e-06, |
| "loss": 0.545, |
| "step": 2201 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 5.042970308045986, |
| "learning_rate": 2.1188077991622174e-06, |
| "loss": 0.3641, |
| "step": 2202 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.5474507435821743, |
| "learning_rate": 2.1145738378949004e-06, |
| "loss": 0.2371, |
| "step": 2203 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 9.950803527815106, |
| "learning_rate": 2.110342976457029e-06, |
| "loss": 0.5266, |
| "step": 2204 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.6618813500238472, |
| "learning_rate": 2.1061152193938355e-06, |
| "loss": 0.215, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 6.830984374276211, |
| "learning_rate": 2.1018905712472285e-06, |
| "loss": 0.5277, |
| "step": 2206 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.5052744627478791, |
| "learning_rate": 2.0976690365557673e-06, |
| "loss": 0.1999, |
| "step": 2207 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 5.6409906387431565, |
| "learning_rate": 2.093450619854671e-06, |
| "loss": 0.6337, |
| "step": 2208 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.657684945956667, |
| "learning_rate": 2.0892353256758107e-06, |
| "loss": 0.2178, |
| "step": 2209 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 5.118560263864878, |
| "learning_rate": 2.0850231585476965e-06, |
| "loss": 0.4665, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.8137236999164266, |
| "learning_rate": 2.0808141229954876e-06, |
| "loss": 0.2002, |
| "step": 2211 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 9.817857709861432, |
| "learning_rate": 2.0766082235409695e-06, |
| "loss": 0.4937, |
| "step": 2212 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.5934699742301135, |
| "learning_rate": 2.072405464702566e-06, |
| "loss": 0.2585, |
| "step": 2213 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 6.077520047362195, |
| "learning_rate": 2.068205850995326e-06, |
| "loss": 0.5491, |
| "step": 2214 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 5.861050995792146, |
| "learning_rate": 2.064009386930915e-06, |
| "loss": 0.5114, |
| "step": 2215 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.5038002055573227, |
| "learning_rate": 2.0598160770176208e-06, |
| "loss": 0.1906, |
| "step": 2216 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.6958036181030445, |
| "learning_rate": 2.0556259257603355e-06, |
| "loss": 0.2393, |
| "step": 2217 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.6086477208936687, |
| "learning_rate": 2.0514389376605646e-06, |
| "loss": 0.2688, |
| "step": 2218 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.43271043730868, |
| "learning_rate": 2.0472551172164152e-06, |
| "loss": 0.2063, |
| "step": 2219 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 6.381175730682259, |
| "learning_rate": 2.0430744689225833e-06, |
| "loss": 0.57, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.3943765638105143, |
| "learning_rate": 2.0388969972703688e-06, |
| "loss": 0.2283, |
| "step": 2221 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 8.011989023626537, |
| "learning_rate": 2.0347227067476478e-06, |
| "loss": 0.3946, |
| "step": 2222 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.3876916030957878, |
| "learning_rate": 2.030551601838887e-06, |
| "loss": 0.1907, |
| "step": 2223 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 6.369440824496463, |
| "learning_rate": 2.0263836870251277e-06, |
| "loss": 0.6874, |
| "step": 2224 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.392030096155692, |
| "learning_rate": 2.0222189667839805e-06, |
| "loss": 0.1721, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 7.505820135642883, |
| "learning_rate": 2.01805744558963e-06, |
| "loss": 0.5525, |
| "step": 2226 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 10.217081467195106, |
| "learning_rate": 2.013899127912824e-06, |
| "loss": 0.5245, |
| "step": 2227 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 8.771085852403852, |
| "learning_rate": 2.009744018220863e-06, |
| "loss": 0.4655, |
| "step": 2228 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 6.354959097655579, |
| "learning_rate": 2.005592120977606e-06, |
| "loss": 0.5604, |
| "step": 2229 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.7218011393895385, |
| "learning_rate": 2.0014434406434584e-06, |
| "loss": 0.1957, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.4055159786799856, |
| "learning_rate": 1.9972979816753717e-06, |
| "loss": 0.1682, |
| "step": 2231 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 4.084421952028699, |
| "learning_rate": 1.9931557485268365e-06, |
| "loss": 0.3722, |
| "step": 2232 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 5.361724567386323, |
| "learning_rate": 1.9890167456478748e-06, |
| "loss": 0.5068, |
| "step": 2233 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.60003653371184, |
| "learning_rate": 1.984880977485041e-06, |
| "loss": 0.211, |
| "step": 2234 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 7.482603126691497, |
| "learning_rate": 1.980748448481416e-06, |
| "loss": 0.4358, |
| "step": 2235 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 6.141325282590449, |
| "learning_rate": 1.9766191630765964e-06, |
| "loss": 0.5306, |
| "step": 2236 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.4516625230105962, |
| "learning_rate": 1.9724931257066988e-06, |
| "loss": 0.2317, |
| "step": 2237 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 9.343327315293724, |
| "learning_rate": 1.9683703408043447e-06, |
| "loss": 0.6164, |
| "step": 2238 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 5.24934798833513, |
| "learning_rate": 1.9642508127986676e-06, |
| "loss": 0.5279, |
| "step": 2239 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.4895377988466674, |
| "learning_rate": 1.9601345461153005e-06, |
| "loss": 0.2156, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.5036358630247681, |
| "learning_rate": 1.9560215451763685e-06, |
| "loss": 0.1936, |
| "step": 2241 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 9.300260916579875, |
| "learning_rate": 1.951911814400495e-06, |
| "loss": 0.5325, |
| "step": 2242 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.370160034971188, |
| "learning_rate": 1.9478053582027826e-06, |
| "loss": 0.1512, |
| "step": 2243 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.465307815297895, |
| "learning_rate": 1.9437021809948232e-06, |
| "loss": 0.2026, |
| "step": 2244 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 9.277551552673337, |
| "learning_rate": 1.9396022871846836e-06, |
| "loss": 0.6607, |
| "step": 2245 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 5.387263280662652, |
| "learning_rate": 1.935505681176899e-06, |
| "loss": 0.5917, |
| "step": 2246 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.349938726512857, |
| "learning_rate": 1.9314123673724805e-06, |
| "loss": 0.1762, |
| "step": 2247 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.6763593693800607, |
| "learning_rate": 1.9273223501688943e-06, |
| "loss": 0.1944, |
| "step": 2248 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 5.549808705544746, |
| "learning_rate": 1.9232356339600717e-06, |
| "loss": 0.6513, |
| "step": 2249 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 6.377562270114407, |
| "learning_rate": 1.919152223136391e-06, |
| "loss": 0.4733, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 7.852463979276551, |
| "learning_rate": 1.9150721220846884e-06, |
| "loss": 0.7424, |
| "step": 2251 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.687946465511382, |
| "learning_rate": 1.910995335188234e-06, |
| "loss": 0.2251, |
| "step": 2252 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.5891175267595996, |
| "learning_rate": 1.906921866826747e-06, |
| "loss": 0.2003, |
| "step": 2253 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 6.101562294379387, |
| "learning_rate": 1.9028517213763737e-06, |
| "loss": 0.5808, |
| "step": 2254 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 7.967450356183768, |
| "learning_rate": 1.8987849032096973e-06, |
| "loss": 0.4792, |
| "step": 2255 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.42460389872086, |
| "learning_rate": 1.89472141669572e-06, |
| "loss": 0.1894, |
| "step": 2256 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 7.633483715732261, |
| "learning_rate": 1.8906612661998698e-06, |
| "loss": 0.5442, |
| "step": 2257 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 8.4505173264, |
| "learning_rate": 1.8866044560839902e-06, |
| "loss": 0.5843, |
| "step": 2258 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 6.345391544979886, |
| "learning_rate": 1.8825509907063328e-06, |
| "loss": 0.4357, |
| "step": 2259 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.7058142928047633, |
| "learning_rate": 1.8785008744215606e-06, |
| "loss": 0.2384, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.4893028857490218, |
| "learning_rate": 1.874454111580733e-06, |
| "loss": 0.1768, |
| "step": 2261 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 6.414925517521466, |
| "learning_rate": 1.8704107065313116e-06, |
| "loss": 0.6891, |
| "step": 2262 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.6135157017856627, |
| "learning_rate": 1.8663706636171503e-06, |
| "loss": 0.2245, |
| "step": 2263 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 15.830859870957802, |
| "learning_rate": 1.8623339871784869e-06, |
| "loss": 0.571, |
| "step": 2264 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 9.115565072336496, |
| "learning_rate": 1.8583006815519473e-06, |
| "loss": 0.4791, |
| "step": 2265 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.3492396073479147, |
| "learning_rate": 1.8542707510705355e-06, |
| "loss": 0.1572, |
| "step": 2266 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 4.694271699344701, |
| "learning_rate": 1.8502442000636246e-06, |
| "loss": 0.3593, |
| "step": 2267 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.5952609772858102, |
| "learning_rate": 1.846221032856965e-06, |
| "loss": 0.2319, |
| "step": 2268 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.5754968134503997, |
| "learning_rate": 1.8422012537726646e-06, |
| "loss": 0.2189, |
| "step": 2269 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.5868406215734856, |
| "learning_rate": 1.8381848671291953e-06, |
| "loss": 0.1898, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 8.171116086303511, |
| "learning_rate": 1.8341718772413852e-06, |
| "loss": 0.5808, |
| "step": 2271 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.449524747276802, |
| "learning_rate": 1.8301622884204096e-06, |
| "loss": 0.1995, |
| "step": 2272 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 6.486439349672904, |
| "learning_rate": 1.8261561049737946e-06, |
| "loss": 0.5245, |
| "step": 2273 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 10.82732267235109, |
| "learning_rate": 1.8221533312054024e-06, |
| "loss": 0.6953, |
| "step": 2274 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.679977002278223, |
| "learning_rate": 1.818153971415439e-06, |
| "loss": 0.2156, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.58293077012093, |
| "learning_rate": 1.8141580299004342e-06, |
| "loss": 0.2454, |
| "step": 2276 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.4055543468615495, |
| "learning_rate": 1.8101655109532552e-06, |
| "loss": 0.2021, |
| "step": 2277 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 6.1193649017324026, |
| "learning_rate": 1.8061764188630831e-06, |
| "loss": 0.5663, |
| "step": 2278 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.3305418674019964, |
| "learning_rate": 1.8021907579154257e-06, |
| "loss": 0.176, |
| "step": 2279 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 10.00885650373271, |
| "learning_rate": 1.7982085323920973e-06, |
| "loss": 0.5901, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.8071652982180177, |
| "learning_rate": 1.7942297465712282e-06, |
| "loss": 0.259, |
| "step": 2281 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.675935122364972, |
| "learning_rate": 1.7902544047272468e-06, |
| "loss": 0.2302, |
| "step": 2282 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 6.776791922815715, |
| "learning_rate": 1.7862825111308873e-06, |
| "loss": 0.664, |
| "step": 2283 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 6.865538111907284, |
| "learning_rate": 1.7823140700491786e-06, |
| "loss": 0.5949, |
| "step": 2284 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 5.802272910455937, |
| "learning_rate": 1.7783490857454354e-06, |
| "loss": 0.3682, |
| "step": 2285 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 6.560312135377362, |
| "learning_rate": 1.7743875624792662e-06, |
| "loss": 0.5582, |
| "step": 2286 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.5913109833044743, |
| "learning_rate": 1.770429504506554e-06, |
| "loss": 0.2239, |
| "step": 2287 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 7.079366669373585, |
| "learning_rate": 1.7664749160794642e-06, |
| "loss": 0.6495, |
| "step": 2288 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 8.09757716625291, |
| "learning_rate": 1.7625238014464358e-06, |
| "loss": 0.4482, |
| "step": 2289 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 10.57674043946392, |
| "learning_rate": 1.7585761648521688e-06, |
| "loss": 0.5998, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 12.421223041564375, |
| "learning_rate": 1.7546320105376346e-06, |
| "loss": 0.6031, |
| "step": 2291 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 5.3650225502835776, |
| "learning_rate": 1.750691342740058e-06, |
| "loss": 0.7267, |
| "step": 2292 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 8.879833657416945, |
| "learning_rate": 1.746754165692921e-06, |
| "loss": 0.5464, |
| "step": 2293 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.5718775127322624, |
| "learning_rate": 1.742820483625957e-06, |
| "loss": 0.2074, |
| "step": 2294 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 8.820128082658634, |
| "learning_rate": 1.7388903007651398e-06, |
| "loss": 0.4889, |
| "step": 2295 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 6.283571796495923, |
| "learning_rate": 1.7349636213326876e-06, |
| "loss": 0.5186, |
| "step": 2296 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 7.058987036721699, |
| "learning_rate": 1.7310404495470557e-06, |
| "loss": 0.4451, |
| "step": 2297 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.563485148949926, |
| "learning_rate": 1.727120789622927e-06, |
| "loss": 0.179, |
| "step": 2298 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 6.014529966282654, |
| "learning_rate": 1.7232046457712164e-06, |
| "loss": 0.4753, |
| "step": 2299 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 8.167024305801668, |
| "learning_rate": 1.7192920221990566e-06, |
| "loss": 0.5438, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.4582656606915507, |
| "learning_rate": 1.7153829231098018e-06, |
| "loss": 0.1758, |
| "step": 2301 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 5.973447495717049, |
| "learning_rate": 1.7114773527030215e-06, |
| "loss": 0.461, |
| "step": 2302 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 11.599549739132604, |
| "learning_rate": 1.7075753151744885e-06, |
| "loss": 0.5915, |
| "step": 2303 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 6.068815615507931, |
| "learning_rate": 1.7036768147161853e-06, |
| "loss": 0.6128, |
| "step": 2304 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.8422198212418037, |
| "learning_rate": 1.6997818555162915e-06, |
| "loss": 0.1973, |
| "step": 2305 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.715902514534363, |
| "learning_rate": 1.6958904417591853e-06, |
| "loss": 0.2236, |
| "step": 2306 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.60276075786763, |
| "learning_rate": 1.6920025776254334e-06, |
| "loss": 0.1898, |
| "step": 2307 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 7.014840318131868, |
| "learning_rate": 1.6881182672917879e-06, |
| "loss": 0.478, |
| "step": 2308 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.5300462322055253, |
| "learning_rate": 1.6842375149311868e-06, |
| "loss": 0.1978, |
| "step": 2309 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.5219358965207703, |
| "learning_rate": 1.680360324712746e-06, |
| "loss": 0.1873, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.55773836258357, |
| "learning_rate": 1.6764867008017493e-06, |
| "loss": 0.1992, |
| "step": 2311 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.6277802076092838, |
| "learning_rate": 1.672616647359655e-06, |
| "loss": 0.1953, |
| "step": 2312 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.5023181181423733, |
| "learning_rate": 1.668750168544081e-06, |
| "loss": 0.2323, |
| "step": 2313 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.4795023942115406, |
| "learning_rate": 1.664887268508808e-06, |
| "loss": 0.178, |
| "step": 2314 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 6.677022427612835, |
| "learning_rate": 1.6610279514037725e-06, |
| "loss": 0.4622, |
| "step": 2315 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.406265038685406, |
| "learning_rate": 1.657172221375058e-06, |
| "loss": 0.1629, |
| "step": 2316 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.6849794458922327, |
| "learning_rate": 1.6533200825648993e-06, |
| "loss": 0.1943, |
| "step": 2317 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 7.780100197924541, |
| "learning_rate": 1.6494715391116671e-06, |
| "loss": 0.6186, |
| "step": 2318 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 5.329751180976738, |
| "learning_rate": 1.6456265951498763e-06, |
| "loss": 0.6589, |
| "step": 2319 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.537633057394514, |
| "learning_rate": 1.641785254810172e-06, |
| "loss": 0.1985, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.4374966867681902, |
| "learning_rate": 1.6379475222193248e-06, |
| "loss": 0.2356, |
| "step": 2321 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.479679886386861, |
| "learning_rate": 1.6341134015002352e-06, |
| "loss": 0.2364, |
| "step": 2322 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 5.8586855752619025, |
| "learning_rate": 1.6302828967719175e-06, |
| "loss": 0.5224, |
| "step": 2323 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.2614347887209656, |
| "learning_rate": 1.626456012149506e-06, |
| "loss": 0.1834, |
| "step": 2324 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 7.745335126732027, |
| "learning_rate": 1.6226327517442453e-06, |
| "loss": 0.6006, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 25.738539555946595, |
| "learning_rate": 1.6188131196634827e-06, |
| "loss": 0.5638, |
| "step": 2326 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.515876565578083, |
| "learning_rate": 1.6149971200106723e-06, |
| "loss": 0.2098, |
| "step": 2327 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 4.679477770645862, |
| "learning_rate": 1.6111847568853645e-06, |
| "loss": 0.4238, |
| "step": 2328 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.5000856150130015, |
| "learning_rate": 1.6073760343831996e-06, |
| "loss": 0.1832, |
| "step": 2329 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.6461882429471486, |
| "learning_rate": 1.603570956595913e-06, |
| "loss": 0.2228, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.5807675669285757, |
| "learning_rate": 1.5997695276113168e-06, |
| "loss": 0.2178, |
| "step": 2331 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 8.247096928630851, |
| "learning_rate": 1.595971751513311e-06, |
| "loss": 0.4801, |
| "step": 2332 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.484294044808653, |
| "learning_rate": 1.5921776323818655e-06, |
| "loss": 0.1864, |
| "step": 2333 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 4.369584217105914, |
| "learning_rate": 1.5883871742930257e-06, |
| "loss": 0.5279, |
| "step": 2334 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 7.771794175699381, |
| "learning_rate": 1.5846003813188993e-06, |
| "loss": 0.4916, |
| "step": 2335 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 6.767542280678469, |
| "learning_rate": 1.5808172575276615e-06, |
| "loss": 0.517, |
| "step": 2336 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 16.037289163882527, |
| "learning_rate": 1.5770378069835412e-06, |
| "loss": 0.598, |
| "step": 2337 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.6150945428755419, |
| "learning_rate": 1.5732620337468258e-06, |
| "loss": 0.2426, |
| "step": 2338 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 4.8715631816307265, |
| "learning_rate": 1.5694899418738462e-06, |
| "loss": 0.6858, |
| "step": 2339 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.6023643760317052, |
| "learning_rate": 1.5657215354169841e-06, |
| "loss": 0.185, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.4888072128943393, |
| "learning_rate": 1.561956818424661e-06, |
| "loss": 0.1841, |
| "step": 2341 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.6005429190663583, |
| "learning_rate": 1.5581957949413295e-06, |
| "loss": 0.2522, |
| "step": 2342 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.4101795444918526, |
| "learning_rate": 1.554438469007482e-06, |
| "loss": 0.1682, |
| "step": 2343 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.5328717521022854, |
| "learning_rate": 1.5506848446596317e-06, |
| "loss": 0.2017, |
| "step": 2344 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 11.364712037092591, |
| "learning_rate": 1.546934925930319e-06, |
| "loss": 0.5613, |
| "step": 2345 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.3647957100737897, |
| "learning_rate": 1.5431887168481051e-06, |
| "loss": 0.1412, |
| "step": 2346 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.4670332075404113, |
| "learning_rate": 1.5394462214375593e-06, |
| "loss": 0.1962, |
| "step": 2347 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 6.586589393567223, |
| "learning_rate": 1.5357074437192688e-06, |
| "loss": 0.4516, |
| "step": 2348 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 10.56356967244964, |
| "learning_rate": 1.5319723877098202e-06, |
| "loss": 0.5722, |
| "step": 2349 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 27.533933965993338, |
| "learning_rate": 1.5282410574218072e-06, |
| "loss": 0.6225, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 7.3389769770249, |
| "learning_rate": 1.5245134568638197e-06, |
| "loss": 0.6991, |
| "step": 2351 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 7.6747107205316025, |
| "learning_rate": 1.5207895900404363e-06, |
| "loss": 0.655, |
| "step": 2352 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.7133153620797101, |
| "learning_rate": 1.5170694609522306e-06, |
| "loss": 0.2325, |
| "step": 2353 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 5.833319223599012, |
| "learning_rate": 1.5133530735957586e-06, |
| "loss": 0.5796, |
| "step": 2354 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.4469803480828578, |
| "learning_rate": 1.5096404319635533e-06, |
| "loss": 0.1964, |
| "step": 2355 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 8.851584956787143, |
| "learning_rate": 1.50593154004413e-06, |
| "loss": 0.6571, |
| "step": 2356 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.6610199134694212, |
| "learning_rate": 1.502226401821968e-06, |
| "loss": 0.2382, |
| "step": 2357 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 4.581817614080536, |
| "learning_rate": 1.498525021277521e-06, |
| "loss": 0.482, |
| "step": 2358 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 7.45310859680198, |
| "learning_rate": 1.4948274023872005e-06, |
| "loss": 0.5531, |
| "step": 2359 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.750951578789252, |
| "learning_rate": 1.4911335491233818e-06, |
| "loss": 0.2255, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.440148079355167, |
| "learning_rate": 1.487443465454389e-06, |
| "loss": 0.1655, |
| "step": 2361 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.6263530608794818, |
| "learning_rate": 1.483757155344503e-06, |
| "loss": 0.2022, |
| "step": 2362 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 6.509908589441633, |
| "learning_rate": 1.4800746227539437e-06, |
| "loss": 0.5018, |
| "step": 2363 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.478431881245627, |
| "learning_rate": 1.4763958716388798e-06, |
| "loss": 0.2005, |
| "step": 2364 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 11.073599191026318, |
| "learning_rate": 1.4727209059514114e-06, |
| "loss": 0.6426, |
| "step": 2365 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 21.181615696021776, |
| "learning_rate": 1.4690497296395773e-06, |
| "loss": 0.5923, |
| "step": 2366 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 17.85850577782123, |
| "learning_rate": 1.46538234664734e-06, |
| "loss": 0.499, |
| "step": 2367 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 7.225088359123627, |
| "learning_rate": 1.4617187609145906e-06, |
| "loss": 0.4634, |
| "step": 2368 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 5.4330131602081595, |
| "learning_rate": 1.4580589763771413e-06, |
| "loss": 0.49, |
| "step": 2369 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 6.320076970953097, |
| "learning_rate": 1.4544029969667167e-06, |
| "loss": 0.6917, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.5329530470500947, |
| "learning_rate": 1.4507508266109565e-06, |
| "loss": 0.184, |
| "step": 2371 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.501294050131622, |
| "learning_rate": 1.4471024692334101e-06, |
| "loss": 0.2084, |
| "step": 2372 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 4.755780190366352, |
| "learning_rate": 1.4434579287535244e-06, |
| "loss": 0.5128, |
| "step": 2373 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 6.602086049509877, |
| "learning_rate": 1.439817209086653e-06, |
| "loss": 0.5971, |
| "step": 2374 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.6857665566710058, |
| "learning_rate": 1.4361803141440384e-06, |
| "loss": 0.2171, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 6.353716756607063, |
| "learning_rate": 1.432547247832819e-06, |
| "loss": 0.5053, |
| "step": 2376 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 8.005481917673409, |
| "learning_rate": 1.4289180140560189e-06, |
| "loss": 0.6893, |
| "step": 2377 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 7.207337731211934, |
| "learning_rate": 1.4252926167125413e-06, |
| "loss": 0.618, |
| "step": 2378 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.4856239702812042, |
| "learning_rate": 1.421671059697175e-06, |
| "loss": 0.2069, |
| "step": 2379 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 7.293121056948293, |
| "learning_rate": 1.418053346900574e-06, |
| "loss": 0.5381, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 10.743860752985295, |
| "learning_rate": 1.4144394822092712e-06, |
| "loss": 0.5178, |
| "step": 2381 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 7.942226089157316, |
| "learning_rate": 1.4108294695056606e-06, |
| "loss": 0.5673, |
| "step": 2382 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 6.01833332655479, |
| "learning_rate": 1.4072233126679985e-06, |
| "loss": 0.525, |
| "step": 2383 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 5.736861752132404, |
| "learning_rate": 1.4036210155703989e-06, |
| "loss": 0.3515, |
| "step": 2384 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.386143205504466, |
| "learning_rate": 1.4000225820828317e-06, |
| "loss": 0.1874, |
| "step": 2385 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 14.73927995898969, |
| "learning_rate": 1.3964280160711119e-06, |
| "loss": 0.5035, |
| "step": 2386 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.421278867736656, |
| "learning_rate": 1.3928373213969038e-06, |
| "loss": 0.2144, |
| "step": 2387 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 6.8491800401674325, |
| "learning_rate": 1.38925050191771e-06, |
| "loss": 0.493, |
| "step": 2388 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.4477905976408243, |
| "learning_rate": 1.3856675614868687e-06, |
| "loss": 0.1524, |
| "step": 2389 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.6095206288289274, |
| "learning_rate": 1.3820885039535564e-06, |
| "loss": 0.2371, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 6.888230687986665, |
| "learning_rate": 1.378513333162771e-06, |
| "loss": 0.5841, |
| "step": 2391 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 9.045799662925818, |
| "learning_rate": 1.3749420529553414e-06, |
| "loss": 0.5309, |
| "step": 2392 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 6.077391830064108, |
| "learning_rate": 1.3713746671679112e-06, |
| "loss": 0.6062, |
| "step": 2393 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.8349420626947697, |
| "learning_rate": 1.3678111796329446e-06, |
| "loss": 0.2625, |
| "step": 2394 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 9.73021569182454, |
| "learning_rate": 1.3642515941787171e-06, |
| "loss": 0.4867, |
| "step": 2395 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 13.698370345073796, |
| "learning_rate": 1.3606959146293086e-06, |
| "loss": 0.4307, |
| "step": 2396 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.6944837465065132, |
| "learning_rate": 1.3571441448046086e-06, |
| "loss": 0.2304, |
| "step": 2397 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.3612756215247916, |
| "learning_rate": 1.3535962885202997e-06, |
| "loss": 0.1837, |
| "step": 2398 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.7508574085618989, |
| "learning_rate": 1.350052349587866e-06, |
| "loss": 0.2136, |
| "step": 2399 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 5.945963849598769, |
| "learning_rate": 1.3465123318145817e-06, |
| "loss": 0.612, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.3563017391440364, |
| "learning_rate": 1.342976239003505e-06, |
| "loss": 0.1595, |
| "step": 2401 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.4513470956592684, |
| "learning_rate": 1.339444074953482e-06, |
| "loss": 0.1939, |
| "step": 2402 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.5405561506611012, |
| "learning_rate": 1.335915843459137e-06, |
| "loss": 0.1618, |
| "step": 2403 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.55313805260523, |
| "learning_rate": 1.3323915483108662e-06, |
| "loss": 0.1962, |
| "step": 2404 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.402394552080233, |
| "learning_rate": 1.3288711932948427e-06, |
| "loss": 0.1921, |
| "step": 2405 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.4552915386785408, |
| "learning_rate": 1.3253547821930002e-06, |
| "loss": 0.1888, |
| "step": 2406 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 10.677169419957384, |
| "learning_rate": 1.3218423187830409e-06, |
| "loss": 0.6102, |
| "step": 2407 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.3661472541973056, |
| "learning_rate": 1.3183338068384243e-06, |
| "loss": 0.2131, |
| "step": 2408 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 8.069416045414519, |
| "learning_rate": 1.3148292501283627e-06, |
| "loss": 0.5967, |
| "step": 2409 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.4718935332732668, |
| "learning_rate": 1.3113286524178232e-06, |
| "loss": 0.2282, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.7259519323112857, |
| "learning_rate": 1.3078320174675141e-06, |
| "loss": 0.2041, |
| "step": 2411 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.684452130814182, |
| "learning_rate": 1.3043393490338918e-06, |
| "loss": 0.2216, |
| "step": 2412 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.6210119180826756, |
| "learning_rate": 1.3008506508691516e-06, |
| "loss": 0.186, |
| "step": 2413 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.6543582716795695, |
| "learning_rate": 1.2973659267212173e-06, |
| "loss": 0.1759, |
| "step": 2414 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.7221113167648243, |
| "learning_rate": 1.2938851803337516e-06, |
| "loss": 0.2711, |
| "step": 2415 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.3452649138346509, |
| "learning_rate": 1.290408415446136e-06, |
| "loss": 0.1468, |
| "step": 2416 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 11.631055232311622, |
| "learning_rate": 1.2869356357934815e-06, |
| "loss": 0.5374, |
| "step": 2417 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 4.822561779391185, |
| "learning_rate": 1.2834668451066118e-06, |
| "loss": 0.397, |
| "step": 2418 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 5.747961116244306, |
| "learning_rate": 1.2800020471120717e-06, |
| "loss": 0.5021, |
| "step": 2419 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 5.616191178312223, |
| "learning_rate": 1.276541245532109e-06, |
| "loss": 0.5156, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.6064079114412415, |
| "learning_rate": 1.2730844440846862e-06, |
| "loss": 0.2126, |
| "step": 2421 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 8.419000170413655, |
| "learning_rate": 1.2696316464834607e-06, |
| "loss": 0.5254, |
| "step": 2422 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.4723558832246442, |
| "learning_rate": 1.2661828564377948e-06, |
| "loss": 0.2147, |
| "step": 2423 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 7.659881335092557, |
| "learning_rate": 1.2627380776527415e-06, |
| "loss": 0.6609, |
| "step": 2424 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.5219241063364994, |
| "learning_rate": 1.259297313829046e-06, |
| "loss": 0.2274, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 6.3997301053784135, |
| "learning_rate": 1.255860568663142e-06, |
| "loss": 0.4295, |
| "step": 2426 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.3787264057905275, |
| "learning_rate": 1.2524278458471411e-06, |
| "loss": 0.2019, |
| "step": 2427 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.6013440692023229, |
| "learning_rate": 1.248999149068838e-06, |
| "loss": 0.1796, |
| "step": 2428 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.728939143806417, |
| "learning_rate": 1.2455744820117028e-06, |
| "loss": 0.1958, |
| "step": 2429 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.464485738013023, |
| "learning_rate": 1.2421538483548706e-06, |
| "loss": 0.1655, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 7.034298389357844, |
| "learning_rate": 1.2387372517731505e-06, |
| "loss": 0.4589, |
| "step": 2431 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 12.37757862861587, |
| "learning_rate": 1.2353246959370086e-06, |
| "loss": 0.5021, |
| "step": 2432 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 6.9792442549148435, |
| "learning_rate": 1.2319161845125744e-06, |
| "loss": 0.4972, |
| "step": 2433 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.6712922440507088, |
| "learning_rate": 1.228511721161631e-06, |
| "loss": 0.1924, |
| "step": 2434 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.5713949409100816, |
| "learning_rate": 1.2251113095416113e-06, |
| "loss": 0.2149, |
| "step": 2435 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.46398599487867, |
| "learning_rate": 1.2217149533055976e-06, |
| "loss": 0.1773, |
| "step": 2436 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 5.132408324494143, |
| "learning_rate": 1.2183226561023132e-06, |
| "loss": 0.5753, |
| "step": 2437 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 5.857664332671467, |
| "learning_rate": 1.2149344215761216e-06, |
| "loss": 0.5602, |
| "step": 2438 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.5876521548650295, |
| "learning_rate": 1.2115502533670253e-06, |
| "loss": 0.2446, |
| "step": 2439 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.4493141431593062, |
| "learning_rate": 1.2081701551106506e-06, |
| "loss": 0.1996, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.776534678525459, |
| "learning_rate": 1.20479413043826e-06, |
| "loss": 0.2469, |
| "step": 2441 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.5689220844918574, |
| "learning_rate": 1.201422182976732e-06, |
| "loss": 0.1944, |
| "step": 2442 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 14.335545365917188, |
| "learning_rate": 1.1980543163485726e-06, |
| "loss": 0.5762, |
| "step": 2443 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.6300431013975771, |
| "learning_rate": 1.1946905341718951e-06, |
| "loss": 0.2157, |
| "step": 2444 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 5.633850187021617, |
| "learning_rate": 1.1913308400604339e-06, |
| "loss": 0.6298, |
| "step": 2445 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.3547056318444164, |
| "learning_rate": 1.1879752376235231e-06, |
| "loss": 0.1967, |
| "step": 2446 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 9.750923071373556, |
| "learning_rate": 1.1846237304661095e-06, |
| "loss": 0.545, |
| "step": 2447 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 6.662999758882776, |
| "learning_rate": 1.181276322188732e-06, |
| "loss": 0.6131, |
| "step": 2448 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.6677086643354662, |
| "learning_rate": 1.1779330163875325e-06, |
| "loss": 0.2169, |
| "step": 2449 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.6074696369988541, |
| "learning_rate": 1.1745938166542414e-06, |
| "loss": 0.1886, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 4.811853847327237, |
| "learning_rate": 1.1712587265761799e-06, |
| "loss": 0.5485, |
| "step": 2451 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 9.59561361609413, |
| "learning_rate": 1.1679277497362563e-06, |
| "loss": 0.5142, |
| "step": 2452 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 23.86988713960673, |
| "learning_rate": 1.1646008897129546e-06, |
| "loss": 0.4215, |
| "step": 2453 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 5.807472920956867, |
| "learning_rate": 1.161278150080341e-06, |
| "loss": 0.4971, |
| "step": 2454 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.5991223030970334, |
| "learning_rate": 1.157959534408052e-06, |
| "loss": 0.2095, |
| "step": 2455 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.5688822910998863, |
| "learning_rate": 1.1546450462612951e-06, |
| "loss": 0.1677, |
| "step": 2456 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.6127490615241922, |
| "learning_rate": 1.151334689200845e-06, |
| "loss": 0.1992, |
| "step": 2457 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 4.250124362846092, |
| "learning_rate": 1.1480284667830343e-06, |
| "loss": 0.6511, |
| "step": 2458 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 6.532244488593466, |
| "learning_rate": 1.1447263825597577e-06, |
| "loss": 0.4948, |
| "step": 2459 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.7963430910867482, |
| "learning_rate": 1.1414284400784643e-06, |
| "loss": 0.2555, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 6.245490741574394, |
| "learning_rate": 1.1381346428821482e-06, |
| "loss": 0.5099, |
| "step": 2461 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 9.041639519313504, |
| "learning_rate": 1.134844994509358e-06, |
| "loss": 0.5677, |
| "step": 2462 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.481988512128748, |
| "learning_rate": 1.1315594984941786e-06, |
| "loss": 0.2139, |
| "step": 2463 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.380496281928763, |
| "learning_rate": 1.1282781583662372e-06, |
| "loss": 0.1537, |
| "step": 2464 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 5.073454771621429, |
| "learning_rate": 1.1250009776506982e-06, |
| "loss": 0.5818, |
| "step": 2465 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.3944048376058802, |
| "learning_rate": 1.1217279598682518e-06, |
| "loss": 0.1951, |
| "step": 2466 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 6.083484458011643, |
| "learning_rate": 1.118459108535122e-06, |
| "loss": 0.5768, |
| "step": 2467 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.621953302541929, |
| "learning_rate": 1.1151944271630517e-06, |
| "loss": 0.2339, |
| "step": 2468 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.3666553916208857, |
| "learning_rate": 1.1119339192593077e-06, |
| "loss": 0.2036, |
| "step": 2469 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.5413769186843314, |
| "learning_rate": 1.1086775883266725e-06, |
| "loss": 0.2168, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.518061414783004, |
| "learning_rate": 1.1054254378634399e-06, |
| "loss": 0.1752, |
| "step": 2471 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.464210153980354, |
| "learning_rate": 1.102177471363412e-06, |
| "loss": 0.1809, |
| "step": 2472 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.5872269482408712, |
| "learning_rate": 1.0989336923158999e-06, |
| "loss": 0.1802, |
| "step": 2473 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.6405936373674377, |
| "learning_rate": 1.0956941042057106e-06, |
| "loss": 0.2145, |
| "step": 2474 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 36.82423488510403, |
| "learning_rate": 1.0924587105131546e-06, |
| "loss": 0.4966, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.6724867978947762, |
| "learning_rate": 1.0892275147140307e-06, |
| "loss": 0.2153, |
| "step": 2476 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.6272667557449185, |
| "learning_rate": 1.086000520279632e-06, |
| "loss": 0.2137, |
| "step": 2477 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.6051885665171814, |
| "learning_rate": 1.0827777306767384e-06, |
| "loss": 0.1802, |
| "step": 2478 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.5636775827738592, |
| "learning_rate": 1.0795591493676072e-06, |
| "loss": 0.1948, |
| "step": 2479 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.4944126627336913, |
| "learning_rate": 1.0763447798099813e-06, |
| "loss": 0.2312, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.5618556069148841, |
| "learning_rate": 1.0731346254570735e-06, |
| "loss": 0.1841, |
| "step": 2481 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.301146600753639, |
| "learning_rate": 1.0699286897575718e-06, |
| "loss": 0.1857, |
| "step": 2482 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 6.672526601056036, |
| "learning_rate": 1.066726976155632e-06, |
| "loss": 0.2723, |
| "step": 2483 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 8.854185007252035, |
| "learning_rate": 1.0635294880908702e-06, |
| "loss": 0.529, |
| "step": 2484 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 7.174645636796516, |
| "learning_rate": 1.0603362289983687e-06, |
| "loss": 0.5951, |
| "step": 2485 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.5295702173480263, |
| "learning_rate": 1.0571472023086604e-06, |
| "loss": 0.2012, |
| "step": 2486 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 6.908237707717649, |
| "learning_rate": 1.053962411447736e-06, |
| "loss": 0.5908, |
| "step": 2487 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.5778324264783983, |
| "learning_rate": 1.0507818598370355e-06, |
| "loss": 0.1894, |
| "step": 2488 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 6.674324706171196, |
| "learning_rate": 1.0476055508934408e-06, |
| "loss": 0.5811, |
| "step": 2489 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 4.897314396887511, |
| "learning_rate": 1.0444334880292794e-06, |
| "loss": 0.6365, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 7.945153171999199, |
| "learning_rate": 1.0412656746523182e-06, |
| "loss": 0.5195, |
| "step": 2491 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.3993321163668972, |
| "learning_rate": 1.0381021141657526e-06, |
| "loss": 0.1745, |
| "step": 2492 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.6648621363034495, |
| "learning_rate": 1.0349428099682173e-06, |
| "loss": 0.2262, |
| "step": 2493 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 8.39262549028051, |
| "learning_rate": 1.0317877654537672e-06, |
| "loss": 0.573, |
| "step": 2494 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 13.261737124946308, |
| "learning_rate": 1.0286369840118859e-06, |
| "loss": 0.5983, |
| "step": 2495 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.4379975384462143, |
| "learning_rate": 1.025490469027477e-06, |
| "loss": 0.184, |
| "step": 2496 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.4363675604056922, |
| "learning_rate": 1.0223482238808557e-06, |
| "loss": 0.1803, |
| "step": 2497 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 5.067103834534466, |
| "learning_rate": 1.0192102519477565e-06, |
| "loss": 0.5164, |
| "step": 2498 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.6172231052222457, |
| "learning_rate": 1.016076556599318e-06, |
| "loss": 0.2087, |
| "step": 2499 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.3916450698179588, |
| "learning_rate": 1.0129471412020886e-06, |
| "loss": 0.1704, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 6.379376112173397, |
| "learning_rate": 1.0098220091180145e-06, |
| "loss": 0.6694, |
| "step": 2501 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.414347592392862, |
| "learning_rate": 1.006701163704445e-06, |
| "loss": 0.1983, |
| "step": 2502 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.610670843022095, |
| "learning_rate": 1.0035846083141193e-06, |
| "loss": 0.2061, |
| "step": 2503 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.8901554645136118, |
| "learning_rate": 1.0004723462951732e-06, |
| "loss": 0.2325, |
| "step": 2504 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.4386781328899296, |
| "learning_rate": 9.973643809911238e-07, |
| "loss": 0.212, |
| "step": 2505 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.5778673041286801, |
| "learning_rate": 9.942607157408784e-07, |
| "loss": 0.1916, |
| "step": 2506 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.4616468940958067, |
| "learning_rate": 9.911613538787196e-07, |
| "loss": 0.1631, |
| "step": 2507 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.4308317265407906, |
| "learning_rate": 9.880662987343103e-07, |
| "loss": 0.1697, |
| "step": 2508 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 4.6578055875903495, |
| "learning_rate": 9.849755536326866e-07, |
| "loss": 0.5274, |
| "step": 2509 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.5709526897428592, |
| "learning_rate": 9.818891218942511e-07, |
| "loss": 0.2525, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.4416668419248302, |
| "learning_rate": 9.78807006834777e-07, |
| "loss": 0.1782, |
| "step": 2511 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.6251745102042936, |
| "learning_rate": 9.757292117653955e-07, |
| "loss": 0.2108, |
| "step": 2512 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 6.9719931867493505, |
| "learning_rate": 9.726557399925995e-07, |
| "loss": 0.5532, |
| "step": 2513 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.7711361425380343, |
| "learning_rate": 9.695865948182392e-07, |
| "loss": 0.1984, |
| "step": 2514 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 7.717615445109604, |
| "learning_rate": 9.66521779539511e-07, |
| "loss": 0.544, |
| "step": 2515 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.4683742252197753, |
| "learning_rate": 9.63461297448966e-07, |
| "loss": 0.1882, |
| "step": 2516 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.5808992229471432, |
| "learning_rate": 9.604051518344948e-07, |
| "loss": 0.2048, |
| "step": 2517 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 12.282569863775484, |
| "learning_rate": 9.57353345979332e-07, |
| "loss": 0.5281, |
| "step": 2518 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.6271893351123086, |
| "learning_rate": 9.543058831620528e-07, |
| "loss": 0.2557, |
| "step": 2519 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.6138184596302778, |
| "learning_rate": 9.512627666565588e-07, |
| "loss": 0.2418, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.6122296182738112, |
| "learning_rate": 9.482239997320903e-07, |
| "loss": 0.1874, |
| "step": 2521 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.4472149233201144, |
| "learning_rate": 9.451895856532117e-07, |
| "loss": 0.221, |
| "step": 2522 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 14.97645190084707, |
| "learning_rate": 9.421595276798084e-07, |
| "loss": 0.6133, |
| "step": 2523 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.4519990327298435, |
| "learning_rate": 9.39133829067092e-07, |
| "loss": 0.2109, |
| "step": 2524 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.4610290448462637, |
| "learning_rate": 9.361124930655841e-07, |
| "loss": 0.2018, |
| "step": 2525 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.498347722449172, |
| "learning_rate": 9.330955229211259e-07, |
| "loss": 0.1776, |
| "step": 2526 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.5958498700482844, |
| "learning_rate": 9.300829218748625e-07, |
| "loss": 0.2672, |
| "step": 2527 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 6.203034648831612, |
| "learning_rate": 9.270746931632501e-07, |
| "loss": 0.4314, |
| "step": 2528 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.3300040390686352, |
| "learning_rate": 9.240708400180437e-07, |
| "loss": 0.1795, |
| "step": 2529 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 14.3616155812818, |
| "learning_rate": 9.210713656663023e-07, |
| "loss": 0.5088, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 5.884538514544935, |
| "learning_rate": 9.180762733303745e-07, |
| "loss": 0.533, |
| "step": 2531 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 7.814694426867154, |
| "learning_rate": 9.150855662279079e-07, |
| "loss": 0.4018, |
| "step": 2532 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.3973635596438183, |
| "learning_rate": 9.120992475718333e-07, |
| "loss": 0.1903, |
| "step": 2533 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.4383665344471661, |
| "learning_rate": 9.091173205703708e-07, |
| "loss": 0.2065, |
| "step": 2534 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 6.294289198358693, |
| "learning_rate": 9.061397884270217e-07, |
| "loss": 0.5422, |
| "step": 2535 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 4.9857271791072035, |
| "learning_rate": 9.031666543405637e-07, |
| "loss": 0.5583, |
| "step": 2536 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 8.327374247823952, |
| "learning_rate": 9.001979215050544e-07, |
| "loss": 0.4995, |
| "step": 2537 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 4.488245645407394, |
| "learning_rate": 8.972335931098159e-07, |
| "loss": 0.466, |
| "step": 2538 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.4002523589411047, |
| "learning_rate": 8.942736723394458e-07, |
| "loss": 0.2085, |
| "step": 2539 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 4.889026022921027, |
| "learning_rate": 8.913181623738032e-07, |
| "loss": 0.6764, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 7.750492978828258, |
| "learning_rate": 8.883670663880078e-07, |
| "loss": 0.6702, |
| "step": 2541 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 5.127825878036379, |
| "learning_rate": 8.854203875524403e-07, |
| "loss": 0.5095, |
| "step": 2542 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.799711649663046, |
| "learning_rate": 8.824781290327317e-07, |
| "loss": 0.2318, |
| "step": 2543 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.6051235284074827, |
| "learning_rate": 8.795402939897679e-07, |
| "loss": 0.1928, |
| "step": 2544 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 7.044225533853197, |
| "learning_rate": 8.766068855796833e-07, |
| "loss": 0.617, |
| "step": 2545 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 8.052121845683034, |
| "learning_rate": 8.736779069538521e-07, |
| "loss": 0.4959, |
| "step": 2546 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 17.23300986125481, |
| "learning_rate": 8.707533612588948e-07, |
| "loss": 0.569, |
| "step": 2547 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 5.282669940799093, |
| "learning_rate": 8.67833251636665e-07, |
| "loss": 0.4632, |
| "step": 2548 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.4320389198263321, |
| "learning_rate": 8.649175812242532e-07, |
| "loss": 0.1857, |
| "step": 2549 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 7.580851961765514, |
| "learning_rate": 8.62006353153983e-07, |
| "loss": 0.8111, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.544662525765582, |
| "learning_rate": 8.590995705533994e-07, |
| "loss": 0.209, |
| "step": 2551 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 7.149161656017897, |
| "learning_rate": 8.561972365452775e-07, |
| "loss": 0.4482, |
| "step": 2552 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.6270817149166188, |
| "learning_rate": 8.532993542476108e-07, |
| "loss": 0.2326, |
| "step": 2553 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 7.320842607727228, |
| "learning_rate": 8.504059267736097e-07, |
| "loss": 0.5003, |
| "step": 2554 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.2733848199707007, |
| "learning_rate": 8.475169572316988e-07, |
| "loss": 0.1798, |
| "step": 2555 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.5418719853348621, |
| "learning_rate": 8.446324487255164e-07, |
| "loss": 0.1742, |
| "step": 2556 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 5.99967558608403, |
| "learning_rate": 8.417524043539038e-07, |
| "loss": 0.5723, |
| "step": 2557 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.3442831750304751, |
| "learning_rate": 8.388768272109105e-07, |
| "loss": 0.1884, |
| "step": 2558 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 6.470561310477854, |
| "learning_rate": 8.36005720385783e-07, |
| "loss": 0.605, |
| "step": 2559 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 10.15361411600289, |
| "learning_rate": 8.331390869629702e-07, |
| "loss": 0.7166, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.3861308641502719, |
| "learning_rate": 8.302769300221098e-07, |
| "loss": 0.1641, |
| "step": 2561 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.2012542474337997, |
| "learning_rate": 8.274192526380337e-07, |
| "loss": 0.1419, |
| "step": 2562 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.5051072521255415, |
| "learning_rate": 8.24566057880763e-07, |
| "loss": 0.1897, |
| "step": 2563 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.4835039914520893, |
| "learning_rate": 8.217173488154972e-07, |
| "loss": 0.2138, |
| "step": 2564 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 5.488317253938668, |
| "learning_rate": 8.188731285026219e-07, |
| "loss": 0.5416, |
| "step": 2565 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 12.708079568791273, |
| "learning_rate": 8.160333999977004e-07, |
| "loss": 0.5586, |
| "step": 2566 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 7.844079556825954, |
| "learning_rate": 8.131981663514665e-07, |
| "loss": 0.5967, |
| "step": 2567 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 5.514556203894869, |
| "learning_rate": 8.103674306098291e-07, |
| "loss": 0.4291, |
| "step": 2568 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 11.294018881746007, |
| "learning_rate": 8.075411958138623e-07, |
| "loss": 0.3269, |
| "step": 2569 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.5568098518084332, |
| "learning_rate": 8.047194649998063e-07, |
| "loss": 0.2458, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 6.6662345305717725, |
| "learning_rate": 8.019022411990634e-07, |
| "loss": 0.6224, |
| "step": 2571 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 5.594513397899825, |
| "learning_rate": 7.99089527438191e-07, |
| "loss": 0.5713, |
| "step": 2572 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.5219324026730405, |
| "learning_rate": 7.962813267389052e-07, |
| "loss": 0.2215, |
| "step": 2573 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 6.447484952604837, |
| "learning_rate": 7.93477642118069e-07, |
| "loss": 0.7177, |
| "step": 2574 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.6396772877082404, |
| "learning_rate": 7.906784765876985e-07, |
| "loss": 0.2205, |
| "step": 2575 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.3336659870784024, |
| "learning_rate": 7.878838331549538e-07, |
| "loss": 0.1731, |
| "step": 2576 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.3515039283045287, |
| "learning_rate": 7.850937148221332e-07, |
| "loss": 0.2089, |
| "step": 2577 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.4923380627301124, |
| "learning_rate": 7.823081245866776e-07, |
| "loss": 0.1906, |
| "step": 2578 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.5277699646651623, |
| "learning_rate": 7.795270654411635e-07, |
| "loss": 0.2103, |
| "step": 2579 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.3240100567701212, |
| "learning_rate": 7.767505403732961e-07, |
| "loss": 0.1629, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 6.597982018865997, |
| "learning_rate": 7.739785523659144e-07, |
| "loss": 0.3574, |
| "step": 2581 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.4229439072388805, |
| "learning_rate": 7.712111043969772e-07, |
| "loss": 0.1817, |
| "step": 2582 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 8.611152415745416, |
| "learning_rate": 7.684481994395726e-07, |
| "loss": 0.395, |
| "step": 2583 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.8485251598213055, |
| "learning_rate": 7.656898404619029e-07, |
| "loss": 0.2343, |
| "step": 2584 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 6.762929749362675, |
| "learning_rate": 7.629360304272882e-07, |
| "loss": 0.4831, |
| "step": 2585 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 5.0445718794221355, |
| "learning_rate": 7.601867722941642e-07, |
| "loss": 0.3216, |
| "step": 2586 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 9.592577004766241, |
| "learning_rate": 7.57442069016071e-07, |
| "loss": 0.6185, |
| "step": 2587 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 6.077894120077208, |
| "learning_rate": 7.547019235416609e-07, |
| "loss": 0.6236, |
| "step": 2588 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.5405329002127461, |
| "learning_rate": 7.519663388146886e-07, |
| "loss": 0.1953, |
| "step": 2589 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 6.577754669292901, |
| "learning_rate": 7.492353177740047e-07, |
| "loss": 0.376, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 9.408829788343258, |
| "learning_rate": 7.465088633535639e-07, |
| "loss": 0.5448, |
| "step": 2591 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.5320519268321242, |
| "learning_rate": 7.437869784824086e-07, |
| "loss": 0.2195, |
| "step": 2592 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.5231665827699195, |
| "learning_rate": 7.410696660846761e-07, |
| "loss": 0.1723, |
| "step": 2593 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.6874867821002582, |
| "learning_rate": 7.383569290795911e-07, |
| "loss": 0.2481, |
| "step": 2594 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.5968693057949326, |
| "learning_rate": 7.356487703814602e-07, |
| "loss": 0.181, |
| "step": 2595 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 7.840406851525264, |
| "learning_rate": 7.329451928996745e-07, |
| "loss": 0.5325, |
| "step": 2596 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.4844619258708431, |
| "learning_rate": 7.302461995387033e-07, |
| "loss": 0.1758, |
| "step": 2597 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 16.29674965582375, |
| "learning_rate": 7.275517931980886e-07, |
| "loss": 0.6096, |
| "step": 2598 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 21.19066863166103, |
| "learning_rate": 7.24861976772448e-07, |
| "loss": 0.7152, |
| "step": 2599 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 11.317114303086758, |
| "learning_rate": 7.22176753151464e-07, |
| "loss": 0.6161, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 6.986948783906964, |
| "learning_rate": 7.194961252198885e-07, |
| "loss": 0.5213, |
| "step": 2601 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 4.893252000606057, |
| "learning_rate": 7.168200958575361e-07, |
| "loss": 0.4924, |
| "step": 2602 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 6.200996314158357, |
| "learning_rate": 7.141486679392778e-07, |
| "loss": 0.5967, |
| "step": 2603 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.5078257564939408, |
| "learning_rate": 7.114818443350463e-07, |
| "loss": 0.1926, |
| "step": 2604 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 5.839746009856321, |
| "learning_rate": 7.088196279098225e-07, |
| "loss": 0.6757, |
| "step": 2605 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.2910292736416038, |
| "learning_rate": 7.061620215236415e-07, |
| "loss": 0.1497, |
| "step": 2606 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 6.45423444267534, |
| "learning_rate": 7.035090280315854e-07, |
| "loss": 0.54, |
| "step": 2607 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.5139209625655168, |
| "learning_rate": 7.008606502837784e-07, |
| "loss": 0.1677, |
| "step": 2608 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 8.843478951192482, |
| "learning_rate": 6.982168911253895e-07, |
| "loss": 0.64, |
| "step": 2609 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 9.254331185836687, |
| "learning_rate": 6.955777533966212e-07, |
| "loss": 0.4762, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 6.6073102726749795, |
| "learning_rate": 6.929432399327174e-07, |
| "loss": 0.5791, |
| "step": 2611 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.5608241342559577, |
| "learning_rate": 6.903133535639467e-07, |
| "loss": 0.2073, |
| "step": 2612 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.5981984971688308, |
| "learning_rate": 6.876880971156147e-07, |
| "loss": 0.2374, |
| "step": 2613 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.3580211791343035, |
| "learning_rate": 6.850674734080454e-07, |
| "loss": 0.1544, |
| "step": 2614 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.6692509484909754, |
| "learning_rate": 6.824514852565922e-07, |
| "loss": 0.1843, |
| "step": 2615 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.731129371209292, |
| "learning_rate": 6.798401354716233e-07, |
| "loss": 0.1988, |
| "step": 2616 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.584868453254953, |
| "learning_rate": 6.772334268585296e-07, |
| "loss": 0.2202, |
| "step": 2617 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.6211274381694143, |
| "learning_rate": 6.746313622177097e-07, |
| "loss": 0.2285, |
| "step": 2618 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.6356376831839594, |
| "learning_rate": 6.720339443445772e-07, |
| "loss": 0.1855, |
| "step": 2619 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 3.2565345030647532, |
| "learning_rate": 6.694411760295538e-07, |
| "loss": 0.531, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 13.194009235153622, |
| "learning_rate": 6.66853060058063e-07, |
| "loss": 0.4903, |
| "step": 2621 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.5463835963314558, |
| "learning_rate": 6.642695992105347e-07, |
| "loss": 0.2219, |
| "step": 2622 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.4566373371656223, |
| "learning_rate": 6.61690796262392e-07, |
| "loss": 0.1668, |
| "step": 2623 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.5464011596894065, |
| "learning_rate": 6.591166539840599e-07, |
| "loss": 0.218, |
| "step": 2624 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.4726230979560666, |
| "learning_rate": 6.565471751409541e-07, |
| "loss": 0.2045, |
| "step": 2625 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 7.53648323357261, |
| "learning_rate": 6.539823624934777e-07, |
| "loss": 0.6538, |
| "step": 2626 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 4.3811779723790885, |
| "learning_rate": 6.514222187970248e-07, |
| "loss": 0.3418, |
| "step": 2627 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 6.082964849648111, |
| "learning_rate": 6.488667468019727e-07, |
| "loss": 0.567, |
| "step": 2628 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 7.3904723461789725, |
| "learning_rate": 6.46315949253678e-07, |
| "loss": 0.606, |
| "step": 2629 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.3947875291118377, |
| "learning_rate": 6.437698288924777e-07, |
| "loss": 0.1841, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 6.009049736365674, |
| "learning_rate": 6.412283884536818e-07, |
| "loss": 0.5414, |
| "step": 2631 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.4672149857669146, |
| "learning_rate": 6.38691630667575e-07, |
| "loss": 0.2192, |
| "step": 2632 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 11.53404226265188, |
| "learning_rate": 6.36159558259411e-07, |
| "loss": 0.8374, |
| "step": 2633 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.2702099350975073, |
| "learning_rate": 6.336321739494072e-07, |
| "loss": 0.1739, |
| "step": 2634 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 8.096994483604018, |
| "learning_rate": 6.31109480452749e-07, |
| "loss": 0.5601, |
| "step": 2635 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.3277480034853422, |
| "learning_rate": 6.285914804795784e-07, |
| "loss": 0.1473, |
| "step": 2636 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.4812499772472136, |
| "learning_rate": 6.260781767349983e-07, |
| "loss": 0.1971, |
| "step": 2637 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 5.538018979603673, |
| "learning_rate": 6.235695719190632e-07, |
| "loss": 0.5932, |
| "step": 2638 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.5124177468422904, |
| "learning_rate": 6.210656687267835e-07, |
| "loss": 0.1811, |
| "step": 2639 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 5.738558629682813, |
| "learning_rate": 6.185664698481137e-07, |
| "loss": 0.3961, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 7.5116895308050475, |
| "learning_rate": 6.160719779679597e-07, |
| "loss": 0.5401, |
| "step": 2641 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 8.889935222961554, |
| "learning_rate": 6.135821957661658e-07, |
| "loss": 0.6612, |
| "step": 2642 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.6046307953312196, |
| "learning_rate": 6.110971259175208e-07, |
| "loss": 0.1817, |
| "step": 2643 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 6.892177321413231, |
| "learning_rate": 6.086167710917479e-07, |
| "loss": 0.5152, |
| "step": 2644 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 13.071280804593417, |
| "learning_rate": 6.061411339535062e-07, |
| "loss": 0.6484, |
| "step": 2645 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.3419632389583118, |
| "learning_rate": 6.036702171623876e-07, |
| "loss": 0.1383, |
| "step": 2646 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 6.157792301410421, |
| "learning_rate": 6.012040233729105e-07, |
| "loss": 0.5189, |
| "step": 2647 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 7.7104212355617925, |
| "learning_rate": 5.987425552345222e-07, |
| "loss": 0.5179, |
| "step": 2648 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.5615036983923993, |
| "learning_rate": 5.962858153915896e-07, |
| "loss": 0.1928, |
| "step": 2649 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 12.993945008932329, |
| "learning_rate": 5.938338064834037e-07, |
| "loss": 0.4566, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.4056054595363345, |
| "learning_rate": 5.913865311441714e-07, |
| "loss": 0.1978, |
| "step": 2651 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.367541351930958, |
| "learning_rate": 5.889439920030127e-07, |
| "loss": 0.1538, |
| "step": 2652 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 6.393546120631601, |
| "learning_rate": 5.865061916839615e-07, |
| "loss": 0.5159, |
| "step": 2653 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.4226337155233368, |
| "learning_rate": 5.840731328059629e-07, |
| "loss": 0.205, |
| "step": 2654 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 6.86782039259812, |
| "learning_rate": 5.816448179828616e-07, |
| "loss": 0.6033, |
| "step": 2655 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.6104876835275903, |
| "learning_rate": 5.792212498234134e-07, |
| "loss": 0.2062, |
| "step": 2656 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 9.049602233806667, |
| "learning_rate": 5.768024309312681e-07, |
| "loss": 0.4272, |
| "step": 2657 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 9.592778712216669, |
| "learning_rate": 5.74388363904978e-07, |
| "loss": 0.5094, |
| "step": 2658 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 5.250655127944203, |
| "learning_rate": 5.719790513379891e-07, |
| "loss": 0.5485, |
| "step": 2659 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 9.41096800686093, |
| "learning_rate": 5.695744958186383e-07, |
| "loss": 0.4675, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 10.780266583961469, |
| "learning_rate": 5.671746999301542e-07, |
| "loss": 0.5982, |
| "step": 2661 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.710761266629091, |
| "learning_rate": 5.647796662506493e-07, |
| "loss": 0.2093, |
| "step": 2662 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 4.654933300278924, |
| "learning_rate": 5.623893973531225e-07, |
| "loss": 0.4755, |
| "step": 2663 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 5.2427890587191275, |
| "learning_rate": 5.600038958054538e-07, |
| "loss": 0.5326, |
| "step": 2664 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 6.693717545797934, |
| "learning_rate": 5.576231641703994e-07, |
| "loss": 0.4351, |
| "step": 2665 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 4.523179412732861, |
| "learning_rate": 5.552472050055946e-07, |
| "loss": 0.568, |
| "step": 2666 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 5.287751299486752, |
| "learning_rate": 5.528760208635436e-07, |
| "loss": 0.6917, |
| "step": 2667 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.5086347552640127, |
| "learning_rate": 5.505096142916233e-07, |
| "loss": 0.1786, |
| "step": 2668 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 5.505592709604087, |
| "learning_rate": 5.481479878320784e-07, |
| "loss": 0.4923, |
| "step": 2669 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.276453692652011, |
| "learning_rate": 5.457911440220154e-07, |
| "loss": 0.1714, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 5.3060623381904115, |
| "learning_rate": 5.434390853934063e-07, |
| "loss": 0.5048, |
| "step": 2671 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 7.244293658133257, |
| "learning_rate": 5.410918144730815e-07, |
| "loss": 0.4543, |
| "step": 2672 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 6.256776796215696, |
| "learning_rate": 5.387493337827254e-07, |
| "loss": 0.6208, |
| "step": 2673 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 7.584912804732227, |
| "learning_rate": 5.364116458388802e-07, |
| "loss": 0.5999, |
| "step": 2674 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 7.699756945569905, |
| "learning_rate": 5.340787531529346e-07, |
| "loss": 0.6547, |
| "step": 2675 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.605575414031511, |
| "learning_rate": 5.3175065823113e-07, |
| "loss": 0.1968, |
| "step": 2676 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.4817619941410574, |
| "learning_rate": 5.294273635745517e-07, |
| "loss": 0.2291, |
| "step": 2677 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 5.90107264061013, |
| "learning_rate": 5.271088716791273e-07, |
| "loss": 0.513, |
| "step": 2678 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 6.983756070460959, |
| "learning_rate": 5.24795185035627e-07, |
| "loss": 0.4819, |
| "step": 2679 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 5.190412595121703, |
| "learning_rate": 5.224863061296553e-07, |
| "loss": 0.3798, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.6978196461390462, |
| "learning_rate": 5.201822374416549e-07, |
| "loss": 0.1984, |
| "step": 2681 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 9.240812347198206, |
| "learning_rate": 5.178829814469006e-07, |
| "loss": 0.6435, |
| "step": 2682 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.656681270068995, |
| "learning_rate": 5.155885406154937e-07, |
| "loss": 0.2336, |
| "step": 2683 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.5786867044648638, |
| "learning_rate": 5.132989174123659e-07, |
| "loss": 0.2072, |
| "step": 2684 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 5.044095479711521, |
| "learning_rate": 5.110141142972735e-07, |
| "loss": 0.4497, |
| "step": 2685 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.593603943847763, |
| "learning_rate": 5.087341337247914e-07, |
| "loss": 0.2106, |
| "step": 2686 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 6.618972684768107, |
| "learning_rate": 5.064589781443163e-07, |
| "loss": 0.7105, |
| "step": 2687 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.4376629646749215, |
| "learning_rate": 5.041886500000603e-07, |
| "loss": 0.1872, |
| "step": 2688 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.440392194206104, |
| "learning_rate": 5.019231517310491e-07, |
| "loss": 0.1735, |
| "step": 2689 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 6.088923357509856, |
| "learning_rate": 4.996624857711219e-07, |
| "loss": 0.5553, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.7043003296847394, |
| "learning_rate": 4.97406654548922e-07, |
| "loss": 0.1879, |
| "step": 2691 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.5775563243885704, |
| "learning_rate": 4.951556604879049e-07, |
| "loss": 0.1709, |
| "step": 2692 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.4559291771321798, |
| "learning_rate": 4.929095060063227e-07, |
| "loss": 0.1728, |
| "step": 2693 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 7.343298387449492, |
| "learning_rate": 4.906681935172342e-07, |
| "loss": 0.6308, |
| "step": 2694 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 8.254690281488546, |
| "learning_rate": 4.88431725428492e-07, |
| "loss": 0.5289, |
| "step": 2695 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.5805793183405417, |
| "learning_rate": 4.862001041427488e-07, |
| "loss": 0.2157, |
| "step": 2696 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.757438267597915, |
| "learning_rate": 4.839733320574457e-07, |
| "loss": 0.2086, |
| "step": 2697 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 8.33445307266451, |
| "learning_rate": 4.817514115648164e-07, |
| "loss": 0.418, |
| "step": 2698 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 7.548588114648093, |
| "learning_rate": 4.795343450518825e-07, |
| "loss": 0.4581, |
| "step": 2699 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.3352919122460238, |
| "learning_rate": 4.773221349004531e-07, |
| "loss": 0.1991, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.3387781501315952, |
| "learning_rate": 4.7511478348711447e-07, |
| "loss": 0.1672, |
| "step": 2701 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.7704078493779831, |
| "learning_rate": 4.729122931832392e-07, |
| "loss": 0.1869, |
| "step": 2702 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 6.0396203658077, |
| "learning_rate": 4.707146663549744e-07, |
| "loss": 0.5129, |
| "step": 2703 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 6.582816211876843, |
| "learning_rate": 4.685219053632423e-07, |
| "loss": 0.4932, |
| "step": 2704 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.5537253110385736, |
| "learning_rate": 4.663340125637389e-07, |
| "loss": 0.2203, |
| "step": 2705 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.4877347829698297, |
| "learning_rate": 4.6415099030692914e-07, |
| "loss": 0.1647, |
| "step": 2706 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 5.9450673868837525, |
| "learning_rate": 4.619728409380453e-07, |
| "loss": 0.4525, |
| "step": 2707 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 8.06939617997545, |
| "learning_rate": 4.597995667970878e-07, |
| "loss": 0.5758, |
| "step": 2708 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 12.473711205485506, |
| "learning_rate": 4.5763117021881467e-07, |
| "loss": 0.3835, |
| "step": 2709 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.3235374068538472, |
| "learning_rate": 4.5546765353274846e-07, |
| "loss": 0.1844, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 4.862552219195205, |
| "learning_rate": 4.5330901906316506e-07, |
| "loss": 0.5605, |
| "step": 2711 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.4153040869832534, |
| "learning_rate": 4.511552691290988e-07, |
| "loss": 0.1872, |
| "step": 2712 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 5.210784876496466, |
| "learning_rate": 4.490064060443361e-07, |
| "loss": 0.4642, |
| "step": 2713 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.5480203986117018, |
| "learning_rate": 4.468624321174109e-07, |
| "loss": 0.1905, |
| "step": 2714 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.43235037299231, |
| "learning_rate": 4.4472334965160736e-07, |
| "loss": 0.1997, |
| "step": 2715 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.5503954668745454, |
| "learning_rate": 4.4258916094495394e-07, |
| "loss": 0.1583, |
| "step": 2716 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 7.398457461161245, |
| "learning_rate": 4.4045986829022e-07, |
| "loss": 0.5537, |
| "step": 2717 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 7.218432948067189, |
| "learning_rate": 4.38335473974919e-07, |
| "loss": 0.6647, |
| "step": 2718 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.4794803957855214, |
| "learning_rate": 4.362159802812971e-07, |
| "loss": 0.1837, |
| "step": 2719 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.4278596696760588, |
| "learning_rate": 4.341013894863405e-07, |
| "loss": 0.1818, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 5.591059833775441, |
| "learning_rate": 4.3199170386176325e-07, |
| "loss": 0.6646, |
| "step": 2721 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 9.657243782582162, |
| "learning_rate": 4.2988692567401515e-07, |
| "loss": 0.4096, |
| "step": 2722 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 7.34427746688386, |
| "learning_rate": 4.2778705718426907e-07, |
| "loss": 0.5594, |
| "step": 2723 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.495038666260333, |
| "learning_rate": 4.2569210064842716e-07, |
| "loss": 0.1581, |
| "step": 2724 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.5329584343646685, |
| "learning_rate": 4.236020583171108e-07, |
| "loss": 0.1958, |
| "step": 2725 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 7.7896070161012645, |
| "learning_rate": 4.215169324356666e-07, |
| "loss": 0.6856, |
| "step": 2726 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 7.771467751815555, |
| "learning_rate": 4.194367252441545e-07, |
| "loss": 0.6252, |
| "step": 2727 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.6693745729825031, |
| "learning_rate": 4.1736143897735394e-07, |
| "loss": 0.2033, |
| "step": 2728 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.331737824452968, |
| "learning_rate": 4.152910758647577e-07, |
| "loss": 0.187, |
| "step": 2729 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 5.952112384295271, |
| "learning_rate": 4.1322563813056606e-07, |
| "loss": 0.7214, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.8790949725849464, |
| "learning_rate": 4.111651279936929e-07, |
| "loss": 0.2495, |
| "step": 2731 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.4096065548075443, |
| "learning_rate": 4.091095476677531e-07, |
| "loss": 0.1673, |
| "step": 2732 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.5124954739782766, |
| "learning_rate": 4.070588993610697e-07, |
| "loss": 0.154, |
| "step": 2733 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.5687885929449497, |
| "learning_rate": 4.050131852766659e-07, |
| "loss": 0.2173, |
| "step": 2734 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 6.275592701555711, |
| "learning_rate": 4.029724076122621e-07, |
| "loss": 0.4758, |
| "step": 2735 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.4748542433756284, |
| "learning_rate": 4.009365685602795e-07, |
| "loss": 0.2158, |
| "step": 2736 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 9.414075729888554, |
| "learning_rate": 3.989056703078292e-07, |
| "loss": 0.5527, |
| "step": 2737 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.5713542490218366, |
| "learning_rate": 3.968797150367171e-07, |
| "loss": 0.2018, |
| "step": 2738 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.5449491396397508, |
| "learning_rate": 3.948587049234398e-07, |
| "loss": 0.1901, |
| "step": 2739 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.6307394390097811, |
| "learning_rate": 3.928426421391773e-07, |
| "loss": 0.1784, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 6.998713371935027, |
| "learning_rate": 3.9083152884979935e-07, |
| "loss": 0.5584, |
| "step": 2741 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.4164373784508115, |
| "learning_rate": 3.8882536721585486e-07, |
| "loss": 0.1768, |
| "step": 2742 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 7.755153311756699, |
| "learning_rate": 3.868241593925742e-07, |
| "loss": 0.6357, |
| "step": 2743 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.5425594331206072, |
| "learning_rate": 3.848279075298678e-07, |
| "loss": 0.1762, |
| "step": 2744 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.4271275279711206, |
| "learning_rate": 3.828366137723183e-07, |
| "loss": 0.2099, |
| "step": 2745 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 7.057266626679748, |
| "learning_rate": 3.80850280259184e-07, |
| "loss": 0.6185, |
| "step": 2746 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.4564045410306423, |
| "learning_rate": 3.7886890912439633e-07, |
| "loss": 0.1933, |
| "step": 2747 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.4734666899751858, |
| "learning_rate": 3.768925024965503e-07, |
| "loss": 0.178, |
| "step": 2748 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 5.7696830680755, |
| "learning_rate": 3.749210624989125e-07, |
| "loss": 0.4184, |
| "step": 2749 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.3766507432246797, |
| "learning_rate": 3.729545912494115e-07, |
| "loss": 0.1684, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.4692231082345608, |
| "learning_rate": 3.7099309086063794e-07, |
| "loss": 0.1902, |
| "step": 2751 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.6021302055115212, |
| "learning_rate": 3.6903656343984293e-07, |
| "loss": 0.1716, |
| "step": 2752 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.5941335703704298, |
| "learning_rate": 3.670850110889346e-07, |
| "loss": 0.1941, |
| "step": 2753 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 6.651698021511934, |
| "learning_rate": 3.651384359044774e-07, |
| "loss": 0.498, |
| "step": 2754 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 5.789963956744085, |
| "learning_rate": 3.631968399776864e-07, |
| "loss": 0.5474, |
| "step": 2755 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 10.234126217846844, |
| "learning_rate": 3.6126022539442975e-07, |
| "loss": 0.473, |
| "step": 2756 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.8953151152540306, |
| "learning_rate": 3.593285942352237e-07, |
| "loss": 0.2352, |
| "step": 2757 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 5.586124086834919, |
| "learning_rate": 3.5740194857523e-07, |
| "loss": 0.4172, |
| "step": 2758 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 6.343090688464675, |
| "learning_rate": 3.554802904842547e-07, |
| "loss": 0.6174, |
| "step": 2759 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 5.142444498296554, |
| "learning_rate": 3.5356362202674687e-07, |
| "loss": 0.5368, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 7.738189510492381, |
| "learning_rate": 3.516519452617922e-07, |
| "loss": 0.5712, |
| "step": 2761 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.4335777220529844, |
| "learning_rate": 3.4974526224311744e-07, |
| "loss": 0.1897, |
| "step": 2762 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.5217264160894917, |
| "learning_rate": 3.478435750190817e-07, |
| "loss": 0.234, |
| "step": 2763 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 8.599142512430719, |
| "learning_rate": 3.459468856326792e-07, |
| "loss": 0.4893, |
| "step": 2764 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 5.339084031965163, |
| "learning_rate": 3.4405519612153326e-07, |
| "loss": 0.4573, |
| "step": 2765 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 8.37944674597712, |
| "learning_rate": 3.4216850851789663e-07, |
| "loss": 0.69, |
| "step": 2766 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 5.712275397578137, |
| "learning_rate": 3.402868248486485e-07, |
| "loss": 0.5051, |
| "step": 2767 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 9.48171144073477, |
| "learning_rate": 3.3841014713529184e-07, |
| "loss": 0.5432, |
| "step": 2768 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 19.7958685386682, |
| "learning_rate": 3.3653847739395174e-07, |
| "loss": 0.616, |
| "step": 2769 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 7.384955125014153, |
| "learning_rate": 3.346718176353747e-07, |
| "loss": 0.4825, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.546675489425708, |
| "learning_rate": 3.3281016986492165e-07, |
| "loss": 0.2247, |
| "step": 2771 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 7.383348616381246, |
| "learning_rate": 3.3095353608257385e-07, |
| "loss": 0.4587, |
| "step": 2772 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.384415863590384, |
| "learning_rate": 3.2910191828292083e-07, |
| "loss": 0.1825, |
| "step": 2773 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 12.185491676837916, |
| "learning_rate": 3.2725531845516744e-07, |
| "loss": 0.6282, |
| "step": 2774 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.4675587976043334, |
| "learning_rate": 3.254137385831263e-07, |
| "loss": 0.2427, |
| "step": 2775 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 7.1582183676316795, |
| "learning_rate": 3.2357718064521594e-07, |
| "loss": 0.562, |
| "step": 2776 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.6649165418380594, |
| "learning_rate": 3.217456466144614e-07, |
| "loss": 0.1921, |
| "step": 2777 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 8.367329099086248, |
| "learning_rate": 3.199191384584893e-07, |
| "loss": 0.5557, |
| "step": 2778 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 9.522871424688466, |
| "learning_rate": 3.180976581395295e-07, |
| "loss": 0.4402, |
| "step": 2779 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 6.946008058570308, |
| "learning_rate": 3.1628120761440616e-07, |
| "loss": 0.573, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 5.949544153040697, |
| "learning_rate": 3.144697888345427e-07, |
| "loss": 0.5591, |
| "step": 2781 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.563219206555618, |
| "learning_rate": 3.1266340374595693e-07, |
| "loss": 0.2009, |
| "step": 2782 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 9.840995642921778, |
| "learning_rate": 3.108620542892593e-07, |
| "loss": 0.5893, |
| "step": 2783 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 5.5277411461745105, |
| "learning_rate": 3.0906574239964795e-07, |
| "loss": 0.4025, |
| "step": 2784 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 5.9312829901751485, |
| "learning_rate": 3.072744700069119e-07, |
| "loss": 0.4705, |
| "step": 2785 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.4692101723238575, |
| "learning_rate": 3.054882390354241e-07, |
| "loss": 0.2353, |
| "step": 2786 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 11.367735027012158, |
| "learning_rate": 3.0370705140414293e-07, |
| "loss": 0.4232, |
| "step": 2787 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 7.789201059559417, |
| "learning_rate": 3.019309090266087e-07, |
| "loss": 0.6217, |
| "step": 2788 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.3680402034081511, |
| "learning_rate": 3.0015981381094073e-07, |
| "loss": 0.1725, |
| "step": 2789 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 8.714863976063446, |
| "learning_rate": 2.9839376765983583e-07, |
| "loss": 0.6529, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.4995256520141007, |
| "learning_rate": 2.9663277247056923e-07, |
| "loss": 0.1802, |
| "step": 2791 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.404499609913541, |
| "learning_rate": 2.9487683013498523e-07, |
| "loss": 0.2168, |
| "step": 2792 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 6.106240227015083, |
| "learning_rate": 2.93125942539505e-07, |
| "loss": 0.4867, |
| "step": 2793 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 5.292866306582783, |
| "learning_rate": 2.913801115651144e-07, |
| "loss": 0.4967, |
| "step": 2794 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 5.731707102771506, |
| "learning_rate": 2.896393390873714e-07, |
| "loss": 0.5005, |
| "step": 2795 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.538387808018448, |
| "learning_rate": 2.8790362697639685e-07, |
| "loss": 0.1781, |
| "step": 2796 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 6.141995741207109, |
| "learning_rate": 2.8617297709687577e-07, |
| "loss": 0.5539, |
| "step": 2797 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.766898595287886, |
| "learning_rate": 2.8444739130805587e-07, |
| "loss": 0.2714, |
| "step": 2798 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 5.086068720235225, |
| "learning_rate": 2.827268714637421e-07, |
| "loss": 0.547, |
| "step": 2799 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.5542713166653377, |
| "learning_rate": 2.810114194122998e-07, |
| "loss": 0.2125, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 6.313678448452166, |
| "learning_rate": 2.793010369966487e-07, |
| "loss": 0.5274, |
| "step": 2801 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 6.565773969501368, |
| "learning_rate": 2.7759572605426057e-07, |
| "loss": 0.6617, |
| "step": 2802 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 6.368977047106074, |
| "learning_rate": 2.7589548841716274e-07, |
| "loss": 0.3688, |
| "step": 2803 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.2415580407502018, |
| "learning_rate": 2.7420032591192856e-07, |
| "loss": 0.1499, |
| "step": 2804 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.6188798679668956, |
| "learning_rate": 2.7251024035968134e-07, |
| "loss": 0.2165, |
| "step": 2805 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 6.7213647514169805, |
| "learning_rate": 2.7082523357608856e-07, |
| "loss": 0.5418, |
| "step": 2806 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.3919765447813295, |
| "learning_rate": 2.6914530737136346e-07, |
| "loss": 0.1883, |
| "step": 2807 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 7.590480561051072, |
| "learning_rate": 2.674704635502584e-07, |
| "loss": 0.4917, |
| "step": 2808 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.5450975740073691, |
| "learning_rate": 2.658007039120697e-07, |
| "loss": 0.2432, |
| "step": 2809 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 6.150846024520127, |
| "learning_rate": 2.64136030250628e-07, |
| "loss": 0.4327, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 6.517313862487519, |
| "learning_rate": 2.6247644435430263e-07, |
| "loss": 0.5871, |
| "step": 2811 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 8.345085681229417, |
| "learning_rate": 2.6082194800599424e-07, |
| "loss": 0.6578, |
| "step": 2812 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.558151080338734, |
| "learning_rate": 2.591725429831382e-07, |
| "loss": 0.2097, |
| "step": 2813 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.5451625551303971, |
| "learning_rate": 2.57528231057701e-07, |
| "loss": 0.2227, |
| "step": 2814 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.890145544796304, |
| "learning_rate": 2.558890139961745e-07, |
| "loss": 0.1953, |
| "step": 2815 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.5366257554269054, |
| "learning_rate": 2.5425489355957956e-07, |
| "loss": 0.1981, |
| "step": 2816 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 5.594092602461184, |
| "learning_rate": 2.526258715034602e-07, |
| "loss": 0.5163, |
| "step": 2817 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.2872462268534564, |
| "learning_rate": 2.510019495778837e-07, |
| "loss": 0.1846, |
| "step": 2818 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.4696999230227057, |
| "learning_rate": 2.4938312952744016e-07, |
| "loss": 0.2071, |
| "step": 2819 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 5.3706244592045485, |
| "learning_rate": 2.477694130912356e-07, |
| "loss": 0.6052, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.6093203291021525, |
| "learning_rate": 2.461608020028944e-07, |
| "loss": 0.1687, |
| "step": 2821 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 6.522675308570204, |
| "learning_rate": 2.445572979905575e-07, |
| "loss": 0.5339, |
| "step": 2822 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 7.055478940763568, |
| "learning_rate": 2.4295890277687695e-07, |
| "loss": 0.5388, |
| "step": 2823 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.5084545161877478, |
| "learning_rate": 2.4136561807901916e-07, |
| "loss": 0.205, |
| "step": 2824 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.7137511742906086, |
| "learning_rate": 2.397774456086577e-07, |
| "loss": 0.2395, |
| "step": 2825 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.4841156326024527, |
| "learning_rate": 2.3819438707197495e-07, |
| "loss": 0.1946, |
| "step": 2826 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 6.070685735254147, |
| "learning_rate": 2.3661644416966057e-07, |
| "loss": 0.6072, |
| "step": 2827 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 8.512142589977733, |
| "learning_rate": 2.3504361859690628e-07, |
| "loss": 0.4394, |
| "step": 2828 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 7.079742604152353, |
| "learning_rate": 2.3347591204340881e-07, |
| "loss": 0.5408, |
| "step": 2829 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 11.829621193001326, |
| "learning_rate": 2.3191332619336204e-07, |
| "loss": 0.6588, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 20.39111290338053, |
| "learning_rate": 2.3035586272546207e-07, |
| "loss": 0.5723, |
| "step": 2831 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.4352689502580414, |
| "learning_rate": 2.2880352331290102e-07, |
| "loss": 0.1685, |
| "step": 2832 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 5.10892056271425, |
| "learning_rate": 2.2725630962336542e-07, |
| "loss": 0.5223, |
| "step": 2833 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 5.983565286314907, |
| "learning_rate": 2.2571422331903458e-07, |
| "loss": 0.6039, |
| "step": 2834 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 4.629975595643532, |
| "learning_rate": 2.2417726605658164e-07, |
| "loss": 0.5059, |
| "step": 2835 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.1957005697326524, |
| "learning_rate": 2.226454394871669e-07, |
| "loss": 0.1483, |
| "step": 2836 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.392513449302165, |
| "learning_rate": 2.2111874525644228e-07, |
| "loss": 0.1925, |
| "step": 2837 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.989833788465268, |
| "learning_rate": 2.1959718500454196e-07, |
| "loss": 0.2532, |
| "step": 2838 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 6.223678565243159, |
| "learning_rate": 2.1808076036608783e-07, |
| "loss": 0.5268, |
| "step": 2839 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 6.780954869756683, |
| "learning_rate": 2.165694729701834e-07, |
| "loss": 0.4708, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 10.461543864853596, |
| "learning_rate": 2.1506332444041212e-07, |
| "loss": 0.5094, |
| "step": 2841 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 7.972329492424538, |
| "learning_rate": 2.1356231639483917e-07, |
| "loss": 0.4595, |
| "step": 2842 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 7.202713021400867, |
| "learning_rate": 2.1206645044600404e-07, |
| "loss": 0.5811, |
| "step": 2843 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 8.93053592284642, |
| "learning_rate": 2.1057572820092576e-07, |
| "loss": 0.579, |
| "step": 2844 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.5590482278159425, |
| "learning_rate": 2.0909015126109488e-07, |
| "loss": 0.1961, |
| "step": 2845 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.4354444595115425, |
| "learning_rate": 2.0760972122247425e-07, |
| "loss": 0.1854, |
| "step": 2846 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.426341737969202, |
| "learning_rate": 2.061344396754994e-07, |
| "loss": 0.1916, |
| "step": 2847 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 6.850055342787105, |
| "learning_rate": 2.04664308205072e-07, |
| "loss": 0.4271, |
| "step": 2848 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 7.668832901374139, |
| "learning_rate": 2.0319932839056365e-07, |
| "loss": 0.6114, |
| "step": 2849 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 5.6236889361003, |
| "learning_rate": 2.0173950180581047e-07, |
| "loss": 0.5771, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 6.235638019635725, |
| "learning_rate": 2.002848300191118e-07, |
| "loss": 0.5185, |
| "step": 2851 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 6.138322497141576, |
| "learning_rate": 1.988353145932298e-07, |
| "loss": 0.5961, |
| "step": 2852 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.4757457669273937, |
| "learning_rate": 1.9739095708538714e-07, |
| "loss": 0.1807, |
| "step": 2853 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.7252598905388794, |
| "learning_rate": 1.9595175904726481e-07, |
| "loss": 0.2329, |
| "step": 2854 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 27.86582099478461, |
| "learning_rate": 1.9451772202500163e-07, |
| "loss": 0.5558, |
| "step": 2855 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.5933301652280694, |
| "learning_rate": 1.9308884755919132e-07, |
| "loss": 0.1907, |
| "step": 2856 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 7.4008487216339764, |
| "learning_rate": 1.9166513718488155e-07, |
| "loss": 0.4857, |
| "step": 2857 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 5.879695016121743, |
| "learning_rate": 1.902465924315733e-07, |
| "loss": 0.5451, |
| "step": 2858 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 8.25822028400388, |
| "learning_rate": 1.8883321482321583e-07, |
| "loss": 0.615, |
| "step": 2859 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 10.175180891593763, |
| "learning_rate": 1.8742500587820955e-07, |
| "loss": 0.567, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.5893833108605107, |
| "learning_rate": 1.86021967109401e-07, |
| "loss": 0.1981, |
| "step": 2861 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.3030970949694585, |
| "learning_rate": 1.8462410002408228e-07, |
| "loss": 0.1716, |
| "step": 2862 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.5856352844453432, |
| "learning_rate": 1.8323140612399038e-07, |
| "loss": 0.1863, |
| "step": 2863 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.5339491261603877, |
| "learning_rate": 1.8184388690530242e-07, |
| "loss": 0.1794, |
| "step": 2864 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.7042141915449476, |
| "learning_rate": 1.804615438586399e-07, |
| "loss": 0.2322, |
| "step": 2865 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 8.131326556983366, |
| "learning_rate": 1.7908437846906158e-07, |
| "loss": 0.6032, |
| "step": 2866 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 7.562849471737348, |
| "learning_rate": 1.7771239221606285e-07, |
| "loss": 0.6066, |
| "step": 2867 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.384289973847297, |
| "learning_rate": 1.7634558657357748e-07, |
| "loss": 0.1973, |
| "step": 2868 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 10.089983241935549, |
| "learning_rate": 1.7498396300997146e-07, |
| "loss": 0.5804, |
| "step": 2869 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.6044307892692768, |
| "learning_rate": 1.736275229880441e-07, |
| "loss": 0.2389, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 7.241276701968791, |
| "learning_rate": 1.7227626796502807e-07, |
| "loss": 0.4521, |
| "step": 2871 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.6115718489535953, |
| "learning_rate": 1.7093019939258327e-07, |
| "loss": 0.1495, |
| "step": 2872 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 5.819339971265935, |
| "learning_rate": 1.6958931871679908e-07, |
| "loss": 0.5626, |
| "step": 2873 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.4566726170315303, |
| "learning_rate": 1.6825362737818985e-07, |
| "loss": 0.2031, |
| "step": 2874 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 5.250363755381174, |
| "learning_rate": 1.6692312681169775e-07, |
| "loss": 0.7009, |
| "step": 2875 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 4.456766188220367, |
| "learning_rate": 1.6559781844668666e-07, |
| "loss": 0.4214, |
| "step": 2876 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 7.7821418717005235, |
| "learning_rate": 1.6427770370694208e-07, |
| "loss": 0.5192, |
| "step": 2877 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.6356190170377005, |
| "learning_rate": 1.6296278401067122e-07, |
| "loss": 0.1901, |
| "step": 2878 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.7066681565007213, |
| "learning_rate": 1.6165306077049969e-07, |
| "loss": 0.2045, |
| "step": 2879 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 6.874786697193116, |
| "learning_rate": 1.603485353934703e-07, |
| "loss": 0.5165, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 6.215627299105615, |
| "learning_rate": 1.5904920928104196e-07, |
| "loss": 0.6417, |
| "step": 2881 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 23.282797246982042, |
| "learning_rate": 1.577550838290881e-07, |
| "loss": 0.5461, |
| "step": 2882 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 10.936842811991326, |
| "learning_rate": 1.564661604278944e-07, |
| "loss": 0.5793, |
| "step": 2883 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 7.105473846049208, |
| "learning_rate": 1.5518244046215936e-07, |
| "loss": 0.5209, |
| "step": 2884 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.6291138445685007, |
| "learning_rate": 1.539039253109892e-07, |
| "loss": 0.2067, |
| "step": 2885 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.5330733203641582, |
| "learning_rate": 1.526306163479019e-07, |
| "loss": 0.1849, |
| "step": 2886 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 8.577477788398626, |
| "learning_rate": 1.5136251494081822e-07, |
| "loss": 0.7031, |
| "step": 2887 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 17.50367381791444, |
| "learning_rate": 1.5009962245206845e-07, |
| "loss": 0.5257, |
| "step": 2888 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 6.824661403341482, |
| "learning_rate": 1.488419402383834e-07, |
| "loss": 0.5009, |
| "step": 2889 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 7.750865837232404, |
| "learning_rate": 1.4758946965089894e-07, |
| "loss": 0.6712, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.495357227840504, |
| "learning_rate": 1.4634221203515097e-07, |
| "loss": 0.2044, |
| "step": 2891 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.6973063844464513, |
| "learning_rate": 1.4510016873107657e-07, |
| "loss": 0.1886, |
| "step": 2892 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.4555547735917695, |
| "learning_rate": 1.4386334107300727e-07, |
| "loss": 0.1951, |
| "step": 2893 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 6.284820189802007, |
| "learning_rate": 1.4263173038967627e-07, |
| "loss": 0.5042, |
| "step": 2894 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 13.227742098011161, |
| "learning_rate": 1.4140533800420853e-07, |
| "loss": 0.6508, |
| "step": 2895 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 88.15415177230122, |
| "learning_rate": 1.401841652341246e-07, |
| "loss": 0.6007, |
| "step": 2896 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 5.796768955970198, |
| "learning_rate": 1.389682133913378e-07, |
| "loss": 0.7432, |
| "step": 2897 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 6.879847456235132, |
| "learning_rate": 1.3775748378215047e-07, |
| "loss": 0.6708, |
| "step": 2898 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 4.851883532350419, |
| "learning_rate": 1.3655197770725826e-07, |
| "loss": 0.3758, |
| "step": 2899 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 9.977095689739897, |
| "learning_rate": 1.3535169646174073e-07, |
| "loss": 0.4572, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 6.6640551359882725, |
| "learning_rate": 1.3415664133506812e-07, |
| "loss": 0.5807, |
| "step": 2901 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 6.526859432151342, |
| "learning_rate": 1.3296681361109564e-07, |
| "loss": 0.4813, |
| "step": 2902 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.3440258772018676, |
| "learning_rate": 1.3178221456806028e-07, |
| "loss": 0.1984, |
| "step": 2903 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.4401183304907947, |
| "learning_rate": 1.3060284547858403e-07, |
| "loss": 0.208, |
| "step": 2904 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 6.329726556651034, |
| "learning_rate": 1.2942870760966952e-07, |
| "loss": 0.5821, |
| "step": 2905 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 5.094978910867227, |
| "learning_rate": 1.282598022226994e-07, |
| "loss": 0.5231, |
| "step": 2906 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.4060308590002704, |
| "learning_rate": 1.270961305734364e-07, |
| "loss": 0.2206, |
| "step": 2907 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.644107385283967, |
| "learning_rate": 1.2593769391201827e-07, |
| "loss": 0.2131, |
| "step": 2908 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.8235233450387875, |
| "learning_rate": 1.247844934829606e-07, |
| "loss": 0.1971, |
| "step": 2909 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 6.718301175320187, |
| "learning_rate": 1.2363653052515302e-07, |
| "loss": 0.4507, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.676197969737339, |
| "learning_rate": 1.2249380627185781e-07, |
| "loss": 0.1956, |
| "step": 2911 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 8.67306685375281, |
| "learning_rate": 1.2135632195071133e-07, |
| "loss": 0.5648, |
| "step": 2912 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.594319802332187, |
| "learning_rate": 1.202240787837178e-07, |
| "loss": 0.2167, |
| "step": 2913 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 6.7491465730094395, |
| "learning_rate": 1.1909707798725412e-07, |
| "loss": 0.6679, |
| "step": 2914 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.7309112303750012, |
| "learning_rate": 1.1797532077206187e-07, |
| "loss": 0.2096, |
| "step": 2915 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.3304986946071597, |
| "learning_rate": 1.1685880834325203e-07, |
| "loss": 0.16, |
| "step": 2916 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.3626329682508962, |
| "learning_rate": 1.1574754190030014e-07, |
| "loss": 0.1922, |
| "step": 2917 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 7.166258200913389, |
| "learning_rate": 1.1464152263704565e-07, |
| "loss": 0.6461, |
| "step": 2918 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.3522158691801092, |
| "learning_rate": 1.1354075174169088e-07, |
| "loss": 0.1929, |
| "step": 2919 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.6023939170404724, |
| "learning_rate": 1.12445230396801e-07, |
| "loss": 0.2059, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 6.090268642091465, |
| "learning_rate": 1.1135495977930011e-07, |
| "loss": 0.5342, |
| "step": 2921 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.5883808411565767, |
| "learning_rate": 1.1026994106047296e-07, |
| "loss": 0.1993, |
| "step": 2922 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.6860915572927744, |
| "learning_rate": 1.0919017540595933e-07, |
| "loss": 0.1826, |
| "step": 2923 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 8.721284754711782, |
| "learning_rate": 1.0811566397575912e-07, |
| "loss": 0.358, |
| "step": 2924 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 3.935675864876987, |
| "learning_rate": 1.0704640792422616e-07, |
| "loss": 0.4267, |
| "step": 2925 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 8.056284718040663, |
| "learning_rate": 1.0598240840006658e-07, |
| "loss": 0.5781, |
| "step": 2926 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 6.63408809117204, |
| "learning_rate": 1.0492366654634211e-07, |
| "loss": 0.4878, |
| "step": 2927 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.3656309333900636, |
| "learning_rate": 1.0387018350046519e-07, |
| "loss": 0.2072, |
| "step": 2928 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 6.951365718753954, |
| "learning_rate": 1.0282196039419823e-07, |
| "loss": 0.5279, |
| "step": 2929 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.4346216121931041, |
| "learning_rate": 1.0177899835365323e-07, |
| "loss": 0.202, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.6888835421357935, |
| "learning_rate": 1.0074129849928948e-07, |
| "loss": 0.2275, |
| "step": 2931 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.4423057325247473, |
| "learning_rate": 9.970886194591467e-08, |
| "loss": 0.1978, |
| "step": 2932 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.4267009542011637, |
| "learning_rate": 9.8681689802681e-08, |
| "loss": 0.1821, |
| "step": 2933 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 4.973516740039685, |
| "learning_rate": 9.765978317308522e-08, |
| "loss": 0.4752, |
| "step": 2934 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 4.725959405022734, |
| "learning_rate": 9.664314315496692e-08, |
| "loss": 0.503, |
| "step": 2935 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.4000403717757461, |
| "learning_rate": 9.5631770840508e-08, |
| "loss": 0.2098, |
| "step": 2936 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.512264392172657, |
| "learning_rate": 9.462566731623213e-08, |
| "loss": 0.1908, |
| "step": 2937 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 6.575995188879126, |
| "learning_rate": 9.36248336630019e-08, |
| "loss": 0.5007, |
| "step": 2938 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.5617906757502826, |
| "learning_rate": 9.262927095601782e-08, |
| "loss": 0.2095, |
| "step": 2939 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.6751730406524747, |
| "learning_rate": 9.163898026481876e-08, |
| "loss": 0.2249, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 9.041493149958258, |
| "learning_rate": 9.065396265327986e-08, |
| "loss": 0.4572, |
| "step": 2941 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.4750842071035337, |
| "learning_rate": 8.967421917961072e-08, |
| "loss": 0.1997, |
| "step": 2942 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 8.271970625422963, |
| "learning_rate": 8.869975089635552e-08, |
| "loss": 0.4652, |
| "step": 2943 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.3942467670470393, |
| "learning_rate": 8.773055885039072e-08, |
| "loss": 0.1652, |
| "step": 2944 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 6.763267989712541, |
| "learning_rate": 8.676664408292457e-08, |
| "loss": 0.5331, |
| "step": 2945 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 5.2386080492282, |
| "learning_rate": 8.580800762949704e-08, |
| "loss": 0.5473, |
| "step": 2946 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.6535396390189872, |
| "learning_rate": 8.485465051997488e-08, |
| "loss": 0.2036, |
| "step": 2947 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.5508936364876473, |
| "learning_rate": 8.39065737785566e-08, |
| "loss": 0.1836, |
| "step": 2948 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 4.588146624149047, |
| "learning_rate": 8.296377842376524e-08, |
| "loss": 0.5541, |
| "step": 2949 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.6431556861222913, |
| "learning_rate": 8.202626546845172e-08, |
| "loss": 0.2131, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 6.7805094436163085, |
| "learning_rate": 8.109403591979148e-08, |
| "loss": 0.5631, |
| "step": 2951 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 9.769084565696684, |
| "learning_rate": 8.016709077928397e-08, |
| "loss": 0.5768, |
| "step": 2952 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 8.357217432789115, |
| "learning_rate": 7.924543104275095e-08, |
| "loss": 0.4799, |
| "step": 2953 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.297962053997005, |
| "learning_rate": 7.832905770033705e-08, |
| "loss": 0.1345, |
| "step": 2954 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.5274934813053564, |
| "learning_rate": 7.7417971736507e-08, |
| "loss": 0.2046, |
| "step": 2955 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 13.267871007882906, |
| "learning_rate": 7.651217413004674e-08, |
| "loss": 0.5781, |
| "step": 2956 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 7.938808149163866, |
| "learning_rate": 7.561166585405789e-08, |
| "loss": 0.4829, |
| "step": 2957 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.4567831332091496, |
| "learning_rate": 7.47164478759621e-08, |
| "loss": 0.1964, |
| "step": 2958 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 5.838862693988253, |
| "learning_rate": 7.382652115749789e-08, |
| "loss": 0.5398, |
| "step": 2959 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.5771838835063452, |
| "learning_rate": 7.294188665471769e-08, |
| "loss": 0.1818, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.430463795467331, |
| "learning_rate": 7.206254531799018e-08, |
| "loss": 0.1971, |
| "step": 2961 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.4473265007418736, |
| "learning_rate": 7.118849809199524e-08, |
| "loss": 0.1747, |
| "step": 2962 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 7.001770352559234, |
| "learning_rate": 7.031974591572732e-08, |
| "loss": 0.5199, |
| "step": 2963 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.4584409439658936, |
| "learning_rate": 6.945628972249208e-08, |
| "loss": 0.1997, |
| "step": 2964 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.5603112849189318, |
| "learning_rate": 6.859813043990526e-08, |
| "loss": 0.2087, |
| "step": 2965 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.5989705437486081, |
| "learning_rate": 6.77452689898922e-08, |
| "loss": 0.2121, |
| "step": 2966 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.5299259420967457, |
| "learning_rate": 6.689770628868609e-08, |
| "loss": 0.2284, |
| "step": 2967 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 5.453348824888012, |
| "learning_rate": 6.605544324682855e-08, |
| "loss": 0.4395, |
| "step": 2968 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.5849018751349726, |
| "learning_rate": 6.521848076916859e-08, |
| "loss": 0.2086, |
| "step": 2969 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.5379039372042327, |
| "learning_rate": 6.438681975485805e-08, |
| "loss": 0.215, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 11.850872203494525, |
| "learning_rate": 6.356046109735614e-08, |
| "loss": 0.6073, |
| "step": 2971 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.6067848040383592, |
| "learning_rate": 6.273940568442327e-08, |
| "loss": 0.1977, |
| "step": 2972 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.399031388079973, |
| "learning_rate": 6.192365439812553e-08, |
| "loss": 0.1573, |
| "step": 2973 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 6.356556169923487, |
| "learning_rate": 6.111320811482802e-08, |
| "loss": 0.463, |
| "step": 2974 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.6608622010185468, |
| "learning_rate": 6.030806770519815e-08, |
| "loss": 0.1943, |
| "step": 2975 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.3672535592434387, |
| "learning_rate": 5.9508234034202364e-08, |
| "loss": 0.1661, |
| "step": 2976 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.6686974930318208, |
| "learning_rate": 5.871370796110665e-08, |
| "loss": 0.193, |
| "step": 2977 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 7.863053421390452, |
| "learning_rate": 5.7924490339474335e-08, |
| "loss": 0.5465, |
| "step": 2978 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 5.980751998759158, |
| "learning_rate": 5.7140582017167764e-08, |
| "loss": 0.5738, |
| "step": 2979 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.7375085215686261, |
| "learning_rate": 5.636198383634217e-08, |
| "loss": 0.1701, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.7604058039303363, |
| "learning_rate": 5.558869663345123e-08, |
| "loss": 0.246, |
| "step": 2981 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.5834894119079832, |
| "learning_rate": 5.482072123924098e-08, |
| "loss": 0.1968, |
| "step": 2982 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.4063289344126568, |
| "learning_rate": 5.405805847875256e-08, |
| "loss": 0.1884, |
| "step": 2983 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 5.697631443303541, |
| "learning_rate": 5.330070917131724e-08, |
| "loss": 0.527, |
| "step": 2984 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 6.388984077995515, |
| "learning_rate": 5.2548674130561974e-08, |
| "loss": 0.3975, |
| "step": 2985 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.5987057012740025, |
| "learning_rate": 5.1801954164399925e-08, |
| "loss": 0.2432, |
| "step": 2986 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 5.812013938920401, |
| "learning_rate": 5.106055007503774e-08, |
| "loss": 0.4925, |
| "step": 2987 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.5797247671065668, |
| "learning_rate": 5.0324462658969395e-08, |
| "loss": 0.2229, |
| "step": 2988 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.664656061602779, |
| "learning_rate": 4.959369270697789e-08, |
| "loss": 0.2654, |
| "step": 2989 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 8.105206792904527, |
| "learning_rate": 4.886824100413412e-08, |
| "loss": 0.7076, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 23.236434026650407, |
| "learning_rate": 4.814810832979411e-08, |
| "loss": 0.6483, |
| "step": 2991 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 5.308885397734057, |
| "learning_rate": 4.743329545760122e-08, |
| "loss": 0.5117, |
| "step": 2992 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 14.907848429189544, |
| "learning_rate": 4.67238031554812e-08, |
| "loss": 0.6733, |
| "step": 2993 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 6.4774314255112, |
| "learning_rate": 4.6019632185647645e-08, |
| "loss": 0.5325, |
| "step": 2994 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 6.644308814873751, |
| "learning_rate": 4.532078330459433e-08, |
| "loss": 0.575, |
| "step": 2995 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.6465106542055439, |
| "learning_rate": 4.4627257263098465e-08, |
| "loss": 0.175, |
| "step": 2996 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 6.993332934686698, |
| "learning_rate": 4.393905480621907e-08, |
| "loss": 0.464, |
| "step": 2997 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.4112580103582617, |
| "learning_rate": 4.3256176673295846e-08, |
| "loss": 0.1727, |
| "step": 2998 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 7.449952228864845, |
| "learning_rate": 4.2578623597949174e-08, |
| "loss": 0.5289, |
| "step": 2999 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 6.983314068003288, |
| "learning_rate": 4.1906396308077356e-08, |
| "loss": 0.4838, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.420455900288509, |
| "learning_rate": 4.123949552585826e-08, |
| "loss": 0.2371, |
| "step": 3001 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 5.960526883885027, |
| "learning_rate": 4.0577921967747126e-08, |
| "loss": 0.3775, |
| "step": 3002 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.5666246212700372, |
| "learning_rate": 3.9921676344475966e-08, |
| "loss": 0.2083, |
| "step": 3003 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.4196458859952457, |
| "learning_rate": 3.927075936105307e-08, |
| "loss": 0.1898, |
| "step": 3004 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 10.699323728753926, |
| "learning_rate": 3.8625171716762385e-08, |
| "loss": 0.5733, |
| "step": 3005 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 5.490490638625815, |
| "learning_rate": 3.7984914105162474e-08, |
| "loss": 0.3556, |
| "step": 3006 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.7482767819557443, |
| "learning_rate": 3.7349987214084784e-08, |
| "loss": 0.1785, |
| "step": 3007 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.4680399434949956, |
| "learning_rate": 3.672039172563646e-08, |
| "loss": 0.2078, |
| "step": 3008 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.4830412849161663, |
| "learning_rate": 3.609612831619369e-08, |
| "loss": 0.1598, |
| "step": 3009 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 14.456193489536421, |
| "learning_rate": 3.547719765640778e-08, |
| "loss": 0.5513, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 6.669946189400554, |
| "learning_rate": 3.4863600411197404e-08, |
| "loss": 0.5237, |
| "step": 3011 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.3913977980338987, |
| "learning_rate": 3.425533723975527e-08, |
| "loss": 0.1985, |
| "step": 3012 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.519397239367645, |
| "learning_rate": 3.365240879554144e-08, |
| "loss": 0.2387, |
| "step": 3013 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.5463128250860247, |
| "learning_rate": 3.3054815726285e-08, |
| "loss": 0.1895, |
| "step": 3014 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 6.35177466171103, |
| "learning_rate": 3.2462558673983516e-08, |
| "loss": 0.4928, |
| "step": 3015 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 7.576803306496446, |
| "learning_rate": 3.1875638274902476e-08, |
| "loss": 0.5856, |
| "step": 3016 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.4257716411064958, |
| "learning_rate": 3.129405515957307e-08, |
| "loss": 0.1759, |
| "step": 3017 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.6215858795651334, |
| "learning_rate": 3.071780995279439e-08, |
| "loss": 0.252, |
| "step": 3018 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 5.048807746845461, |
| "learning_rate": 3.014690327362846e-08, |
| "loss": 0.438, |
| "step": 3019 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 8.851945826700035, |
| "learning_rate": 2.9581335735404672e-08, |
| "loss": 0.5805, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.4454792312951033, |
| "learning_rate": 2.9021107945714777e-08, |
| "loss": 0.1805, |
| "step": 3021 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.4381068991020554, |
| "learning_rate": 2.8466220506414565e-08, |
| "loss": 0.1667, |
| "step": 3022 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.6272251766125279, |
| "learning_rate": 2.79166740136233e-08, |
| "loss": 0.2039, |
| "step": 3023 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.3805092466249094, |
| "learning_rate": 2.7372469057721506e-08, |
| "loss": 0.1887, |
| "step": 3024 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.5745020096815718, |
| "learning_rate": 2.6833606223351515e-08, |
| "loss": 0.2059, |
| "step": 3025 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 7.362129018288768, |
| "learning_rate": 2.6300086089416366e-08, |
| "loss": 0.6999, |
| "step": 3026 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.774494427703464, |
| "learning_rate": 2.577190922908035e-08, |
| "loss": 0.2284, |
| "step": 3027 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.5132740417702437, |
| "learning_rate": 2.5249076209767353e-08, |
| "loss": 0.2053, |
| "step": 3028 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.7297296384375203, |
| "learning_rate": 2.473158759315808e-08, |
| "loss": 0.1827, |
| "step": 3029 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 5.490226694397722, |
| "learning_rate": 2.421944393519504e-08, |
| "loss": 0.4102, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.559266414410157, |
| "learning_rate": 2.3712645786075905e-08, |
| "loss": 0.1818, |
| "step": 3031 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 10.133659475015277, |
| "learning_rate": 2.3211193690257373e-08, |
| "loss": 0.499, |
| "step": 3032 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 5.3456419932725066, |
| "learning_rate": 2.271508818645185e-08, |
| "loss": 0.3714, |
| "step": 3033 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.397147772347432, |
| "learning_rate": 2.222432980762912e-08, |
| "loss": 0.1636, |
| "step": 3034 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.526139789617029, |
| "learning_rate": 2.1738919081012446e-08, |
| "loss": 0.2318, |
| "step": 3035 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.5863043449471008, |
| "learning_rate": 2.1258856528081906e-08, |
| "loss": 0.1641, |
| "step": 3036 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.8684179072745595, |
| "learning_rate": 2.0784142664571626e-08, |
| "loss": 0.2071, |
| "step": 3037 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 11.788992003108227, |
| "learning_rate": 2.031477800046866e-08, |
| "loss": 0.5973, |
| "step": 3038 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 7.604835128526027, |
| "learning_rate": 1.9850763040014654e-08, |
| "loss": 0.5794, |
| "step": 3039 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 8.514897010317375, |
| "learning_rate": 1.939209828170363e-08, |
| "loss": 0.461, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 10.669757021023933, |
| "learning_rate": 1.8938784218281435e-08, |
| "loss": 0.7521, |
| "step": 3041 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.5168614554290425, |
| "learning_rate": 1.849082133674518e-08, |
| "loss": 0.1889, |
| "step": 3042 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 4.147775107296752, |
| "learning_rate": 1.80482101183449e-08, |
| "loss": 0.3864, |
| "step": 3043 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 9.244428175932155, |
| "learning_rate": 1.761095103858024e-08, |
| "loss": 0.5296, |
| "step": 3044 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 7.505218349293431, |
| "learning_rate": 1.717904456720043e-08, |
| "loss": 0.6889, |
| "step": 3045 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.352085160477461, |
| "learning_rate": 1.675249116820543e-08, |
| "loss": 0.1641, |
| "step": 3046 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.299654688631552, |
| "learning_rate": 1.6331291299844233e-08, |
| "loss": 0.1653, |
| "step": 3047 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.5684991611977552, |
| "learning_rate": 1.5915445414613208e-08, |
| "loss": 0.2617, |
| "step": 3048 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.4482670914575277, |
| "learning_rate": 1.550495395925944e-08, |
| "loss": 0.1941, |
| "step": 3049 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 7.256742985265447, |
| "learning_rate": 1.5099817374774615e-08, |
| "loss": 0.548, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 7.699936238621277, |
| "learning_rate": 1.4700036096400028e-08, |
| "loss": 0.5615, |
| "step": 3051 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 9.119117485970706, |
| "learning_rate": 1.4305610553623228e-08, |
| "loss": 0.6034, |
| "step": 3052 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.6421789152262496, |
| "learning_rate": 1.3916541170176934e-08, |
| "loss": 0.176, |
| "step": 3053 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.3522274306030595, |
| "learning_rate": 1.3532828364041239e-08, |
| "loss": 0.194, |
| "step": 3054 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.6474357079295912, |
| "learning_rate": 1.3154472547440289e-08, |
| "loss": 0.1873, |
| "step": 3055 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 5.44204046323545, |
| "learning_rate": 1.2781474126845051e-08, |
| "loss": 0.6603, |
| "step": 3056 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.6273549373967076, |
| "learning_rate": 1.241383350296832e-08, |
| "loss": 0.1915, |
| "step": 3057 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 6.286750400189176, |
| "learning_rate": 1.2051551070769719e-08, |
| "loss": 0.4309, |
| "step": 3058 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 5.135787208051596, |
| "learning_rate": 1.1694627219450694e-08, |
| "loss": 0.4062, |
| "step": 3059 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 7.165414563360992, |
| "learning_rate": 1.134306233245619e-08, |
| "loss": 0.5891, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.6275508401703538, |
| "learning_rate": 1.0996856787475197e-08, |
| "loss": 0.2313, |
| "step": 3061 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 6.526103785267677, |
| "learning_rate": 1.0656010956437979e-08, |
| "loss": 0.6737, |
| "step": 3062 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.411322668377023, |
| "learning_rate": 1.0320525205516629e-08, |
| "loss": 0.1676, |
| "step": 3063 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 9.193227665398865, |
| "learning_rate": 9.990399895125624e-09, |
| "loss": 0.434, |
| "step": 3064 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.488605324614986, |
| "learning_rate": 9.665635379920157e-09, |
| "loss": 0.1869, |
| "step": 3065 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 5.801227737969345, |
| "learning_rate": 9.346232008797252e-09, |
| "loss": 0.5074, |
| "step": 3066 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 6.595733425369078, |
| "learning_rate": 9.032190124893536e-09, |
| "loss": 0.3494, |
| "step": 3067 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 5.873193491313792, |
| "learning_rate": 8.723510065585806e-09, |
| "loss": 0.5724, |
| "step": 3068 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.536886334269207, |
| "learning_rate": 8.42019216249046e-09, |
| "loss": 0.2056, |
| "step": 3069 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 7.617227766345291, |
| "learning_rate": 8.122236741464618e-09, |
| "loss": 0.457, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.6439233519395178, |
| "learning_rate": 7.82964412260223e-09, |
| "loss": 0.2278, |
| "step": 3071 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 7.584597301220255, |
| "learning_rate": 7.542414620237414e-09, |
| "loss": 0.5963, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.7329227900430477, |
| "learning_rate": 7.260548542943335e-09, |
| "loss": 0.2189, |
| "step": 3073 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.7161071849637957, |
| "learning_rate": 6.984046193528881e-09, |
| "loss": 0.2063, |
| "step": 3074 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 6.313601738654207, |
| "learning_rate": 6.712907869043661e-09, |
| "loss": 0.4828, |
| "step": 3075 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.309216968825294, |
| "learning_rate": 6.447133860771893e-09, |
| "loss": 0.1278, |
| "step": 3076 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 5.732384056113262, |
| "learning_rate": 6.186724454236847e-09, |
| "loss": 0.6362, |
| "step": 3077 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.569061524189713, |
| "learning_rate": 5.9316799291969654e-09, |
| "loss": 0.2119, |
| "step": 3078 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.4694221335985644, |
| "learning_rate": 5.682000559649181e-09, |
| "loss": 0.2094, |
| "step": 3079 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 9.261448007385779, |
| "learning_rate": 5.437686613823934e-09, |
| "loss": 0.3961, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.6067208100390094, |
| "learning_rate": 5.198738354190158e-09, |
| "loss": 0.219, |
| "step": 3081 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 6.1009075406485005, |
| "learning_rate": 4.9651560374514015e-09, |
| "loss": 0.4553, |
| "step": 3082 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.4256870486709714, |
| "learning_rate": 4.736939914545824e-09, |
| "loss": 0.1788, |
| "step": 3083 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 5.378017075157903, |
| "learning_rate": 4.514090230647305e-09, |
| "loss": 0.4654, |
| "step": 3084 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 6.112138137560136, |
| "learning_rate": 4.296607225164895e-09, |
| "loss": 0.6969, |
| "step": 3085 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 7.250462609629612, |
| "learning_rate": 4.084491131741697e-09, |
| "loss": 0.7226, |
| "step": 3086 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 8.628188351336053, |
| "learning_rate": 3.877742178254873e-09, |
| "loss": 0.7685, |
| "step": 3087 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 7.162283701232727, |
| "learning_rate": 3.6763605868167516e-09, |
| "loss": 0.6627, |
| "step": 3088 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 6.397798704627545, |
| "learning_rate": 3.4803465737714983e-09, |
| "loss": 0.4927, |
| "step": 3089 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 5.4537538091253195, |
| "learning_rate": 3.289700349698999e-09, |
| "loss": 0.6303, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 5.454843813066502, |
| "learning_rate": 3.104422119411532e-09, |
| "loss": 0.3822, |
| "step": 3091 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.5629152223697327, |
| "learning_rate": 2.9245120819543226e-09, |
| "loss": 0.188, |
| "step": 3092 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 17.27712515755136, |
| "learning_rate": 2.749970430605542e-09, |
| "loss": 0.5413, |
| "step": 3093 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.5181043823476956, |
| "learning_rate": 2.5807973528768626e-09, |
| "loss": 0.1834, |
| "step": 3094 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 7.209806498981695, |
| "learning_rate": 2.416993030511239e-09, |
| "loss": 0.6215, |
| "step": 3095 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 5.914505942903512, |
| "learning_rate": 2.258557639484571e-09, |
| "loss": 0.5087, |
| "step": 3096 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 4.674993130466707, |
| "learning_rate": 2.1054913500051512e-09, |
| "loss": 0.4659, |
| "step": 3097 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.5402829311423731, |
| "learning_rate": 1.957794326513107e-09, |
| "loss": 0.1981, |
| "step": 3098 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.517322625685105, |
| "learning_rate": 1.8154667276798488e-09, |
| "loss": 0.1802, |
| "step": 3099 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.5378631489322545, |
| "learning_rate": 1.6785087064086213e-09, |
| "loss": 0.1637, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.4670537938773458, |
| "learning_rate": 1.546920409834507e-09, |
| "loss": 0.1683, |
| "step": 3101 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 5.2917356066256245, |
| "learning_rate": 1.4207019793238686e-09, |
| "loss": 0.5907, |
| "step": 3102 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 6.98601503007583, |
| "learning_rate": 1.299853550472685e-09, |
| "loss": 0.476, |
| "step": 3103 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 7.200946600616249, |
| "learning_rate": 1.1843752531104368e-09, |
| "loss": 0.4721, |
| "step": 3104 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.589855680885164, |
| "learning_rate": 1.0742672112951103e-09, |
| "loss": 0.2493, |
| "step": 3105 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 6.235917287828203, |
| "learning_rate": 9.695295433170826e-10, |
| "loss": 0.5953, |
| "step": 3106 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 5.989546147230207, |
| "learning_rate": 8.701623616963472e-10, |
| "loss": 0.5155, |
| "step": 3107 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 8.608292594450589, |
| "learning_rate": 7.761657731836236e-10, |
| "loss": 0.5289, |
| "step": 3108 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.2868548811530105, |
| "learning_rate": 6.87539878759802e-10, |
| "loss": 0.1499, |
| "step": 3109 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.5380312363926991, |
| "learning_rate": 6.042847736364987e-10, |
| "loss": 0.1935, |
| "step": 3110 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 4.746891666683149, |
| "learning_rate": 5.264005472549461e-10, |
| "loss": 0.3446, |
| "step": 3111 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 9.518489206428116, |
| "learning_rate": 4.538872832865471e-10, |
| "loss": 0.4071, |
| "step": 3112 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.7664412590705008, |
| "learning_rate": 3.867450596328759e-10, |
| "loss": 0.2381, |
| "step": 3113 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.6988190436169681, |
| "learning_rate": 3.2497394842512244e-10, |
| "loss": 0.218, |
| "step": 3114 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.4416306644380266, |
| "learning_rate": 2.685740160240924e-10, |
| "loss": 0.2128, |
| "step": 3115 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 7.825396015056772, |
| "learning_rate": 2.1754532302076247e-10, |
| "loss": 0.511, |
| "step": 3116 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.6608648888600894, |
| "learning_rate": 1.718879242357252e-10, |
| "loss": 0.2151, |
| "step": 3117 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 5.650930222088315, |
| "learning_rate": 1.316018687191889e-10, |
| "loss": 0.5032, |
| "step": 3118 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 15.307567535576162, |
| "learning_rate": 9.668719974986751e-11, |
| "loss": 0.6285, |
| "step": 3119 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 8.882364937663029, |
| "learning_rate": 6.714395483720105e-11, |
| "loss": 0.5117, |
| "step": 3120 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 6.942683694133464, |
| "learning_rate": 4.297216571969021e-11, |
| "loss": 0.5333, |
| "step": 3121 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.722721440266414, |
| "learning_rate": 2.417185836545155e-11, |
| "loss": 0.2157, |
| "step": 3122 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.501297152856583, |
| "learning_rate": 1.0743052971107225e-11, |
| "loss": 0.1943, |
| "step": 3123 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 7.410022358554075, |
| "learning_rate": 2.6857639640054387e-12, |
| "loss": 0.4378, |
| "step": 3124 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 6.343821428663217, |
| "learning_rate": 0.0, |
| "loss": 0.378, |
| "step": 3125 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 3125, |
| "total_flos": 887644537620480.0, |
| "train_loss": 0.39999343316555025, |
| "train_runtime": 11344.852, |
| "train_samples_per_second": 4.407, |
| "train_steps_per_second": 0.275 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 3125, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1000, |
| "total_flos": 887644537620480.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|