| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 822, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.003658536585365854, |
| "grad_norm": 6.277445410210355, |
| "learning_rate": 0.0, |
| "loss": 0.8949, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.007317073170731708, |
| "grad_norm": 6.189966552075687, |
| "learning_rate": 1.204819277108434e-07, |
| "loss": 0.875, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.01097560975609756, |
| "grad_norm": 6.164450292976063, |
| "learning_rate": 2.409638554216868e-07, |
| "loss": 0.8753, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.014634146341463415, |
| "grad_norm": 6.197109316032434, |
| "learning_rate": 3.614457831325301e-07, |
| "loss": 0.8511, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.018292682926829267, |
| "grad_norm": 6.423202590634042, |
| "learning_rate": 4.819277108433736e-07, |
| "loss": 0.8748, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.02195121951219512, |
| "grad_norm": 6.25214947511438, |
| "learning_rate": 6.024096385542169e-07, |
| "loss": 0.8883, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.025609756097560974, |
| "grad_norm": 5.959365118165624, |
| "learning_rate": 7.228915662650602e-07, |
| "loss": 0.8739, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.02926829268292683, |
| "grad_norm": 5.699309208501387, |
| "learning_rate": 8.433734939759036e-07, |
| "loss": 0.8641, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.032926829268292684, |
| "grad_norm": 5.896427299318614, |
| "learning_rate": 9.638554216867472e-07, |
| "loss": 0.8376, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.036585365853658534, |
| "grad_norm": 4.457488780662371, |
| "learning_rate": 1.0843373493975905e-06, |
| "loss": 0.8127, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04024390243902439, |
| "grad_norm": 4.83357688640288, |
| "learning_rate": 1.2048192771084338e-06, |
| "loss": 0.8569, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.04390243902439024, |
| "grad_norm": 4.606224868944638, |
| "learning_rate": 1.3253012048192773e-06, |
| "loss": 0.8594, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0475609756097561, |
| "grad_norm": 2.939566450225436, |
| "learning_rate": 1.4457831325301204e-06, |
| "loss": 0.7608, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.05121951219512195, |
| "grad_norm": 2.520318983948017, |
| "learning_rate": 1.566265060240964e-06, |
| "loss": 0.7226, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.054878048780487805, |
| "grad_norm": 2.5228998490908845, |
| "learning_rate": 1.6867469879518073e-06, |
| "loss": 0.7426, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.05853658536585366, |
| "grad_norm": 2.3043155456582545, |
| "learning_rate": 1.8072289156626508e-06, |
| "loss": 0.7395, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.06219512195121951, |
| "grad_norm": 2.0517255621206454, |
| "learning_rate": 1.9277108433734943e-06, |
| "loss": 0.7115, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.06585365853658537, |
| "grad_norm": 2.263867308051565, |
| "learning_rate": 2.0481927710843377e-06, |
| "loss": 0.6575, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.06951219512195123, |
| "grad_norm": 2.3534448895798623, |
| "learning_rate": 2.168674698795181e-06, |
| "loss": 0.6783, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.07317073170731707, |
| "grad_norm": 2.5801360082960314, |
| "learning_rate": 2.2891566265060243e-06, |
| "loss": 0.6791, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.07682926829268293, |
| "grad_norm": 2.091602122954677, |
| "learning_rate": 2.4096385542168676e-06, |
| "loss": 0.6447, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.08048780487804878, |
| "grad_norm": 1.8784090908022497, |
| "learning_rate": 2.530120481927711e-06, |
| "loss": 0.6341, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.08414634146341464, |
| "grad_norm": 1.6942257363329816, |
| "learning_rate": 2.6506024096385547e-06, |
| "loss": 0.6357, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.08780487804878048, |
| "grad_norm": 1.6095157334175276, |
| "learning_rate": 2.771084337349398e-06, |
| "loss": 0.6345, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.09146341463414634, |
| "grad_norm": 1.7414308121430007, |
| "learning_rate": 2.891566265060241e-06, |
| "loss": 0.602, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.0951219512195122, |
| "grad_norm": 1.8575881807291224, |
| "learning_rate": 3.012048192771085e-06, |
| "loss": 0.6362, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.09878048780487805, |
| "grad_norm": 1.7851864056717361, |
| "learning_rate": 3.132530120481928e-06, |
| "loss": 0.612, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.1024390243902439, |
| "grad_norm": 1.440836469753712, |
| "learning_rate": 3.2530120481927713e-06, |
| "loss": 0.5809, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.10609756097560975, |
| "grad_norm": 1.0609497126438352, |
| "learning_rate": 3.3734939759036146e-06, |
| "loss": 0.5171, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.10975609756097561, |
| "grad_norm": 1.14774673610937, |
| "learning_rate": 3.4939759036144583e-06, |
| "loss": 0.542, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.11341463414634147, |
| "grad_norm": 1.1996784532412303, |
| "learning_rate": 3.6144578313253016e-06, |
| "loss": 0.5745, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.11707317073170732, |
| "grad_norm": 1.1058886982759129, |
| "learning_rate": 3.7349397590361445e-06, |
| "loss": 0.535, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.12073170731707317, |
| "grad_norm": 1.1654070589558059, |
| "learning_rate": 3.855421686746989e-06, |
| "loss": 0.5394, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.12439024390243902, |
| "grad_norm": 1.0975202327930376, |
| "learning_rate": 3.975903614457832e-06, |
| "loss": 0.5235, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.12804878048780488, |
| "grad_norm": 1.0349455335423454, |
| "learning_rate": 4.096385542168675e-06, |
| "loss": 0.5248, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.13170731707317074, |
| "grad_norm": 1.0886433836793852, |
| "learning_rate": 4.216867469879519e-06, |
| "loss": 0.5217, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.1353658536585366, |
| "grad_norm": 1.0243537702205467, |
| "learning_rate": 4.337349397590362e-06, |
| "loss": 0.5269, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.13902439024390245, |
| "grad_norm": 0.8706005558780466, |
| "learning_rate": 4.457831325301205e-06, |
| "loss": 0.5224, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.14268292682926828, |
| "grad_norm": 0.9201501100893733, |
| "learning_rate": 4.578313253012049e-06, |
| "loss": 0.489, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.14634146341463414, |
| "grad_norm": 0.9395976154976792, |
| "learning_rate": 4.698795180722892e-06, |
| "loss": 0.5119, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.8427641210847073, |
| "learning_rate": 4.819277108433735e-06, |
| "loss": 0.4676, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.15365853658536585, |
| "grad_norm": 0.8083286994818667, |
| "learning_rate": 4.939759036144578e-06, |
| "loss": 0.4929, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.1573170731707317, |
| "grad_norm": 0.8645108522630794, |
| "learning_rate": 5.060240963855422e-06, |
| "loss": 0.4892, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.16097560975609757, |
| "grad_norm": 0.9003614919042666, |
| "learning_rate": 5.180722891566266e-06, |
| "loss": 0.4814, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.16463414634146342, |
| "grad_norm": 0.741593728453306, |
| "learning_rate": 5.301204819277109e-06, |
| "loss": 0.5015, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.16829268292682928, |
| "grad_norm": 0.7562265354454827, |
| "learning_rate": 5.421686746987952e-06, |
| "loss": 0.4696, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.1719512195121951, |
| "grad_norm": 0.7656658535399147, |
| "learning_rate": 5.542168674698796e-06, |
| "loss": 0.4765, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.17560975609756097, |
| "grad_norm": 0.8167177748510339, |
| "learning_rate": 5.66265060240964e-06, |
| "loss": 0.4794, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.17926829268292682, |
| "grad_norm": 0.8458910289909082, |
| "learning_rate": 5.783132530120482e-06, |
| "loss": 0.4677, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.18292682926829268, |
| "grad_norm": 0.7346378269144168, |
| "learning_rate": 5.9036144578313255e-06, |
| "loss": 0.4733, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.18658536585365854, |
| "grad_norm": 0.740671844456031, |
| "learning_rate": 6.02409638554217e-06, |
| "loss": 0.474, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.1902439024390244, |
| "grad_norm": 0.8016621843811075, |
| "learning_rate": 6.144578313253012e-06, |
| "loss": 0.4726, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.19390243902439025, |
| "grad_norm": 0.687264969205199, |
| "learning_rate": 6.265060240963856e-06, |
| "loss": 0.4361, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.1975609756097561, |
| "grad_norm": 0.6904700215176642, |
| "learning_rate": 6.385542168674699e-06, |
| "loss": 0.4585, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.20121951219512196, |
| "grad_norm": 0.7341819290585861, |
| "learning_rate": 6.5060240963855425e-06, |
| "loss": 0.4351, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.2048780487804878, |
| "grad_norm": 0.7027985388306266, |
| "learning_rate": 6.626506024096386e-06, |
| "loss": 0.4514, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.20853658536585365, |
| "grad_norm": 0.7865290192260256, |
| "learning_rate": 6.746987951807229e-06, |
| "loss": 0.4329, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.2121951219512195, |
| "grad_norm": 0.80407188175117, |
| "learning_rate": 6.867469879518073e-06, |
| "loss": 0.4435, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.21585365853658536, |
| "grad_norm": 0.7172401190061911, |
| "learning_rate": 6.987951807228917e-06, |
| "loss": 0.4513, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.21951219512195122, |
| "grad_norm": 0.8101533915145436, |
| "learning_rate": 7.1084337349397595e-06, |
| "loss": 0.4503, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.22317073170731708, |
| "grad_norm": 0.804368270905315, |
| "learning_rate": 7.228915662650603e-06, |
| "loss": 0.45, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.22682926829268293, |
| "grad_norm": 0.9006759847187962, |
| "learning_rate": 7.349397590361447e-06, |
| "loss": 0.4655, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.2304878048780488, |
| "grad_norm": 0.7424358924212873, |
| "learning_rate": 7.469879518072289e-06, |
| "loss": 0.451, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.23414634146341465, |
| "grad_norm": 0.7737469569927911, |
| "learning_rate": 7.590361445783133e-06, |
| "loss": 0.435, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.23780487804878048, |
| "grad_norm": 0.7292890277210534, |
| "learning_rate": 7.710843373493977e-06, |
| "loss": 0.4578, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.24146341463414633, |
| "grad_norm": 0.7375239690173336, |
| "learning_rate": 7.83132530120482e-06, |
| "loss": 0.4248, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.2451219512195122, |
| "grad_norm": 0.7635364447903387, |
| "learning_rate": 7.951807228915663e-06, |
| "loss": 0.4389, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.24878048780487805, |
| "grad_norm": 0.8154765127796729, |
| "learning_rate": 8.072289156626508e-06, |
| "loss": 0.4465, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.2524390243902439, |
| "grad_norm": 0.7035246328533662, |
| "learning_rate": 8.19277108433735e-06, |
| "loss": 0.4458, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.25609756097560976, |
| "grad_norm": 0.7489537524137279, |
| "learning_rate": 8.313253012048194e-06, |
| "loss": 0.446, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2597560975609756, |
| "grad_norm": 0.737620333742491, |
| "learning_rate": 8.433734939759038e-06, |
| "loss": 0.4161, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.2634146341463415, |
| "grad_norm": 0.7850264326588713, |
| "learning_rate": 8.55421686746988e-06, |
| "loss": 0.4458, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.26707317073170733, |
| "grad_norm": 0.6816894813147583, |
| "learning_rate": 8.674698795180724e-06, |
| "loss": 0.4131, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.2707317073170732, |
| "grad_norm": 0.7565446389184619, |
| "learning_rate": 8.795180722891567e-06, |
| "loss": 0.428, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.27439024390243905, |
| "grad_norm": 0.701791274439343, |
| "learning_rate": 8.91566265060241e-06, |
| "loss": 0.4178, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.2780487804878049, |
| "grad_norm": 0.7340761193641352, |
| "learning_rate": 9.036144578313254e-06, |
| "loss": 0.4427, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.2817073170731707, |
| "grad_norm": 0.7618958840715552, |
| "learning_rate": 9.156626506024097e-06, |
| "loss": 0.4286, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.28536585365853656, |
| "grad_norm": 0.8304904668902089, |
| "learning_rate": 9.27710843373494e-06, |
| "loss": 0.4514, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.2890243902439024, |
| "grad_norm": 0.7030300483997182, |
| "learning_rate": 9.397590361445785e-06, |
| "loss": 0.4241, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.2926829268292683, |
| "grad_norm": 0.6805879055096362, |
| "learning_rate": 9.518072289156628e-06, |
| "loss": 0.4172, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.29634146341463413, |
| "grad_norm": 0.8356922060920742, |
| "learning_rate": 9.63855421686747e-06, |
| "loss": 0.4364, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.7503110247952592, |
| "learning_rate": 9.759036144578315e-06, |
| "loss": 0.4413, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.30365853658536585, |
| "grad_norm": 0.7061215452986729, |
| "learning_rate": 9.879518072289156e-06, |
| "loss": 0.4338, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.3073170731707317, |
| "grad_norm": 0.7634467193523269, |
| "learning_rate": 1e-05, |
| "loss": 0.4321, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.31097560975609756, |
| "grad_norm": 0.7210623022309584, |
| "learning_rate": 9.999954819584226e-06, |
| "loss": 0.4194, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.3146341463414634, |
| "grad_norm": 0.721915503004975, |
| "learning_rate": 9.99981927915341e-06, |
| "loss": 0.4054, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.3182926829268293, |
| "grad_norm": 0.7175117036906977, |
| "learning_rate": 9.999593381157061e-06, |
| "loss": 0.4064, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.32195121951219513, |
| "grad_norm": 0.7700973325270625, |
| "learning_rate": 9.999277129677647e-06, |
| "loss": 0.4353, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.325609756097561, |
| "grad_norm": 0.6814147419837185, |
| "learning_rate": 9.998870530430517e-06, |
| "loss": 0.4194, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.32926829268292684, |
| "grad_norm": 0.7678648920525801, |
| "learning_rate": 9.998373590763798e-06, |
| "loss": 0.4162, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.3329268292682927, |
| "grad_norm": 0.7311404514370706, |
| "learning_rate": 9.997786319658269e-06, |
| "loss": 0.4289, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.33658536585365856, |
| "grad_norm": 0.7611041087207226, |
| "learning_rate": 9.99710872772719e-06, |
| "loss": 0.4301, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.3402439024390244, |
| "grad_norm": 0.828204792592121, |
| "learning_rate": 9.996340827216114e-06, |
| "loss": 0.4265, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.3439024390243902, |
| "grad_norm": 0.7616248679645873, |
| "learning_rate": 9.995482632002666e-06, |
| "loss": 0.4332, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.3475609756097561, |
| "grad_norm": 0.7439027924570666, |
| "learning_rate": 9.9945341575963e-06, |
| "loss": 0.4278, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.35121951219512193, |
| "grad_norm": 0.8590239356762064, |
| "learning_rate": 9.993495421137991e-06, |
| "loss": 0.4233, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.3548780487804878, |
| "grad_norm": 0.6934508286067458, |
| "learning_rate": 9.992366441399968e-06, |
| "loss": 0.398, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.35853658536585364, |
| "grad_norm": 0.7663186915430809, |
| "learning_rate": 9.991147238785335e-06, |
| "loss": 0.4631, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.3621951219512195, |
| "grad_norm": 0.7156930809406346, |
| "learning_rate": 9.989837835327724e-06, |
| "loss": 0.4208, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.36585365853658536, |
| "grad_norm": 0.8002982909933671, |
| "learning_rate": 9.988438254690896e-06, |
| "loss": 0.4152, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3695121951219512, |
| "grad_norm": 0.7294127443277085, |
| "learning_rate": 9.986948522168301e-06, |
| "loss": 0.413, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.37317073170731707, |
| "grad_norm": 0.8058181737161048, |
| "learning_rate": 9.985368664682636e-06, |
| "loss": 0.392, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.37682926829268293, |
| "grad_norm": 0.7274807055972744, |
| "learning_rate": 9.983698710785345e-06, |
| "loss": 0.4224, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.3804878048780488, |
| "grad_norm": 0.6993436655613549, |
| "learning_rate": 9.981938690656117e-06, |
| "loss": 0.4088, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.38414634146341464, |
| "grad_norm": 0.7940440414933521, |
| "learning_rate": 9.980088636102323e-06, |
| "loss": 0.4072, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.3878048780487805, |
| "grad_norm": 0.8290354247980892, |
| "learning_rate": 9.97814858055846e-06, |
| "loss": 0.4131, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.39146341463414636, |
| "grad_norm": 0.7441200330283944, |
| "learning_rate": 9.976118559085535e-06, |
| "loss": 0.3919, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.3951219512195122, |
| "grad_norm": 0.6950410289839455, |
| "learning_rate": 9.973998608370433e-06, |
| "loss": 0.3959, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.39878048780487807, |
| "grad_norm": 0.8708483538432179, |
| "learning_rate": 9.971788766725254e-06, |
| "loss": 0.3971, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.4024390243902439, |
| "grad_norm": 0.759084878650044, |
| "learning_rate": 9.969489074086626e-06, |
| "loss": 0.409, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.4060975609756098, |
| "grad_norm": 0.7871272877847223, |
| "learning_rate": 9.967099572014977e-06, |
| "loss": 0.421, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.4097560975609756, |
| "grad_norm": 0.7903507778760472, |
| "learning_rate": 9.964620303693784e-06, |
| "loss": 0.4047, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.41341463414634144, |
| "grad_norm": 0.723841099163788, |
| "learning_rate": 9.962051313928796e-06, |
| "loss": 0.4092, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.4170731707317073, |
| "grad_norm": 0.6639342794876416, |
| "learning_rate": 9.959392649147226e-06, |
| "loss": 0.4027, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.42073170731707316, |
| "grad_norm": 0.807078988818577, |
| "learning_rate": 9.956644357396905e-06, |
| "loss": 0.4278, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.424390243902439, |
| "grad_norm": 0.7650258943283277, |
| "learning_rate": 9.953806488345417e-06, |
| "loss": 0.4088, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.42804878048780487, |
| "grad_norm": 0.7150263286569847, |
| "learning_rate": 9.950879093279204e-06, |
| "loss": 0.4004, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.4317073170731707, |
| "grad_norm": 0.7284032347300947, |
| "learning_rate": 9.947862225102637e-06, |
| "loss": 0.4172, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.4353658536585366, |
| "grad_norm": 0.7646864504880536, |
| "learning_rate": 9.944755938337063e-06, |
| "loss": 0.3873, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.43902439024390244, |
| "grad_norm": 0.6983159212265896, |
| "learning_rate": 9.941560289119808e-06, |
| "loss": 0.3884, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4426829268292683, |
| "grad_norm": 0.767699892100864, |
| "learning_rate": 9.938275335203176e-06, |
| "loss": 0.4419, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.44634146341463415, |
| "grad_norm": 0.7705493162673682, |
| "learning_rate": 9.934901135953402e-06, |
| "loss": 0.389, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.7664723925888519, |
| "learning_rate": 9.931437752349579e-06, |
| "loss": 0.4073, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.45365853658536587, |
| "grad_norm": 0.8462520333011967, |
| "learning_rate": 9.927885246982548e-06, |
| "loss": 0.4074, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.4573170731707317, |
| "grad_norm": 0.7775485328681504, |
| "learning_rate": 9.924243684053778e-06, |
| "loss": 0.3931, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.4609756097560976, |
| "grad_norm": 0.7690878823572883, |
| "learning_rate": 9.920513129374198e-06, |
| "loss": 0.4163, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.46463414634146344, |
| "grad_norm": 0.8486196279486743, |
| "learning_rate": 9.916693650363014e-06, |
| "loss": 0.4063, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.4682926829268293, |
| "grad_norm": 0.7060741443892173, |
| "learning_rate": 9.912785316046487e-06, |
| "loss": 0.4029, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.4719512195121951, |
| "grad_norm": 0.7610752199019202, |
| "learning_rate": 9.908788197056682e-06, |
| "loss": 0.4172, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.47560975609756095, |
| "grad_norm": 0.7252945736486252, |
| "learning_rate": 9.9047023656302e-06, |
| "loss": 0.3935, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.4792682926829268, |
| "grad_norm": 0.767038892879291, |
| "learning_rate": 9.900527895606868e-06, |
| "loss": 0.3972, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.48292682926829267, |
| "grad_norm": 0.643692685037689, |
| "learning_rate": 9.8962648624284e-06, |
| "loss": 0.4225, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.4865853658536585, |
| "grad_norm": 0.8783566981280354, |
| "learning_rate": 9.891913343137041e-06, |
| "loss": 0.4416, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.4902439024390244, |
| "grad_norm": 0.7185000940147632, |
| "learning_rate": 9.887473416374169e-06, |
| "loss": 0.3992, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.49390243902439024, |
| "grad_norm": 0.6892638783815311, |
| "learning_rate": 9.882945162378884e-06, |
| "loss": 0.4198, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.4975609756097561, |
| "grad_norm": 0.696543424071767, |
| "learning_rate": 9.87832866298654e-06, |
| "loss": 0.3896, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.501219512195122, |
| "grad_norm": 0.7371986690918538, |
| "learning_rate": 9.873624001627286e-06, |
| "loss": 0.406, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.5048780487804878, |
| "grad_norm": 0.7393120814879373, |
| "learning_rate": 9.868831263324543e-06, |
| "loss": 0.3948, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.5085365853658537, |
| "grad_norm": 0.7660439237825246, |
| "learning_rate": 9.863950534693474e-06, |
| "loss": 0.4069, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.5121951219512195, |
| "grad_norm": 0.7272706576678343, |
| "learning_rate": 9.858981903939419e-06, |
| "loss": 0.4024, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.5158536585365854, |
| "grad_norm": 0.7481707493506783, |
| "learning_rate": 9.853925460856299e-06, |
| "loss": 0.418, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.5195121951219512, |
| "grad_norm": 0.7115691589222297, |
| "learning_rate": 9.848781296824994e-06, |
| "loss": 0.3632, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.5231707317073171, |
| "grad_norm": 0.7631193372021012, |
| "learning_rate": 9.843549504811695e-06, |
| "loss": 0.3966, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.526829268292683, |
| "grad_norm": 0.6756136811278868, |
| "learning_rate": 9.838230179366213e-06, |
| "loss": 0.3766, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.5304878048780488, |
| "grad_norm": 0.7161082479999907, |
| "learning_rate": 9.832823416620285e-06, |
| "loss": 0.3734, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.5341463414634147, |
| "grad_norm": 0.8034199359900721, |
| "learning_rate": 9.827329314285825e-06, |
| "loss": 0.3947, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.5378048780487805, |
| "grad_norm": 0.7413237614885045, |
| "learning_rate": 9.821747971653164e-06, |
| "loss": 0.3951, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.5414634146341464, |
| "grad_norm": 0.7175495817807958, |
| "learning_rate": 9.816079489589257e-06, |
| "loss": 0.4057, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.5451219512195122, |
| "grad_norm": 0.7937438771947285, |
| "learning_rate": 9.810323970535851e-06, |
| "loss": 0.4029, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.5487804878048781, |
| "grad_norm": 0.7432618293923435, |
| "learning_rate": 9.804481518507645e-06, |
| "loss": 0.4375, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.552439024390244, |
| "grad_norm": 0.6993854930910921, |
| "learning_rate": 9.798552239090404e-06, |
| "loss": 0.3791, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.5560975609756098, |
| "grad_norm": 0.7198012758680304, |
| "learning_rate": 9.792536239439052e-06, |
| "loss": 0.401, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.5597560975609757, |
| "grad_norm": 0.6415672054488112, |
| "learning_rate": 9.786433628275735e-06, |
| "loss": 0.3783, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.5634146341463414, |
| "grad_norm": 0.6920734346987311, |
| "learning_rate": 9.780244515887856e-06, |
| "loss": 0.4043, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.5670731707317073, |
| "grad_norm": 0.7503756171018907, |
| "learning_rate": 9.773969014126084e-06, |
| "loss": 0.3724, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.5707317073170731, |
| "grad_norm": 0.7117225351020292, |
| "learning_rate": 9.76760723640233e-06, |
| "loss": 0.384, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.574390243902439, |
| "grad_norm": 0.6734357880050217, |
| "learning_rate": 9.7611592976877e-06, |
| "loss": 0.3837, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.5780487804878048, |
| "grad_norm": 0.7337246817368871, |
| "learning_rate": 9.754625314510416e-06, |
| "loss": 0.3997, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.5817073170731707, |
| "grad_norm": 0.7045366717146077, |
| "learning_rate": 9.748005404953705e-06, |
| "loss": 0.381, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.5853658536585366, |
| "grad_norm": 0.6963782507139761, |
| "learning_rate": 9.741299688653676e-06, |
| "loss": 0.38, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5890243902439024, |
| "grad_norm": 0.7948834479120092, |
| "learning_rate": 9.734508286797148e-06, |
| "loss": 0.389, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.5926829268292683, |
| "grad_norm": 0.7105686277711006, |
| "learning_rate": 9.727631322119467e-06, |
| "loss": 0.3886, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.5963414634146341, |
| "grad_norm": 0.6521720402273905, |
| "learning_rate": 9.72066891890228e-06, |
| "loss": 0.3655, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.7132643040658287, |
| "learning_rate": 9.713621202971297e-06, |
| "loss": 0.385, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.6036585365853658, |
| "grad_norm": 0.7206674268681118, |
| "learning_rate": 9.706488301694013e-06, |
| "loss": 0.394, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.6073170731707317, |
| "grad_norm": 0.7241063229535367, |
| "learning_rate": 9.699270343977403e-06, |
| "loss": 0.3693, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.6109756097560975, |
| "grad_norm": 0.6824731673625768, |
| "learning_rate": 9.691967460265604e-06, |
| "loss": 0.3856, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.6146341463414634, |
| "grad_norm": 0.760082427210616, |
| "learning_rate": 9.684579782537542e-06, |
| "loss": 0.3548, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.6182926829268293, |
| "grad_norm": 0.772003373608569, |
| "learning_rate": 9.677107444304556e-06, |
| "loss": 0.3743, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.6219512195121951, |
| "grad_norm": 0.8083887172564229, |
| "learning_rate": 9.66955058060799e-06, |
| "loss": 0.3815, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.625609756097561, |
| "grad_norm": 0.8277017660896508, |
| "learning_rate": 9.661909328016739e-06, |
| "loss": 0.3837, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.6292682926829268, |
| "grad_norm": 0.7445701030987295, |
| "learning_rate": 9.654183824624789e-06, |
| "loss": 0.3909, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.6329268292682927, |
| "grad_norm": 0.7277845875454032, |
| "learning_rate": 9.646374210048723e-06, |
| "loss": 0.3739, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.6365853658536585, |
| "grad_norm": 0.9082732763858772, |
| "learning_rate": 9.638480625425197e-06, |
| "loss": 0.4024, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.6402439024390244, |
| "grad_norm": 0.6982452948788327, |
| "learning_rate": 9.630503213408383e-06, |
| "loss": 0.3744, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.6439024390243903, |
| "grad_norm": 0.6744263389412143, |
| "learning_rate": 9.622442118167396e-06, |
| "loss": 0.3894, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.6475609756097561, |
| "grad_norm": 0.7729120265080925, |
| "learning_rate": 9.614297485383693e-06, |
| "loss": 0.3584, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.651219512195122, |
| "grad_norm": 0.7028666356034352, |
| "learning_rate": 9.606069462248432e-06, |
| "loss": 0.3617, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.6548780487804878, |
| "grad_norm": 0.7402052980733588, |
| "learning_rate": 9.597758197459814e-06, |
| "loss": 0.4102, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.6585365853658537, |
| "grad_norm": 0.7506506066011618, |
| "learning_rate": 9.589363841220398e-06, |
| "loss": 0.3989, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6621951219512195, |
| "grad_norm": 0.7794807064224093, |
| "learning_rate": 9.580886545234387e-06, |
| "loss": 0.3777, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.6658536585365854, |
| "grad_norm": 0.6495178330993069, |
| "learning_rate": 9.572326462704884e-06, |
| "loss": 0.3861, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.6695121951219513, |
| "grad_norm": 0.7946496357370726, |
| "learning_rate": 9.563683748331123e-06, |
| "loss": 0.3852, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.6731707317073171, |
| "grad_norm": 0.7486601013846049, |
| "learning_rate": 9.554958558305678e-06, |
| "loss": 0.397, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.676829268292683, |
| "grad_norm": 0.7088712034584526, |
| "learning_rate": 9.546151050311632e-06, |
| "loss": 0.3902, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.6804878048780488, |
| "grad_norm": 0.6932437360491843, |
| "learning_rate": 9.537261383519736e-06, |
| "loss": 0.3433, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.6841463414634147, |
| "grad_norm": 0.7183303900647539, |
| "learning_rate": 9.528289718585523e-06, |
| "loss": 0.3749, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.6878048780487804, |
| "grad_norm": 0.6748508130997356, |
| "learning_rate": 9.519236217646419e-06, |
| "loss": 0.3748, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.6914634146341463, |
| "grad_norm": 0.6706028292433445, |
| "learning_rate": 9.510101044318795e-06, |
| "loss": 0.3757, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.6951219512195121, |
| "grad_norm": 0.6870273832569191, |
| "learning_rate": 9.500884363695025e-06, |
| "loss": 0.3638, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.698780487804878, |
| "grad_norm": 0.7321391217873353, |
| "learning_rate": 9.49158634234049e-06, |
| "loss": 0.3799, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.7024390243902439, |
| "grad_norm": 0.6003693164345862, |
| "learning_rate": 9.482207148290585e-06, |
| "loss": 0.3804, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.7060975609756097, |
| "grad_norm": 0.6583230239921988, |
| "learning_rate": 9.472746951047657e-06, |
| "loss": 0.3725, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.7097560975609756, |
| "grad_norm": 0.6742830191100991, |
| "learning_rate": 9.463205921577972e-06, |
| "loss": 0.3662, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.7134146341463414, |
| "grad_norm": 0.7010665460150319, |
| "learning_rate": 9.453584232308593e-06, |
| "loss": 0.3623, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.7170731707317073, |
| "grad_norm": 0.6448071014559433, |
| "learning_rate": 9.443882057124294e-06, |
| "loss": 0.357, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.7207317073170731, |
| "grad_norm": 0.709113132802626, |
| "learning_rate": 9.434099571364396e-06, |
| "loss": 0.385, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.724390243902439, |
| "grad_norm": 0.7570745833677422, |
| "learning_rate": 9.424236951819612e-06, |
| "loss": 0.3851, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.7280487804878049, |
| "grad_norm": 0.642564214786846, |
| "learning_rate": 9.41429437672884e-06, |
| "loss": 0.3773, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.7317073170731707, |
| "grad_norm": 0.68482160328781, |
| "learning_rate": 9.40427202577595e-06, |
| "loss": 0.3911, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7353658536585366, |
| "grad_norm": 0.690309120558691, |
| "learning_rate": 9.394170080086538e-06, |
| "loss": 0.3843, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.7390243902439024, |
| "grad_norm": 0.6769970564023863, |
| "learning_rate": 9.383988722224642e-06, |
| "loss": 0.3887, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.7426829268292683, |
| "grad_norm": 0.7074513706865134, |
| "learning_rate": 9.37372813618946e-06, |
| "loss": 0.3793, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.7463414634146341, |
| "grad_norm": 0.6777055688042114, |
| "learning_rate": 9.363388507412005e-06, |
| "loss": 0.3938, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.6720337268026889, |
| "learning_rate": 9.35297002275177e-06, |
| "loss": 0.3859, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.7536585365853659, |
| "grad_norm": 0.655107581741595, |
| "learning_rate": 9.342472870493342e-06, |
| "loss": 0.386, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.7573170731707317, |
| "grad_norm": 0.6851327055789187, |
| "learning_rate": 9.331897240343001e-06, |
| "loss": 0.392, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.7609756097560976, |
| "grad_norm": 0.6556370821127617, |
| "learning_rate": 9.321243323425298e-06, |
| "loss": 0.3851, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.7646341463414634, |
| "grad_norm": 0.6401412447193511, |
| "learning_rate": 9.310511312279586e-06, |
| "loss": 0.365, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.7682926829268293, |
| "grad_norm": 0.6762861190866665, |
| "learning_rate": 9.29970140085656e-06, |
| "loss": 0.3721, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.7719512195121951, |
| "grad_norm": 0.6713698619241105, |
| "learning_rate": 9.288813784514733e-06, |
| "loss": 0.3768, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.775609756097561, |
| "grad_norm": 0.6782913708342606, |
| "learning_rate": 9.277848660016921e-06, |
| "loss": 0.3826, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.7792682926829269, |
| "grad_norm": 0.7857162080237674, |
| "learning_rate": 9.266806225526677e-06, |
| "loss": 0.3876, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.7829268292682927, |
| "grad_norm": 0.7058571807350672, |
| "learning_rate": 9.255686680604712e-06, |
| "loss": 0.3775, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.7865853658536586, |
| "grad_norm": 0.6470360391059924, |
| "learning_rate": 9.244490226205294e-06, |
| "loss": 0.3676, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.7902439024390244, |
| "grad_norm": 0.693553517207013, |
| "learning_rate": 9.233217064672607e-06, |
| "loss": 0.3685, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.7939024390243903, |
| "grad_norm": 0.7721074742274187, |
| "learning_rate": 9.221867399737101e-06, |
| "loss": 0.3955, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.7975609756097561, |
| "grad_norm": 0.6937965952731138, |
| "learning_rate": 9.21044143651181e-06, |
| "loss": 0.3842, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.801219512195122, |
| "grad_norm": 0.669785255096806, |
| "learning_rate": 9.198939381488638e-06, |
| "loss": 0.3701, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.8048780487804879, |
| "grad_norm": 0.68468277903504, |
| "learning_rate": 9.187361442534641e-06, |
| "loss": 0.3744, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.8085365853658537, |
| "grad_norm": 0.7085279773560048, |
| "learning_rate": 9.175707828888255e-06, |
| "loss": 0.3592, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.8121951219512196, |
| "grad_norm": 0.6347781601971861, |
| "learning_rate": 9.163978751155522e-06, |
| "loss": 0.3704, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.8158536585365853, |
| "grad_norm": 0.6200919359866742, |
| "learning_rate": 9.152174421306288e-06, |
| "loss": 0.3604, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.8195121951219512, |
| "grad_norm": 0.6154635559910653, |
| "learning_rate": 9.140295052670365e-06, |
| "loss": 0.3851, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.823170731707317, |
| "grad_norm": 0.647566035539756, |
| "learning_rate": 9.128340859933677e-06, |
| "loss": 0.364, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.8268292682926829, |
| "grad_norm": 0.6163426618840316, |
| "learning_rate": 9.116312059134386e-06, |
| "loss": 0.3703, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.8304878048780487, |
| "grad_norm": 0.6813695201145943, |
| "learning_rate": 9.104208867658977e-06, |
| "loss": 0.3801, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.8341463414634146, |
| "grad_norm": 0.6447781117989946, |
| "learning_rate": 9.092031504238343e-06, |
| "loss": 0.371, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.8378048780487805, |
| "grad_norm": 0.6488889361560148, |
| "learning_rate": 9.079780188943819e-06, |
| "loss": 0.3611, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.8414634146341463, |
| "grad_norm": 0.6892176659739193, |
| "learning_rate": 9.067455143183213e-06, |
| "loss": 0.3645, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.8451219512195122, |
| "grad_norm": 0.6615908246281889, |
| "learning_rate": 9.0550565896968e-06, |
| "loss": 0.3717, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.848780487804878, |
| "grad_norm": 0.6608091922176914, |
| "learning_rate": 9.042584752553303e-06, |
| "loss": 0.3939, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.8524390243902439, |
| "grad_norm": 0.7781223334917082, |
| "learning_rate": 9.030039857145836e-06, |
| "loss": 0.3608, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.8560975609756097, |
| "grad_norm": 0.6356612947154711, |
| "learning_rate": 9.017422130187834e-06, |
| "loss": 0.3691, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.8597560975609756, |
| "grad_norm": 0.6673516460275246, |
| "learning_rate": 9.004731799708961e-06, |
| "loss": 0.3587, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.8634146341463415, |
| "grad_norm": 0.756626298213216, |
| "learning_rate": 8.991969095050976e-06, |
| "loss": 0.3814, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.8670731707317073, |
| "grad_norm": 0.7356372585319284, |
| "learning_rate": 8.979134246863598e-06, |
| "loss": 0.3651, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.8707317073170732, |
| "grad_norm": 0.7125847173556636, |
| "learning_rate": 8.966227487100346e-06, |
| "loss": 0.3758, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.874390243902439, |
| "grad_norm": 0.7382930519755999, |
| "learning_rate": 8.953249049014324e-06, |
| "loss": 0.3871, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.8780487804878049, |
| "grad_norm": 0.7666434750440383, |
| "learning_rate": 8.94019916715402e-06, |
| "loss": 0.3807, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.8817073170731707, |
| "grad_norm": 0.6976662371355642, |
| "learning_rate": 8.927078077359076e-06, |
| "loss": 0.3721, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.8853658536585366, |
| "grad_norm": 0.7860831512575027, |
| "learning_rate": 8.913886016756007e-06, |
| "loss": 0.3754, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.8890243902439025, |
| "grad_norm": 0.7069750574806342, |
| "learning_rate": 8.900623223753923e-06, |
| "loss": 0.3904, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.8926829268292683, |
| "grad_norm": 0.7292537500467997, |
| "learning_rate": 8.887289938040229e-06, |
| "loss": 0.388, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.8963414634146342, |
| "grad_norm": 0.7915538577084609, |
| "learning_rate": 8.873886400576279e-06, |
| "loss": 0.3836, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.7452425515775055, |
| "learning_rate": 8.860412853593033e-06, |
| "loss": 0.3736, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.9036585365853659, |
| "grad_norm": 0.648977235676723, |
| "learning_rate": 8.846869540586671e-06, |
| "loss": 0.3678, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.9073170731707317, |
| "grad_norm": 0.6864106518210626, |
| "learning_rate": 8.8332567063142e-06, |
| "loss": 0.3823, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.9109756097560976, |
| "grad_norm": 0.7349118351330677, |
| "learning_rate": 8.819574596789025e-06, |
| "loss": 0.3761, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.9146341463414634, |
| "grad_norm": 0.7633616936437505, |
| "learning_rate": 8.805823459276501e-06, |
| "loss": 0.388, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.9182926829268293, |
| "grad_norm": 0.6524916307250452, |
| "learning_rate": 8.792003542289478e-06, |
| "loss": 0.3747, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.9219512195121952, |
| "grad_norm": 0.6536188734248112, |
| "learning_rate": 8.77811509558379e-06, |
| "loss": 0.3664, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.925609756097561, |
| "grad_norm": 0.7531682065790463, |
| "learning_rate": 8.764158370153755e-06, |
| "loss": 0.3727, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.9292682926829269, |
| "grad_norm": 0.693728850312522, |
| "learning_rate": 8.75013361822764e-06, |
| "loss": 0.3674, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.9329268292682927, |
| "grad_norm": 0.657737265214606, |
| "learning_rate": 8.736041093263092e-06, |
| "loss": 0.376, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.9365853658536586, |
| "grad_norm": 0.7496080034820701, |
| "learning_rate": 8.721881049942565e-06, |
| "loss": 0.3684, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.9402439024390243, |
| "grad_norm": 0.6683122746778191, |
| "learning_rate": 8.707653744168718e-06, |
| "loss": 0.3653, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.9439024390243902, |
| "grad_norm": 0.6546707237838714, |
| "learning_rate": 8.693359433059789e-06, |
| "loss": 0.3747, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.947560975609756, |
| "grad_norm": 0.6924361208939677, |
| "learning_rate": 8.67899837494494e-06, |
| "loss": 0.3826, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.9512195121951219, |
| "grad_norm": 0.7048197646181643, |
| "learning_rate": 8.664570829359608e-06, |
| "loss": 0.3713, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.9548780487804878, |
| "grad_norm": 0.6717794456103233, |
| "learning_rate": 8.650077057040794e-06, |
| "loss": 0.3863, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.9585365853658536, |
| "grad_norm": 0.6360542228266761, |
| "learning_rate": 8.635517319922359e-06, |
| "loss": 0.357, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.9621951219512195, |
| "grad_norm": 0.605097486114699, |
| "learning_rate": 8.620891881130297e-06, |
| "loss": 0.3478, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.9658536585365853, |
| "grad_norm": 0.7759870929652042, |
| "learning_rate": 8.606201004977967e-06, |
| "loss": 0.3847, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.9695121951219512, |
| "grad_norm": 0.6911181654981698, |
| "learning_rate": 8.591444956961333e-06, |
| "loss": 0.3703, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.973170731707317, |
| "grad_norm": 0.6853370502539734, |
| "learning_rate": 8.57662400375414e-06, |
| "loss": 0.3644, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.9768292682926829, |
| "grad_norm": 0.6849837602731838, |
| "learning_rate": 8.561738413203124e-06, |
| "loss": 0.3742, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.9804878048780488, |
| "grad_norm": 0.6924423604236972, |
| "learning_rate": 8.546788454323153e-06, |
| "loss": 0.3696, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.9841463414634146, |
| "grad_norm": 0.6672850829111578, |
| "learning_rate": 8.53177439729237e-06, |
| "loss": 0.3672, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.9878048780487805, |
| "grad_norm": 0.6884397982659812, |
| "learning_rate": 8.516696513447308e-06, |
| "loss": 0.3695, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.9914634146341463, |
| "grad_norm": 0.6448421759053435, |
| "learning_rate": 8.501555075277997e-06, |
| "loss": 0.3525, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.9951219512195122, |
| "grad_norm": 0.643104212334238, |
| "learning_rate": 8.486350356423021e-06, |
| "loss": 0.3523, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.998780487804878, |
| "grad_norm": 0.639193169537897, |
| "learning_rate": 8.471082631664588e-06, |
| "loss": 0.3636, |
| "step": 273 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.639193169537897, |
| "learning_rate": 8.455752176923561e-06, |
| "loss": 0.3744, |
| "step": 274 |
| }, |
| { |
| "epoch": 1.0036585365853659, |
| "grad_norm": 1.1473923148744305, |
| "learning_rate": 8.440359269254468e-06, |
| "loss": 0.3204, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.0073170731707317, |
| "grad_norm": 0.6738354361167268, |
| "learning_rate": 8.424904186840495e-06, |
| "loss": 0.3185, |
| "step": 276 |
| }, |
| { |
| "epoch": 1.0109756097560976, |
| "grad_norm": 0.569651838834323, |
| "learning_rate": 8.40938720898846e-06, |
| "loss": 0.3242, |
| "step": 277 |
| }, |
| { |
| "epoch": 1.0146341463414634, |
| "grad_norm": 0.6512424474189245, |
| "learning_rate": 8.393808616123771e-06, |
| "loss": 0.3378, |
| "step": 278 |
| }, |
| { |
| "epoch": 1.0182926829268293, |
| "grad_norm": 0.6936858791581263, |
| "learning_rate": 8.378168689785346e-06, |
| "loss": 0.3308, |
| "step": 279 |
| }, |
| { |
| "epoch": 1.0219512195121951, |
| "grad_norm": 0.6563652415572625, |
| "learning_rate": 8.36246771262054e-06, |
| "loss": 0.3094, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.025609756097561, |
| "grad_norm": 0.6298667529680904, |
| "learning_rate": 8.346705968380015e-06, |
| "loss": 0.3323, |
| "step": 281 |
| }, |
| { |
| "epoch": 1.0292682926829269, |
| "grad_norm": 0.6639245800866381, |
| "learning_rate": 8.330883741912644e-06, |
| "loss": 0.3209, |
| "step": 282 |
| }, |
| { |
| "epoch": 1.0329268292682927, |
| "grad_norm": 0.6286428100246292, |
| "learning_rate": 8.315001319160327e-06, |
| "loss": 0.3136, |
| "step": 283 |
| }, |
| { |
| "epoch": 1.0365853658536586, |
| "grad_norm": 0.617323964575384, |
| "learning_rate": 8.299058987152854e-06, |
| "loss": 0.292, |
| "step": 284 |
| }, |
| { |
| "epoch": 1.0402439024390244, |
| "grad_norm": 0.6666320305150188, |
| "learning_rate": 8.283057034002699e-06, |
| "loss": 0.3275, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.0439024390243903, |
| "grad_norm": 0.6428680669935322, |
| "learning_rate": 8.26699574889982e-06, |
| "loss": 0.3153, |
| "step": 286 |
| }, |
| { |
| "epoch": 1.0475609756097561, |
| "grad_norm": 0.637715026411299, |
| "learning_rate": 8.250875422106434e-06, |
| "loss": 0.3228, |
| "step": 287 |
| }, |
| { |
| "epoch": 1.051219512195122, |
| "grad_norm": 0.6522273141210564, |
| "learning_rate": 8.234696344951767e-06, |
| "loss": 0.3066, |
| "step": 288 |
| }, |
| { |
| "epoch": 1.0548780487804879, |
| "grad_norm": 0.6454285200212009, |
| "learning_rate": 8.21845880982679e-06, |
| "loss": 0.3268, |
| "step": 289 |
| }, |
| { |
| "epoch": 1.0585365853658537, |
| "grad_norm": 0.5990451449609265, |
| "learning_rate": 8.202163110178945e-06, |
| "loss": 0.3152, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.0621951219512196, |
| "grad_norm": 0.6588671859842463, |
| "learning_rate": 8.185809540506818e-06, |
| "loss": 0.3108, |
| "step": 291 |
| }, |
| { |
| "epoch": 1.0658536585365854, |
| "grad_norm": 0.625175506582043, |
| "learning_rate": 8.169398396354844e-06, |
| "loss": 0.2888, |
| "step": 292 |
| }, |
| { |
| "epoch": 1.0695121951219513, |
| "grad_norm": 0.7086099623038007, |
| "learning_rate": 8.152929974307949e-06, |
| "loss": 0.3096, |
| "step": 293 |
| }, |
| { |
| "epoch": 1.0731707317073171, |
| "grad_norm": 0.612473791527845, |
| "learning_rate": 8.136404571986194e-06, |
| "loss": 0.3147, |
| "step": 294 |
| }, |
| { |
| "epoch": 1.076829268292683, |
| "grad_norm": 0.7424912205691075, |
| "learning_rate": 8.1198224880394e-06, |
| "loss": 0.316, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.0804878048780489, |
| "grad_norm": 0.6947218892107693, |
| "learning_rate": 8.103184022141746e-06, |
| "loss": 0.3188, |
| "step": 296 |
| }, |
| { |
| "epoch": 1.0841463414634147, |
| "grad_norm": 0.6501089480964608, |
| "learning_rate": 8.08648947498635e-06, |
| "loss": 0.3076, |
| "step": 297 |
| }, |
| { |
| "epoch": 1.0878048780487806, |
| "grad_norm": 0.7085015308115117, |
| "learning_rate": 8.069739148279851e-06, |
| "loss": 0.3263, |
| "step": 298 |
| }, |
| { |
| "epoch": 1.0914634146341464, |
| "grad_norm": 0.6977632322290905, |
| "learning_rate": 8.052933344736937e-06, |
| "loss": 0.3352, |
| "step": 299 |
| }, |
| { |
| "epoch": 1.0951219512195123, |
| "grad_norm": 0.6237640578371839, |
| "learning_rate": 8.036072368074883e-06, |
| "loss": 0.3078, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.0987804878048781, |
| "grad_norm": 0.7293112733748618, |
| "learning_rate": 8.019156523008065e-06, |
| "loss": 0.3109, |
| "step": 301 |
| }, |
| { |
| "epoch": 1.102439024390244, |
| "grad_norm": 0.6523525486092905, |
| "learning_rate": 8.002186115242447e-06, |
| "loss": 0.3204, |
| "step": 302 |
| }, |
| { |
| "epoch": 1.1060975609756099, |
| "grad_norm": 0.6831085429924993, |
| "learning_rate": 7.985161451470061e-06, |
| "loss": 0.3188, |
| "step": 303 |
| }, |
| { |
| "epoch": 1.1097560975609757, |
| "grad_norm": 0.6789690434687307, |
| "learning_rate": 7.968082839363462e-06, |
| "loss": 0.3266, |
| "step": 304 |
| }, |
| { |
| "epoch": 1.1134146341463416, |
| "grad_norm": 0.6348758617481739, |
| "learning_rate": 7.95095058757017e-06, |
| "loss": 0.3121, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.1170731707317074, |
| "grad_norm": 0.6546771590816294, |
| "learning_rate": 7.933765005707085e-06, |
| "loss": 0.3207, |
| "step": 306 |
| }, |
| { |
| "epoch": 1.120731707317073, |
| "grad_norm": 0.6642730324041832, |
| "learning_rate": 7.916526404354905e-06, |
| "loss": 0.3052, |
| "step": 307 |
| }, |
| { |
| "epoch": 1.1243902439024391, |
| "grad_norm": 0.6578600118558376, |
| "learning_rate": 7.899235095052497e-06, |
| "loss": 0.3042, |
| "step": 308 |
| }, |
| { |
| "epoch": 1.1280487804878048, |
| "grad_norm": 0.6315343541006101, |
| "learning_rate": 7.881891390291281e-06, |
| "loss": 0.3127, |
| "step": 309 |
| }, |
| { |
| "epoch": 1.1317073170731708, |
| "grad_norm": 0.688043709337069, |
| "learning_rate": 7.864495603509571e-06, |
| "loss": 0.3268, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.1353658536585365, |
| "grad_norm": 0.6626118603817104, |
| "learning_rate": 7.84704804908692e-06, |
| "loss": 0.3151, |
| "step": 311 |
| }, |
| { |
| "epoch": 1.1390243902439026, |
| "grad_norm": 0.624691719057794, |
| "learning_rate": 7.829549042338436e-06, |
| "loss": 0.3647, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.1426829268292682, |
| "grad_norm": 0.6357333460431095, |
| "learning_rate": 7.811998899509076e-06, |
| "loss": 0.2989, |
| "step": 313 |
| }, |
| { |
| "epoch": 1.146341463414634, |
| "grad_norm": 0.627087816143451, |
| "learning_rate": 7.794397937767941e-06, |
| "loss": 0.3195, |
| "step": 314 |
| }, |
| { |
| "epoch": 1.15, |
| "grad_norm": 0.6715288963393866, |
| "learning_rate": 7.77674647520254e-06, |
| "loss": 0.2984, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.1536585365853658, |
| "grad_norm": 0.6767962115518564, |
| "learning_rate": 7.759044830813036e-06, |
| "loss": 0.3053, |
| "step": 316 |
| }, |
| { |
| "epoch": 1.1573170731707316, |
| "grad_norm": 0.6207594775740026, |
| "learning_rate": 7.741293324506493e-06, |
| "loss": 0.3019, |
| "step": 317 |
| }, |
| { |
| "epoch": 1.1609756097560975, |
| "grad_norm": 0.661920788242436, |
| "learning_rate": 7.723492277091089e-06, |
| "loss": 0.3154, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.1646341463414633, |
| "grad_norm": 0.6036319823158318, |
| "learning_rate": 7.705642010270306e-06, |
| "loss": 0.3088, |
| "step": 319 |
| }, |
| { |
| "epoch": 1.1682926829268292, |
| "grad_norm": 0.6796660873801121, |
| "learning_rate": 7.687742846637141e-06, |
| "loss": 0.3224, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.171951219512195, |
| "grad_norm": 0.6198409436731228, |
| "learning_rate": 7.66979510966825e-06, |
| "loss": 0.3243, |
| "step": 321 |
| }, |
| { |
| "epoch": 1.175609756097561, |
| "grad_norm": 0.6178487393901908, |
| "learning_rate": 7.651799123718126e-06, |
| "loss": 0.3315, |
| "step": 322 |
| }, |
| { |
| "epoch": 1.1792682926829268, |
| "grad_norm": 0.6620709841506736, |
| "learning_rate": 7.63375521401322e-06, |
| "loss": 0.3286, |
| "step": 323 |
| }, |
| { |
| "epoch": 1.1829268292682926, |
| "grad_norm": 0.6496124170177628, |
| "learning_rate": 7.615663706646063e-06, |
| "loss": 0.3096, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.1865853658536585, |
| "grad_norm": 0.5587747317986588, |
| "learning_rate": 7.597524928569391e-06, |
| "loss": 0.3208, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.1902439024390243, |
| "grad_norm": 0.6701337541172385, |
| "learning_rate": 7.579339207590216e-06, |
| "loss": 0.3142, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.1939024390243902, |
| "grad_norm": 0.6825894061442603, |
| "learning_rate": 7.561106872363911e-06, |
| "loss": 0.3081, |
| "step": 327 |
| }, |
| { |
| "epoch": 1.197560975609756, |
| "grad_norm": 0.6201375755843246, |
| "learning_rate": 7.542828252388271e-06, |
| "loss": 0.3307, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.201219512195122, |
| "grad_norm": 0.6404653599994984, |
| "learning_rate": 7.524503677997557e-06, |
| "loss": 0.3088, |
| "step": 329 |
| }, |
| { |
| "epoch": 1.2048780487804878, |
| "grad_norm": 0.6681252945789206, |
| "learning_rate": 7.506133480356523e-06, |
| "loss": 0.3062, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.2085365853658536, |
| "grad_norm": 0.6348173893075612, |
| "learning_rate": 7.487717991454441e-06, |
| "loss": 0.2998, |
| "step": 331 |
| }, |
| { |
| "epoch": 1.2121951219512195, |
| "grad_norm": 0.6476835069549743, |
| "learning_rate": 7.469257544099081e-06, |
| "loss": 0.3228, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.2158536585365853, |
| "grad_norm": 0.6873002671793336, |
| "learning_rate": 7.450752471910725e-06, |
| "loss": 0.3177, |
| "step": 333 |
| }, |
| { |
| "epoch": 1.2195121951219512, |
| "grad_norm": 0.5995570200932562, |
| "learning_rate": 7.432203109316112e-06, |
| "loss": 0.3157, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.223170731707317, |
| "grad_norm": 0.7020387116477846, |
| "learning_rate": 7.413609791542407e-06, |
| "loss": 0.3221, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.226829268292683, |
| "grad_norm": 0.6813547839022456, |
| "learning_rate": 7.394972854611142e-06, |
| "loss": 0.3105, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.2304878048780488, |
| "grad_norm": 0.6156200538885269, |
| "learning_rate": 7.376292635332142e-06, |
| "loss": 0.3019, |
| "step": 337 |
| }, |
| { |
| "epoch": 1.2341463414634146, |
| "grad_norm": 0.6645540728481677, |
| "learning_rate": 7.3575694712974335e-06, |
| "loss": 0.3083, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.2378048780487805, |
| "grad_norm": 0.6692767414956421, |
| "learning_rate": 7.338803700875153e-06, |
| "loss": 0.3088, |
| "step": 339 |
| }, |
| { |
| "epoch": 1.2414634146341463, |
| "grad_norm": 0.6616979629249964, |
| "learning_rate": 7.319995663203425e-06, |
| "loss": 0.3174, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.2451219512195122, |
| "grad_norm": 0.7614157176916696, |
| "learning_rate": 7.301145698184233e-06, |
| "loss": 0.3117, |
| "step": 341 |
| }, |
| { |
| "epoch": 1.248780487804878, |
| "grad_norm": 0.5694694334732344, |
| "learning_rate": 7.282254146477281e-06, |
| "loss": 0.2977, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.252439024390244, |
| "grad_norm": 0.613501009109194, |
| "learning_rate": 7.263321349493833e-06, |
| "loss": 0.3029, |
| "step": 343 |
| }, |
| { |
| "epoch": 1.2560975609756098, |
| "grad_norm": 0.6185975460458262, |
| "learning_rate": 7.244347649390542e-06, |
| "loss": 0.3052, |
| "step": 344 |
| }, |
| { |
| "epoch": 1.2597560975609756, |
| "grad_norm": 0.6450583790490763, |
| "learning_rate": 7.225333389063276e-06, |
| "loss": 0.3203, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.2634146341463415, |
| "grad_norm": 0.5848954020900502, |
| "learning_rate": 7.206278912140907e-06, |
| "loss": 0.3071, |
| "step": 346 |
| }, |
| { |
| "epoch": 1.2670731707317073, |
| "grad_norm": 0.6761598102383233, |
| "learning_rate": 7.187184562979112e-06, |
| "loss": 0.3149, |
| "step": 347 |
| }, |
| { |
| "epoch": 1.2707317073170732, |
| "grad_norm": 0.6333122175147837, |
| "learning_rate": 7.168050686654144e-06, |
| "loss": 0.3287, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.274390243902439, |
| "grad_norm": 0.6142569877993334, |
| "learning_rate": 7.148877628956598e-06, |
| "loss": 0.3108, |
| "step": 349 |
| }, |
| { |
| "epoch": 1.278048780487805, |
| "grad_norm": 0.5992913682315228, |
| "learning_rate": 7.1296657363851644e-06, |
| "loss": 0.3083, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.2817073170731708, |
| "grad_norm": 0.693923292682762, |
| "learning_rate": 7.110415356140357e-06, |
| "loss": 0.3241, |
| "step": 351 |
| }, |
| { |
| "epoch": 1.2853658536585366, |
| "grad_norm": 0.6135479274958128, |
| "learning_rate": 7.091126836118249e-06, |
| "loss": 0.301, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.2890243902439025, |
| "grad_norm": 0.5671663217628563, |
| "learning_rate": 7.071800524904185e-06, |
| "loss": 0.3025, |
| "step": 353 |
| }, |
| { |
| "epoch": 1.2926829268292683, |
| "grad_norm": 0.6340067432971223, |
| "learning_rate": 7.052436771766474e-06, |
| "loss": 0.3056, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.2963414634146342, |
| "grad_norm": 0.6749369096026892, |
| "learning_rate": 7.033035926650084e-06, |
| "loss": 0.3165, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.3, |
| "grad_norm": 0.6601655491194676, |
| "learning_rate": 7.0135983401703125e-06, |
| "loss": 0.3082, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.303658536585366, |
| "grad_norm": 0.5790814002165854, |
| "learning_rate": 6.994124363606457e-06, |
| "loss": 0.3016, |
| "step": 357 |
| }, |
| { |
| "epoch": 1.3073170731707318, |
| "grad_norm": 0.6462497543472036, |
| "learning_rate": 6.974614348895459e-06, |
| "loss": 0.3227, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.3109756097560976, |
| "grad_norm": 0.6818223846641225, |
| "learning_rate": 6.95506864862555e-06, |
| "loss": 0.3144, |
| "step": 359 |
| }, |
| { |
| "epoch": 1.3146341463414635, |
| "grad_norm": 0.5953658678828784, |
| "learning_rate": 6.9354876160298764e-06, |
| "loss": 0.2803, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.3182926829268293, |
| "grad_norm": 0.6480981830399114, |
| "learning_rate": 6.915871604980115e-06, |
| "loss": 0.3359, |
| "step": 361 |
| }, |
| { |
| "epoch": 1.3219512195121952, |
| "grad_norm": 0.7143958836163122, |
| "learning_rate": 6.89622096998008e-06, |
| "loss": 0.3112, |
| "step": 362 |
| }, |
| { |
| "epoch": 1.325609756097561, |
| "grad_norm": 0.6281272945430706, |
| "learning_rate": 6.876536066159315e-06, |
| "loss": 0.318, |
| "step": 363 |
| }, |
| { |
| "epoch": 1.329268292682927, |
| "grad_norm": 0.6195212204195637, |
| "learning_rate": 6.856817249266676e-06, |
| "loss": 0.3101, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.3329268292682928, |
| "grad_norm": 0.67165922544889, |
| "learning_rate": 6.837064875663901e-06, |
| "loss": 0.3055, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.3365853658536586, |
| "grad_norm": 0.6195823500964106, |
| "learning_rate": 6.817279302319171e-06, |
| "loss": 0.3302, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.3402439024390245, |
| "grad_norm": 0.6100082115147878, |
| "learning_rate": 6.797460886800658e-06, |
| "loss": 0.3242, |
| "step": 367 |
| }, |
| { |
| "epoch": 1.34390243902439, |
| "grad_norm": 0.6440688199214216, |
| "learning_rate": 6.777609987270064e-06, |
| "loss": 0.3217, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.3475609756097562, |
| "grad_norm": 0.5621112226548872, |
| "learning_rate": 6.757726962476145e-06, |
| "loss": 0.3042, |
| "step": 369 |
| }, |
| { |
| "epoch": 1.3512195121951218, |
| "grad_norm": 0.6592929751452373, |
| "learning_rate": 6.737812171748234e-06, |
| "loss": 0.3092, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.354878048780488, |
| "grad_norm": 0.5840319742865415, |
| "learning_rate": 6.717865974989739e-06, |
| "loss": 0.3069, |
| "step": 371 |
| }, |
| { |
| "epoch": 1.3585365853658535, |
| "grad_norm": 0.6479111812916528, |
| "learning_rate": 6.6978887326716455e-06, |
| "loss": 0.3047, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.3621951219512196, |
| "grad_norm": 0.6405731778314568, |
| "learning_rate": 6.677880805825998e-06, |
| "loss": 0.313, |
| "step": 373 |
| }, |
| { |
| "epoch": 1.3658536585365852, |
| "grad_norm": 0.596212161768833, |
| "learning_rate": 6.6578425560393835e-06, |
| "loss": 0.3035, |
| "step": 374 |
| }, |
| { |
| "epoch": 1.3695121951219513, |
| "grad_norm": 0.6045424729604323, |
| "learning_rate": 6.6377743454463785e-06, |
| "loss": 0.3281, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.373170731707317, |
| "grad_norm": 0.6128263053984974, |
| "learning_rate": 6.617676536723024e-06, |
| "loss": 0.2897, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.376829268292683, |
| "grad_norm": 0.5787213510400789, |
| "learning_rate": 6.597549493080263e-06, |
| "loss": 0.2921, |
| "step": 377 |
| }, |
| { |
| "epoch": 1.3804878048780487, |
| "grad_norm": 0.6279325443526219, |
| "learning_rate": 6.577393578257375e-06, |
| "loss": 0.3077, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.3841463414634148, |
| "grad_norm": 0.6900536831097186, |
| "learning_rate": 6.557209156515403e-06, |
| "loss": 0.3362, |
| "step": 379 |
| }, |
| { |
| "epoch": 1.3878048780487804, |
| "grad_norm": 0.6253403111205682, |
| "learning_rate": 6.536996592630578e-06, |
| "loss": 0.2967, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.3914634146341465, |
| "grad_norm": 0.6104584449859696, |
| "learning_rate": 6.516756251887711e-06, |
| "loss": 0.3184, |
| "step": 381 |
| }, |
| { |
| "epoch": 1.395121951219512, |
| "grad_norm": 0.6281312306081094, |
| "learning_rate": 6.496488500073608e-06, |
| "loss": 0.3126, |
| "step": 382 |
| }, |
| { |
| "epoch": 1.3987804878048782, |
| "grad_norm": 0.5854353977761765, |
| "learning_rate": 6.476193703470454e-06, |
| "loss": 0.3034, |
| "step": 383 |
| }, |
| { |
| "epoch": 1.4024390243902438, |
| "grad_norm": 0.5924846776205195, |
| "learning_rate": 6.455872228849182e-06, |
| "loss": 0.3122, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.40609756097561, |
| "grad_norm": 0.629553578206909, |
| "learning_rate": 6.435524443462865e-06, |
| "loss": 0.3085, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.4097560975609755, |
| "grad_norm": 0.6347794227496868, |
| "learning_rate": 6.415150715040066e-06, |
| "loss": 0.3043, |
| "step": 386 |
| }, |
| { |
| "epoch": 1.4134146341463414, |
| "grad_norm": 0.6367266740240046, |
| "learning_rate": 6.394751411778188e-06, |
| "loss": 0.3031, |
| "step": 387 |
| }, |
| { |
| "epoch": 1.4170731707317072, |
| "grad_norm": 0.5747360730840103, |
| "learning_rate": 6.374326902336838e-06, |
| "loss": 0.2992, |
| "step": 388 |
| }, |
| { |
| "epoch": 1.420731707317073, |
| "grad_norm": 0.6449249430626156, |
| "learning_rate": 6.353877555831144e-06, |
| "loss": 0.302, |
| "step": 389 |
| }, |
| { |
| "epoch": 1.424390243902439, |
| "grad_norm": 0.6542878218567583, |
| "learning_rate": 6.3334037418250975e-06, |
| "loss": 0.3015, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.4280487804878048, |
| "grad_norm": 0.6025024314363183, |
| "learning_rate": 6.312905830324871e-06, |
| "loss": 0.3028, |
| "step": 391 |
| }, |
| { |
| "epoch": 1.4317073170731707, |
| "grad_norm": 0.6067694802806707, |
| "learning_rate": 6.292384191772128e-06, |
| "loss": 0.315, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.4353658536585365, |
| "grad_norm": 0.6232527972793704, |
| "learning_rate": 6.271839197037337e-06, |
| "loss": 0.3042, |
| "step": 393 |
| }, |
| { |
| "epoch": 1.4390243902439024, |
| "grad_norm": 0.6135565219677418, |
| "learning_rate": 6.251271217413059e-06, |
| "loss": 0.3121, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.4426829268292682, |
| "grad_norm": 0.6028567158447554, |
| "learning_rate": 6.230680624607237e-06, |
| "loss": 0.2943, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.446341463414634, |
| "grad_norm": 0.5680751035754046, |
| "learning_rate": 6.210067790736496e-06, |
| "loss": 0.3121, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.45, |
| "grad_norm": 0.6435335111059491, |
| "learning_rate": 6.189433088319394e-06, |
| "loss": 0.3001, |
| "step": 397 |
| }, |
| { |
| "epoch": 1.4536585365853658, |
| "grad_norm": 0.6316933527533821, |
| "learning_rate": 6.1687768902697045e-06, |
| "loss": 0.3176, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.4573170731707317, |
| "grad_norm": 0.6257119035903327, |
| "learning_rate": 6.148099569889675e-06, |
| "loss": 0.3124, |
| "step": 399 |
| }, |
| { |
| "epoch": 1.4609756097560975, |
| "grad_norm": 0.6504617753214657, |
| "learning_rate": 6.127401500863281e-06, |
| "loss": 0.2971, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.4646341463414634, |
| "grad_norm": 0.6085074214868041, |
| "learning_rate": 6.106683057249461e-06, |
| "loss": 0.2966, |
| "step": 401 |
| }, |
| { |
| "epoch": 1.4682926829268292, |
| "grad_norm": 0.6654694916787747, |
| "learning_rate": 6.085944613475381e-06, |
| "loss": 0.3062, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.471951219512195, |
| "grad_norm": 0.656833640936344, |
| "learning_rate": 6.065186544329641e-06, |
| "loss": 0.3069, |
| "step": 403 |
| }, |
| { |
| "epoch": 1.475609756097561, |
| "grad_norm": 0.639981866077395, |
| "learning_rate": 6.044409224955522e-06, |
| "loss": 0.3031, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.4792682926829268, |
| "grad_norm": 0.651818391326447, |
| "learning_rate": 6.023613030844194e-06, |
| "loss": 0.2899, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.4829268292682927, |
| "grad_norm": 0.5860131898598232, |
| "learning_rate": 6.0027983378279355e-06, |
| "loss": 0.3151, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.4865853658536585, |
| "grad_norm": 0.6329564555310796, |
| "learning_rate": 5.981965522073341e-06, |
| "loss": 0.3141, |
| "step": 407 |
| }, |
| { |
| "epoch": 1.4902439024390244, |
| "grad_norm": 0.5343136578984965, |
| "learning_rate": 5.96111496007452e-06, |
| "loss": 0.301, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.4939024390243902, |
| "grad_norm": 0.6878892449113025, |
| "learning_rate": 5.940247028646299e-06, |
| "loss": 0.2918, |
| "step": 409 |
| }, |
| { |
| "epoch": 1.497560975609756, |
| "grad_norm": 0.6584719645530801, |
| "learning_rate": 5.919362104917403e-06, |
| "loss": 0.3039, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.501219512195122, |
| "grad_norm": 0.6061856674952371, |
| "learning_rate": 5.898460566323649e-06, |
| "loss": 0.3071, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.5048780487804878, |
| "grad_norm": 0.6329216805871392, |
| "learning_rate": 5.877542790601116e-06, |
| "loss": 0.2989, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.5085365853658537, |
| "grad_norm": 0.5811394474667385, |
| "learning_rate": 5.856609155779327e-06, |
| "loss": 0.3084, |
| "step": 413 |
| }, |
| { |
| "epoch": 1.5121951219512195, |
| "grad_norm": 0.6624194190479747, |
| "learning_rate": 5.835660040174413e-06, |
| "loss": 0.3199, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.5158536585365854, |
| "grad_norm": 0.5877849057929369, |
| "learning_rate": 5.814695822382274e-06, |
| "loss": 0.3062, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.5195121951219512, |
| "grad_norm": 0.6324026124729334, |
| "learning_rate": 5.793716881271742e-06, |
| "loss": 0.3069, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.523170731707317, |
| "grad_norm": 0.6541754157726875, |
| "learning_rate": 5.772723595977728e-06, |
| "loss": 0.322, |
| "step": 417 |
| }, |
| { |
| "epoch": 1.526829268292683, |
| "grad_norm": 0.6455997368435114, |
| "learning_rate": 5.751716345894377e-06, |
| "loss": 0.3034, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.5304878048780488, |
| "grad_norm": 0.6120672144668279, |
| "learning_rate": 5.730695510668204e-06, |
| "loss": 0.3049, |
| "step": 419 |
| }, |
| { |
| "epoch": 1.5341463414634147, |
| "grad_norm": 0.6831050808314805, |
| "learning_rate": 5.709661470191241e-06, |
| "loss": 0.3114, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.5378048780487805, |
| "grad_norm": 0.60423345440851, |
| "learning_rate": 5.688614604594165e-06, |
| "loss": 0.306, |
| "step": 421 |
| }, |
| { |
| "epoch": 1.5414634146341464, |
| "grad_norm": 0.6579384904880561, |
| "learning_rate": 5.66755529423943e-06, |
| "loss": 0.3129, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.5451219512195122, |
| "grad_norm": 0.5655478798902573, |
| "learning_rate": 5.646483919714398e-06, |
| "loss": 0.2725, |
| "step": 423 |
| }, |
| { |
| "epoch": 1.548780487804878, |
| "grad_norm": 0.6876809669509347, |
| "learning_rate": 5.625400861824452e-06, |
| "loss": 0.3031, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.552439024390244, |
| "grad_norm": 0.6359182181607586, |
| "learning_rate": 5.60430650158612e-06, |
| "loss": 0.3114, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.5560975609756098, |
| "grad_norm": 0.6082595644514966, |
| "learning_rate": 5.583201220220189e-06, |
| "loss": 0.325, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.5597560975609757, |
| "grad_norm": 0.6466616658146095, |
| "learning_rate": 5.562085399144815e-06, |
| "loss": 0.3052, |
| "step": 427 |
| }, |
| { |
| "epoch": 1.5634146341463415, |
| "grad_norm": 0.5830672803700676, |
| "learning_rate": 5.5409594199686265e-06, |
| "loss": 0.2983, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.5670731707317072, |
| "grad_norm": 0.6190570589105842, |
| "learning_rate": 5.519823664483834e-06, |
| "loss": 0.2926, |
| "step": 429 |
| }, |
| { |
| "epoch": 1.5707317073170732, |
| "grad_norm": 0.6679466145592807, |
| "learning_rate": 5.4986785146593255e-06, |
| "loss": 0.2981, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.5743902439024389, |
| "grad_norm": 0.6059070844416552, |
| "learning_rate": 5.477524352633764e-06, |
| "loss": 0.3112, |
| "step": 431 |
| }, |
| { |
| "epoch": 1.578048780487805, |
| "grad_norm": 0.6339996136313506, |
| "learning_rate": 5.4563615607086865e-06, |
| "loss": 0.3128, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.5817073170731706, |
| "grad_norm": 0.638994673364677, |
| "learning_rate": 5.435190521341584e-06, |
| "loss": 0.3152, |
| "step": 433 |
| }, |
| { |
| "epoch": 1.5853658536585367, |
| "grad_norm": 0.6234136145013041, |
| "learning_rate": 5.414011617139004e-06, |
| "loss": 0.2789, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.5890243902439023, |
| "grad_norm": 0.615079581586681, |
| "learning_rate": 5.392825230849626e-06, |
| "loss": 0.3015, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.5926829268292684, |
| "grad_norm": 0.6205254532976391, |
| "learning_rate": 5.371631745357344e-06, |
| "loss": 0.2921, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.596341463414634, |
| "grad_norm": 0.6076702662224442, |
| "learning_rate": 5.3504315436743545e-06, |
| "loss": 0.2941, |
| "step": 437 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.6169851323637991, |
| "learning_rate": 5.329225008934228e-06, |
| "loss": 0.3107, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.6036585365853657, |
| "grad_norm": 0.5916553236301388, |
| "learning_rate": 5.308012524384986e-06, |
| "loss": 0.3044, |
| "step": 439 |
| }, |
| { |
| "epoch": 1.6073170731707318, |
| "grad_norm": 0.6681712481979999, |
| "learning_rate": 5.286794473382178e-06, |
| "loss": 0.3116, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.6109756097560974, |
| "grad_norm": 0.5631447516908423, |
| "learning_rate": 5.2655712393819504e-06, |
| "loss": 0.3226, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.6146341463414635, |
| "grad_norm": 0.6284610548650537, |
| "learning_rate": 5.244343205934118e-06, |
| "loss": 0.2988, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.6182926829268292, |
| "grad_norm": 0.6183892400516535, |
| "learning_rate": 5.223110756675231e-06, |
| "loss": 0.3129, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.6219512195121952, |
| "grad_norm": 0.599291363344575, |
| "learning_rate": 5.201874275321642e-06, |
| "loss": 0.315, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.6256097560975609, |
| "grad_norm": 0.6001954294328656, |
| "learning_rate": 5.1806341456625785e-06, |
| "loss": 0.298, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.629268292682927, |
| "grad_norm": 0.6149268111955434, |
| "learning_rate": 5.159390751553191e-06, |
| "loss": 0.2865, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.6329268292682926, |
| "grad_norm": 0.6512843754790982, |
| "learning_rate": 5.138144476907634e-06, |
| "loss": 0.3039, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.6365853658536587, |
| "grad_norm": 0.650047956082531, |
| "learning_rate": 5.116895705692117e-06, |
| "loss": 0.3114, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.6402439024390243, |
| "grad_norm": 0.6029542336663615, |
| "learning_rate": 5.095644821917964e-06, |
| "loss": 0.3068, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.6439024390243904, |
| "grad_norm": 0.6007591221577205, |
| "learning_rate": 5.0743922096346836e-06, |
| "loss": 0.2996, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.647560975609756, |
| "grad_norm": 0.603771559433405, |
| "learning_rate": 5.053138252923019e-06, |
| "loss": 0.2952, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.651219512195122, |
| "grad_norm": 0.55661515069207, |
| "learning_rate": 5.031883335888009e-06, |
| "loss": 0.3096, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.6548780487804877, |
| "grad_norm": 0.6416454330032674, |
| "learning_rate": 5.010627842652049e-06, |
| "loss": 0.2974, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.6585365853658538, |
| "grad_norm": 0.6063267101737585, |
| "learning_rate": 4.989372157347951e-06, |
| "loss": 0.2835, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.6621951219512194, |
| "grad_norm": 0.58786831681036, |
| "learning_rate": 4.968116664111992e-06, |
| "loss": 0.3042, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.6658536585365855, |
| "grad_norm": 0.6382722947842386, |
| "learning_rate": 4.946861747076983e-06, |
| "loss": 0.2992, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.6695121951219511, |
| "grad_norm": 0.6277617762282729, |
| "learning_rate": 4.925607790365319e-06, |
| "loss": 0.3032, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.6731707317073172, |
| "grad_norm": 0.6298104723656519, |
| "learning_rate": 4.904355178082038e-06, |
| "loss": 0.314, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.6768292682926829, |
| "grad_norm": 0.6107292006492634, |
| "learning_rate": 4.883104294307884e-06, |
| "loss": 0.2942, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.680487804878049, |
| "grad_norm": 0.6176967723038016, |
| "learning_rate": 4.861855523092366e-06, |
| "loss": 0.2997, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.6841463414634146, |
| "grad_norm": 0.5913665790260473, |
| "learning_rate": 4.840609248446809e-06, |
| "loss": 0.3184, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.6878048780487804, |
| "grad_norm": 0.5953694773185858, |
| "learning_rate": 4.819365854337423e-06, |
| "loss": 0.3165, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.6914634146341463, |
| "grad_norm": 0.5707973515045525, |
| "learning_rate": 4.7981257246783595e-06, |
| "loss": 0.3104, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.6951219512195121, |
| "grad_norm": 0.6156767325990874, |
| "learning_rate": 4.776889243324772e-06, |
| "loss": 0.3151, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.698780487804878, |
| "grad_norm": 0.5877462803769106, |
| "learning_rate": 4.755656794065884e-06, |
| "loss": 0.2832, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.7024390243902439, |
| "grad_norm": 0.6205361496175391, |
| "learning_rate": 4.73442876061805e-06, |
| "loss": 0.3045, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.7060975609756097, |
| "grad_norm": 0.6228470039101209, |
| "learning_rate": 4.713205526617822e-06, |
| "loss": 0.322, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.7097560975609756, |
| "grad_norm": 0.5803220572591428, |
| "learning_rate": 4.691987475615016e-06, |
| "loss": 0.2973, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.7134146341463414, |
| "grad_norm": 0.5954242966594924, |
| "learning_rate": 4.670774991065774e-06, |
| "loss": 0.2946, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.7170731707317073, |
| "grad_norm": 0.6008261653591337, |
| "learning_rate": 4.649568456325645e-06, |
| "loss": 0.2955, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.7207317073170731, |
| "grad_norm": 0.5937214372456813, |
| "learning_rate": 4.6283682546426564e-06, |
| "loss": 0.2826, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.724390243902439, |
| "grad_norm": 0.5234628318336602, |
| "learning_rate": 4.607174769150375e-06, |
| "loss": 0.2825, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.7280487804878049, |
| "grad_norm": 0.6189630230422453, |
| "learning_rate": 4.5859883828609965e-06, |
| "loss": 0.3186, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.7317073170731707, |
| "grad_norm": 0.5793938651866555, |
| "learning_rate": 4.564809478658419e-06, |
| "loss": 0.2977, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.7353658536585366, |
| "grad_norm": 0.5736103569893777, |
| "learning_rate": 4.543638439291317e-06, |
| "loss": 0.286, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.7390243902439024, |
| "grad_norm": 0.6106146033891122, |
| "learning_rate": 4.5224756473662365e-06, |
| "loss": 0.3086, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.7426829268292683, |
| "grad_norm": 0.648329612248683, |
| "learning_rate": 4.501321485340676e-06, |
| "loss": 0.3107, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.7463414634146341, |
| "grad_norm": 0.572713588951205, |
| "learning_rate": 4.480176335516167e-06, |
| "loss": 0.3003, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 0.6136351450374243, |
| "learning_rate": 4.459040580031374e-06, |
| "loss": 0.2967, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.7536585365853659, |
| "grad_norm": 0.6499237376964561, |
| "learning_rate": 4.437914600855187e-06, |
| "loss": 0.3084, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.7573170731707317, |
| "grad_norm": 0.647740834520244, |
| "learning_rate": 4.41679877977981e-06, |
| "loss": 0.3137, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.7609756097560976, |
| "grad_norm": 0.6089886321475284, |
| "learning_rate": 4.3956934984138815e-06, |
| "loss": 0.307, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.7646341463414634, |
| "grad_norm": 0.5882692243451826, |
| "learning_rate": 4.374599138175551e-06, |
| "loss": 0.2998, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.7682926829268293, |
| "grad_norm": 0.6433244483480277, |
| "learning_rate": 4.353516080285603e-06, |
| "loss": 0.299, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.7719512195121951, |
| "grad_norm": 0.6514720927170986, |
| "learning_rate": 4.332444705760571e-06, |
| "loss": 0.3129, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.775609756097561, |
| "grad_norm": 0.5863962303909052, |
| "learning_rate": 4.3113853954058385e-06, |
| "loss": 0.3134, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.7792682926829269, |
| "grad_norm": 0.5733292141072323, |
| "learning_rate": 4.2903385298087595e-06, |
| "loss": 0.2879, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.7829268292682927, |
| "grad_norm": 0.5946111816840389, |
| "learning_rate": 4.269304489331797e-06, |
| "loss": 0.2974, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.7865853658536586, |
| "grad_norm": 0.609851345774362, |
| "learning_rate": 4.248283654105624e-06, |
| "loss": 0.2921, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.7902439024390244, |
| "grad_norm": 0.6321610405118764, |
| "learning_rate": 4.2272764040222724e-06, |
| "loss": 0.317, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.7939024390243903, |
| "grad_norm": 0.6372499429582906, |
| "learning_rate": 4.206283118728258e-06, |
| "loss": 0.3029, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.7975609756097561, |
| "grad_norm": 0.5820995527331286, |
| "learning_rate": 4.185304177617725e-06, |
| "loss": 0.2844, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.801219512195122, |
| "grad_norm": 0.6175292122161644, |
| "learning_rate": 4.164339959825587e-06, |
| "loss": 0.2997, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.8048780487804879, |
| "grad_norm": 0.5681456854505189, |
| "learning_rate": 4.1433908442206735e-06, |
| "loss": 0.2965, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.8085365853658537, |
| "grad_norm": 0.5924758008964693, |
| "learning_rate": 4.122457209398886e-06, |
| "loss": 0.2829, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.8121951219512196, |
| "grad_norm": 0.5996535608299406, |
| "learning_rate": 4.101539433676354e-06, |
| "loss": 0.3034, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.8158536585365854, |
| "grad_norm": 0.5829324041998495, |
| "learning_rate": 4.0806378950825996e-06, |
| "loss": 0.3024, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.819512195121951, |
| "grad_norm": 0.6081335311388287, |
| "learning_rate": 4.059752971353702e-06, |
| "loss": 0.3052, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.8231707317073171, |
| "grad_norm": 0.5616344855594736, |
| "learning_rate": 4.038885039925481e-06, |
| "loss": 0.298, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.8268292682926828, |
| "grad_norm": 0.5767878756565518, |
| "learning_rate": 4.018034477926661e-06, |
| "loss": 0.3089, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.8304878048780489, |
| "grad_norm": 0.6019923918904264, |
| "learning_rate": 3.997201662172065e-06, |
| "loss": 0.285, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.8341463414634145, |
| "grad_norm": 0.5689341408873392, |
| "learning_rate": 3.976386969155807e-06, |
| "loss": 0.301, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.8378048780487806, |
| "grad_norm": 0.6170699137265363, |
| "learning_rate": 3.9555907750444785e-06, |
| "loss": 0.3092, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.8414634146341462, |
| "grad_norm": 0.628745537543142, |
| "learning_rate": 3.934813455670359e-06, |
| "loss": 0.3016, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.8451219512195123, |
| "grad_norm": 0.6152669681872653, |
| "learning_rate": 3.914055386524621e-06, |
| "loss": 0.2917, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.848780487804878, |
| "grad_norm": 0.6118927040375713, |
| "learning_rate": 3.89331694275054e-06, |
| "loss": 0.309, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.852439024390244, |
| "grad_norm": 0.5871831149186963, |
| "learning_rate": 3.872598499136723e-06, |
| "loss": 0.3063, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.8560975609756096, |
| "grad_norm": 0.5923860193562817, |
| "learning_rate": 3.851900430110326e-06, |
| "loss": 0.3043, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.8597560975609757, |
| "grad_norm": 0.5702777294822635, |
| "learning_rate": 3.831223109730296e-06, |
| "loss": 0.276, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.8634146341463413, |
| "grad_norm": 0.5818433208728103, |
| "learning_rate": 3.810566911680607e-06, |
| "loss": 0.2936, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.8670731707317074, |
| "grad_norm": 0.6678513890057626, |
| "learning_rate": 3.789932209263506e-06, |
| "loss": 0.297, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.870731707317073, |
| "grad_norm": 0.6355121966400645, |
| "learning_rate": 3.769319375392764e-06, |
| "loss": 0.3006, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.8743902439024391, |
| "grad_norm": 0.5771949043253944, |
| "learning_rate": 3.7487287825869445e-06, |
| "loss": 0.3007, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.8780487804878048, |
| "grad_norm": 0.6045825081482409, |
| "learning_rate": 3.7281608029626636e-06, |
| "loss": 0.3039, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.8817073170731708, |
| "grad_norm": 0.5878998409142763, |
| "learning_rate": 3.707615808227872e-06, |
| "loss": 0.2949, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.8853658536585365, |
| "grad_norm": 0.6231546765696477, |
| "learning_rate": 3.6870941696751307e-06, |
| "loss": 0.295, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.8890243902439026, |
| "grad_norm": 0.5832664311621261, |
| "learning_rate": 3.6665962581749046e-06, |
| "loss": 0.3059, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.8926829268292682, |
| "grad_norm": 0.5722327245982995, |
| "learning_rate": 3.646122444168858e-06, |
| "loss": 0.3083, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.8963414634146343, |
| "grad_norm": 0.5967837161066656, |
| "learning_rate": 3.6256730976631637e-06, |
| "loss": 0.3126, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.9, |
| "grad_norm": 0.6048351316153445, |
| "learning_rate": 3.6052485882218124e-06, |
| "loss": 0.3004, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.903658536585366, |
| "grad_norm": 0.6245134644651108, |
| "learning_rate": 3.5848492849599357e-06, |
| "loss": 0.2994, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.9073170731707316, |
| "grad_norm": 0.5619671228019864, |
| "learning_rate": 3.564475556537136e-06, |
| "loss": 0.2899, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.9109756097560977, |
| "grad_norm": 0.5927941945328951, |
| "learning_rate": 3.54412777115082e-06, |
| "loss": 0.3211, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.9146341463414633, |
| "grad_norm": 0.6136632759095979, |
| "learning_rate": 3.5238062965295493e-06, |
| "loss": 0.2929, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.9182926829268294, |
| "grad_norm": 0.6254834746812936, |
| "learning_rate": 3.5035114999263918e-06, |
| "loss": 0.2937, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.921951219512195, |
| "grad_norm": 0.6045756197387149, |
| "learning_rate": 3.48324374811229e-06, |
| "loss": 0.3078, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.9256097560975611, |
| "grad_norm": 0.6140608673385813, |
| "learning_rate": 3.463003407369424e-06, |
| "loss": 0.3162, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.9292682926829268, |
| "grad_norm": 0.5738010260945632, |
| "learning_rate": 3.442790843484598e-06, |
| "loss": 0.2968, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.9329268292682928, |
| "grad_norm": 0.6161416586651335, |
| "learning_rate": 3.4226064217426276e-06, |
| "loss": 0.2923, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.9365853658536585, |
| "grad_norm": 0.5935224400360348, |
| "learning_rate": 3.4024505069197387e-06, |
| "loss": 0.2903, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.9402439024390243, |
| "grad_norm": 0.60896816099721, |
| "learning_rate": 3.382323463276977e-06, |
| "loss": 0.307, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.9439024390243902, |
| "grad_norm": 0.639666502462028, |
| "learning_rate": 3.362225654553623e-06, |
| "loss": 0.3048, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.947560975609756, |
| "grad_norm": 0.569997113145275, |
| "learning_rate": 3.3421574439606186e-06, |
| "loss": 0.2926, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.951219512195122, |
| "grad_norm": 0.6010854819630191, |
| "learning_rate": 3.322119194174003e-06, |
| "loss": 0.2962, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.9548780487804878, |
| "grad_norm": 0.6088175492399895, |
| "learning_rate": 3.3021112673283574e-06, |
| "loss": 0.2832, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.9585365853658536, |
| "grad_norm": 0.5883233331917551, |
| "learning_rate": 3.282134025010263e-06, |
| "loss": 0.3008, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.9621951219512195, |
| "grad_norm": 0.5572804861289024, |
| "learning_rate": 3.2621878282517684e-06, |
| "loss": 0.2888, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.9658536585365853, |
| "grad_norm": 0.5796243962499318, |
| "learning_rate": 3.2422730375238566e-06, |
| "loss": 0.2803, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.9695121951219512, |
| "grad_norm": 0.5935266061720855, |
| "learning_rate": 3.222390012729938e-06, |
| "loss": 0.2994, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.973170731707317, |
| "grad_norm": 0.5662598655298823, |
| "learning_rate": 3.2025391131993443e-06, |
| "loss": 0.2883, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.976829268292683, |
| "grad_norm": 0.5753627844134035, |
| "learning_rate": 3.182720697680831e-06, |
| "loss": 0.2844, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.9804878048780488, |
| "grad_norm": 0.637210434542176, |
| "learning_rate": 3.1629351243361007e-06, |
| "loss": 0.3185, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.9841463414634146, |
| "grad_norm": 0.5627361281713845, |
| "learning_rate": 3.1431827507333257e-06, |
| "loss": 0.2941, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.9878048780487805, |
| "grad_norm": 0.5959644102363714, |
| "learning_rate": 3.1234639338406867e-06, |
| "loss": 0.3001, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.9914634146341463, |
| "grad_norm": 0.6077022077078119, |
| "learning_rate": 3.103779030019922e-06, |
| "loss": 0.308, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.9951219512195122, |
| "grad_norm": 0.6082143409326339, |
| "learning_rate": 3.0841283950198875e-06, |
| "loss": 0.2887, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.998780487804878, |
| "grad_norm": 0.5525286653885901, |
| "learning_rate": 3.064512383970124e-06, |
| "loss": 0.2702, |
| "step": 547 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.5525286653885901, |
| "learning_rate": 3.044931351374451e-06, |
| "loss": 0.2863, |
| "step": 548 |
| }, |
| { |
| "epoch": 2.0036585365853656, |
| "grad_norm": 1.1491146976563753, |
| "learning_rate": 3.025385651104542e-06, |
| "loss": 0.2299, |
| "step": 549 |
| }, |
| { |
| "epoch": 2.0073170731707317, |
| "grad_norm": 0.5812839047916396, |
| "learning_rate": 3.0058756363935447e-06, |
| "loss": 0.2315, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.0109756097560973, |
| "grad_norm": 0.599577291742412, |
| "learning_rate": 2.9864016598296896e-06, |
| "loss": 0.2454, |
| "step": 551 |
| }, |
| { |
| "epoch": 2.0146341463414634, |
| "grad_norm": 0.5633881485245802, |
| "learning_rate": 2.9669640733499184e-06, |
| "loss": 0.243, |
| "step": 552 |
| }, |
| { |
| "epoch": 2.018292682926829, |
| "grad_norm": 0.5392393159339521, |
| "learning_rate": 2.9475632282335265e-06, |
| "loss": 0.2488, |
| "step": 553 |
| }, |
| { |
| "epoch": 2.021951219512195, |
| "grad_norm": 0.6167736588501275, |
| "learning_rate": 2.928199475095816e-06, |
| "loss": 0.2482, |
| "step": 554 |
| }, |
| { |
| "epoch": 2.0256097560975608, |
| "grad_norm": 0.6763236210901113, |
| "learning_rate": 2.908873163881752e-06, |
| "loss": 0.2512, |
| "step": 555 |
| }, |
| { |
| "epoch": 2.029268292682927, |
| "grad_norm": 0.6732027253244217, |
| "learning_rate": 2.8895846438596462e-06, |
| "loss": 0.2346, |
| "step": 556 |
| }, |
| { |
| "epoch": 2.0329268292682925, |
| "grad_norm": 0.5689881452286486, |
| "learning_rate": 2.870334263614838e-06, |
| "loss": 0.2458, |
| "step": 557 |
| }, |
| { |
| "epoch": 2.0365853658536586, |
| "grad_norm": 0.6342251062567795, |
| "learning_rate": 2.8511223710434016e-06, |
| "loss": 0.2391, |
| "step": 558 |
| }, |
| { |
| "epoch": 2.040243902439024, |
| "grad_norm": 0.5367197192306216, |
| "learning_rate": 2.8319493133458575e-06, |
| "loss": 0.2364, |
| "step": 559 |
| }, |
| { |
| "epoch": 2.0439024390243903, |
| "grad_norm": 0.6229233192794811, |
| "learning_rate": 2.8128154370208895e-06, |
| "loss": 0.2372, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.047560975609756, |
| "grad_norm": 0.6223140169848902, |
| "learning_rate": 2.7937210878590947e-06, |
| "loss": 0.2516, |
| "step": 561 |
| }, |
| { |
| "epoch": 2.051219512195122, |
| "grad_norm": 0.6351534138607783, |
| "learning_rate": 2.774666610936727e-06, |
| "loss": 0.24, |
| "step": 562 |
| }, |
| { |
| "epoch": 2.0548780487804876, |
| "grad_norm": 0.6000383406556633, |
| "learning_rate": 2.755652350609459e-06, |
| "loss": 0.242, |
| "step": 563 |
| }, |
| { |
| "epoch": 2.0585365853658537, |
| "grad_norm": 0.5808977074271768, |
| "learning_rate": 2.736678650506168e-06, |
| "loss": 0.2398, |
| "step": 564 |
| }, |
| { |
| "epoch": 2.0621951219512193, |
| "grad_norm": 0.6202232826347814, |
| "learning_rate": 2.71774585352272e-06, |
| "loss": 0.2596, |
| "step": 565 |
| }, |
| { |
| "epoch": 2.0658536585365854, |
| "grad_norm": 0.676317002155401, |
| "learning_rate": 2.6988543018157667e-06, |
| "loss": 0.2601, |
| "step": 566 |
| }, |
| { |
| "epoch": 2.069512195121951, |
| "grad_norm": 0.6261727462539696, |
| "learning_rate": 2.6800043367965754e-06, |
| "loss": 0.2357, |
| "step": 567 |
| }, |
| { |
| "epoch": 2.073170731707317, |
| "grad_norm": 0.5675481331948632, |
| "learning_rate": 2.6611962991248487e-06, |
| "loss": 0.2553, |
| "step": 568 |
| }, |
| { |
| "epoch": 2.0768292682926828, |
| "grad_norm": 0.5682792620688895, |
| "learning_rate": 2.642430528702568e-06, |
| "loss": 0.2423, |
| "step": 569 |
| }, |
| { |
| "epoch": 2.080487804878049, |
| "grad_norm": 0.6210900397441359, |
| "learning_rate": 2.6237073646678596e-06, |
| "loss": 0.2497, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.0841463414634145, |
| "grad_norm": 0.6185039051605767, |
| "learning_rate": 2.60502714538886e-06, |
| "loss": 0.2527, |
| "step": 571 |
| }, |
| { |
| "epoch": 2.0878048780487806, |
| "grad_norm": 0.5830870526935391, |
| "learning_rate": 2.5863902084575943e-06, |
| "loss": 0.2419, |
| "step": 572 |
| }, |
| { |
| "epoch": 2.091463414634146, |
| "grad_norm": 0.5473178750068909, |
| "learning_rate": 2.5677968906838907e-06, |
| "loss": 0.232, |
| "step": 573 |
| }, |
| { |
| "epoch": 2.0951219512195123, |
| "grad_norm": 0.5587329875528281, |
| "learning_rate": 2.5492475280892757e-06, |
| "loss": 0.2495, |
| "step": 574 |
| }, |
| { |
| "epoch": 2.098780487804878, |
| "grad_norm": 0.5596060847586221, |
| "learning_rate": 2.5307424559009196e-06, |
| "loss": 0.2379, |
| "step": 575 |
| }, |
| { |
| "epoch": 2.102439024390244, |
| "grad_norm": 0.5447954930321499, |
| "learning_rate": 2.512282008545561e-06, |
| "loss": 0.242, |
| "step": 576 |
| }, |
| { |
| "epoch": 2.1060975609756096, |
| "grad_norm": 0.6243081793349892, |
| "learning_rate": 2.4938665196434775e-06, |
| "loss": 0.2559, |
| "step": 577 |
| }, |
| { |
| "epoch": 2.1097560975609757, |
| "grad_norm": 0.5983554972287728, |
| "learning_rate": 2.4754963220024452e-06, |
| "loss": 0.2383, |
| "step": 578 |
| }, |
| { |
| "epoch": 2.1134146341463413, |
| "grad_norm": 0.5601175525317587, |
| "learning_rate": 2.4571717476117302e-06, |
| "loss": 0.237, |
| "step": 579 |
| }, |
| { |
| "epoch": 2.1170731707317074, |
| "grad_norm": 0.5703488707814759, |
| "learning_rate": 2.4388931276360898e-06, |
| "loss": 0.2518, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.120731707317073, |
| "grad_norm": 0.5762453114940407, |
| "learning_rate": 2.4206607924097857e-06, |
| "loss": 0.2442, |
| "step": 581 |
| }, |
| { |
| "epoch": 2.124390243902439, |
| "grad_norm": 0.5983912760451229, |
| "learning_rate": 2.4024750714306093e-06, |
| "loss": 0.2597, |
| "step": 582 |
| }, |
| { |
| "epoch": 2.1280487804878048, |
| "grad_norm": 0.5984265659585949, |
| "learning_rate": 2.384336293353938e-06, |
| "loss": 0.2311, |
| "step": 583 |
| }, |
| { |
| "epoch": 2.131707317073171, |
| "grad_norm": 0.604350894913572, |
| "learning_rate": 2.3662447859867837e-06, |
| "loss": 0.2535, |
| "step": 584 |
| }, |
| { |
| "epoch": 2.1353658536585365, |
| "grad_norm": 0.535421144423019, |
| "learning_rate": 2.3482008762818727e-06, |
| "loss": 0.2234, |
| "step": 585 |
| }, |
| { |
| "epoch": 2.1390243902439026, |
| "grad_norm": 0.5421114833925664, |
| "learning_rate": 2.3302048903317497e-06, |
| "loss": 0.2612, |
| "step": 586 |
| }, |
| { |
| "epoch": 2.142682926829268, |
| "grad_norm": 0.6046980678553843, |
| "learning_rate": 2.312257153362862e-06, |
| "loss": 0.2387, |
| "step": 587 |
| }, |
| { |
| "epoch": 2.1463414634146343, |
| "grad_norm": 0.6109825890260535, |
| "learning_rate": 2.2943579897296947e-06, |
| "loss": 0.2369, |
| "step": 588 |
| }, |
| { |
| "epoch": 2.15, |
| "grad_norm": 0.5778782524223457, |
| "learning_rate": 2.2765077229089146e-06, |
| "loss": 0.2306, |
| "step": 589 |
| }, |
| { |
| "epoch": 2.153658536585366, |
| "grad_norm": 0.5706488907922543, |
| "learning_rate": 2.2587066754935088e-06, |
| "loss": 0.2406, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.1573170731707316, |
| "grad_norm": 0.5950872809237095, |
| "learning_rate": 2.240955169186965e-06, |
| "loss": 0.2506, |
| "step": 591 |
| }, |
| { |
| "epoch": 2.1609756097560977, |
| "grad_norm": 0.5700219691586104, |
| "learning_rate": 2.223253524797463e-06, |
| "loss": 0.2453, |
| "step": 592 |
| }, |
| { |
| "epoch": 2.1646341463414633, |
| "grad_norm": 0.5995511680667281, |
| "learning_rate": 2.2056020622320614e-06, |
| "loss": 0.2495, |
| "step": 593 |
| }, |
| { |
| "epoch": 2.1682926829268294, |
| "grad_norm": 0.6136735169642444, |
| "learning_rate": 2.1880011004909253e-06, |
| "loss": 0.2306, |
| "step": 594 |
| }, |
| { |
| "epoch": 2.171951219512195, |
| "grad_norm": 0.6256640342184288, |
| "learning_rate": 2.170450957661566e-06, |
| "loss": 0.2329, |
| "step": 595 |
| }, |
| { |
| "epoch": 2.175609756097561, |
| "grad_norm": 0.548577114684068, |
| "learning_rate": 2.15295195091308e-06, |
| "loss": 0.2275, |
| "step": 596 |
| }, |
| { |
| "epoch": 2.1792682926829268, |
| "grad_norm": 0.5687383781384396, |
| "learning_rate": 2.135504396490429e-06, |
| "loss": 0.2321, |
| "step": 597 |
| }, |
| { |
| "epoch": 2.182926829268293, |
| "grad_norm": 0.5579917295229633, |
| "learning_rate": 2.1181086097087204e-06, |
| "loss": 0.2318, |
| "step": 598 |
| }, |
| { |
| "epoch": 2.1865853658536585, |
| "grad_norm": 0.580363538298193, |
| "learning_rate": 2.1007649049475046e-06, |
| "loss": 0.2482, |
| "step": 599 |
| }, |
| { |
| "epoch": 2.1902439024390246, |
| "grad_norm": 0.5341887911643112, |
| "learning_rate": 2.083473595645096e-06, |
| "loss": 0.2454, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.19390243902439, |
| "grad_norm": 0.6095901733738088, |
| "learning_rate": 2.066234994292916e-06, |
| "loss": 0.2525, |
| "step": 601 |
| }, |
| { |
| "epoch": 2.1975609756097563, |
| "grad_norm": 0.59396704070015, |
| "learning_rate": 2.0490494124298314e-06, |
| "loss": 0.2372, |
| "step": 602 |
| }, |
| { |
| "epoch": 2.201219512195122, |
| "grad_norm": 0.6081435264800101, |
| "learning_rate": 2.031917160636537e-06, |
| "loss": 0.2461, |
| "step": 603 |
| }, |
| { |
| "epoch": 2.204878048780488, |
| "grad_norm": 0.5609971726135861, |
| "learning_rate": 2.01483854852994e-06, |
| "loss": 0.2345, |
| "step": 604 |
| }, |
| { |
| "epoch": 2.2085365853658536, |
| "grad_norm": 0.5923610417844827, |
| "learning_rate": 1.997813884757555e-06, |
| "loss": 0.2486, |
| "step": 605 |
| }, |
| { |
| "epoch": 2.2121951219512197, |
| "grad_norm": 0.5783000329707068, |
| "learning_rate": 1.980843476991936e-06, |
| "loss": 0.2334, |
| "step": 606 |
| }, |
| { |
| "epoch": 2.2158536585365853, |
| "grad_norm": 0.5534361667872443, |
| "learning_rate": 1.9639276319251166e-06, |
| "loss": 0.2569, |
| "step": 607 |
| }, |
| { |
| "epoch": 2.2195121951219514, |
| "grad_norm": 0.5721660940023355, |
| "learning_rate": 1.947066655263064e-06, |
| "loss": 0.2516, |
| "step": 608 |
| }, |
| { |
| "epoch": 2.223170731707317, |
| "grad_norm": 0.5824791898002271, |
| "learning_rate": 1.93026085172015e-06, |
| "loss": 0.2321, |
| "step": 609 |
| }, |
| { |
| "epoch": 2.226829268292683, |
| "grad_norm": 0.6026540847795739, |
| "learning_rate": 1.91351052501365e-06, |
| "loss": 0.2487, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.2304878048780488, |
| "grad_norm": 0.5698599518813655, |
| "learning_rate": 1.8968159778582572e-06, |
| "loss": 0.2662, |
| "step": 611 |
| }, |
| { |
| "epoch": 2.234146341463415, |
| "grad_norm": 0.6004371446298645, |
| "learning_rate": 1.8801775119606009e-06, |
| "loss": 0.2462, |
| "step": 612 |
| }, |
| { |
| "epoch": 2.2378048780487805, |
| "grad_norm": 0.5767977787961263, |
| "learning_rate": 1.8635954280138058e-06, |
| "loss": 0.25, |
| "step": 613 |
| }, |
| { |
| "epoch": 2.241463414634146, |
| "grad_norm": 0.5761299967173042, |
| "learning_rate": 1.8470700256920527e-06, |
| "loss": 0.231, |
| "step": 614 |
| }, |
| { |
| "epoch": 2.245121951219512, |
| "grad_norm": 0.6029460653486376, |
| "learning_rate": 1.8306016036451584e-06, |
| "loss": 0.2481, |
| "step": 615 |
| }, |
| { |
| "epoch": 2.2487804878048783, |
| "grad_norm": 0.603799230865757, |
| "learning_rate": 1.8141904594931836e-06, |
| "loss": 0.2491, |
| "step": 616 |
| }, |
| { |
| "epoch": 2.252439024390244, |
| "grad_norm": 0.5651935206892335, |
| "learning_rate": 1.7978368898210585e-06, |
| "loss": 0.2221, |
| "step": 617 |
| }, |
| { |
| "epoch": 2.2560975609756095, |
| "grad_norm": 0.6155871393031543, |
| "learning_rate": 1.7815411901732093e-06, |
| "loss": 0.2418, |
| "step": 618 |
| }, |
| { |
| "epoch": 2.2597560975609756, |
| "grad_norm": 0.5822183713085968, |
| "learning_rate": 1.765303655048234e-06, |
| "loss": 0.2418, |
| "step": 619 |
| }, |
| { |
| "epoch": 2.2634146341463417, |
| "grad_norm": 0.6162680679231551, |
| "learning_rate": 1.7491245778935673e-06, |
| "loss": 0.2328, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.2670731707317073, |
| "grad_norm": 0.5734640882021516, |
| "learning_rate": 1.733004251100182e-06, |
| "loss": 0.2399, |
| "step": 621 |
| }, |
| { |
| "epoch": 2.270731707317073, |
| "grad_norm": 0.5802580047444629, |
| "learning_rate": 1.7169429659973024e-06, |
| "loss": 0.2335, |
| "step": 622 |
| }, |
| { |
| "epoch": 2.274390243902439, |
| "grad_norm": 0.5885677954780522, |
| "learning_rate": 1.7009410128471481e-06, |
| "loss": 0.2313, |
| "step": 623 |
| }, |
| { |
| "epoch": 2.278048780487805, |
| "grad_norm": 0.5805060869510791, |
| "learning_rate": 1.6849986808396746e-06, |
| "loss": 0.2489, |
| "step": 624 |
| }, |
| { |
| "epoch": 2.2817073170731708, |
| "grad_norm": 0.5692390222760896, |
| "learning_rate": 1.6691162580873576e-06, |
| "loss": 0.2544, |
| "step": 625 |
| }, |
| { |
| "epoch": 2.2853658536585364, |
| "grad_norm": 0.6250325114294697, |
| "learning_rate": 1.6532940316199853e-06, |
| "loss": 0.2529, |
| "step": 626 |
| }, |
| { |
| "epoch": 2.2890243902439025, |
| "grad_norm": 0.5634074649409233, |
| "learning_rate": 1.6375322873794635e-06, |
| "loss": 0.226, |
| "step": 627 |
| }, |
| { |
| "epoch": 2.292682926829268, |
| "grad_norm": 0.5118509902258667, |
| "learning_rate": 1.6218313102146544e-06, |
| "loss": 0.2369, |
| "step": 628 |
| }, |
| { |
| "epoch": 2.296341463414634, |
| "grad_norm": 0.5720341341375265, |
| "learning_rate": 1.60619138387623e-06, |
| "loss": 0.243, |
| "step": 629 |
| }, |
| { |
| "epoch": 2.3, |
| "grad_norm": 0.614966015901642, |
| "learning_rate": 1.5906127910115414e-06, |
| "loss": 0.2276, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.303658536585366, |
| "grad_norm": 0.5378503989863892, |
| "learning_rate": 1.5750958131595072e-06, |
| "loss": 0.2126, |
| "step": 631 |
| }, |
| { |
| "epoch": 2.3073170731707315, |
| "grad_norm": 0.5645461452595779, |
| "learning_rate": 1.559640730745534e-06, |
| "loss": 0.2478, |
| "step": 632 |
| }, |
| { |
| "epoch": 2.3109756097560976, |
| "grad_norm": 0.5478752076017742, |
| "learning_rate": 1.5442478230764412e-06, |
| "loss": 0.2877, |
| "step": 633 |
| }, |
| { |
| "epoch": 2.3146341463414632, |
| "grad_norm": 0.5960823599634195, |
| "learning_rate": 1.528917368335413e-06, |
| "loss": 0.2497, |
| "step": 634 |
| }, |
| { |
| "epoch": 2.3182926829268293, |
| "grad_norm": 0.5756343891738284, |
| "learning_rate": 1.5136496435769804e-06, |
| "loss": 0.2432, |
| "step": 635 |
| }, |
| { |
| "epoch": 2.321951219512195, |
| "grad_norm": 0.558342457601641, |
| "learning_rate": 1.4984449247220046e-06, |
| "loss": 0.2285, |
| "step": 636 |
| }, |
| { |
| "epoch": 2.325609756097561, |
| "grad_norm": 0.5562662816415436, |
| "learning_rate": 1.4833034865526913e-06, |
| "loss": 0.2395, |
| "step": 637 |
| }, |
| { |
| "epoch": 2.3292682926829267, |
| "grad_norm": 0.5789722140208441, |
| "learning_rate": 1.4682256027076313e-06, |
| "loss": 0.2557, |
| "step": 638 |
| }, |
| { |
| "epoch": 2.3329268292682928, |
| "grad_norm": 0.6242635332827038, |
| "learning_rate": 1.4532115456768485e-06, |
| "loss": 0.2551, |
| "step": 639 |
| }, |
| { |
| "epoch": 2.3365853658536584, |
| "grad_norm": 0.6275804931184046, |
| "learning_rate": 1.4382615867968768e-06, |
| "loss": 0.2363, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.3402439024390245, |
| "grad_norm": 0.5319917903538446, |
| "learning_rate": 1.4233759962458604e-06, |
| "loss": 0.2384, |
| "step": 641 |
| }, |
| { |
| "epoch": 2.34390243902439, |
| "grad_norm": 0.5755574311949859, |
| "learning_rate": 1.4085550430386696e-06, |
| "loss": 0.2377, |
| "step": 642 |
| }, |
| { |
| "epoch": 2.347560975609756, |
| "grad_norm": 0.5723847122260605, |
| "learning_rate": 1.3937989950220321e-06, |
| "loss": 0.239, |
| "step": 643 |
| }, |
| { |
| "epoch": 2.351219512195122, |
| "grad_norm": 0.6309713290014296, |
| "learning_rate": 1.3791081188697047e-06, |
| "loss": 0.2441, |
| "step": 644 |
| }, |
| { |
| "epoch": 2.354878048780488, |
| "grad_norm": 0.5355989960002594, |
| "learning_rate": 1.3644826800776434e-06, |
| "loss": 0.2412, |
| "step": 645 |
| }, |
| { |
| "epoch": 2.3585365853658535, |
| "grad_norm": 0.581694065503623, |
| "learning_rate": 1.3499229429592087e-06, |
| "loss": 0.243, |
| "step": 646 |
| }, |
| { |
| "epoch": 2.3621951219512196, |
| "grad_norm": 0.597316677443761, |
| "learning_rate": 1.3354291706403926e-06, |
| "loss": 0.2477, |
| "step": 647 |
| }, |
| { |
| "epoch": 2.3658536585365852, |
| "grad_norm": 0.555894151254139, |
| "learning_rate": 1.3210016250550605e-06, |
| "loss": 0.228, |
| "step": 648 |
| }, |
| { |
| "epoch": 2.3695121951219513, |
| "grad_norm": 0.5746285862835825, |
| "learning_rate": 1.3066405669402126e-06, |
| "loss": 0.2465, |
| "step": 649 |
| }, |
| { |
| "epoch": 2.373170731707317, |
| "grad_norm": 0.6008983785495297, |
| "learning_rate": 1.2923462558312827e-06, |
| "loss": 0.2502, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.376829268292683, |
| "grad_norm": 0.5750611972493124, |
| "learning_rate": 1.2781189500574354e-06, |
| "loss": 0.2622, |
| "step": 651 |
| }, |
| { |
| "epoch": 2.3804878048780487, |
| "grad_norm": 0.5678813804956473, |
| "learning_rate": 1.26395890673691e-06, |
| "loss": 0.245, |
| "step": 652 |
| }, |
| { |
| "epoch": 2.3841463414634148, |
| "grad_norm": 0.597798261155427, |
| "learning_rate": 1.2498663817723604e-06, |
| "loss": 0.247, |
| "step": 653 |
| }, |
| { |
| "epoch": 2.3878048780487804, |
| "grad_norm": 0.5512370776781218, |
| "learning_rate": 1.2358416298462456e-06, |
| "loss": 0.235, |
| "step": 654 |
| }, |
| { |
| "epoch": 2.3914634146341465, |
| "grad_norm": 0.5707273910330682, |
| "learning_rate": 1.2218849044162112e-06, |
| "loss": 0.2416, |
| "step": 655 |
| }, |
| { |
| "epoch": 2.395121951219512, |
| "grad_norm": 0.5839165892388363, |
| "learning_rate": 1.2079964577105241e-06, |
| "loss": 0.2536, |
| "step": 656 |
| }, |
| { |
| "epoch": 2.398780487804878, |
| "grad_norm": 0.5839125276922039, |
| "learning_rate": 1.194176540723499e-06, |
| "loss": 0.2479, |
| "step": 657 |
| }, |
| { |
| "epoch": 2.402439024390244, |
| "grad_norm": 0.5345728457615189, |
| "learning_rate": 1.1804254032109774e-06, |
| "loss": 0.236, |
| "step": 658 |
| }, |
| { |
| "epoch": 2.40609756097561, |
| "grad_norm": 0.5439161399421847, |
| "learning_rate": 1.1667432936858002e-06, |
| "loss": 0.2389, |
| "step": 659 |
| }, |
| { |
| "epoch": 2.4097560975609755, |
| "grad_norm": 0.5698271145521743, |
| "learning_rate": 1.1531304594133297e-06, |
| "loss": 0.2324, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.4134146341463416, |
| "grad_norm": 0.5160638872772833, |
| "learning_rate": 1.139587146406969e-06, |
| "loss": 0.2314, |
| "step": 661 |
| }, |
| { |
| "epoch": 2.4170731707317072, |
| "grad_norm": 0.5351749178167325, |
| "learning_rate": 1.1261135994237204e-06, |
| "loss": 0.2353, |
| "step": 662 |
| }, |
| { |
| "epoch": 2.4207317073170733, |
| "grad_norm": 0.5698909345124533, |
| "learning_rate": 1.1127100619597715e-06, |
| "loss": 0.2523, |
| "step": 663 |
| }, |
| { |
| "epoch": 2.424390243902439, |
| "grad_norm": 0.5686270061728808, |
| "learning_rate": 1.0993767762460777e-06, |
| "loss": 0.2381, |
| "step": 664 |
| }, |
| { |
| "epoch": 2.428048780487805, |
| "grad_norm": 0.6150451380411043, |
| "learning_rate": 1.0861139832439938e-06, |
| "loss": 0.2365, |
| "step": 665 |
| }, |
| { |
| "epoch": 2.4317073170731707, |
| "grad_norm": 0.5493712156080535, |
| "learning_rate": 1.0729219226409242e-06, |
| "loss": 0.2361, |
| "step": 666 |
| }, |
| { |
| "epoch": 2.4353658536585368, |
| "grad_norm": 0.571359468750548, |
| "learning_rate": 1.0598008328459797e-06, |
| "loss": 0.2281, |
| "step": 667 |
| }, |
| { |
| "epoch": 2.4390243902439024, |
| "grad_norm": 0.567709087384941, |
| "learning_rate": 1.0467509509856772e-06, |
| "loss": 0.2234, |
| "step": 668 |
| }, |
| { |
| "epoch": 2.4426829268292685, |
| "grad_norm": 0.5373694816069636, |
| "learning_rate": 1.0337725128996544e-06, |
| "loss": 0.2348, |
| "step": 669 |
| }, |
| { |
| "epoch": 2.446341463414634, |
| "grad_norm": 0.6077276375613963, |
| "learning_rate": 1.020865753136402e-06, |
| "loss": 0.2323, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.45, |
| "grad_norm": 0.5085168375614834, |
| "learning_rate": 1.008030904949026e-06, |
| "loss": 0.2205, |
| "step": 671 |
| }, |
| { |
| "epoch": 2.453658536585366, |
| "grad_norm": 0.5897640252687923, |
| "learning_rate": 9.952682002910412e-07, |
| "loss": 0.2398, |
| "step": 672 |
| }, |
| { |
| "epoch": 2.457317073170732, |
| "grad_norm": 0.5547114702268138, |
| "learning_rate": 9.825778698121663e-07, |
| "loss": 0.2377, |
| "step": 673 |
| }, |
| { |
| "epoch": 2.4609756097560975, |
| "grad_norm": 0.5983889476544828, |
| "learning_rate": 9.69960142854165e-07, |
| "loss": 0.2521, |
| "step": 674 |
| }, |
| { |
| "epoch": 2.4646341463414636, |
| "grad_norm": 0.5493142204332745, |
| "learning_rate": 9.574152474466986e-07, |
| "loss": 0.2572, |
| "step": 675 |
| }, |
| { |
| "epoch": 2.4682926829268292, |
| "grad_norm": 0.5719327457080328, |
| "learning_rate": 9.449434103032018e-07, |
| "loss": 0.2476, |
| "step": 676 |
| }, |
| { |
| "epoch": 2.471951219512195, |
| "grad_norm": 0.5785120866063957, |
| "learning_rate": 9.325448568167888e-07, |
| "loss": 0.2397, |
| "step": 677 |
| }, |
| { |
| "epoch": 2.475609756097561, |
| "grad_norm": 0.5671550898722578, |
| "learning_rate": 9.202198110561817e-07, |
| "loss": 0.2486, |
| "step": 678 |
| }, |
| { |
| "epoch": 2.479268292682927, |
| "grad_norm": 0.5765095536540357, |
| "learning_rate": 9.07968495761658e-07, |
| "loss": 0.2427, |
| "step": 679 |
| }, |
| { |
| "epoch": 2.4829268292682927, |
| "grad_norm": 0.5484215020688389, |
| "learning_rate": 8.957911323410229e-07, |
| "loss": 0.2417, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.4865853658536583, |
| "grad_norm": 0.6023263022216668, |
| "learning_rate": 8.836879408656157e-07, |
| "loss": 0.2429, |
| "step": 681 |
| }, |
| { |
| "epoch": 2.4902439024390244, |
| "grad_norm": 0.5797747760934718, |
| "learning_rate": 8.716591400663249e-07, |
| "loss": 0.2281, |
| "step": 682 |
| }, |
| { |
| "epoch": 2.4939024390243905, |
| "grad_norm": 0.572555201842755, |
| "learning_rate": 8.59704947329637e-07, |
| "loss": 0.2401, |
| "step": 683 |
| }, |
| { |
| "epoch": 2.497560975609756, |
| "grad_norm": 0.5415634954670027, |
| "learning_rate": 8.478255786937129e-07, |
| "loss": 0.229, |
| "step": 684 |
| }, |
| { |
| "epoch": 2.5012195121951217, |
| "grad_norm": 0.5113662473102487, |
| "learning_rate": 8.360212488444797e-07, |
| "loss": 0.2422, |
| "step": 685 |
| }, |
| { |
| "epoch": 2.504878048780488, |
| "grad_norm": 0.603131680716288, |
| "learning_rate": 8.242921711117469e-07, |
| "loss": 0.2449, |
| "step": 686 |
| }, |
| { |
| "epoch": 2.508536585365854, |
| "grad_norm": 0.5711686630435312, |
| "learning_rate": 8.126385574653606e-07, |
| "loss": 0.2442, |
| "step": 687 |
| }, |
| { |
| "epoch": 2.5121951219512195, |
| "grad_norm": 0.5710312547579648, |
| "learning_rate": 8.010606185113628e-07, |
| "loss": 0.2509, |
| "step": 688 |
| }, |
| { |
| "epoch": 2.515853658536585, |
| "grad_norm": 0.6000591180723375, |
| "learning_rate": 7.89558563488192e-07, |
| "loss": 0.2322, |
| "step": 689 |
| }, |
| { |
| "epoch": 2.5195121951219512, |
| "grad_norm": 0.6188613367700759, |
| "learning_rate": 7.781326002628991e-07, |
| "loss": 0.2375, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.5231707317073173, |
| "grad_norm": 0.6233174102298019, |
| "learning_rate": 7.667829353273943e-07, |
| "loss": 0.2379, |
| "step": 691 |
| }, |
| { |
| "epoch": 2.526829268292683, |
| "grad_norm": 0.549218628145928, |
| "learning_rate": 7.555097737947076e-07, |
| "loss": 0.2409, |
| "step": 692 |
| }, |
| { |
| "epoch": 2.5304878048780486, |
| "grad_norm": 0.5650966641823335, |
| "learning_rate": 7.443133193952884e-07, |
| "loss": 0.2493, |
| "step": 693 |
| }, |
| { |
| "epoch": 2.5341463414634147, |
| "grad_norm": 0.5794507269900794, |
| "learning_rate": 7.331937744733248e-07, |
| "loss": 0.2436, |
| "step": 694 |
| }, |
| { |
| "epoch": 2.5378048780487807, |
| "grad_norm": 0.6045788546182774, |
| "learning_rate": 7.221513399830798e-07, |
| "loss": 0.2614, |
| "step": 695 |
| }, |
| { |
| "epoch": 2.5414634146341464, |
| "grad_norm": 0.5938491421154302, |
| "learning_rate": 7.111862154852672e-07, |
| "loss": 0.2303, |
| "step": 696 |
| }, |
| { |
| "epoch": 2.545121951219512, |
| "grad_norm": 0.5442083357758543, |
| "learning_rate": 7.002985991434418e-07, |
| "loss": 0.245, |
| "step": 697 |
| }, |
| { |
| "epoch": 2.548780487804878, |
| "grad_norm": 0.5712927405061993, |
| "learning_rate": 6.894886877204155e-07, |
| "loss": 0.2395, |
| "step": 698 |
| }, |
| { |
| "epoch": 2.552439024390244, |
| "grad_norm": 0.5546735475244872, |
| "learning_rate": 6.78756676574704e-07, |
| "loss": 0.2296, |
| "step": 699 |
| }, |
| { |
| "epoch": 2.55609756097561, |
| "grad_norm": 0.5273016852140356, |
| "learning_rate": 6.681027596569988e-07, |
| "loss": 0.2305, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.5597560975609754, |
| "grad_norm": 0.5613527654490458, |
| "learning_rate": 6.575271295066593e-07, |
| "loss": 0.2318, |
| "step": 701 |
| }, |
| { |
| "epoch": 2.5634146341463415, |
| "grad_norm": 0.5617712758782253, |
| "learning_rate": 6.470299772482307e-07, |
| "loss": 0.2424, |
| "step": 702 |
| }, |
| { |
| "epoch": 2.567073170731707, |
| "grad_norm": 0.5657700489243026, |
| "learning_rate": 6.366114925879962e-07, |
| "loss": 0.2368, |
| "step": 703 |
| }, |
| { |
| "epoch": 2.5707317073170732, |
| "grad_norm": 0.5708454774862058, |
| "learning_rate": 6.262718638105425e-07, |
| "loss": 0.2343, |
| "step": 704 |
| }, |
| { |
| "epoch": 2.574390243902439, |
| "grad_norm": 0.5409052342393588, |
| "learning_rate": 6.160112777753585e-07, |
| "loss": 0.2451, |
| "step": 705 |
| }, |
| { |
| "epoch": 2.578048780487805, |
| "grad_norm": 0.5513303692706916, |
| "learning_rate": 6.058299199134637e-07, |
| "loss": 0.2414, |
| "step": 706 |
| }, |
| { |
| "epoch": 2.5817073170731706, |
| "grad_norm": 0.5529279063327968, |
| "learning_rate": 5.957279742240507e-07, |
| "loss": 0.2272, |
| "step": 707 |
| }, |
| { |
| "epoch": 2.5853658536585367, |
| "grad_norm": 0.5405182573819264, |
| "learning_rate": 5.857056232711611e-07, |
| "loss": 0.2251, |
| "step": 708 |
| }, |
| { |
| "epoch": 2.5890243902439023, |
| "grad_norm": 0.5289491829969317, |
| "learning_rate": 5.757630481803889e-07, |
| "loss": 0.2375, |
| "step": 709 |
| }, |
| { |
| "epoch": 2.5926829268292684, |
| "grad_norm": 0.5452238409528825, |
| "learning_rate": 5.659004286356045e-07, |
| "loss": 0.2319, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.596341463414634, |
| "grad_norm": 0.6172680232767701, |
| "learning_rate": 5.561179428757063e-07, |
| "loss": 0.241, |
| "step": 711 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 0.5346562294926773, |
| "learning_rate": 5.464157676914078e-07, |
| "loss": 0.2358, |
| "step": 712 |
| }, |
| { |
| "epoch": 2.6036585365853657, |
| "grad_norm": 0.5682283402698644, |
| "learning_rate": 5.367940784220305e-07, |
| "loss": 0.246, |
| "step": 713 |
| }, |
| { |
| "epoch": 2.607317073170732, |
| "grad_norm": 0.5468426751424924, |
| "learning_rate": 5.272530489523425e-07, |
| "loss": 0.2589, |
| "step": 714 |
| }, |
| { |
| "epoch": 2.6109756097560974, |
| "grad_norm": 0.5748997644311935, |
| "learning_rate": 5.177928517094166e-07, |
| "loss": 0.2341, |
| "step": 715 |
| }, |
| { |
| "epoch": 2.6146341463414635, |
| "grad_norm": 0.5751311413606573, |
| "learning_rate": 5.0841365765951e-07, |
| "loss": 0.2486, |
| "step": 716 |
| }, |
| { |
| "epoch": 2.618292682926829, |
| "grad_norm": 0.5490008995229373, |
| "learning_rate": 4.991156363049765e-07, |
| "loss": 0.2442, |
| "step": 717 |
| }, |
| { |
| "epoch": 2.6219512195121952, |
| "grad_norm": 0.5750789304658456, |
| "learning_rate": 4.89898955681205e-07, |
| "loss": 0.2538, |
| "step": 718 |
| }, |
| { |
| "epoch": 2.625609756097561, |
| "grad_norm": 0.5989073598299415, |
| "learning_rate": 4.807637823535821e-07, |
| "loss": 0.2465, |
| "step": 719 |
| }, |
| { |
| "epoch": 2.629268292682927, |
| "grad_norm": 0.595299686305544, |
| "learning_rate": 4.7171028141447693e-07, |
| "loss": 0.2422, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.6329268292682926, |
| "grad_norm": 0.5554287718554282, |
| "learning_rate": 4.627386164802661e-07, |
| "loss": 0.2527, |
| "step": 721 |
| }, |
| { |
| "epoch": 2.6365853658536587, |
| "grad_norm": 0.5661580070133602, |
| "learning_rate": 4.538489496883686e-07, |
| "loss": 0.2446, |
| "step": 722 |
| }, |
| { |
| "epoch": 2.6402439024390243, |
| "grad_norm": 0.5256299309600876, |
| "learning_rate": 4.450414416943233e-07, |
| "loss": 0.2343, |
| "step": 723 |
| }, |
| { |
| "epoch": 2.6439024390243904, |
| "grad_norm": 0.5594023249360274, |
| "learning_rate": 4.363162516688774e-07, |
| "loss": 0.2277, |
| "step": 724 |
| }, |
| { |
| "epoch": 2.647560975609756, |
| "grad_norm": 0.547174675788788, |
| "learning_rate": 4.2767353729511796e-07, |
| "loss": 0.2401, |
| "step": 725 |
| }, |
| { |
| "epoch": 2.651219512195122, |
| "grad_norm": 0.5698482629553446, |
| "learning_rate": 4.191134547656145e-07, |
| "loss": 0.2367, |
| "step": 726 |
| }, |
| { |
| "epoch": 2.6548780487804877, |
| "grad_norm": 0.5245951306910515, |
| "learning_rate": 4.1063615877960427e-07, |
| "loss": 0.2594, |
| "step": 727 |
| }, |
| { |
| "epoch": 2.658536585365854, |
| "grad_norm": 0.5429005359039983, |
| "learning_rate": 4.0224180254018807e-07, |
| "loss": 0.2364, |
| "step": 728 |
| }, |
| { |
| "epoch": 2.6621951219512194, |
| "grad_norm": 0.5582877597327396, |
| "learning_rate": 3.9393053775156955e-07, |
| "loss": 0.2412, |
| "step": 729 |
| }, |
| { |
| "epoch": 2.6658536585365855, |
| "grad_norm": 0.5966956037127704, |
| "learning_rate": 3.8570251461630735e-07, |
| "loss": 0.2292, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.669512195121951, |
| "grad_norm": 0.5522903020151757, |
| "learning_rate": 3.775578818326048e-07, |
| "loss": 0.2335, |
| "step": 731 |
| }, |
| { |
| "epoch": 2.6731707317073172, |
| "grad_norm": 0.5229730045275881, |
| "learning_rate": 3.6949678659161827e-07, |
| "loss": 0.2399, |
| "step": 732 |
| }, |
| { |
| "epoch": 2.676829268292683, |
| "grad_norm": 0.5639593382334762, |
| "learning_rate": 3.615193745748036e-07, |
| "loss": 0.2283, |
| "step": 733 |
| }, |
| { |
| "epoch": 2.680487804878049, |
| "grad_norm": 0.5951326633000662, |
| "learning_rate": 3.536257899512768e-07, |
| "loss": 0.2441, |
| "step": 734 |
| }, |
| { |
| "epoch": 2.6841463414634146, |
| "grad_norm": 0.5688456984898208, |
| "learning_rate": 3.458161753752126e-07, |
| "loss": 0.2319, |
| "step": 735 |
| }, |
| { |
| "epoch": 2.68780487804878, |
| "grad_norm": 0.5290898917591977, |
| "learning_rate": 3.380906719832627e-07, |
| "loss": 0.2555, |
| "step": 736 |
| }, |
| { |
| "epoch": 2.6914634146341463, |
| "grad_norm": 0.5418544486569097, |
| "learning_rate": 3.3044941939201104e-07, |
| "loss": 0.2251, |
| "step": 737 |
| }, |
| { |
| "epoch": 2.6951219512195124, |
| "grad_norm": 0.5400829302955231, |
| "learning_rate": 3.228925556954443e-07, |
| "loss": 0.2469, |
| "step": 738 |
| }, |
| { |
| "epoch": 2.698780487804878, |
| "grad_norm": 0.5613160759441016, |
| "learning_rate": 3.1542021746245934e-07, |
| "loss": 0.2445, |
| "step": 739 |
| }, |
| { |
| "epoch": 2.7024390243902436, |
| "grad_norm": 0.5824767170530214, |
| "learning_rate": 3.080325397343969e-07, |
| "loss": 0.2408, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.7060975609756097, |
| "grad_norm": 0.5395944310522093, |
| "learning_rate": 3.007296560225975e-07, |
| "loss": 0.2526, |
| "step": 741 |
| }, |
| { |
| "epoch": 2.709756097560976, |
| "grad_norm": 0.5242409861099263, |
| "learning_rate": 2.935116983059888e-07, |
| "loss": 0.2302, |
| "step": 742 |
| }, |
| { |
| "epoch": 2.7134146341463414, |
| "grad_norm": 0.5913937186515237, |
| "learning_rate": 2.8637879702870407e-07, |
| "loss": 0.2344, |
| "step": 743 |
| }, |
| { |
| "epoch": 2.717073170731707, |
| "grad_norm": 0.5906361021677703, |
| "learning_rate": 2.7933108109772066e-07, |
| "loss": 0.2351, |
| "step": 744 |
| }, |
| { |
| "epoch": 2.720731707317073, |
| "grad_norm": 0.5949066439617051, |
| "learning_rate": 2.7236867788053343e-07, |
| "loss": 0.2373, |
| "step": 745 |
| }, |
| { |
| "epoch": 2.7243902439024392, |
| "grad_norm": 0.5471386508447823, |
| "learning_rate": 2.6549171320285226e-07, |
| "loss": 0.2352, |
| "step": 746 |
| }, |
| { |
| "epoch": 2.728048780487805, |
| "grad_norm": 0.5552310997986438, |
| "learning_rate": 2.5870031134632543e-07, |
| "loss": 0.2278, |
| "step": 747 |
| }, |
| { |
| "epoch": 2.7317073170731705, |
| "grad_norm": 0.5591003186791976, |
| "learning_rate": 2.519945950462965e-07, |
| "loss": 0.236, |
| "step": 748 |
| }, |
| { |
| "epoch": 2.7353658536585366, |
| "grad_norm": 0.6216695411274681, |
| "learning_rate": 2.453746854895861e-07, |
| "loss": 0.2592, |
| "step": 749 |
| }, |
| { |
| "epoch": 2.7390243902439027, |
| "grad_norm": 0.5427018458395848, |
| "learning_rate": 2.388407023123007e-07, |
| "loss": 0.235, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.7426829268292683, |
| "grad_norm": 0.5646945272289545, |
| "learning_rate": 2.3239276359767025e-07, |
| "loss": 0.2542, |
| "step": 751 |
| }, |
| { |
| "epoch": 2.746341463414634, |
| "grad_norm": 0.5571032201299158, |
| "learning_rate": 2.2603098587391737e-07, |
| "loss": 0.2386, |
| "step": 752 |
| }, |
| { |
| "epoch": 2.75, |
| "grad_norm": 0.544300301663097, |
| "learning_rate": 2.1975548411214577e-07, |
| "loss": 0.2376, |
| "step": 753 |
| }, |
| { |
| "epoch": 2.753658536585366, |
| "grad_norm": 0.5797633979529553, |
| "learning_rate": 2.1356637172426697e-07, |
| "loss": 0.245, |
| "step": 754 |
| }, |
| { |
| "epoch": 2.7573170731707317, |
| "grad_norm": 0.5332930378597391, |
| "learning_rate": 2.074637605609492e-07, |
| "loss": 0.2382, |
| "step": 755 |
| }, |
| { |
| "epoch": 2.7609756097560973, |
| "grad_norm": 0.5798577844773429, |
| "learning_rate": 2.0144776090959718e-07, |
| "loss": 0.2428, |
| "step": 756 |
| }, |
| { |
| "epoch": 2.7646341463414634, |
| "grad_norm": 0.6090360913976046, |
| "learning_rate": 1.9551848149235607e-07, |
| "loss": 0.245, |
| "step": 757 |
| }, |
| { |
| "epoch": 2.7682926829268295, |
| "grad_norm": 0.5839943430547335, |
| "learning_rate": 1.8967602946415088e-07, |
| "loss": 0.2563, |
| "step": 758 |
| }, |
| { |
| "epoch": 2.771951219512195, |
| "grad_norm": 0.5258252983191701, |
| "learning_rate": 1.8392051041074498e-07, |
| "loss": 0.2343, |
| "step": 759 |
| }, |
| { |
| "epoch": 2.7756097560975608, |
| "grad_norm": 0.5506279521494741, |
| "learning_rate": 1.782520283468364e-07, |
| "loss": 0.2446, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.779268292682927, |
| "grad_norm": 0.5634761604767314, |
| "learning_rate": 1.7267068571417633e-07, |
| "loss": 0.2398, |
| "step": 761 |
| }, |
| { |
| "epoch": 2.782926829268293, |
| "grad_norm": 0.5980586546172675, |
| "learning_rate": 1.671765833797162e-07, |
| "loss": 0.2429, |
| "step": 762 |
| }, |
| { |
| "epoch": 2.7865853658536586, |
| "grad_norm": 0.5500214071241183, |
| "learning_rate": 1.6176982063378754e-07, |
| "loss": 0.2495, |
| "step": 763 |
| }, |
| { |
| "epoch": 2.790243902439024, |
| "grad_norm": 0.575157278926917, |
| "learning_rate": 1.5645049518830614e-07, |
| "loss": 0.2462, |
| "step": 764 |
| }, |
| { |
| "epoch": 2.7939024390243903, |
| "grad_norm": 0.5445436607053848, |
| "learning_rate": 1.512187031750062e-07, |
| "loss": 0.2324, |
| "step": 765 |
| }, |
| { |
| "epoch": 2.7975609756097564, |
| "grad_norm": 0.5329228786605588, |
| "learning_rate": 1.4607453914370185e-07, |
| "loss": 0.2338, |
| "step": 766 |
| }, |
| { |
| "epoch": 2.801219512195122, |
| "grad_norm": 0.5536006141829138, |
| "learning_rate": 1.410180960605817e-07, |
| "loss": 0.2276, |
| "step": 767 |
| }, |
| { |
| "epoch": 2.8048780487804876, |
| "grad_norm": 0.5501904220602625, |
| "learning_rate": 1.3604946530652695e-07, |
| "loss": 0.2493, |
| "step": 768 |
| }, |
| { |
| "epoch": 2.8085365853658537, |
| "grad_norm": 0.5609170603936632, |
| "learning_rate": 1.3116873667545827e-07, |
| "loss": 0.2351, |
| "step": 769 |
| }, |
| { |
| "epoch": 2.81219512195122, |
| "grad_norm": 0.5594953404382451, |
| "learning_rate": 1.263759983727142e-07, |
| "loss": 0.2418, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.8158536585365854, |
| "grad_norm": 0.5575873120930057, |
| "learning_rate": 1.2167133701345979e-07, |
| "loss": 0.2291, |
| "step": 771 |
| }, |
| { |
| "epoch": 2.819512195121951, |
| "grad_norm": 0.5430632979708804, |
| "learning_rate": 1.1705483762111725e-07, |
| "loss": 0.2401, |
| "step": 772 |
| }, |
| { |
| "epoch": 2.823170731707317, |
| "grad_norm": 0.5474457581234221, |
| "learning_rate": 1.1252658362583102e-07, |
| "loss": 0.2497, |
| "step": 773 |
| }, |
| { |
| "epoch": 2.8268292682926828, |
| "grad_norm": 0.5688470281131273, |
| "learning_rate": 1.0808665686296072e-07, |
| "loss": 0.231, |
| "step": 774 |
| }, |
| { |
| "epoch": 2.830487804878049, |
| "grad_norm": 0.5254433758139256, |
| "learning_rate": 1.0373513757160114e-07, |
| "loss": 0.2255, |
| "step": 775 |
| }, |
| { |
| "epoch": 2.8341463414634145, |
| "grad_norm": 0.5578309858607658, |
| "learning_rate": 9.947210439313237e-08, |
| "loss": 0.2524, |
| "step": 776 |
| }, |
| { |
| "epoch": 2.8378048780487806, |
| "grad_norm": 0.5999128731542565, |
| "learning_rate": 9.529763436979923e-08, |
| "loss": 0.2532, |
| "step": 777 |
| }, |
| { |
| "epoch": 2.841463414634146, |
| "grad_norm": 0.5547526000184041, |
| "learning_rate": 9.121180294331844e-08, |
| "loss": 0.246, |
| "step": 778 |
| }, |
| { |
| "epoch": 2.8451219512195123, |
| "grad_norm": 0.6243311864358262, |
| "learning_rate": 8.721468395351428e-08, |
| "loss": 0.2481, |
| "step": 779 |
| }, |
| { |
| "epoch": 2.848780487804878, |
| "grad_norm": 0.5407753899453527, |
| "learning_rate": 8.33063496369868e-08, |
| "loss": 0.2482, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.852439024390244, |
| "grad_norm": 0.5475952015446557, |
| "learning_rate": 7.948687062580341e-08, |
| "loss": 0.2431, |
| "step": 781 |
| }, |
| { |
| "epoch": 2.8560975609756096, |
| "grad_norm": 0.5804952728263567, |
| "learning_rate": 7.575631594622323e-08, |
| "loss": 0.2261, |
| "step": 782 |
| }, |
| { |
| "epoch": 2.8597560975609757, |
| "grad_norm": 0.5610293885829224, |
| "learning_rate": 7.211475301745264e-08, |
| "loss": 0.2589, |
| "step": 783 |
| }, |
| { |
| "epoch": 2.8634146341463413, |
| "grad_norm": 0.571595545059285, |
| "learning_rate": 6.856224765042163e-08, |
| "loss": 0.2307, |
| "step": 784 |
| }, |
| { |
| "epoch": 2.8670731707317074, |
| "grad_norm": 0.5078487348143493, |
| "learning_rate": 6.509886404659715e-08, |
| "loss": 0.2455, |
| "step": 785 |
| }, |
| { |
| "epoch": 2.870731707317073, |
| "grad_norm": 0.5626791514208339, |
| "learning_rate": 6.172466479682449e-08, |
| "loss": 0.2458, |
| "step": 786 |
| }, |
| { |
| "epoch": 2.874390243902439, |
| "grad_norm": 0.5917030136132226, |
| "learning_rate": 5.8439710880194287e-08, |
| "loss": 0.2314, |
| "step": 787 |
| }, |
| { |
| "epoch": 2.8780487804878048, |
| "grad_norm": 0.5405338171059761, |
| "learning_rate": 5.5244061662937944e-08, |
| "loss": 0.2348, |
| "step": 788 |
| }, |
| { |
| "epoch": 2.881707317073171, |
| "grad_norm": 0.5748908534192652, |
| "learning_rate": 5.213777489736227e-08, |
| "loss": 0.2552, |
| "step": 789 |
| }, |
| { |
| "epoch": 2.8853658536585365, |
| "grad_norm": 0.5942895745694118, |
| "learning_rate": 4.91209067207965e-08, |
| "loss": 0.2433, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.8890243902439026, |
| "grad_norm": 0.5166417037795502, |
| "learning_rate": 4.6193511654584186e-08, |
| "loss": 0.2446, |
| "step": 791 |
| }, |
| { |
| "epoch": 2.892682926829268, |
| "grad_norm": 0.5507990948723721, |
| "learning_rate": 4.335564260309677e-08, |
| "loss": 0.2297, |
| "step": 792 |
| }, |
| { |
| "epoch": 2.8963414634146343, |
| "grad_norm": 0.5781950059218034, |
| "learning_rate": 4.06073508527749e-08, |
| "loss": 0.2421, |
| "step": 793 |
| }, |
| { |
| "epoch": 2.9, |
| "grad_norm": 0.5749833983841531, |
| "learning_rate": 3.794868607120417e-08, |
| "loss": 0.2341, |
| "step": 794 |
| }, |
| { |
| "epoch": 2.903658536585366, |
| "grad_norm": 0.5754752915871355, |
| "learning_rate": 3.537969630621752e-08, |
| "loss": 0.2641, |
| "step": 795 |
| }, |
| { |
| "epoch": 2.9073170731707316, |
| "grad_norm": 0.5791697991229936, |
| "learning_rate": 3.290042798502424e-08, |
| "loss": 0.2384, |
| "step": 796 |
| }, |
| { |
| "epoch": 2.9109756097560977, |
| "grad_norm": 0.5835307370273534, |
| "learning_rate": 3.051092591337401e-08, |
| "loss": 0.229, |
| "step": 797 |
| }, |
| { |
| "epoch": 2.9146341463414633, |
| "grad_norm": 0.5803370436780367, |
| "learning_rate": 2.8211233274745842e-08, |
| "loss": 0.2343, |
| "step": 798 |
| }, |
| { |
| "epoch": 2.9182926829268294, |
| "grad_norm": 0.5343132160568691, |
| "learning_rate": 2.600139162956761e-08, |
| "loss": 0.2608, |
| "step": 799 |
| }, |
| { |
| "epoch": 2.921951219512195, |
| "grad_norm": 0.5396281266368242, |
| "learning_rate": 2.388144091446498e-08, |
| "loss": 0.2433, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.925609756097561, |
| "grad_norm": 0.5372475354081403, |
| "learning_rate": 2.185141944153979e-08, |
| "loss": 0.2335, |
| "step": 801 |
| }, |
| { |
| "epoch": 2.9292682926829268, |
| "grad_norm": 0.5591750381484034, |
| "learning_rate": 1.9911363897677228e-08, |
| "loss": 0.23, |
| "step": 802 |
| }, |
| { |
| "epoch": 2.932926829268293, |
| "grad_norm": 0.5408723741122057, |
| "learning_rate": 1.8061309343884724e-08, |
| "loss": 0.2289, |
| "step": 803 |
| }, |
| { |
| "epoch": 2.9365853658536585, |
| "grad_norm": 0.565184007294025, |
| "learning_rate": 1.6301289214655236e-08, |
| "loss": 0.2306, |
| "step": 804 |
| }, |
| { |
| "epoch": 2.9402439024390246, |
| "grad_norm": 0.5403935578429421, |
| "learning_rate": 1.4631335317365492e-08, |
| "loss": 0.2377, |
| "step": 805 |
| }, |
| { |
| "epoch": 2.94390243902439, |
| "grad_norm": 0.6324858665100218, |
| "learning_rate": 1.3051477831699798e-08, |
| "loss": 0.2416, |
| "step": 806 |
| }, |
| { |
| "epoch": 2.947560975609756, |
| "grad_norm": 0.5737715575991524, |
| "learning_rate": 1.1561745309105476e-08, |
| "loss": 0.242, |
| "step": 807 |
| }, |
| { |
| "epoch": 2.951219512195122, |
| "grad_norm": 0.5115828715456013, |
| "learning_rate": 1.0162164672276598e-08, |
| "loss": 0.2389, |
| "step": 808 |
| }, |
| { |
| "epoch": 2.954878048780488, |
| "grad_norm": 0.5537564527003395, |
| "learning_rate": 8.852761214666605e-09, |
| "loss": 0.2476, |
| "step": 809 |
| }, |
| { |
| "epoch": 2.9585365853658536, |
| "grad_norm": 0.5424642804569867, |
| "learning_rate": 7.633558600033675e-09, |
| "loss": 0.2597, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.9621951219512193, |
| "grad_norm": 0.5705724169258695, |
| "learning_rate": 6.504578862009392e-09, |
| "loss": 0.2504, |
| "step": 811 |
| }, |
| { |
| "epoch": 2.9658536585365853, |
| "grad_norm": 0.5844841317025147, |
| "learning_rate": 5.4658424037029585e-09, |
| "loss": 0.2394, |
| "step": 812 |
| }, |
| { |
| "epoch": 2.9695121951219514, |
| "grad_norm": 0.5612989699157095, |
| "learning_rate": 4.5173679973337105e-09, |
| "loss": 0.2462, |
| "step": 813 |
| }, |
| { |
| "epoch": 2.973170731707317, |
| "grad_norm": 0.5745256781886607, |
| "learning_rate": 3.659172783887499e-09, |
| "loss": 0.2412, |
| "step": 814 |
| }, |
| { |
| "epoch": 2.9768292682926827, |
| "grad_norm": 0.5532600490739356, |
| "learning_rate": 2.89127227281194e-09, |
| "loss": 0.2325, |
| "step": 815 |
| }, |
| { |
| "epoch": 2.9804878048780488, |
| "grad_norm": 0.5558431587776496, |
| "learning_rate": 2.213680341732194e-09, |
| "loss": 0.241, |
| "step": 816 |
| }, |
| { |
| "epoch": 2.984146341463415, |
| "grad_norm": 0.5556399642651051, |
| "learning_rate": 1.6264092362028306e-09, |
| "loss": 0.2213, |
| "step": 817 |
| }, |
| { |
| "epoch": 2.9878048780487805, |
| "grad_norm": 0.5730682042333336, |
| "learning_rate": 1.1294695694841207e-09, |
| "loss": 0.2343, |
| "step": 818 |
| }, |
| { |
| "epoch": 2.991463414634146, |
| "grad_norm": 0.581621235538938, |
| "learning_rate": 7.228703223532974e-10, |
| "loss": 0.2399, |
| "step": 819 |
| }, |
| { |
| "epoch": 2.995121951219512, |
| "grad_norm": 0.5575710843072575, |
| "learning_rate": 4.0661884293913266e-10, |
| "loss": 0.2405, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.9987804878048783, |
| "grad_norm": 0.5537487533940493, |
| "learning_rate": 1.8072084659093158e-10, |
| "loss": 0.2436, |
| "step": 821 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 1.067961677556421, |
| "learning_rate": 4.518041577472598e-11, |
| "loss": 0.2422, |
| "step": 822 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 822, |
| "total_flos": 370118663536640.0, |
| "train_loss": 0.32970622370422903, |
| "train_runtime": 8808.919, |
| "train_samples_per_second": 8.934, |
| "train_steps_per_second": 0.093 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 822, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": -822, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 370118663536640.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|