| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 334, |
| "global_step": 334, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0029940119760479044, |
| "grad_norm": 0.72265625, |
| "learning_rate": 1e-05, |
| "loss": 2.5197, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.005988023952095809, |
| "grad_norm": 0.65625, |
| "learning_rate": 9.970059880239523e-06, |
| "loss": 2.5142, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.008982035928143712, |
| "grad_norm": 0.62890625, |
| "learning_rate": 9.940119760479042e-06, |
| "loss": 2.4901, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.011976047904191617, |
| "grad_norm": 0.62109375, |
| "learning_rate": 9.910179640718564e-06, |
| "loss": 2.4185, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.014970059880239521, |
| "grad_norm": 0.60546875, |
| "learning_rate": 9.880239520958084e-06, |
| "loss": 2.4441, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.017964071856287425, |
| "grad_norm": 0.55859375, |
| "learning_rate": 9.850299401197606e-06, |
| "loss": 2.3861, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.020958083832335328, |
| "grad_norm": 0.51953125, |
| "learning_rate": 9.820359281437127e-06, |
| "loss": 2.2858, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.023952095808383235, |
| "grad_norm": 0.54296875, |
| "learning_rate": 9.790419161676647e-06, |
| "loss": 2.3769, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.02694610778443114, |
| "grad_norm": 0.50390625, |
| "learning_rate": 9.760479041916169e-06, |
| "loss": 2.2785, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.029940119760479042, |
| "grad_norm": 0.50390625, |
| "learning_rate": 9.73053892215569e-06, |
| "loss": 2.272, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03293413173652695, |
| "grad_norm": 0.45703125, |
| "learning_rate": 9.70059880239521e-06, |
| "loss": 2.1637, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.03592814371257485, |
| "grad_norm": 0.47265625, |
| "learning_rate": 9.670658682634732e-06, |
| "loss": 2.2051, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.038922155688622756, |
| "grad_norm": 0.474609375, |
| "learning_rate": 9.640718562874252e-06, |
| "loss": 2.1898, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.041916167664670656, |
| "grad_norm": 0.455078125, |
| "learning_rate": 9.610778443113773e-06, |
| "loss": 2.1575, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.04491017964071856, |
| "grad_norm": 0.447265625, |
| "learning_rate": 9.580838323353295e-06, |
| "loss": 2.0706, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.04790419161676647, |
| "grad_norm": 0.45703125, |
| "learning_rate": 9.550898203592815e-06, |
| "loss": 2.1116, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.05089820359281437, |
| "grad_norm": 0.44921875, |
| "learning_rate": 9.520958083832336e-06, |
| "loss": 2.0522, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.05389221556886228, |
| "grad_norm": 0.453125, |
| "learning_rate": 9.491017964071856e-06, |
| "loss": 2.0473, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.05688622754491018, |
| "grad_norm": 0.4375, |
| "learning_rate": 9.461077844311378e-06, |
| "loss": 2.0157, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.059880239520958084, |
| "grad_norm": 0.40234375, |
| "learning_rate": 9.4311377245509e-06, |
| "loss": 1.9766, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06287425149700598, |
| "grad_norm": 0.40234375, |
| "learning_rate": 9.401197604790419e-06, |
| "loss": 1.9576, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.0658682634730539, |
| "grad_norm": 0.384765625, |
| "learning_rate": 9.371257485029941e-06, |
| "loss": 1.949, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0688622754491018, |
| "grad_norm": 0.384765625, |
| "learning_rate": 9.341317365269462e-06, |
| "loss": 1.8964, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.0718562874251497, |
| "grad_norm": 0.373046875, |
| "learning_rate": 9.311377245508982e-06, |
| "loss": 1.9112, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.0748502994011976, |
| "grad_norm": 0.3671875, |
| "learning_rate": 9.281437125748504e-06, |
| "loss": 1.8759, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.07784431137724551, |
| "grad_norm": 0.369140625, |
| "learning_rate": 9.251497005988024e-06, |
| "loss": 1.8658, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.08083832335329341, |
| "grad_norm": 0.353515625, |
| "learning_rate": 9.221556886227547e-06, |
| "loss": 1.8888, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.08383233532934131, |
| "grad_norm": 0.353515625, |
| "learning_rate": 9.191616766467067e-06, |
| "loss": 1.8039, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.08682634730538923, |
| "grad_norm": 0.34375, |
| "learning_rate": 9.161676646706587e-06, |
| "loss": 1.8441, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.08982035928143713, |
| "grad_norm": 0.3203125, |
| "learning_rate": 9.13173652694611e-06, |
| "loss": 1.841, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.09281437125748503, |
| "grad_norm": 0.330078125, |
| "learning_rate": 9.10179640718563e-06, |
| "loss": 1.7621, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.09580838323353294, |
| "grad_norm": 0.322265625, |
| "learning_rate": 9.07185628742515e-06, |
| "loss": 1.7873, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.09880239520958084, |
| "grad_norm": 0.3203125, |
| "learning_rate": 9.041916167664672e-06, |
| "loss": 1.7489, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.10179640718562874, |
| "grad_norm": 0.314453125, |
| "learning_rate": 9.011976047904193e-06, |
| "loss": 1.7462, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.10479041916167664, |
| "grad_norm": 0.298828125, |
| "learning_rate": 8.982035928143713e-06, |
| "loss": 1.7215, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.10778443113772455, |
| "grad_norm": 0.3046875, |
| "learning_rate": 8.952095808383234e-06, |
| "loss": 1.7299, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.11077844311377245, |
| "grad_norm": 0.291015625, |
| "learning_rate": 8.922155688622756e-06, |
| "loss": 1.6916, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.11377245508982035, |
| "grad_norm": 0.2890625, |
| "learning_rate": 8.892215568862276e-06, |
| "loss": 1.7432, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.11676646706586827, |
| "grad_norm": 0.279296875, |
| "learning_rate": 8.862275449101796e-06, |
| "loss": 1.6688, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.11976047904191617, |
| "grad_norm": 0.271484375, |
| "learning_rate": 8.832335329341319e-06, |
| "loss": 1.6834, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.12275449101796407, |
| "grad_norm": 0.275390625, |
| "learning_rate": 8.802395209580839e-06, |
| "loss": 1.728, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.12574850299401197, |
| "grad_norm": 0.251953125, |
| "learning_rate": 8.77245508982036e-06, |
| "loss": 1.6077, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.12874251497005987, |
| "grad_norm": 0.259765625, |
| "learning_rate": 8.742514970059881e-06, |
| "loss": 1.6259, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.1317365269461078, |
| "grad_norm": 0.259765625, |
| "learning_rate": 8.712574850299402e-06, |
| "loss": 1.6377, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.1347305389221557, |
| "grad_norm": 0.25, |
| "learning_rate": 8.682634730538922e-06, |
| "loss": 1.5849, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.1377245508982036, |
| "grad_norm": 0.26171875, |
| "learning_rate": 8.652694610778444e-06, |
| "loss": 1.6071, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.1407185628742515, |
| "grad_norm": 0.24609375, |
| "learning_rate": 8.622754491017965e-06, |
| "loss": 1.6225, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.1437125748502994, |
| "grad_norm": 0.2392578125, |
| "learning_rate": 8.592814371257485e-06, |
| "loss": 1.6016, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.1467065868263473, |
| "grad_norm": 0.24609375, |
| "learning_rate": 8.562874251497007e-06, |
| "loss": 1.6029, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.1497005988023952, |
| "grad_norm": 0.2216796875, |
| "learning_rate": 8.532934131736528e-06, |
| "loss": 1.5553, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.15269461077844312, |
| "grad_norm": 0.2275390625, |
| "learning_rate": 8.50299401197605e-06, |
| "loss": 1.558, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.15568862275449102, |
| "grad_norm": 0.23828125, |
| "learning_rate": 8.473053892215568e-06, |
| "loss": 1.5795, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.15868263473053892, |
| "grad_norm": 0.2109375, |
| "learning_rate": 8.44311377245509e-06, |
| "loss": 1.5454, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.16167664670658682, |
| "grad_norm": 0.2119140625, |
| "learning_rate": 8.413173652694611e-06, |
| "loss": 1.5217, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.16467065868263472, |
| "grad_norm": 0.212890625, |
| "learning_rate": 8.383233532934131e-06, |
| "loss": 1.5408, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.16766467065868262, |
| "grad_norm": 0.2099609375, |
| "learning_rate": 8.353293413173653e-06, |
| "loss": 1.5544, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.17065868263473055, |
| "grad_norm": 0.19921875, |
| "learning_rate": 8.323353293413174e-06, |
| "loss": 1.4898, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.17365269461077845, |
| "grad_norm": 0.1953125, |
| "learning_rate": 8.293413173652696e-06, |
| "loss": 1.5178, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.17664670658682635, |
| "grad_norm": 0.197265625, |
| "learning_rate": 8.263473053892216e-06, |
| "loss": 1.4842, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.17964071856287425, |
| "grad_norm": 0.2099609375, |
| "learning_rate": 8.233532934131737e-06, |
| "loss": 1.5372, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.18263473053892215, |
| "grad_norm": 0.205078125, |
| "learning_rate": 8.203592814371259e-06, |
| "loss": 1.5338, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.18562874251497005, |
| "grad_norm": 0.201171875, |
| "learning_rate": 8.17365269461078e-06, |
| "loss": 1.483, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.18862275449101795, |
| "grad_norm": 0.1875, |
| "learning_rate": 8.1437125748503e-06, |
| "loss": 1.4776, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.19161676646706588, |
| "grad_norm": 0.2060546875, |
| "learning_rate": 8.113772455089822e-06, |
| "loss": 1.4907, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.19461077844311378, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 8.083832335329342e-06, |
| "loss": 1.4702, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.19760479041916168, |
| "grad_norm": 0.1875, |
| "learning_rate": 8.053892215568863e-06, |
| "loss": 1.4657, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.20059880239520958, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 8.023952095808385e-06, |
| "loss": 1.4704, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.20359281437125748, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 7.994011976047905e-06, |
| "loss": 1.4551, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.20658682634730538, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 7.964071856287425e-06, |
| "loss": 1.5081, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.20958083832335328, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 7.934131736526946e-06, |
| "loss": 1.501, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2125748502994012, |
| "grad_norm": 0.17578125, |
| "learning_rate": 7.904191616766468e-06, |
| "loss": 1.4431, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.2155688622754491, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 7.874251497005988e-06, |
| "loss": 1.4427, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.218562874251497, |
| "grad_norm": 0.17578125, |
| "learning_rate": 7.844311377245509e-06, |
| "loss": 1.4488, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.2215568862275449, |
| "grad_norm": 0.18359375, |
| "learning_rate": 7.814371257485031e-06, |
| "loss": 1.4469, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.2245508982035928, |
| "grad_norm": 0.2041015625, |
| "learning_rate": 7.784431137724551e-06, |
| "loss": 1.4297, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.2275449101796407, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 7.754491017964072e-06, |
| "loss": 1.416, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.23053892215568864, |
| "grad_norm": 0.181640625, |
| "learning_rate": 7.724550898203594e-06, |
| "loss": 1.4095, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.23353293413173654, |
| "grad_norm": 0.177734375, |
| "learning_rate": 7.694610778443114e-06, |
| "loss": 1.4354, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.23652694610778444, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 7.664670658682636e-06, |
| "loss": 1.3849, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.23952095808383234, |
| "grad_norm": 0.166015625, |
| "learning_rate": 7.634730538922157e-06, |
| "loss": 1.3947, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.24251497005988024, |
| "grad_norm": 0.1796875, |
| "learning_rate": 7.604790419161677e-06, |
| "loss": 1.3844, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.24550898203592814, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 7.574850299401198e-06, |
| "loss": 1.3549, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.24850299401197604, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 7.544910179640719e-06, |
| "loss": 1.4183, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.25149700598802394, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 7.51497005988024e-06, |
| "loss": 1.4088, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.25449101796407186, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 7.485029940119761e-06, |
| "loss": 1.3758, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.25748502994011974, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 7.4550898203592825e-06, |
| "loss": 1.3803, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.26047904191616766, |
| "grad_norm": 0.1796875, |
| "learning_rate": 7.425149700598803e-06, |
| "loss": 1.4081, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.2634730538922156, |
| "grad_norm": 0.17578125, |
| "learning_rate": 7.395209580838324e-06, |
| "loss": 1.3633, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.26646706586826346, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 7.365269461077845e-06, |
| "loss": 1.3794, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.2694610778443114, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 7.335329341317366e-06, |
| "loss": 1.3754, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.27245508982035926, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 7.305389221556887e-06, |
| "loss": 1.3878, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.2754491017964072, |
| "grad_norm": 0.169921875, |
| "learning_rate": 7.275449101796408e-06, |
| "loss": 1.3885, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.27844311377245506, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 7.2455089820359295e-06, |
| "loss": 1.3056, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.281437125748503, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 7.215568862275449e-06, |
| "loss": 1.4014, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.2844311377245509, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 7.18562874251497e-06, |
| "loss": 1.4006, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.2874251497005988, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 7.155688622754492e-06, |
| "loss": 1.3968, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.2904191616766467, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 7.125748502994012e-06, |
| "loss": 1.3471, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.2934131736526946, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 7.095808383233533e-06, |
| "loss": 1.3429, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.2964071856287425, |
| "grad_norm": 0.173828125, |
| "learning_rate": 7.0658682634730545e-06, |
| "loss": 1.3668, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.2994011976047904, |
| "grad_norm": 0.16796875, |
| "learning_rate": 7.035928143712576e-06, |
| "loss": 1.348, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3023952095808383, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 7.005988023952096e-06, |
| "loss": 1.3409, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.30538922155688625, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 6.976047904191617e-06, |
| "loss": 1.3298, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.3083832335329341, |
| "grad_norm": 0.173828125, |
| "learning_rate": 6.946107784431139e-06, |
| "loss": 1.3651, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.31137724550898205, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 6.916167664670659e-06, |
| "loss": 1.3342, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.3143712574850299, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 6.88622754491018e-06, |
| "loss": 1.3343, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.31736526946107785, |
| "grad_norm": 0.1689453125, |
| "learning_rate": 6.8562874251497016e-06, |
| "loss": 1.3526, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.3203592814371258, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 6.826347305389223e-06, |
| "loss": 1.3527, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.32335329341317365, |
| "grad_norm": 0.2080078125, |
| "learning_rate": 6.796407185628743e-06, |
| "loss": 1.3931, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.3263473053892216, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 6.7664670658682645e-06, |
| "loss": 1.3651, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.32934131736526945, |
| "grad_norm": 0.1689453125, |
| "learning_rate": 6.736526946107786e-06, |
| "loss": 1.3268, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3323353293413174, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 6.706586826347305e-06, |
| "loss": 1.3366, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.33532934131736525, |
| "grad_norm": 0.173828125, |
| "learning_rate": 6.6766467065868265e-06, |
| "loss": 1.3565, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.3383233532934132, |
| "grad_norm": 0.2060546875, |
| "learning_rate": 6.646706586826348e-06, |
| "loss": 1.3127, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.3413173652694611, |
| "grad_norm": 0.173828125, |
| "learning_rate": 6.616766467065869e-06, |
| "loss": 1.3152, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.344311377245509, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 6.586826347305389e-06, |
| "loss": 1.3575, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.3473053892215569, |
| "grad_norm": 0.166015625, |
| "learning_rate": 6.556886227544911e-06, |
| "loss": 1.3175, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.3502994011976048, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 6.526946107784432e-06, |
| "loss": 1.2844, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.3532934131736527, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 6.497005988023952e-06, |
| "loss": 1.3137, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.3562874251497006, |
| "grad_norm": 0.17578125, |
| "learning_rate": 6.4670658682634736e-06, |
| "loss": 1.3641, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.3592814371257485, |
| "grad_norm": 0.185546875, |
| "learning_rate": 6.437125748502995e-06, |
| "loss": 1.3695, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.36227544910179643, |
| "grad_norm": 0.162109375, |
| "learning_rate": 6.407185628742516e-06, |
| "loss": 1.3174, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.3652694610778443, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 6.3772455089820365e-06, |
| "loss": 1.3148, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.36826347305389223, |
| "grad_norm": 0.2041015625, |
| "learning_rate": 6.347305389221558e-06, |
| "loss": 1.3603, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.3712574850299401, |
| "grad_norm": 0.17578125, |
| "learning_rate": 6.317365269461079e-06, |
| "loss": 1.3012, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.37425149700598803, |
| "grad_norm": 0.18359375, |
| "learning_rate": 6.2874251497005985e-06, |
| "loss": 1.3569, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.3772455089820359, |
| "grad_norm": 0.17578125, |
| "learning_rate": 6.25748502994012e-06, |
| "loss": 1.299, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.38023952095808383, |
| "grad_norm": 0.162109375, |
| "learning_rate": 6.227544910179642e-06, |
| "loss": 1.3066, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.38323353293413176, |
| "grad_norm": 0.1796875, |
| "learning_rate": 6.1976047904191614e-06, |
| "loss": 1.3144, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.38622754491017963, |
| "grad_norm": 0.1650390625, |
| "learning_rate": 6.167664670658683e-06, |
| "loss": 1.2978, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.38922155688622756, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 6.137724550898204e-06, |
| "loss": 1.2917, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.39221556886227543, |
| "grad_norm": 0.1689453125, |
| "learning_rate": 6.107784431137725e-06, |
| "loss": 1.3107, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.39520958083832336, |
| "grad_norm": 0.1796875, |
| "learning_rate": 6.077844311377246e-06, |
| "loss": 1.33, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.39820359281437123, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 6.047904191616767e-06, |
| "loss": 1.3175, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.40119760479041916, |
| "grad_norm": 0.169921875, |
| "learning_rate": 6.017964071856288e-06, |
| "loss": 1.3118, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.4041916167664671, |
| "grad_norm": 0.177734375, |
| "learning_rate": 5.9880239520958085e-06, |
| "loss": 1.3104, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.40718562874251496, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 5.95808383233533e-06, |
| "loss": 1.2933, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.4101796407185629, |
| "grad_norm": 0.181640625, |
| "learning_rate": 5.928143712574851e-06, |
| "loss": 1.3226, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.41317365269461076, |
| "grad_norm": 0.220703125, |
| "learning_rate": 5.898203592814372e-06, |
| "loss": 1.2749, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.4161676646706587, |
| "grad_norm": 0.228515625, |
| "learning_rate": 5.868263473053893e-06, |
| "loss": 1.2958, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.41916167664670656, |
| "grad_norm": 0.181640625, |
| "learning_rate": 5.838323353293414e-06, |
| "loss": 1.2889, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.4221556886227545, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 5.808383233532935e-06, |
| "loss": 1.3154, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.4251497005988024, |
| "grad_norm": 0.171875, |
| "learning_rate": 5.778443113772455e-06, |
| "loss": 1.2891, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.4281437125748503, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 5.748502994011976e-06, |
| "loss": 1.2421, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.4311377245508982, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 5.718562874251497e-06, |
| "loss": 1.28, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.4341317365269461, |
| "grad_norm": 0.232421875, |
| "learning_rate": 5.6886227544910184e-06, |
| "loss": 1.2694, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.437125748502994, |
| "grad_norm": 0.169921875, |
| "learning_rate": 5.658682634730539e-06, |
| "loss": 1.2945, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.44011976047904194, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 5.62874251497006e-06, |
| "loss": 1.2786, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.4431137724550898, |
| "grad_norm": 0.181640625, |
| "learning_rate": 5.598802395209581e-06, |
| "loss": 1.3056, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.44610778443113774, |
| "grad_norm": 0.197265625, |
| "learning_rate": 5.568862275449102e-06, |
| "loss": 1.305, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.4491017964071856, |
| "grad_norm": 0.185546875, |
| "learning_rate": 5.538922155688623e-06, |
| "loss": 1.2877, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.45209580838323354, |
| "grad_norm": 0.2119140625, |
| "learning_rate": 5.508982035928144e-06, |
| "loss": 1.3158, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.4550898203592814, |
| "grad_norm": 0.57421875, |
| "learning_rate": 5.4790419161676655e-06, |
| "loss": 1.287, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.45808383233532934, |
| "grad_norm": 0.2236328125, |
| "learning_rate": 5.449101796407186e-06, |
| "loss": 1.3375, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.46107784431137727, |
| "grad_norm": 0.181640625, |
| "learning_rate": 5.419161676646707e-06, |
| "loss": 1.3105, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.46407185628742514, |
| "grad_norm": 0.18359375, |
| "learning_rate": 5.389221556886228e-06, |
| "loss": 1.283, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.46706586826347307, |
| "grad_norm": 0.20703125, |
| "learning_rate": 5.359281437125749e-06, |
| "loss": 1.2507, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.47005988023952094, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 5.32934131736527e-06, |
| "loss": 1.2731, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.47305389221556887, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 5.299401197604791e-06, |
| "loss": 1.2421, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.47604790419161674, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 5.2694610778443125e-06, |
| "loss": 1.2434, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.47904191616766467, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 5.239520958083832e-06, |
| "loss": 1.2895, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.4820359281437126, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 5.209580838323353e-06, |
| "loss": 1.2307, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.48502994011976047, |
| "grad_norm": 0.2197265625, |
| "learning_rate": 5.179640718562875e-06, |
| "loss": 1.2938, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.4880239520958084, |
| "grad_norm": 0.193359375, |
| "learning_rate": 5.149700598802395e-06, |
| "loss": 1.2483, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.49101796407185627, |
| "grad_norm": 0.203125, |
| "learning_rate": 5.119760479041916e-06, |
| "loss": 1.2917, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.4940119760479042, |
| "grad_norm": 0.162109375, |
| "learning_rate": 5.0898203592814375e-06, |
| "loss": 1.2339, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.49700598802395207, |
| "grad_norm": 0.197265625, |
| "learning_rate": 5.059880239520959e-06, |
| "loss": 1.3043, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.193359375, |
| "learning_rate": 5.029940119760479e-06, |
| "loss": 1.2708, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.5029940119760479, |
| "grad_norm": 0.177734375, |
| "learning_rate": 5e-06, |
| "loss": 1.2553, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.5059880239520959, |
| "grad_norm": 0.185546875, |
| "learning_rate": 4.970059880239521e-06, |
| "loss": 1.2922, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.5089820359281437, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 4.940119760479042e-06, |
| "loss": 1.2744, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5119760479041916, |
| "grad_norm": 0.19140625, |
| "learning_rate": 4.910179640718563e-06, |
| "loss": 1.2971, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.5149700598802395, |
| "grad_norm": 0.2001953125, |
| "learning_rate": 4.8802395209580846e-06, |
| "loss": 1.3124, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.5179640718562875, |
| "grad_norm": 0.25390625, |
| "learning_rate": 4.850299401197605e-06, |
| "loss": 1.2741, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.5209580838323353, |
| "grad_norm": 0.212890625, |
| "learning_rate": 4.820359281437126e-06, |
| "loss": 1.247, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.5239520958083832, |
| "grad_norm": 0.2099609375, |
| "learning_rate": 4.7904191616766475e-06, |
| "loss": 1.2915, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.5269461077844312, |
| "grad_norm": 0.185546875, |
| "learning_rate": 4.760479041916168e-06, |
| "loss": 1.3011, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.5299401197604791, |
| "grad_norm": 0.330078125, |
| "learning_rate": 4.730538922155689e-06, |
| "loss": 1.2419, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.5329341317365269, |
| "grad_norm": 0.177734375, |
| "learning_rate": 4.7005988023952095e-06, |
| "loss": 1.2723, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.5359281437125748, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 4.670658682634731e-06, |
| "loss": 1.2518, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.5389221556886228, |
| "grad_norm": 0.2041015625, |
| "learning_rate": 4.640718562874252e-06, |
| "loss": 1.3112, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5419161676646707, |
| "grad_norm": 0.2197265625, |
| "learning_rate": 4.610778443113773e-06, |
| "loss": 1.3199, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.5449101796407185, |
| "grad_norm": 0.22265625, |
| "learning_rate": 4.580838323353294e-06, |
| "loss": 1.2663, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.5479041916167665, |
| "grad_norm": 0.177734375, |
| "learning_rate": 4.550898203592815e-06, |
| "loss": 1.2309, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.5508982035928144, |
| "grad_norm": 0.171875, |
| "learning_rate": 4.520958083832336e-06, |
| "loss": 1.2463, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.5538922155688623, |
| "grad_norm": 0.203125, |
| "learning_rate": 4.4910179640718566e-06, |
| "loss": 1.2868, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.5568862275449101, |
| "grad_norm": 0.185546875, |
| "learning_rate": 4.461077844311378e-06, |
| "loss": 1.2508, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.5598802395209581, |
| "grad_norm": 0.208984375, |
| "learning_rate": 4.431137724550898e-06, |
| "loss": 1.2883, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.562874251497006, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 4.4011976047904195e-06, |
| "loss": 1.2944, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.5658682634730539, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 4.371257485029941e-06, |
| "loss": 1.2567, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.5688622754491018, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 4.341317365269461e-06, |
| "loss": 1.2744, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5718562874251497, |
| "grad_norm": 0.185546875, |
| "learning_rate": 4.311377245508982e-06, |
| "loss": 1.2736, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.5748502994011976, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 4.281437125748504e-06, |
| "loss": 1.2635, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.5778443113772455, |
| "grad_norm": 0.181640625, |
| "learning_rate": 4.251497005988025e-06, |
| "loss": 1.2835, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.5808383233532934, |
| "grad_norm": 0.1953125, |
| "learning_rate": 4.221556886227545e-06, |
| "loss": 1.2348, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.5838323353293413, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 4.191616766467066e-06, |
| "loss": 1.2537, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.5868263473053892, |
| "grad_norm": 0.2021484375, |
| "learning_rate": 4.161676646706587e-06, |
| "loss": 1.2686, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.5898203592814372, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 4.131736526946108e-06, |
| "loss": 1.2434, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.592814371257485, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 4.1017964071856294e-06, |
| "loss": 1.2902, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.5958083832335329, |
| "grad_norm": 0.18359375, |
| "learning_rate": 4.07185628742515e-06, |
| "loss": 1.2493, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.5988023952095808, |
| "grad_norm": 0.19921875, |
| "learning_rate": 4.041916167664671e-06, |
| "loss": 1.2616, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6017964071856288, |
| "grad_norm": 0.1953125, |
| "learning_rate": 4.011976047904192e-06, |
| "loss": 1.256, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.6047904191616766, |
| "grad_norm": 0.173828125, |
| "learning_rate": 3.982035928143713e-06, |
| "loss": 1.2378, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.6077844311377245, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 3.952095808383234e-06, |
| "loss": 1.2677, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.6107784431137725, |
| "grad_norm": 0.1875, |
| "learning_rate": 3.922155688622754e-06, |
| "loss": 1.2585, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.6137724550898204, |
| "grad_norm": 0.197265625, |
| "learning_rate": 3.892215568862276e-06, |
| "loss": 1.2488, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.6167664670658682, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 3.862275449101797e-06, |
| "loss": 1.2561, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.6197604790419161, |
| "grad_norm": 0.205078125, |
| "learning_rate": 3.832335329341318e-06, |
| "loss": 1.2457, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.6227544910179641, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 3.8023952095808385e-06, |
| "loss": 1.2987, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.625748502994012, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 3.7724550898203594e-06, |
| "loss": 1.2546, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.6287425149700598, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 3.7425149700598806e-06, |
| "loss": 1.2501, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.6317365269461078, |
| "grad_norm": 0.1875, |
| "learning_rate": 3.7125748502994014e-06, |
| "loss": 1.2456, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.6347305389221557, |
| "grad_norm": 0.2021484375, |
| "learning_rate": 3.6826347305389227e-06, |
| "loss": 1.2779, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.6377245508982036, |
| "grad_norm": 0.2392578125, |
| "learning_rate": 3.6526946107784435e-06, |
| "loss": 1.2375, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.6407185628742516, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 3.6227544910179648e-06, |
| "loss": 1.2302, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.6437125748502994, |
| "grad_norm": 0.189453125, |
| "learning_rate": 3.592814371257485e-06, |
| "loss": 1.2512, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.6467065868263473, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 3.562874251497006e-06, |
| "loss": 1.2908, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.6497005988023952, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 3.5329341317365273e-06, |
| "loss": 1.2641, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.6526946107784432, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 3.502994011976048e-06, |
| "loss": 1.2724, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.655688622754491, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 3.4730538922155693e-06, |
| "loss": 1.2364, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.6586826347305389, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 3.44311377245509e-06, |
| "loss": 1.2436, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.6616766467065869, |
| "grad_norm": 0.203125, |
| "learning_rate": 3.4131736526946114e-06, |
| "loss": 1.2095, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.6646706586826348, |
| "grad_norm": 0.16796875, |
| "learning_rate": 3.3832335329341322e-06, |
| "loss": 1.2171, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.6676646706586826, |
| "grad_norm": 0.21875, |
| "learning_rate": 3.3532934131736526e-06, |
| "loss": 1.2786, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.6706586826347305, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 3.323353293413174e-06, |
| "loss": 1.2451, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.6736526946107785, |
| "grad_norm": 0.17578125, |
| "learning_rate": 3.2934131736526947e-06, |
| "loss": 1.2391, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.6766467065868264, |
| "grad_norm": 0.17578125, |
| "learning_rate": 3.263473053892216e-06, |
| "loss": 1.2446, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.6796407185628742, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 3.2335329341317368e-06, |
| "loss": 1.2585, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.6826347305389222, |
| "grad_norm": 0.197265625, |
| "learning_rate": 3.203592814371258e-06, |
| "loss": 1.2718, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.6856287425149701, |
| "grad_norm": 0.2001953125, |
| "learning_rate": 3.173652694610779e-06, |
| "loss": 1.279, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.688622754491018, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 3.1437125748502993e-06, |
| "loss": 1.2106, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.6916167664670658, |
| "grad_norm": 0.18359375, |
| "learning_rate": 3.113772455089821e-06, |
| "loss": 1.2408, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.6946107784431138, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 3.0838323353293413e-06, |
| "loss": 1.2722, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.6976047904191617, |
| "grad_norm": 0.177734375, |
| "learning_rate": 3.0538922155688626e-06, |
| "loss": 1.2599, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.7005988023952096, |
| "grad_norm": 0.197265625, |
| "learning_rate": 3.0239520958083834e-06, |
| "loss": 1.2703, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.7035928143712575, |
| "grad_norm": 0.20703125, |
| "learning_rate": 2.9940119760479042e-06, |
| "loss": 1.2198, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.7065868263473054, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 2.9640718562874255e-06, |
| "loss": 1.2481, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.7095808383233533, |
| "grad_norm": 0.19140625, |
| "learning_rate": 2.9341317365269463e-06, |
| "loss": 1.2336, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.7125748502994012, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 2.9041916167664676e-06, |
| "loss": 1.2121, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.7155688622754491, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 2.874251497005988e-06, |
| "loss": 1.2536, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.718562874251497, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 2.8443113772455092e-06, |
| "loss": 1.2766, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.7215568862275449, |
| "grad_norm": 0.1953125, |
| "learning_rate": 2.81437125748503e-06, |
| "loss": 1.2518, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.7245508982035929, |
| "grad_norm": 0.2138671875, |
| "learning_rate": 2.784431137724551e-06, |
| "loss": 1.2038, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.7275449101796407, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 2.754491017964072e-06, |
| "loss": 1.2446, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.7305389221556886, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 2.724550898203593e-06, |
| "loss": 1.2392, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.7335329341317365, |
| "grad_norm": 0.2158203125, |
| "learning_rate": 2.694610778443114e-06, |
| "loss": 1.2166, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.7365269461077845, |
| "grad_norm": 0.17578125, |
| "learning_rate": 2.664670658682635e-06, |
| "loss": 1.2236, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.7395209580838323, |
| "grad_norm": 0.1796875, |
| "learning_rate": 2.6347305389221563e-06, |
| "loss": 1.2626, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.7425149700598802, |
| "grad_norm": 0.197265625, |
| "learning_rate": 2.6047904191616767e-06, |
| "loss": 1.2618, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.7455089820359282, |
| "grad_norm": 0.177734375, |
| "learning_rate": 2.5748502994011975e-06, |
| "loss": 1.208, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.7485029940119761, |
| "grad_norm": 0.203125, |
| "learning_rate": 2.5449101796407188e-06, |
| "loss": 1.2561, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.7514970059880239, |
| "grad_norm": 0.2109375, |
| "learning_rate": 2.5149700598802396e-06, |
| "loss": 1.2851, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.7544910179640718, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 2.4850299401197604e-06, |
| "loss": 1.2475, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.7574850299401198, |
| "grad_norm": 0.177734375, |
| "learning_rate": 2.4550898203592817e-06, |
| "loss": 1.2461, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.7604790419161677, |
| "grad_norm": 0.2314453125, |
| "learning_rate": 2.4251497005988025e-06, |
| "loss": 1.2954, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.7634730538922155, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 2.3952095808383237e-06, |
| "loss": 1.2431, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.7664670658682635, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 2.3652694610778446e-06, |
| "loss": 1.2188, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.7694610778443114, |
| "grad_norm": 0.177734375, |
| "learning_rate": 2.3353293413173654e-06, |
| "loss": 1.2155, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.7724550898203593, |
| "grad_norm": 0.185546875, |
| "learning_rate": 2.3053892215568866e-06, |
| "loss": 1.2328, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.7754491017964071, |
| "grad_norm": 0.189453125, |
| "learning_rate": 2.2754491017964075e-06, |
| "loss": 1.2684, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.7784431137724551, |
| "grad_norm": 0.181640625, |
| "learning_rate": 2.2455089820359283e-06, |
| "loss": 1.2549, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.781437125748503, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 2.215568862275449e-06, |
| "loss": 1.2526, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.7844311377245509, |
| "grad_norm": 0.17578125, |
| "learning_rate": 2.1856287425149704e-06, |
| "loss": 1.2477, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.7874251497005988, |
| "grad_norm": 0.185546875, |
| "learning_rate": 2.155688622754491e-06, |
| "loss": 1.2642, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.7904191616766467, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 2.1257485029940124e-06, |
| "loss": 1.226, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.7934131736526946, |
| "grad_norm": 0.18359375, |
| "learning_rate": 2.095808383233533e-06, |
| "loss": 1.2835, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.7964071856287425, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 2.065868263473054e-06, |
| "loss": 1.2559, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.7994011976047904, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 2.035928143712575e-06, |
| "loss": 1.2453, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.8023952095808383, |
| "grad_norm": 0.18359375, |
| "learning_rate": 2.005988023952096e-06, |
| "loss": 1.2359, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.8053892215568862, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 1.976047904191617e-06, |
| "loss": 1.2698, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.8083832335329342, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 1.946107784431138e-06, |
| "loss": 1.2523, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.811377245508982, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 1.916167664670659e-06, |
| "loss": 1.2631, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.8143712574850299, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 1.8862275449101797e-06, |
| "loss": 1.2433, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.8173652694610778, |
| "grad_norm": 0.197265625, |
| "learning_rate": 1.8562874251497007e-06, |
| "loss": 1.2283, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.8203592814371258, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 1.8263473053892218e-06, |
| "loss": 1.2446, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.8233532934131736, |
| "grad_norm": 0.1953125, |
| "learning_rate": 1.7964071856287426e-06, |
| "loss": 1.2349, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.8263473053892215, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 1.7664670658682636e-06, |
| "loss": 1.2367, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.8293413173652695, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 1.7365269461077847e-06, |
| "loss": 1.2107, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.8323353293413174, |
| "grad_norm": 0.185546875, |
| "learning_rate": 1.7065868263473057e-06, |
| "loss": 1.2539, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.8353293413173652, |
| "grad_norm": 0.203125, |
| "learning_rate": 1.6766467065868263e-06, |
| "loss": 1.2627, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.8383233532934131, |
| "grad_norm": 0.234375, |
| "learning_rate": 1.6467065868263474e-06, |
| "loss": 1.2337, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.8413173652694611, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 1.6167664670658684e-06, |
| "loss": 1.2311, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.844311377245509, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 1.5868263473053894e-06, |
| "loss": 1.2475, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.8473053892215568, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 1.5568862275449105e-06, |
| "loss": 1.2319, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.8502994011976048, |
| "grad_norm": 0.1708984375, |
| "learning_rate": 1.5269461077844313e-06, |
| "loss": 1.2259, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.8532934131736527, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 1.4970059880239521e-06, |
| "loss": 1.2358, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.8562874251497006, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 1.4670658682634732e-06, |
| "loss": 1.2412, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.8592814371257484, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 1.437125748502994e-06, |
| "loss": 1.2004, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.8622754491017964, |
| "grad_norm": 0.181640625, |
| "learning_rate": 1.407185628742515e-06, |
| "loss": 1.2413, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.8652694610778443, |
| "grad_norm": 0.181640625, |
| "learning_rate": 1.377245508982036e-06, |
| "loss": 1.2627, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.8682634730538922, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 1.347305389221557e-06, |
| "loss": 1.2342, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.8712574850299402, |
| "grad_norm": 0.1796875, |
| "learning_rate": 1.3173652694610781e-06, |
| "loss": 1.2189, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.874251497005988, |
| "grad_norm": 0.181640625, |
| "learning_rate": 1.2874251497005988e-06, |
| "loss": 1.2225, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.8772455089820359, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 1.2574850299401198e-06, |
| "loss": 1.2214, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.8802395209580839, |
| "grad_norm": 0.193359375, |
| "learning_rate": 1.2275449101796408e-06, |
| "loss": 1.2402, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.8832335329341318, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 1.1976047904191619e-06, |
| "loss": 1.2619, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.8862275449101796, |
| "grad_norm": 0.193359375, |
| "learning_rate": 1.1676646706586827e-06, |
| "loss": 1.211, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.8892215568862275, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 1.1377245508982037e-06, |
| "loss": 1.223, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.8922155688622755, |
| "grad_norm": 0.177734375, |
| "learning_rate": 1.1077844311377246e-06, |
| "loss": 1.2187, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.8952095808383234, |
| "grad_norm": 0.177734375, |
| "learning_rate": 1.0778443113772456e-06, |
| "loss": 1.2116, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.8982035928143712, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 1.0479041916167664e-06, |
| "loss": 1.2359, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.9011976047904192, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 1.0179640718562875e-06, |
| "loss": 1.2731, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.9041916167664671, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 9.880239520958085e-07, |
| "loss": 1.2202, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.907185628742515, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 9.580838323353295e-07, |
| "loss": 1.2096, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.9101796407185628, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 9.281437125748504e-07, |
| "loss": 1.2649, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.9131736526946108, |
| "grad_norm": 0.185546875, |
| "learning_rate": 8.982035928143713e-07, |
| "loss": 1.2283, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.9161676646706587, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 8.682634730538923e-07, |
| "loss": 1.2219, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.9191616766467066, |
| "grad_norm": 0.181640625, |
| "learning_rate": 8.383233532934132e-07, |
| "loss": 1.2372, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.9221556886227545, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 8.083832335329342e-07, |
| "loss": 1.2557, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.9251497005988024, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 7.784431137724552e-07, |
| "loss": 1.2252, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.9281437125748503, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 7.485029940119761e-07, |
| "loss": 1.2635, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.9311377245508982, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 7.18562874251497e-07, |
| "loss": 1.23, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.9341317365269461, |
| "grad_norm": 0.193359375, |
| "learning_rate": 6.88622754491018e-07, |
| "loss": 1.2364, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.937125748502994, |
| "grad_norm": 0.17578125, |
| "learning_rate": 6.586826347305391e-07, |
| "loss": 1.2345, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.9401197604790419, |
| "grad_norm": 0.17578125, |
| "learning_rate": 6.287425149700599e-07, |
| "loss": 1.2195, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.9431137724550899, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 5.988023952095809e-07, |
| "loss": 1.2212, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.9461077844311377, |
| "grad_norm": 0.177734375, |
| "learning_rate": 5.688622754491019e-07, |
| "loss": 1.2202, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.9491017964071856, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 5.389221556886228e-07, |
| "loss": 1.23, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.9520958083832335, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 5.089820359281437e-07, |
| "loss": 1.2248, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.9550898203592815, |
| "grad_norm": 0.177734375, |
| "learning_rate": 4.790419161676648e-07, |
| "loss": 1.238, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.9580838323353293, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 4.4910179640718565e-07, |
| "loss": 1.21, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.9610778443113772, |
| "grad_norm": 0.17578125, |
| "learning_rate": 4.191616766467066e-07, |
| "loss": 1.2262, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.9640718562874252, |
| "grad_norm": 0.1767578125, |
| "learning_rate": 3.892215568862276e-07, |
| "loss": 1.2146, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.9670658682634731, |
| "grad_norm": 0.251953125, |
| "learning_rate": 3.592814371257485e-07, |
| "loss": 1.198, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.9700598802395209, |
| "grad_norm": 0.193359375, |
| "learning_rate": 3.2934131736526953e-07, |
| "loss": 1.2583, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.9730538922155688, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 2.9940119760479047e-07, |
| "loss": 1.1982, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.9760479041916168, |
| "grad_norm": 0.271484375, |
| "learning_rate": 2.694610778443114e-07, |
| "loss": 1.2834, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.9790419161676647, |
| "grad_norm": 0.19140625, |
| "learning_rate": 2.395209580838324e-07, |
| "loss": 1.2277, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.9820359281437125, |
| "grad_norm": 0.18359375, |
| "learning_rate": 2.095808383233533e-07, |
| "loss": 1.233, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.9850299401197605, |
| "grad_norm": 0.1796875, |
| "learning_rate": 1.7964071856287425e-07, |
| "loss": 1.2387, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.9880239520958084, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 1.4970059880239523e-07, |
| "loss": 1.2507, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.9910179640718563, |
| "grad_norm": 0.185546875, |
| "learning_rate": 1.197604790419162e-07, |
| "loss": 1.2354, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.9940119760479041, |
| "grad_norm": 0.2490234375, |
| "learning_rate": 8.982035928143712e-08, |
| "loss": 1.2488, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.9970059880239521, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 5.98802395209581e-08, |
| "loss": 1.2657, |
| "step": 333 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 2.994011976047905e-08, |
| "loss": 1.2338, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 1.2354644536972046, |
| "eval_runtime": 12.3412, |
| "eval_samples_per_second": 2.755, |
| "eval_steps_per_second": 0.405, |
| "step": 334 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 334, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 0, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.14157528414788e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|