{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 214, "global_step": 214, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004672897196261682, "grad_norm": 1.5234375, "learning_rate": 1e-05, "loss": 2.9069, "step": 1 }, { "epoch": 0.009345794392523364, "grad_norm": 1.546875, "learning_rate": 9.953271028037384e-06, "loss": 2.9462, "step": 2 }, { "epoch": 0.014018691588785047, "grad_norm": 1.4375, "learning_rate": 9.906542056074768e-06, "loss": 2.8437, "step": 3 }, { "epoch": 0.018691588785046728, "grad_norm": 1.4140625, "learning_rate": 9.859813084112151e-06, "loss": 2.8093, "step": 4 }, { "epoch": 0.02336448598130841, "grad_norm": 1.140625, "learning_rate": 9.813084112149533e-06, "loss": 2.7097, "step": 5 }, { "epoch": 0.028037383177570093, "grad_norm": 1.0703125, "learning_rate": 9.766355140186918e-06, "loss": 2.7076, "step": 6 }, { "epoch": 0.03271028037383177, "grad_norm": 0.953125, "learning_rate": 9.7196261682243e-06, "loss": 2.6449, "step": 7 }, { "epoch": 0.037383177570093455, "grad_norm": 0.8984375, "learning_rate": 9.672897196261683e-06, "loss": 2.6143, "step": 8 }, { "epoch": 0.04205607476635514, "grad_norm": 0.91015625, "learning_rate": 9.626168224299066e-06, "loss": 2.5539, "step": 9 }, { "epoch": 0.04672897196261682, "grad_norm": 0.85546875, "learning_rate": 9.57943925233645e-06, "loss": 2.4964, "step": 10 }, { "epoch": 0.0514018691588785, "grad_norm": 0.73046875, "learning_rate": 9.532710280373833e-06, "loss": 2.4061, "step": 11 }, { "epoch": 0.056074766355140186, "grad_norm": 0.6875, "learning_rate": 9.485981308411217e-06, "loss": 2.3791, "step": 12 }, { "epoch": 0.06074766355140187, "grad_norm": 0.66796875, "learning_rate": 9.439252336448598e-06, "loss": 2.3691, "step": 13 }, { "epoch": 0.06542056074766354, "grad_norm": 0.6484375, "learning_rate": 9.392523364485983e-06, "loss": 2.3177, "step": 14 }, { "epoch": 0.07009345794392523, "grad_norm": 0.59375, "learning_rate": 9.345794392523365e-06, "loss": 2.2444, "step": 15 }, { "epoch": 0.07476635514018691, "grad_norm": 0.59375, "learning_rate": 9.299065420560748e-06, "loss": 2.261, "step": 16 }, { "epoch": 0.0794392523364486, "grad_norm": 0.609375, "learning_rate": 9.252336448598132e-06, "loss": 2.2963, "step": 17 }, { "epoch": 0.08411214953271028, "grad_norm": 0.5859375, "learning_rate": 9.205607476635515e-06, "loss": 2.2225, "step": 18 }, { "epoch": 0.08878504672897196, "grad_norm": 0.54296875, "learning_rate": 9.158878504672899e-06, "loss": 2.1646, "step": 19 }, { "epoch": 0.09345794392523364, "grad_norm": 0.5625, "learning_rate": 9.112149532710282e-06, "loss": 2.1857, "step": 20 }, { "epoch": 0.09813084112149532, "grad_norm": 0.578125, "learning_rate": 9.065420560747664e-06, "loss": 2.1392, "step": 21 }, { "epoch": 0.102803738317757, "grad_norm": 0.57421875, "learning_rate": 9.018691588785047e-06, "loss": 2.1135, "step": 22 }, { "epoch": 0.10747663551401869, "grad_norm": 0.55859375, "learning_rate": 8.97196261682243e-06, "loss": 2.1053, "step": 23 }, { "epoch": 0.11214953271028037, "grad_norm": 0.546875, "learning_rate": 8.925233644859814e-06, "loss": 2.0868, "step": 24 }, { "epoch": 0.11682242990654206, "grad_norm": 0.5234375, "learning_rate": 8.878504672897197e-06, "loss": 2.0354, "step": 25 }, { "epoch": 0.12149532710280374, "grad_norm": 0.5, "learning_rate": 8.83177570093458e-06, "loss": 2.0114, "step": 26 }, { "epoch": 0.1261682242990654, "grad_norm": 0.498046875, "learning_rate": 8.785046728971963e-06, "loss": 2.0158, "step": 27 }, { "epoch": 0.1308411214953271, "grad_norm": 0.498046875, "learning_rate": 8.738317757009348e-06, "loss": 2.0091, "step": 28 }, { "epoch": 0.13551401869158877, "grad_norm": 0.47265625, "learning_rate": 8.69158878504673e-06, "loss": 1.9591, "step": 29 }, { "epoch": 0.14018691588785046, "grad_norm": 0.45703125, "learning_rate": 8.644859813084113e-06, "loss": 1.9576, "step": 30 }, { "epoch": 0.14485981308411214, "grad_norm": 0.4609375, "learning_rate": 8.598130841121496e-06, "loss": 1.9389, "step": 31 }, { "epoch": 0.14953271028037382, "grad_norm": 0.44921875, "learning_rate": 8.55140186915888e-06, "loss": 1.8883, "step": 32 }, { "epoch": 0.1542056074766355, "grad_norm": 0.44921875, "learning_rate": 8.504672897196263e-06, "loss": 1.8913, "step": 33 }, { "epoch": 0.1588785046728972, "grad_norm": 0.431640625, "learning_rate": 8.457943925233646e-06, "loss": 1.8813, "step": 34 }, { "epoch": 0.16355140186915887, "grad_norm": 0.44140625, "learning_rate": 8.411214953271028e-06, "loss": 1.8649, "step": 35 }, { "epoch": 0.16822429906542055, "grad_norm": 0.427734375, "learning_rate": 8.364485981308411e-06, "loss": 1.8555, "step": 36 }, { "epoch": 0.17289719626168223, "grad_norm": 0.4140625, "learning_rate": 8.317757009345795e-06, "loss": 1.8295, "step": 37 }, { "epoch": 0.17757009345794392, "grad_norm": 0.40234375, "learning_rate": 8.271028037383178e-06, "loss": 1.8116, "step": 38 }, { "epoch": 0.1822429906542056, "grad_norm": 0.40234375, "learning_rate": 8.224299065420562e-06, "loss": 1.7998, "step": 39 }, { "epoch": 0.18691588785046728, "grad_norm": 0.38671875, "learning_rate": 8.177570093457945e-06, "loss": 1.7818, "step": 40 }, { "epoch": 0.19158878504672897, "grad_norm": 0.38671875, "learning_rate": 8.130841121495327e-06, "loss": 1.7595, "step": 41 }, { "epoch": 0.19626168224299065, "grad_norm": 0.38671875, "learning_rate": 8.084112149532712e-06, "loss": 1.7451, "step": 42 }, { "epoch": 0.20093457943925233, "grad_norm": 0.384765625, "learning_rate": 8.037383177570094e-06, "loss": 1.7456, "step": 43 }, { "epoch": 0.205607476635514, "grad_norm": 0.37109375, "learning_rate": 7.990654205607477e-06, "loss": 1.7068, "step": 44 }, { "epoch": 0.2102803738317757, "grad_norm": 0.361328125, "learning_rate": 7.94392523364486e-06, "loss": 1.7142, "step": 45 }, { "epoch": 0.21495327102803738, "grad_norm": 0.361328125, "learning_rate": 7.897196261682244e-06, "loss": 1.6928, "step": 46 }, { "epoch": 0.21962616822429906, "grad_norm": 0.353515625, "learning_rate": 7.850467289719627e-06, "loss": 1.665, "step": 47 }, { "epoch": 0.22429906542056074, "grad_norm": 0.345703125, "learning_rate": 7.80373831775701e-06, "loss": 1.6624, "step": 48 }, { "epoch": 0.22897196261682243, "grad_norm": 0.3359375, "learning_rate": 7.757009345794392e-06, "loss": 1.6481, "step": 49 }, { "epoch": 0.2336448598130841, "grad_norm": 0.34375, "learning_rate": 7.710280373831777e-06, "loss": 1.6482, "step": 50 }, { "epoch": 0.2383177570093458, "grad_norm": 0.333984375, "learning_rate": 7.663551401869159e-06, "loss": 1.6233, "step": 51 }, { "epoch": 0.24299065420560748, "grad_norm": 0.330078125, "learning_rate": 7.616822429906543e-06, "loss": 1.6163, "step": 52 }, { "epoch": 0.24766355140186916, "grad_norm": 0.32421875, "learning_rate": 7.570093457943926e-06, "loss": 1.5861, "step": 53 }, { "epoch": 0.2523364485981308, "grad_norm": 0.326171875, "learning_rate": 7.523364485981309e-06, "loss": 1.59, "step": 54 }, { "epoch": 0.2570093457943925, "grad_norm": 0.31640625, "learning_rate": 7.476635514018692e-06, "loss": 1.582, "step": 55 }, { "epoch": 0.2616822429906542, "grad_norm": 0.318359375, "learning_rate": 7.429906542056075e-06, "loss": 1.5681, "step": 56 }, { "epoch": 0.26635514018691586, "grad_norm": 0.3125, "learning_rate": 7.383177570093458e-06, "loss": 1.5647, "step": 57 }, { "epoch": 0.27102803738317754, "grad_norm": 0.310546875, "learning_rate": 7.336448598130842e-06, "loss": 1.539, "step": 58 }, { "epoch": 0.2757009345794392, "grad_norm": 0.302734375, "learning_rate": 7.289719626168225e-06, "loss": 1.5371, "step": 59 }, { "epoch": 0.2803738317757009, "grad_norm": 0.29296875, "learning_rate": 7.242990654205608e-06, "loss": 1.5024, "step": 60 }, { "epoch": 0.2850467289719626, "grad_norm": 0.27734375, "learning_rate": 7.196261682242991e-06, "loss": 1.516, "step": 61 }, { "epoch": 0.2897196261682243, "grad_norm": 0.271484375, "learning_rate": 7.149532710280375e-06, "loss": 1.5012, "step": 62 }, { "epoch": 0.29439252336448596, "grad_norm": 0.26171875, "learning_rate": 7.1028037383177574e-06, "loss": 1.4923, "step": 63 }, { "epoch": 0.29906542056074764, "grad_norm": 0.265625, "learning_rate": 7.056074766355141e-06, "loss": 1.4793, "step": 64 }, { "epoch": 0.3037383177570093, "grad_norm": 0.25, "learning_rate": 7.009345794392523e-06, "loss": 1.486, "step": 65 }, { "epoch": 0.308411214953271, "grad_norm": 0.255859375, "learning_rate": 6.962616822429908e-06, "loss": 1.4806, "step": 66 }, { "epoch": 0.3130841121495327, "grad_norm": 0.251953125, "learning_rate": 6.91588785046729e-06, "loss": 1.4681, "step": 67 }, { "epoch": 0.3177570093457944, "grad_norm": 0.248046875, "learning_rate": 6.869158878504674e-06, "loss": 1.4514, "step": 68 }, { "epoch": 0.32242990654205606, "grad_norm": 0.244140625, "learning_rate": 6.822429906542056e-06, "loss": 1.4378, "step": 69 }, { "epoch": 0.32710280373831774, "grad_norm": 0.2451171875, "learning_rate": 6.77570093457944e-06, "loss": 1.4582, "step": 70 }, { "epoch": 0.3317757009345794, "grad_norm": 0.244140625, "learning_rate": 6.728971962616823e-06, "loss": 1.4322, "step": 71 }, { "epoch": 0.3364485981308411, "grad_norm": 0.2333984375, "learning_rate": 6.682242990654206e-06, "loss": 1.4347, "step": 72 }, { "epoch": 0.3411214953271028, "grad_norm": 0.2373046875, "learning_rate": 6.635514018691589e-06, "loss": 1.4228, "step": 73 }, { "epoch": 0.34579439252336447, "grad_norm": 0.232421875, "learning_rate": 6.588785046728972e-06, "loss": 1.4103, "step": 74 }, { "epoch": 0.35046728971962615, "grad_norm": 0.224609375, "learning_rate": 6.542056074766355e-06, "loss": 1.4139, "step": 75 }, { "epoch": 0.35514018691588783, "grad_norm": 0.234375, "learning_rate": 6.495327102803739e-06, "loss": 1.4196, "step": 76 }, { "epoch": 0.3598130841121495, "grad_norm": 0.2265625, "learning_rate": 6.448598130841122e-06, "loss": 1.3918, "step": 77 }, { "epoch": 0.3644859813084112, "grad_norm": 0.224609375, "learning_rate": 6.401869158878505e-06, "loss": 1.4162, "step": 78 }, { "epoch": 0.3691588785046729, "grad_norm": 0.2119140625, "learning_rate": 6.355140186915888e-06, "loss": 1.3968, "step": 79 }, { "epoch": 0.37383177570093457, "grad_norm": 0.22265625, "learning_rate": 6.308411214953272e-06, "loss": 1.3754, "step": 80 }, { "epoch": 0.37850467289719625, "grad_norm": 0.2109375, "learning_rate": 6.2616822429906544e-06, "loss": 1.4086, "step": 81 }, { "epoch": 0.38317757009345793, "grad_norm": 0.2138671875, "learning_rate": 6.214953271028038e-06, "loss": 1.3931, "step": 82 }, { "epoch": 0.3878504672897196, "grad_norm": 0.216796875, "learning_rate": 6.16822429906542e-06, "loss": 1.3829, "step": 83 }, { "epoch": 0.3925233644859813, "grad_norm": 0.20703125, "learning_rate": 6.121495327102805e-06, "loss": 1.3711, "step": 84 }, { "epoch": 0.397196261682243, "grad_norm": 0.2060546875, "learning_rate": 6.074766355140187e-06, "loss": 1.3579, "step": 85 }, { "epoch": 0.40186915887850466, "grad_norm": 0.20703125, "learning_rate": 6.028037383177571e-06, "loss": 1.3727, "step": 86 }, { "epoch": 0.40654205607476634, "grad_norm": 0.2060546875, "learning_rate": 5.981308411214953e-06, "loss": 1.3692, "step": 87 }, { "epoch": 0.411214953271028, "grad_norm": 0.2021484375, "learning_rate": 5.9345794392523374e-06, "loss": 1.3706, "step": 88 }, { "epoch": 0.4158878504672897, "grad_norm": 0.1982421875, "learning_rate": 5.88785046728972e-06, "loss": 1.3446, "step": 89 }, { "epoch": 0.4205607476635514, "grad_norm": 0.216796875, "learning_rate": 5.841121495327103e-06, "loss": 1.3499, "step": 90 }, { "epoch": 0.4252336448598131, "grad_norm": 0.2158203125, "learning_rate": 5.794392523364486e-06, "loss": 1.3261, "step": 91 }, { "epoch": 0.42990654205607476, "grad_norm": 0.2099609375, "learning_rate": 5.747663551401869e-06, "loss": 1.3459, "step": 92 }, { "epoch": 0.43457943925233644, "grad_norm": 0.2060546875, "learning_rate": 5.700934579439253e-06, "loss": 1.3223, "step": 93 }, { "epoch": 0.4392523364485981, "grad_norm": 0.2001953125, "learning_rate": 5.654205607476636e-06, "loss": 1.3515, "step": 94 }, { "epoch": 0.4439252336448598, "grad_norm": 0.2021484375, "learning_rate": 5.607476635514019e-06, "loss": 1.3395, "step": 95 }, { "epoch": 0.4485981308411215, "grad_norm": 0.1982421875, "learning_rate": 5.560747663551402e-06, "loss": 1.3372, "step": 96 }, { "epoch": 0.4532710280373832, "grad_norm": 0.2119140625, "learning_rate": 5.514018691588785e-06, "loss": 1.3435, "step": 97 }, { "epoch": 0.45794392523364486, "grad_norm": 0.1953125, "learning_rate": 5.467289719626169e-06, "loss": 1.3129, "step": 98 }, { "epoch": 0.46261682242990654, "grad_norm": 0.2060546875, "learning_rate": 5.4205607476635515e-06, "loss": 1.3223, "step": 99 }, { "epoch": 0.4672897196261682, "grad_norm": 0.1962890625, "learning_rate": 5.373831775700935e-06, "loss": 1.3134, "step": 100 }, { "epoch": 0.4719626168224299, "grad_norm": 0.1943359375, "learning_rate": 5.3271028037383174e-06, "loss": 1.3067, "step": 101 }, { "epoch": 0.4766355140186916, "grad_norm": 0.2119140625, "learning_rate": 5.280373831775702e-06, "loss": 1.3027, "step": 102 }, { "epoch": 0.48130841121495327, "grad_norm": 0.2060546875, "learning_rate": 5.233644859813084e-06, "loss": 1.3162, "step": 103 }, { "epoch": 0.48598130841121495, "grad_norm": 0.2021484375, "learning_rate": 5.186915887850468e-06, "loss": 1.3141, "step": 104 }, { "epoch": 0.49065420560747663, "grad_norm": 0.201171875, "learning_rate": 5.14018691588785e-06, "loss": 1.3171, "step": 105 }, { "epoch": 0.4953271028037383, "grad_norm": 0.2021484375, "learning_rate": 5.0934579439252344e-06, "loss": 1.3113, "step": 106 }, { "epoch": 0.5, "grad_norm": 0.1953125, "learning_rate": 5.046728971962617e-06, "loss": 1.3141, "step": 107 }, { "epoch": 0.5046728971962616, "grad_norm": 0.1953125, "learning_rate": 5e-06, "loss": 1.2936, "step": 108 }, { "epoch": 0.5093457943925234, "grad_norm": 0.1982421875, "learning_rate": 4.953271028037384e-06, "loss": 1.3028, "step": 109 }, { "epoch": 0.514018691588785, "grad_norm": 0.19921875, "learning_rate": 4.906542056074766e-06, "loss": 1.322, "step": 110 }, { "epoch": 0.5186915887850467, "grad_norm": 0.205078125, "learning_rate": 4.85981308411215e-06, "loss": 1.2738, "step": 111 }, { "epoch": 0.5233644859813084, "grad_norm": 0.1943359375, "learning_rate": 4.813084112149533e-06, "loss": 1.2831, "step": 112 }, { "epoch": 0.5280373831775701, "grad_norm": 0.2021484375, "learning_rate": 4.766355140186917e-06, "loss": 1.2763, "step": 113 }, { "epoch": 0.5327102803738317, "grad_norm": 0.1884765625, "learning_rate": 4.719626168224299e-06, "loss": 1.2935, "step": 114 }, { "epoch": 0.5373831775700935, "grad_norm": 0.21875, "learning_rate": 4.6728971962616825e-06, "loss": 1.2767, "step": 115 }, { "epoch": 0.5420560747663551, "grad_norm": 0.19921875, "learning_rate": 4.626168224299066e-06, "loss": 1.2702, "step": 116 }, { "epoch": 0.5467289719626168, "grad_norm": 0.2138671875, "learning_rate": 4.579439252336449e-06, "loss": 1.3022, "step": 117 }, { "epoch": 0.5514018691588785, "grad_norm": 0.2236328125, "learning_rate": 4.532710280373832e-06, "loss": 1.2769, "step": 118 }, { "epoch": 0.5560747663551402, "grad_norm": 0.19921875, "learning_rate": 4.485981308411215e-06, "loss": 1.2875, "step": 119 }, { "epoch": 0.5607476635514018, "grad_norm": 0.1923828125, "learning_rate": 4.439252336448599e-06, "loss": 1.2648, "step": 120 }, { "epoch": 0.5654205607476636, "grad_norm": 0.208984375, "learning_rate": 4.392523364485981e-06, "loss": 1.2776, "step": 121 }, { "epoch": 0.5700934579439252, "grad_norm": 0.2041015625, "learning_rate": 4.345794392523365e-06, "loss": 1.2699, "step": 122 }, { "epoch": 0.5747663551401869, "grad_norm": 0.1904296875, "learning_rate": 4.299065420560748e-06, "loss": 1.282, "step": 123 }, { "epoch": 0.5794392523364486, "grad_norm": 0.1962890625, "learning_rate": 4.2523364485981315e-06, "loss": 1.275, "step": 124 }, { "epoch": 0.5841121495327103, "grad_norm": 0.19921875, "learning_rate": 4.205607476635514e-06, "loss": 1.2507, "step": 125 }, { "epoch": 0.5887850467289719, "grad_norm": 0.2216796875, "learning_rate": 4.1588785046728974e-06, "loss": 1.2808, "step": 126 }, { "epoch": 0.5934579439252337, "grad_norm": 0.2109375, "learning_rate": 4.112149532710281e-06, "loss": 1.2779, "step": 127 }, { "epoch": 0.5981308411214953, "grad_norm": 0.2021484375, "learning_rate": 4.065420560747663e-06, "loss": 1.2905, "step": 128 }, { "epoch": 0.602803738317757, "grad_norm": 0.197265625, "learning_rate": 4.018691588785047e-06, "loss": 1.2584, "step": 129 }, { "epoch": 0.6074766355140186, "grad_norm": 0.19140625, "learning_rate": 3.97196261682243e-06, "loss": 1.2554, "step": 130 }, { "epoch": 0.6121495327102804, "grad_norm": 0.205078125, "learning_rate": 3.925233644859814e-06, "loss": 1.2852, "step": 131 }, { "epoch": 0.616822429906542, "grad_norm": 0.1982421875, "learning_rate": 3.878504672897196e-06, "loss": 1.263, "step": 132 }, { "epoch": 0.6214953271028038, "grad_norm": 0.193359375, "learning_rate": 3.8317757009345796e-06, "loss": 1.2651, "step": 133 }, { "epoch": 0.6261682242990654, "grad_norm": 0.2001953125, "learning_rate": 3.785046728971963e-06, "loss": 1.2573, "step": 134 }, { "epoch": 0.6308411214953271, "grad_norm": 0.1865234375, "learning_rate": 3.738317757009346e-06, "loss": 1.2498, "step": 135 }, { "epoch": 0.6355140186915887, "grad_norm": 0.193359375, "learning_rate": 3.691588785046729e-06, "loss": 1.2546, "step": 136 }, { "epoch": 0.6401869158878505, "grad_norm": 0.1982421875, "learning_rate": 3.6448598130841123e-06, "loss": 1.2684, "step": 137 }, { "epoch": 0.6448598130841121, "grad_norm": 0.2119140625, "learning_rate": 3.5981308411214953e-06, "loss": 1.2763, "step": 138 }, { "epoch": 0.6495327102803738, "grad_norm": 0.1962890625, "learning_rate": 3.5514018691588787e-06, "loss": 1.2619, "step": 139 }, { "epoch": 0.6542056074766355, "grad_norm": 0.2041015625, "learning_rate": 3.5046728971962617e-06, "loss": 1.2547, "step": 140 }, { "epoch": 0.6588785046728972, "grad_norm": 0.2041015625, "learning_rate": 3.457943925233645e-06, "loss": 1.2526, "step": 141 }, { "epoch": 0.6635514018691588, "grad_norm": 0.2060546875, "learning_rate": 3.411214953271028e-06, "loss": 1.248, "step": 142 }, { "epoch": 0.6682242990654206, "grad_norm": 0.2177734375, "learning_rate": 3.3644859813084115e-06, "loss": 1.2441, "step": 143 }, { "epoch": 0.6728971962616822, "grad_norm": 0.1943359375, "learning_rate": 3.3177570093457945e-06, "loss": 1.2472, "step": 144 }, { "epoch": 0.677570093457944, "grad_norm": 0.19921875, "learning_rate": 3.2710280373831774e-06, "loss": 1.2524, "step": 145 }, { "epoch": 0.6822429906542056, "grad_norm": 0.2021484375, "learning_rate": 3.224299065420561e-06, "loss": 1.2423, "step": 146 }, { "epoch": 0.6869158878504673, "grad_norm": 0.2216796875, "learning_rate": 3.177570093457944e-06, "loss": 1.2397, "step": 147 }, { "epoch": 0.6915887850467289, "grad_norm": 0.205078125, "learning_rate": 3.1308411214953272e-06, "loss": 1.2681, "step": 148 }, { "epoch": 0.6962616822429907, "grad_norm": 0.2041015625, "learning_rate": 3.08411214953271e-06, "loss": 1.2502, "step": 149 }, { "epoch": 0.7009345794392523, "grad_norm": 0.1884765625, "learning_rate": 3.0373831775700936e-06, "loss": 1.2574, "step": 150 }, { "epoch": 0.705607476635514, "grad_norm": 0.1923828125, "learning_rate": 2.9906542056074766e-06, "loss": 1.2453, "step": 151 }, { "epoch": 0.7102803738317757, "grad_norm": 0.1953125, "learning_rate": 2.94392523364486e-06, "loss": 1.2515, "step": 152 }, { "epoch": 0.7149532710280374, "grad_norm": 0.19921875, "learning_rate": 2.897196261682243e-06, "loss": 1.2589, "step": 153 }, { "epoch": 0.719626168224299, "grad_norm": 0.2080078125, "learning_rate": 2.8504672897196264e-06, "loss": 1.2567, "step": 154 }, { "epoch": 0.7242990654205608, "grad_norm": 0.2265625, "learning_rate": 2.8037383177570094e-06, "loss": 1.2359, "step": 155 }, { "epoch": 0.7289719626168224, "grad_norm": 0.2158203125, "learning_rate": 2.7570093457943923e-06, "loss": 1.2416, "step": 156 }, { "epoch": 0.7336448598130841, "grad_norm": 0.193359375, "learning_rate": 2.7102803738317757e-06, "loss": 1.2358, "step": 157 }, { "epoch": 0.7383177570093458, "grad_norm": 0.1904296875, "learning_rate": 2.6635514018691587e-06, "loss": 1.2224, "step": 158 }, { "epoch": 0.7429906542056075, "grad_norm": 0.2099609375, "learning_rate": 2.616822429906542e-06, "loss": 1.2458, "step": 159 }, { "epoch": 0.7476635514018691, "grad_norm": 0.193359375, "learning_rate": 2.570093457943925e-06, "loss": 1.2339, "step": 160 }, { "epoch": 0.7523364485981309, "grad_norm": 0.1953125, "learning_rate": 2.5233644859813085e-06, "loss": 1.2271, "step": 161 }, { "epoch": 0.7570093457943925, "grad_norm": 0.2060546875, "learning_rate": 2.476635514018692e-06, "loss": 1.2401, "step": 162 }, { "epoch": 0.7616822429906542, "grad_norm": 0.2060546875, "learning_rate": 2.429906542056075e-06, "loss": 1.2336, "step": 163 }, { "epoch": 0.7663551401869159, "grad_norm": 0.2099609375, "learning_rate": 2.3831775700934583e-06, "loss": 1.2179, "step": 164 }, { "epoch": 0.7710280373831776, "grad_norm": 0.2109375, "learning_rate": 2.3364485981308413e-06, "loss": 1.2422, "step": 165 }, { "epoch": 0.7757009345794392, "grad_norm": 0.21875, "learning_rate": 2.2897196261682247e-06, "loss": 1.2491, "step": 166 }, { "epoch": 0.780373831775701, "grad_norm": 0.1904296875, "learning_rate": 2.2429906542056077e-06, "loss": 1.2234, "step": 167 }, { "epoch": 0.7850467289719626, "grad_norm": 0.1923828125, "learning_rate": 2.1962616822429906e-06, "loss": 1.225, "step": 168 }, { "epoch": 0.7897196261682243, "grad_norm": 0.19140625, "learning_rate": 2.149532710280374e-06, "loss": 1.2219, "step": 169 }, { "epoch": 0.794392523364486, "grad_norm": 0.2041015625, "learning_rate": 2.102803738317757e-06, "loss": 1.2187, "step": 170 }, { "epoch": 0.7990654205607477, "grad_norm": 0.1904296875, "learning_rate": 2.0560747663551404e-06, "loss": 1.2221, "step": 171 }, { "epoch": 0.8037383177570093, "grad_norm": 0.2060546875, "learning_rate": 2.0093457943925234e-06, "loss": 1.2361, "step": 172 }, { "epoch": 0.8084112149532711, "grad_norm": 0.185546875, "learning_rate": 1.962616822429907e-06, "loss": 1.2293, "step": 173 }, { "epoch": 0.8130841121495327, "grad_norm": 0.2138671875, "learning_rate": 1.9158878504672898e-06, "loss": 1.2535, "step": 174 }, { "epoch": 0.8177570093457944, "grad_norm": 0.19921875, "learning_rate": 1.869158878504673e-06, "loss": 1.2332, "step": 175 }, { "epoch": 0.822429906542056, "grad_norm": 0.19921875, "learning_rate": 1.8224299065420562e-06, "loss": 1.2292, "step": 176 }, { "epoch": 0.8271028037383178, "grad_norm": 0.1962890625, "learning_rate": 1.7757009345794394e-06, "loss": 1.2325, "step": 177 }, { "epoch": 0.8317757009345794, "grad_norm": 0.2216796875, "learning_rate": 1.7289719626168225e-06, "loss": 1.2314, "step": 178 }, { "epoch": 0.8364485981308412, "grad_norm": 0.197265625, "learning_rate": 1.6822429906542057e-06, "loss": 1.2482, "step": 179 }, { "epoch": 0.8411214953271028, "grad_norm": 0.2109375, "learning_rate": 1.6355140186915887e-06, "loss": 1.2413, "step": 180 }, { "epoch": 0.8457943925233645, "grad_norm": 0.1923828125, "learning_rate": 1.588785046728972e-06, "loss": 1.2187, "step": 181 }, { "epoch": 0.8504672897196262, "grad_norm": 0.240234375, "learning_rate": 1.542056074766355e-06, "loss": 1.2416, "step": 182 }, { "epoch": 0.8551401869158879, "grad_norm": 0.1962890625, "learning_rate": 1.4953271028037383e-06, "loss": 1.2207, "step": 183 }, { "epoch": 0.8598130841121495, "grad_norm": 0.1962890625, "learning_rate": 1.4485981308411215e-06, "loss": 1.2447, "step": 184 }, { "epoch": 0.8644859813084113, "grad_norm": 0.21484375, "learning_rate": 1.4018691588785047e-06, "loss": 1.2384, "step": 185 }, { "epoch": 0.8691588785046729, "grad_norm": 0.197265625, "learning_rate": 1.3551401869158879e-06, "loss": 1.2225, "step": 186 }, { "epoch": 0.8738317757009346, "grad_norm": 0.1943359375, "learning_rate": 1.308411214953271e-06, "loss": 1.2498, "step": 187 }, { "epoch": 0.8785046728971962, "grad_norm": 0.193359375, "learning_rate": 1.2616822429906543e-06, "loss": 1.2368, "step": 188 }, { "epoch": 0.883177570093458, "grad_norm": 0.189453125, "learning_rate": 1.2149532710280374e-06, "loss": 1.2277, "step": 189 }, { "epoch": 0.8878504672897196, "grad_norm": 0.1875, "learning_rate": 1.1682242990654206e-06, "loss": 1.2207, "step": 190 }, { "epoch": 0.8925233644859814, "grad_norm": 0.2333984375, "learning_rate": 1.1214953271028038e-06, "loss": 1.229, "step": 191 }, { "epoch": 0.897196261682243, "grad_norm": 0.2041015625, "learning_rate": 1.074766355140187e-06, "loss": 1.2369, "step": 192 }, { "epoch": 0.9018691588785047, "grad_norm": 0.208984375, "learning_rate": 1.0280373831775702e-06, "loss": 1.2409, "step": 193 }, { "epoch": 0.9065420560747663, "grad_norm": 0.1982421875, "learning_rate": 9.813084112149534e-07, "loss": 1.2302, "step": 194 }, { "epoch": 0.9112149532710281, "grad_norm": 0.203125, "learning_rate": 9.345794392523365e-07, "loss": 1.2332, "step": 195 }, { "epoch": 0.9158878504672897, "grad_norm": 0.22265625, "learning_rate": 8.878504672897197e-07, "loss": 1.2479, "step": 196 }, { "epoch": 0.9205607476635514, "grad_norm": 0.19921875, "learning_rate": 8.411214953271029e-07, "loss": 1.2266, "step": 197 }, { "epoch": 0.9252336448598131, "grad_norm": 0.1943359375, "learning_rate": 7.94392523364486e-07, "loss": 1.2167, "step": 198 }, { "epoch": 0.9299065420560748, "grad_norm": 0.220703125, "learning_rate": 7.476635514018691e-07, "loss": 1.216, "step": 199 }, { "epoch": 0.9345794392523364, "grad_norm": 0.2109375, "learning_rate": 7.009345794392523e-07, "loss": 1.2391, "step": 200 }, { "epoch": 0.9392523364485982, "grad_norm": 0.20703125, "learning_rate": 6.542056074766355e-07, "loss": 1.2361, "step": 201 }, { "epoch": 0.9439252336448598, "grad_norm": 0.197265625, "learning_rate": 6.074766355140187e-07, "loss": 1.2316, "step": 202 }, { "epoch": 0.9485981308411215, "grad_norm": 0.1982421875, "learning_rate": 5.607476635514019e-07, "loss": 1.2201, "step": 203 }, { "epoch": 0.9532710280373832, "grad_norm": 0.203125, "learning_rate": 5.140186915887851e-07, "loss": 1.221, "step": 204 }, { "epoch": 0.9579439252336449, "grad_norm": 0.2060546875, "learning_rate": 4.6728971962616824e-07, "loss": 1.2392, "step": 205 }, { "epoch": 0.9626168224299065, "grad_norm": 0.18359375, "learning_rate": 4.2056074766355143e-07, "loss": 1.2295, "step": 206 }, { "epoch": 0.9672897196261683, "grad_norm": 0.2021484375, "learning_rate": 3.7383177570093457e-07, "loss": 1.2338, "step": 207 }, { "epoch": 0.9719626168224299, "grad_norm": 0.1953125, "learning_rate": 3.2710280373831776e-07, "loss": 1.224, "step": 208 }, { "epoch": 0.9766355140186916, "grad_norm": 0.2109375, "learning_rate": 2.8037383177570096e-07, "loss": 1.2317, "step": 209 }, { "epoch": 0.9813084112149533, "grad_norm": 0.193359375, "learning_rate": 2.3364485981308412e-07, "loss": 1.2235, "step": 210 }, { "epoch": 0.985981308411215, "grad_norm": 0.20703125, "learning_rate": 1.8691588785046729e-07, "loss": 1.2238, "step": 211 }, { "epoch": 0.9906542056074766, "grad_norm": 0.2080078125, "learning_rate": 1.4018691588785048e-07, "loss": 1.2146, "step": 212 }, { "epoch": 0.9953271028037384, "grad_norm": 0.203125, "learning_rate": 9.345794392523364e-08, "loss": 1.231, "step": 213 }, { "epoch": 1.0, "grad_norm": 0.2099609375, "learning_rate": 4.672897196261682e-08, "loss": 1.2306, "step": 214 }, { "epoch": 1.0, "eval_loss": 1.2332667112350464, "eval_runtime": 7.4216, "eval_samples_per_second": 3.099, "eval_steps_per_second": 0.404, "step": 214 } ], "logging_steps": 1.0, "max_steps": 214, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.321152639100518e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }