| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 214, | |
| "global_step": 214, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004672897196261682, | |
| "grad_norm": 1.5234375, | |
| "learning_rate": 1e-05, | |
| "loss": 2.9069, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.009345794392523364, | |
| "grad_norm": 1.546875, | |
| "learning_rate": 9.953271028037384e-06, | |
| "loss": 2.9462, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.014018691588785047, | |
| "grad_norm": 1.4375, | |
| "learning_rate": 9.906542056074768e-06, | |
| "loss": 2.8437, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.018691588785046728, | |
| "grad_norm": 1.4140625, | |
| "learning_rate": 9.859813084112151e-06, | |
| "loss": 2.8093, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.02336448598130841, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 9.813084112149533e-06, | |
| "loss": 2.7097, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.028037383177570093, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 9.766355140186918e-06, | |
| "loss": 2.7076, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.03271028037383177, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 9.7196261682243e-06, | |
| "loss": 2.6449, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.037383177570093455, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 9.672897196261683e-06, | |
| "loss": 2.6143, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.04205607476635514, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 9.626168224299066e-06, | |
| "loss": 2.5539, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.04672897196261682, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 9.57943925233645e-06, | |
| "loss": 2.4964, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0514018691588785, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 9.532710280373833e-06, | |
| "loss": 2.4061, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.056074766355140186, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 9.485981308411217e-06, | |
| "loss": 2.3791, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.06074766355140187, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 9.439252336448598e-06, | |
| "loss": 2.3691, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.06542056074766354, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 9.392523364485983e-06, | |
| "loss": 2.3177, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.07009345794392523, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 9.345794392523365e-06, | |
| "loss": 2.2444, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.07476635514018691, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 9.299065420560748e-06, | |
| "loss": 2.261, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0794392523364486, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 9.252336448598132e-06, | |
| "loss": 2.2963, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.08411214953271028, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 9.205607476635515e-06, | |
| "loss": 2.2225, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.08878504672897196, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 9.158878504672899e-06, | |
| "loss": 2.1646, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.09345794392523364, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 9.112149532710282e-06, | |
| "loss": 2.1857, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.09813084112149532, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 9.065420560747664e-06, | |
| "loss": 2.1392, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.102803738317757, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 9.018691588785047e-06, | |
| "loss": 2.1135, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.10747663551401869, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 8.97196261682243e-06, | |
| "loss": 2.1053, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.11214953271028037, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 8.925233644859814e-06, | |
| "loss": 2.0868, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.11682242990654206, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 8.878504672897197e-06, | |
| "loss": 2.0354, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.12149532710280374, | |
| "grad_norm": 0.5, | |
| "learning_rate": 8.83177570093458e-06, | |
| "loss": 2.0114, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.1261682242990654, | |
| "grad_norm": 0.498046875, | |
| "learning_rate": 8.785046728971963e-06, | |
| "loss": 2.0158, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.1308411214953271, | |
| "grad_norm": 0.498046875, | |
| "learning_rate": 8.738317757009348e-06, | |
| "loss": 2.0091, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.13551401869158877, | |
| "grad_norm": 0.47265625, | |
| "learning_rate": 8.69158878504673e-06, | |
| "loss": 1.9591, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.14018691588785046, | |
| "grad_norm": 0.45703125, | |
| "learning_rate": 8.644859813084113e-06, | |
| "loss": 1.9576, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.14485981308411214, | |
| "grad_norm": 0.4609375, | |
| "learning_rate": 8.598130841121496e-06, | |
| "loss": 1.9389, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.14953271028037382, | |
| "grad_norm": 0.44921875, | |
| "learning_rate": 8.55140186915888e-06, | |
| "loss": 1.8883, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.1542056074766355, | |
| "grad_norm": 0.44921875, | |
| "learning_rate": 8.504672897196263e-06, | |
| "loss": 1.8913, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.1588785046728972, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 8.457943925233646e-06, | |
| "loss": 1.8813, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.16355140186915887, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 8.411214953271028e-06, | |
| "loss": 1.8649, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.16822429906542055, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 8.364485981308411e-06, | |
| "loss": 1.8555, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.17289719626168223, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 8.317757009345795e-06, | |
| "loss": 1.8295, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.17757009345794392, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8.271028037383178e-06, | |
| "loss": 1.8116, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.1822429906542056, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8.224299065420562e-06, | |
| "loss": 1.7998, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.18691588785046728, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8.177570093457945e-06, | |
| "loss": 1.7818, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.19158878504672897, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8.130841121495327e-06, | |
| "loss": 1.7595, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.19626168224299065, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8.084112149532712e-06, | |
| "loss": 1.7451, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.20093457943925233, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 8.037383177570094e-06, | |
| "loss": 1.7456, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.205607476635514, | |
| "grad_norm": 0.37109375, | |
| "learning_rate": 7.990654205607477e-06, | |
| "loss": 1.7068, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.2102803738317757, | |
| "grad_norm": 0.361328125, | |
| "learning_rate": 7.94392523364486e-06, | |
| "loss": 1.7142, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.21495327102803738, | |
| "grad_norm": 0.361328125, | |
| "learning_rate": 7.897196261682244e-06, | |
| "loss": 1.6928, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.21962616822429906, | |
| "grad_norm": 0.353515625, | |
| "learning_rate": 7.850467289719627e-06, | |
| "loss": 1.665, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.22429906542056074, | |
| "grad_norm": 0.345703125, | |
| "learning_rate": 7.80373831775701e-06, | |
| "loss": 1.6624, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.22897196261682243, | |
| "grad_norm": 0.3359375, | |
| "learning_rate": 7.757009345794392e-06, | |
| "loss": 1.6481, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.2336448598130841, | |
| "grad_norm": 0.34375, | |
| "learning_rate": 7.710280373831777e-06, | |
| "loss": 1.6482, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.2383177570093458, | |
| "grad_norm": 0.333984375, | |
| "learning_rate": 7.663551401869159e-06, | |
| "loss": 1.6233, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.24299065420560748, | |
| "grad_norm": 0.330078125, | |
| "learning_rate": 7.616822429906543e-06, | |
| "loss": 1.6163, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.24766355140186916, | |
| "grad_norm": 0.32421875, | |
| "learning_rate": 7.570093457943926e-06, | |
| "loss": 1.5861, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.2523364485981308, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 7.523364485981309e-06, | |
| "loss": 1.59, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.2570093457943925, | |
| "grad_norm": 0.31640625, | |
| "learning_rate": 7.476635514018692e-06, | |
| "loss": 1.582, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.2616822429906542, | |
| "grad_norm": 0.318359375, | |
| "learning_rate": 7.429906542056075e-06, | |
| "loss": 1.5681, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.26635514018691586, | |
| "grad_norm": 0.3125, | |
| "learning_rate": 7.383177570093458e-06, | |
| "loss": 1.5647, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.27102803738317754, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 7.336448598130842e-06, | |
| "loss": 1.539, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.2757009345794392, | |
| "grad_norm": 0.302734375, | |
| "learning_rate": 7.289719626168225e-06, | |
| "loss": 1.5371, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.2803738317757009, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 7.242990654205608e-06, | |
| "loss": 1.5024, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2850467289719626, | |
| "grad_norm": 0.27734375, | |
| "learning_rate": 7.196261682242991e-06, | |
| "loss": 1.516, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.2897196261682243, | |
| "grad_norm": 0.271484375, | |
| "learning_rate": 7.149532710280375e-06, | |
| "loss": 1.5012, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.29439252336448596, | |
| "grad_norm": 0.26171875, | |
| "learning_rate": 7.1028037383177574e-06, | |
| "loss": 1.4923, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.29906542056074764, | |
| "grad_norm": 0.265625, | |
| "learning_rate": 7.056074766355141e-06, | |
| "loss": 1.4793, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.3037383177570093, | |
| "grad_norm": 0.25, | |
| "learning_rate": 7.009345794392523e-06, | |
| "loss": 1.486, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.308411214953271, | |
| "grad_norm": 0.255859375, | |
| "learning_rate": 6.962616822429908e-06, | |
| "loss": 1.4806, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.3130841121495327, | |
| "grad_norm": 0.251953125, | |
| "learning_rate": 6.91588785046729e-06, | |
| "loss": 1.4681, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.3177570093457944, | |
| "grad_norm": 0.248046875, | |
| "learning_rate": 6.869158878504674e-06, | |
| "loss": 1.4514, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.32242990654205606, | |
| "grad_norm": 0.244140625, | |
| "learning_rate": 6.822429906542056e-06, | |
| "loss": 1.4378, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.32710280373831774, | |
| "grad_norm": 0.2451171875, | |
| "learning_rate": 6.77570093457944e-06, | |
| "loss": 1.4582, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.3317757009345794, | |
| "grad_norm": 0.244140625, | |
| "learning_rate": 6.728971962616823e-06, | |
| "loss": 1.4322, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.3364485981308411, | |
| "grad_norm": 0.2333984375, | |
| "learning_rate": 6.682242990654206e-06, | |
| "loss": 1.4347, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.3411214953271028, | |
| "grad_norm": 0.2373046875, | |
| "learning_rate": 6.635514018691589e-06, | |
| "loss": 1.4228, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.34579439252336447, | |
| "grad_norm": 0.232421875, | |
| "learning_rate": 6.588785046728972e-06, | |
| "loss": 1.4103, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.35046728971962615, | |
| "grad_norm": 0.224609375, | |
| "learning_rate": 6.542056074766355e-06, | |
| "loss": 1.4139, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.35514018691588783, | |
| "grad_norm": 0.234375, | |
| "learning_rate": 6.495327102803739e-06, | |
| "loss": 1.4196, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.3598130841121495, | |
| "grad_norm": 0.2265625, | |
| "learning_rate": 6.448598130841122e-06, | |
| "loss": 1.3918, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.3644859813084112, | |
| "grad_norm": 0.224609375, | |
| "learning_rate": 6.401869158878505e-06, | |
| "loss": 1.4162, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.3691588785046729, | |
| "grad_norm": 0.2119140625, | |
| "learning_rate": 6.355140186915888e-06, | |
| "loss": 1.3968, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.37383177570093457, | |
| "grad_norm": 0.22265625, | |
| "learning_rate": 6.308411214953272e-06, | |
| "loss": 1.3754, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.37850467289719625, | |
| "grad_norm": 0.2109375, | |
| "learning_rate": 6.2616822429906544e-06, | |
| "loss": 1.4086, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.38317757009345793, | |
| "grad_norm": 0.2138671875, | |
| "learning_rate": 6.214953271028038e-06, | |
| "loss": 1.3931, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.3878504672897196, | |
| "grad_norm": 0.216796875, | |
| "learning_rate": 6.16822429906542e-06, | |
| "loss": 1.3829, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.3925233644859813, | |
| "grad_norm": 0.20703125, | |
| "learning_rate": 6.121495327102805e-06, | |
| "loss": 1.3711, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.397196261682243, | |
| "grad_norm": 0.2060546875, | |
| "learning_rate": 6.074766355140187e-06, | |
| "loss": 1.3579, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.40186915887850466, | |
| "grad_norm": 0.20703125, | |
| "learning_rate": 6.028037383177571e-06, | |
| "loss": 1.3727, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.40654205607476634, | |
| "grad_norm": 0.2060546875, | |
| "learning_rate": 5.981308411214953e-06, | |
| "loss": 1.3692, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.411214953271028, | |
| "grad_norm": 0.2021484375, | |
| "learning_rate": 5.9345794392523374e-06, | |
| "loss": 1.3706, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.4158878504672897, | |
| "grad_norm": 0.1982421875, | |
| "learning_rate": 5.88785046728972e-06, | |
| "loss": 1.3446, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.4205607476635514, | |
| "grad_norm": 0.216796875, | |
| "learning_rate": 5.841121495327103e-06, | |
| "loss": 1.3499, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.4252336448598131, | |
| "grad_norm": 0.2158203125, | |
| "learning_rate": 5.794392523364486e-06, | |
| "loss": 1.3261, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.42990654205607476, | |
| "grad_norm": 0.2099609375, | |
| "learning_rate": 5.747663551401869e-06, | |
| "loss": 1.3459, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.43457943925233644, | |
| "grad_norm": 0.2060546875, | |
| "learning_rate": 5.700934579439253e-06, | |
| "loss": 1.3223, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.4392523364485981, | |
| "grad_norm": 0.2001953125, | |
| "learning_rate": 5.654205607476636e-06, | |
| "loss": 1.3515, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.4439252336448598, | |
| "grad_norm": 0.2021484375, | |
| "learning_rate": 5.607476635514019e-06, | |
| "loss": 1.3395, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.4485981308411215, | |
| "grad_norm": 0.1982421875, | |
| "learning_rate": 5.560747663551402e-06, | |
| "loss": 1.3372, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.4532710280373832, | |
| "grad_norm": 0.2119140625, | |
| "learning_rate": 5.514018691588785e-06, | |
| "loss": 1.3435, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.45794392523364486, | |
| "grad_norm": 0.1953125, | |
| "learning_rate": 5.467289719626169e-06, | |
| "loss": 1.3129, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.46261682242990654, | |
| "grad_norm": 0.2060546875, | |
| "learning_rate": 5.4205607476635515e-06, | |
| "loss": 1.3223, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.4672897196261682, | |
| "grad_norm": 0.1962890625, | |
| "learning_rate": 5.373831775700935e-06, | |
| "loss": 1.3134, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4719626168224299, | |
| "grad_norm": 0.1943359375, | |
| "learning_rate": 5.3271028037383174e-06, | |
| "loss": 1.3067, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.4766355140186916, | |
| "grad_norm": 0.2119140625, | |
| "learning_rate": 5.280373831775702e-06, | |
| "loss": 1.3027, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.48130841121495327, | |
| "grad_norm": 0.2060546875, | |
| "learning_rate": 5.233644859813084e-06, | |
| "loss": 1.3162, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.48598130841121495, | |
| "grad_norm": 0.2021484375, | |
| "learning_rate": 5.186915887850468e-06, | |
| "loss": 1.3141, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.49065420560747663, | |
| "grad_norm": 0.201171875, | |
| "learning_rate": 5.14018691588785e-06, | |
| "loss": 1.3171, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.4953271028037383, | |
| "grad_norm": 0.2021484375, | |
| "learning_rate": 5.0934579439252344e-06, | |
| "loss": 1.3113, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.1953125, | |
| "learning_rate": 5.046728971962617e-06, | |
| "loss": 1.3141, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.5046728971962616, | |
| "grad_norm": 0.1953125, | |
| "learning_rate": 5e-06, | |
| "loss": 1.2936, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.5093457943925234, | |
| "grad_norm": 0.1982421875, | |
| "learning_rate": 4.953271028037384e-06, | |
| "loss": 1.3028, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.514018691588785, | |
| "grad_norm": 0.19921875, | |
| "learning_rate": 4.906542056074766e-06, | |
| "loss": 1.322, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.5186915887850467, | |
| "grad_norm": 0.205078125, | |
| "learning_rate": 4.85981308411215e-06, | |
| "loss": 1.2738, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.5233644859813084, | |
| "grad_norm": 0.1943359375, | |
| "learning_rate": 4.813084112149533e-06, | |
| "loss": 1.2831, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.5280373831775701, | |
| "grad_norm": 0.2021484375, | |
| "learning_rate": 4.766355140186917e-06, | |
| "loss": 1.2763, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.5327102803738317, | |
| "grad_norm": 0.1884765625, | |
| "learning_rate": 4.719626168224299e-06, | |
| "loss": 1.2935, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.5373831775700935, | |
| "grad_norm": 0.21875, | |
| "learning_rate": 4.6728971962616825e-06, | |
| "loss": 1.2767, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.5420560747663551, | |
| "grad_norm": 0.19921875, | |
| "learning_rate": 4.626168224299066e-06, | |
| "loss": 1.2702, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.5467289719626168, | |
| "grad_norm": 0.2138671875, | |
| "learning_rate": 4.579439252336449e-06, | |
| "loss": 1.3022, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.5514018691588785, | |
| "grad_norm": 0.2236328125, | |
| "learning_rate": 4.532710280373832e-06, | |
| "loss": 1.2769, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.5560747663551402, | |
| "grad_norm": 0.19921875, | |
| "learning_rate": 4.485981308411215e-06, | |
| "loss": 1.2875, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.5607476635514018, | |
| "grad_norm": 0.1923828125, | |
| "learning_rate": 4.439252336448599e-06, | |
| "loss": 1.2648, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.5654205607476636, | |
| "grad_norm": 0.208984375, | |
| "learning_rate": 4.392523364485981e-06, | |
| "loss": 1.2776, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.5700934579439252, | |
| "grad_norm": 0.2041015625, | |
| "learning_rate": 4.345794392523365e-06, | |
| "loss": 1.2699, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.5747663551401869, | |
| "grad_norm": 0.1904296875, | |
| "learning_rate": 4.299065420560748e-06, | |
| "loss": 1.282, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.5794392523364486, | |
| "grad_norm": 0.1962890625, | |
| "learning_rate": 4.2523364485981315e-06, | |
| "loss": 1.275, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.5841121495327103, | |
| "grad_norm": 0.19921875, | |
| "learning_rate": 4.205607476635514e-06, | |
| "loss": 1.2507, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.5887850467289719, | |
| "grad_norm": 0.2216796875, | |
| "learning_rate": 4.1588785046728974e-06, | |
| "loss": 1.2808, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.5934579439252337, | |
| "grad_norm": 0.2109375, | |
| "learning_rate": 4.112149532710281e-06, | |
| "loss": 1.2779, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.5981308411214953, | |
| "grad_norm": 0.2021484375, | |
| "learning_rate": 4.065420560747663e-06, | |
| "loss": 1.2905, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.602803738317757, | |
| "grad_norm": 0.197265625, | |
| "learning_rate": 4.018691588785047e-06, | |
| "loss": 1.2584, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.6074766355140186, | |
| "grad_norm": 0.19140625, | |
| "learning_rate": 3.97196261682243e-06, | |
| "loss": 1.2554, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.6121495327102804, | |
| "grad_norm": 0.205078125, | |
| "learning_rate": 3.925233644859814e-06, | |
| "loss": 1.2852, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.616822429906542, | |
| "grad_norm": 0.1982421875, | |
| "learning_rate": 3.878504672897196e-06, | |
| "loss": 1.263, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.6214953271028038, | |
| "grad_norm": 0.193359375, | |
| "learning_rate": 3.8317757009345796e-06, | |
| "loss": 1.2651, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.6261682242990654, | |
| "grad_norm": 0.2001953125, | |
| "learning_rate": 3.785046728971963e-06, | |
| "loss": 1.2573, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.6308411214953271, | |
| "grad_norm": 0.1865234375, | |
| "learning_rate": 3.738317757009346e-06, | |
| "loss": 1.2498, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.6355140186915887, | |
| "grad_norm": 0.193359375, | |
| "learning_rate": 3.691588785046729e-06, | |
| "loss": 1.2546, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.6401869158878505, | |
| "grad_norm": 0.1982421875, | |
| "learning_rate": 3.6448598130841123e-06, | |
| "loss": 1.2684, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.6448598130841121, | |
| "grad_norm": 0.2119140625, | |
| "learning_rate": 3.5981308411214953e-06, | |
| "loss": 1.2763, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.6495327102803738, | |
| "grad_norm": 0.1962890625, | |
| "learning_rate": 3.5514018691588787e-06, | |
| "loss": 1.2619, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.6542056074766355, | |
| "grad_norm": 0.2041015625, | |
| "learning_rate": 3.5046728971962617e-06, | |
| "loss": 1.2547, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.6588785046728972, | |
| "grad_norm": 0.2041015625, | |
| "learning_rate": 3.457943925233645e-06, | |
| "loss": 1.2526, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.6635514018691588, | |
| "grad_norm": 0.2060546875, | |
| "learning_rate": 3.411214953271028e-06, | |
| "loss": 1.248, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.6682242990654206, | |
| "grad_norm": 0.2177734375, | |
| "learning_rate": 3.3644859813084115e-06, | |
| "loss": 1.2441, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.6728971962616822, | |
| "grad_norm": 0.1943359375, | |
| "learning_rate": 3.3177570093457945e-06, | |
| "loss": 1.2472, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.677570093457944, | |
| "grad_norm": 0.19921875, | |
| "learning_rate": 3.2710280373831774e-06, | |
| "loss": 1.2524, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.6822429906542056, | |
| "grad_norm": 0.2021484375, | |
| "learning_rate": 3.224299065420561e-06, | |
| "loss": 1.2423, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.6869158878504673, | |
| "grad_norm": 0.2216796875, | |
| "learning_rate": 3.177570093457944e-06, | |
| "loss": 1.2397, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.6915887850467289, | |
| "grad_norm": 0.205078125, | |
| "learning_rate": 3.1308411214953272e-06, | |
| "loss": 1.2681, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.6962616822429907, | |
| "grad_norm": 0.2041015625, | |
| "learning_rate": 3.08411214953271e-06, | |
| "loss": 1.2502, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.7009345794392523, | |
| "grad_norm": 0.1884765625, | |
| "learning_rate": 3.0373831775700936e-06, | |
| "loss": 1.2574, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.705607476635514, | |
| "grad_norm": 0.1923828125, | |
| "learning_rate": 2.9906542056074766e-06, | |
| "loss": 1.2453, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.7102803738317757, | |
| "grad_norm": 0.1953125, | |
| "learning_rate": 2.94392523364486e-06, | |
| "loss": 1.2515, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.7149532710280374, | |
| "grad_norm": 0.19921875, | |
| "learning_rate": 2.897196261682243e-06, | |
| "loss": 1.2589, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.719626168224299, | |
| "grad_norm": 0.2080078125, | |
| "learning_rate": 2.8504672897196264e-06, | |
| "loss": 1.2567, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.7242990654205608, | |
| "grad_norm": 0.2265625, | |
| "learning_rate": 2.8037383177570094e-06, | |
| "loss": 1.2359, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.7289719626168224, | |
| "grad_norm": 0.2158203125, | |
| "learning_rate": 2.7570093457943923e-06, | |
| "loss": 1.2416, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.7336448598130841, | |
| "grad_norm": 0.193359375, | |
| "learning_rate": 2.7102803738317757e-06, | |
| "loss": 1.2358, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.7383177570093458, | |
| "grad_norm": 0.1904296875, | |
| "learning_rate": 2.6635514018691587e-06, | |
| "loss": 1.2224, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.7429906542056075, | |
| "grad_norm": 0.2099609375, | |
| "learning_rate": 2.616822429906542e-06, | |
| "loss": 1.2458, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.7476635514018691, | |
| "grad_norm": 0.193359375, | |
| "learning_rate": 2.570093457943925e-06, | |
| "loss": 1.2339, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.7523364485981309, | |
| "grad_norm": 0.1953125, | |
| "learning_rate": 2.5233644859813085e-06, | |
| "loss": 1.2271, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.7570093457943925, | |
| "grad_norm": 0.2060546875, | |
| "learning_rate": 2.476635514018692e-06, | |
| "loss": 1.2401, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.7616822429906542, | |
| "grad_norm": 0.2060546875, | |
| "learning_rate": 2.429906542056075e-06, | |
| "loss": 1.2336, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.7663551401869159, | |
| "grad_norm": 0.2099609375, | |
| "learning_rate": 2.3831775700934583e-06, | |
| "loss": 1.2179, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.7710280373831776, | |
| "grad_norm": 0.2109375, | |
| "learning_rate": 2.3364485981308413e-06, | |
| "loss": 1.2422, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.7757009345794392, | |
| "grad_norm": 0.21875, | |
| "learning_rate": 2.2897196261682247e-06, | |
| "loss": 1.2491, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.780373831775701, | |
| "grad_norm": 0.1904296875, | |
| "learning_rate": 2.2429906542056077e-06, | |
| "loss": 1.2234, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.7850467289719626, | |
| "grad_norm": 0.1923828125, | |
| "learning_rate": 2.1962616822429906e-06, | |
| "loss": 1.225, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.7897196261682243, | |
| "grad_norm": 0.19140625, | |
| "learning_rate": 2.149532710280374e-06, | |
| "loss": 1.2219, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.794392523364486, | |
| "grad_norm": 0.2041015625, | |
| "learning_rate": 2.102803738317757e-06, | |
| "loss": 1.2187, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.7990654205607477, | |
| "grad_norm": 0.1904296875, | |
| "learning_rate": 2.0560747663551404e-06, | |
| "loss": 1.2221, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.8037383177570093, | |
| "grad_norm": 0.2060546875, | |
| "learning_rate": 2.0093457943925234e-06, | |
| "loss": 1.2361, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.8084112149532711, | |
| "grad_norm": 0.185546875, | |
| "learning_rate": 1.962616822429907e-06, | |
| "loss": 1.2293, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.8130841121495327, | |
| "grad_norm": 0.2138671875, | |
| "learning_rate": 1.9158878504672898e-06, | |
| "loss": 1.2535, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.8177570093457944, | |
| "grad_norm": 0.19921875, | |
| "learning_rate": 1.869158878504673e-06, | |
| "loss": 1.2332, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.822429906542056, | |
| "grad_norm": 0.19921875, | |
| "learning_rate": 1.8224299065420562e-06, | |
| "loss": 1.2292, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.8271028037383178, | |
| "grad_norm": 0.1962890625, | |
| "learning_rate": 1.7757009345794394e-06, | |
| "loss": 1.2325, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.8317757009345794, | |
| "grad_norm": 0.2216796875, | |
| "learning_rate": 1.7289719626168225e-06, | |
| "loss": 1.2314, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.8364485981308412, | |
| "grad_norm": 0.197265625, | |
| "learning_rate": 1.6822429906542057e-06, | |
| "loss": 1.2482, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.8411214953271028, | |
| "grad_norm": 0.2109375, | |
| "learning_rate": 1.6355140186915887e-06, | |
| "loss": 1.2413, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.8457943925233645, | |
| "grad_norm": 0.1923828125, | |
| "learning_rate": 1.588785046728972e-06, | |
| "loss": 1.2187, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.8504672897196262, | |
| "grad_norm": 0.240234375, | |
| "learning_rate": 1.542056074766355e-06, | |
| "loss": 1.2416, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.8551401869158879, | |
| "grad_norm": 0.1962890625, | |
| "learning_rate": 1.4953271028037383e-06, | |
| "loss": 1.2207, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.8598130841121495, | |
| "grad_norm": 0.1962890625, | |
| "learning_rate": 1.4485981308411215e-06, | |
| "loss": 1.2447, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.8644859813084113, | |
| "grad_norm": 0.21484375, | |
| "learning_rate": 1.4018691588785047e-06, | |
| "loss": 1.2384, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.8691588785046729, | |
| "grad_norm": 0.197265625, | |
| "learning_rate": 1.3551401869158879e-06, | |
| "loss": 1.2225, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.8738317757009346, | |
| "grad_norm": 0.1943359375, | |
| "learning_rate": 1.308411214953271e-06, | |
| "loss": 1.2498, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.8785046728971962, | |
| "grad_norm": 0.193359375, | |
| "learning_rate": 1.2616822429906543e-06, | |
| "loss": 1.2368, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.883177570093458, | |
| "grad_norm": 0.189453125, | |
| "learning_rate": 1.2149532710280374e-06, | |
| "loss": 1.2277, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.8878504672897196, | |
| "grad_norm": 0.1875, | |
| "learning_rate": 1.1682242990654206e-06, | |
| "loss": 1.2207, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.8925233644859814, | |
| "grad_norm": 0.2333984375, | |
| "learning_rate": 1.1214953271028038e-06, | |
| "loss": 1.229, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.897196261682243, | |
| "grad_norm": 0.2041015625, | |
| "learning_rate": 1.074766355140187e-06, | |
| "loss": 1.2369, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.9018691588785047, | |
| "grad_norm": 0.208984375, | |
| "learning_rate": 1.0280373831775702e-06, | |
| "loss": 1.2409, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.9065420560747663, | |
| "grad_norm": 0.1982421875, | |
| "learning_rate": 9.813084112149534e-07, | |
| "loss": 1.2302, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.9112149532710281, | |
| "grad_norm": 0.203125, | |
| "learning_rate": 9.345794392523365e-07, | |
| "loss": 1.2332, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.9158878504672897, | |
| "grad_norm": 0.22265625, | |
| "learning_rate": 8.878504672897197e-07, | |
| "loss": 1.2479, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.9205607476635514, | |
| "grad_norm": 0.19921875, | |
| "learning_rate": 8.411214953271029e-07, | |
| "loss": 1.2266, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.9252336448598131, | |
| "grad_norm": 0.1943359375, | |
| "learning_rate": 7.94392523364486e-07, | |
| "loss": 1.2167, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.9299065420560748, | |
| "grad_norm": 0.220703125, | |
| "learning_rate": 7.476635514018691e-07, | |
| "loss": 1.216, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.9345794392523364, | |
| "grad_norm": 0.2109375, | |
| "learning_rate": 7.009345794392523e-07, | |
| "loss": 1.2391, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.9392523364485982, | |
| "grad_norm": 0.20703125, | |
| "learning_rate": 6.542056074766355e-07, | |
| "loss": 1.2361, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.9439252336448598, | |
| "grad_norm": 0.197265625, | |
| "learning_rate": 6.074766355140187e-07, | |
| "loss": 1.2316, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.9485981308411215, | |
| "grad_norm": 0.1982421875, | |
| "learning_rate": 5.607476635514019e-07, | |
| "loss": 1.2201, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.9532710280373832, | |
| "grad_norm": 0.203125, | |
| "learning_rate": 5.140186915887851e-07, | |
| "loss": 1.221, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.9579439252336449, | |
| "grad_norm": 0.2060546875, | |
| "learning_rate": 4.6728971962616824e-07, | |
| "loss": 1.2392, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.9626168224299065, | |
| "grad_norm": 0.18359375, | |
| "learning_rate": 4.2056074766355143e-07, | |
| "loss": 1.2295, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.9672897196261683, | |
| "grad_norm": 0.2021484375, | |
| "learning_rate": 3.7383177570093457e-07, | |
| "loss": 1.2338, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.9719626168224299, | |
| "grad_norm": 0.1953125, | |
| "learning_rate": 3.2710280373831776e-07, | |
| "loss": 1.224, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.9766355140186916, | |
| "grad_norm": 0.2109375, | |
| "learning_rate": 2.8037383177570096e-07, | |
| "loss": 1.2317, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.9813084112149533, | |
| "grad_norm": 0.193359375, | |
| "learning_rate": 2.3364485981308412e-07, | |
| "loss": 1.2235, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.985981308411215, | |
| "grad_norm": 0.20703125, | |
| "learning_rate": 1.8691588785046729e-07, | |
| "loss": 1.2238, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.9906542056074766, | |
| "grad_norm": 0.2080078125, | |
| "learning_rate": 1.4018691588785048e-07, | |
| "loss": 1.2146, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.9953271028037384, | |
| "grad_norm": 0.203125, | |
| "learning_rate": 9.345794392523364e-08, | |
| "loss": 1.231, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.2099609375, | |
| "learning_rate": 4.672897196261682e-08, | |
| "loss": 1.2306, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 1.2332667112350464, | |
| "eval_runtime": 7.4216, | |
| "eval_samples_per_second": 3.099, | |
| "eval_steps_per_second": 0.404, | |
| "step": 214 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 214, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 0, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.321152639100518e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |