| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 0, | |
| "global_step": 123, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.008130081300813009, | |
| "grad_norm": 0.47265625, | |
| "learning_rate": 9.91869918699187e-06, | |
| "loss": 1.7914, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.016260162601626018, | |
| "grad_norm": 0.482421875, | |
| "learning_rate": 9.837398373983741e-06, | |
| "loss": 1.8308, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.024390243902439025, | |
| "grad_norm": 0.451171875, | |
| "learning_rate": 9.756097560975611e-06, | |
| "loss": 1.8346, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.032520325203252036, | |
| "grad_norm": 0.4609375, | |
| "learning_rate": 9.67479674796748e-06, | |
| "loss": 1.8078, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.04065040650406504, | |
| "grad_norm": 0.4765625, | |
| "learning_rate": 9.59349593495935e-06, | |
| "loss": 1.83, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.04878048780487805, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 9.51219512195122e-06, | |
| "loss": 1.9395, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.056910569105691054, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 9.43089430894309e-06, | |
| "loss": 1.8757, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.06504065040650407, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 9.34959349593496e-06, | |
| "loss": 1.8858, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.07317073170731707, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 9.268292682926831e-06, | |
| "loss": 1.6752, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.08130081300813008, | |
| "grad_norm": 0.30859375, | |
| "learning_rate": 9.1869918699187e-06, | |
| "loss": 1.7763, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.08943089430894309, | |
| "grad_norm": 0.27734375, | |
| "learning_rate": 9.10569105691057e-06, | |
| "loss": 1.7691, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0975609756097561, | |
| "grad_norm": 0.2734375, | |
| "learning_rate": 9.02439024390244e-06, | |
| "loss": 1.6506, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.10569105691056911, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 8.94308943089431e-06, | |
| "loss": 1.733, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.11382113821138211, | |
| "grad_norm": 0.26171875, | |
| "learning_rate": 8.86178861788618e-06, | |
| "loss": 1.7344, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.12195121951219512, | |
| "grad_norm": 0.32421875, | |
| "learning_rate": 8.78048780487805e-06, | |
| "loss": 1.7125, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.13008130081300814, | |
| "grad_norm": 0.3515625, | |
| "learning_rate": 8.69918699186992e-06, | |
| "loss": 1.7315, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.13821138211382114, | |
| "grad_norm": 0.2470703125, | |
| "learning_rate": 8.617886178861789e-06, | |
| "loss": 1.6819, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.14634146341463414, | |
| "grad_norm": 0.2333984375, | |
| "learning_rate": 8.536585365853658e-06, | |
| "loss": 1.6793, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.15447154471544716, | |
| "grad_norm": 0.263671875, | |
| "learning_rate": 8.45528455284553e-06, | |
| "loss": 1.6847, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.16260162601626016, | |
| "grad_norm": 0.2333984375, | |
| "learning_rate": 8.373983739837399e-06, | |
| "loss": 1.6838, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.17073170731707318, | |
| "grad_norm": 0.2099609375, | |
| "learning_rate": 8.292682926829268e-06, | |
| "loss": 1.63, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.17886178861788618, | |
| "grad_norm": 0.2392578125, | |
| "learning_rate": 8.21138211382114e-06, | |
| "loss": 1.6067, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.18699186991869918, | |
| "grad_norm": 0.1962890625, | |
| "learning_rate": 8.130081300813009e-06, | |
| "loss": 1.638, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.1951219512195122, | |
| "grad_norm": 0.1982421875, | |
| "learning_rate": 8.048780487804879e-06, | |
| "loss": 1.6493, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.2032520325203252, | |
| "grad_norm": 0.19140625, | |
| "learning_rate": 7.967479674796748e-06, | |
| "loss": 1.6309, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.21138211382113822, | |
| "grad_norm": 0.212890625, | |
| "learning_rate": 7.886178861788618e-06, | |
| "loss": 1.653, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.21951219512195122, | |
| "grad_norm": 0.2236328125, | |
| "learning_rate": 7.804878048780489e-06, | |
| "loss": 1.5796, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.22764227642276422, | |
| "grad_norm": 0.1787109375, | |
| "learning_rate": 7.723577235772358e-06, | |
| "loss": 1.6617, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.23577235772357724, | |
| "grad_norm": 0.1923828125, | |
| "learning_rate": 7.64227642276423e-06, | |
| "loss": 1.6065, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.24390243902439024, | |
| "grad_norm": 0.17578125, | |
| "learning_rate": 7.560975609756098e-06, | |
| "loss": 1.635, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.25203252032520324, | |
| "grad_norm": 0.1787109375, | |
| "learning_rate": 7.4796747967479676e-06, | |
| "loss": 1.593, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.2601626016260163, | |
| "grad_norm": 0.201171875, | |
| "learning_rate": 7.398373983739838e-06, | |
| "loss": 1.6044, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.2682926829268293, | |
| "grad_norm": 0.1806640625, | |
| "learning_rate": 7.317073170731707e-06, | |
| "loss": 1.6211, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.2764227642276423, | |
| "grad_norm": 0.1689453125, | |
| "learning_rate": 7.2357723577235786e-06, | |
| "loss": 1.6082, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.2845528455284553, | |
| "grad_norm": 0.1875, | |
| "learning_rate": 7.154471544715448e-06, | |
| "loss": 1.5812, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.2926829268292683, | |
| "grad_norm": 0.166015625, | |
| "learning_rate": 7.0731707317073175e-06, | |
| "loss": 1.6133, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.3008130081300813, | |
| "grad_norm": 0.1923828125, | |
| "learning_rate": 6.991869918699188e-06, | |
| "loss": 1.5672, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.3089430894308943, | |
| "grad_norm": 0.166015625, | |
| "learning_rate": 6.910569105691057e-06, | |
| "loss": 1.6293, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.3170731707317073, | |
| "grad_norm": 0.197265625, | |
| "learning_rate": 6.829268292682928e-06, | |
| "loss": 1.6276, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.3252032520325203, | |
| "grad_norm": 0.1572265625, | |
| "learning_rate": 6.747967479674797e-06, | |
| "loss": 1.5441, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 0.1787109375, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 1.564, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.34146341463414637, | |
| "grad_norm": 0.1962890625, | |
| "learning_rate": 6.585365853658538e-06, | |
| "loss": 1.5339, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.34959349593495936, | |
| "grad_norm": 0.1923828125, | |
| "learning_rate": 6.504065040650407e-06, | |
| "loss": 1.5444, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.35772357723577236, | |
| "grad_norm": 0.201171875, | |
| "learning_rate": 6.422764227642278e-06, | |
| "loss": 1.5525, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.36585365853658536, | |
| "grad_norm": 0.169921875, | |
| "learning_rate": 6.341463414634147e-06, | |
| "loss": 1.5702, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.37398373983739835, | |
| "grad_norm": 0.27734375, | |
| "learning_rate": 6.260162601626017e-06, | |
| "loss": 1.6304, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.3821138211382114, | |
| "grad_norm": 0.1962890625, | |
| "learning_rate": 6.178861788617887e-06, | |
| "loss": 1.5507, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.3902439024390244, | |
| "grad_norm": 0.166015625, | |
| "learning_rate": 6.0975609756097564e-06, | |
| "loss": 1.4688, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.3983739837398374, | |
| "grad_norm": 0.19140625, | |
| "learning_rate": 6.016260162601627e-06, | |
| "loss": 1.5426, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.4065040650406504, | |
| "grad_norm": 0.1767578125, | |
| "learning_rate": 5.934959349593496e-06, | |
| "loss": 1.575, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.4146341463414634, | |
| "grad_norm": 0.162109375, | |
| "learning_rate": 5.853658536585366e-06, | |
| "loss": 1.572, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.42276422764227645, | |
| "grad_norm": 0.197265625, | |
| "learning_rate": 5.772357723577237e-06, | |
| "loss": 1.5698, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.43089430894308944, | |
| "grad_norm": 0.287109375, | |
| "learning_rate": 5.691056910569106e-06, | |
| "loss": 1.5296, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.43902439024390244, | |
| "grad_norm": 0.1708984375, | |
| "learning_rate": 5.609756097560977e-06, | |
| "loss": 1.5567, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.44715447154471544, | |
| "grad_norm": 0.1708984375, | |
| "learning_rate": 5.528455284552846e-06, | |
| "loss": 1.5867, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.45528455284552843, | |
| "grad_norm": 0.173828125, | |
| "learning_rate": 5.447154471544716e-06, | |
| "loss": 1.522, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.4634146341463415, | |
| "grad_norm": 0.16015625, | |
| "learning_rate": 5.365853658536586e-06, | |
| "loss": 1.5443, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.4715447154471545, | |
| "grad_norm": 0.2099609375, | |
| "learning_rate": 5.2845528455284555e-06, | |
| "loss": 1.5125, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.4796747967479675, | |
| "grad_norm": 0.212890625, | |
| "learning_rate": 5.203252032520326e-06, | |
| "loss": 1.6069, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.4878048780487805, | |
| "grad_norm": 0.15625, | |
| "learning_rate": 5.121951219512195e-06, | |
| "loss": 1.5464, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.4959349593495935, | |
| "grad_norm": 0.2109375, | |
| "learning_rate": 5.040650406504065e-06, | |
| "loss": 1.5834, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.5040650406504065, | |
| "grad_norm": 0.1796875, | |
| "learning_rate": 4.959349593495935e-06, | |
| "loss": 1.5126, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.5121951219512195, | |
| "grad_norm": 0.177734375, | |
| "learning_rate": 4.8780487804878055e-06, | |
| "loss": 1.5238, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.5203252032520326, | |
| "grad_norm": 0.1455078125, | |
| "learning_rate": 4.796747967479675e-06, | |
| "loss": 1.5456, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.5284552845528455, | |
| "grad_norm": 0.1513671875, | |
| "learning_rate": 4.715447154471545e-06, | |
| "loss": 1.5332, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.5365853658536586, | |
| "grad_norm": 0.1484375, | |
| "learning_rate": 4.634146341463416e-06, | |
| "loss": 1.5083, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.5447154471544715, | |
| "grad_norm": 0.150390625, | |
| "learning_rate": 4.552845528455285e-06, | |
| "loss": 1.5709, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.5528455284552846, | |
| "grad_norm": 0.138671875, | |
| "learning_rate": 4.471544715447155e-06, | |
| "loss": 1.5517, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.5609756097560976, | |
| "grad_norm": 0.14453125, | |
| "learning_rate": 4.390243902439025e-06, | |
| "loss": 1.4947, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.5691056910569106, | |
| "grad_norm": 0.244140625, | |
| "learning_rate": 4.308943089430894e-06, | |
| "loss": 1.4829, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.5772357723577236, | |
| "grad_norm": 0.1630859375, | |
| "learning_rate": 4.227642276422765e-06, | |
| "loss": 1.5183, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.5853658536585366, | |
| "grad_norm": 0.171875, | |
| "learning_rate": 4.146341463414634e-06, | |
| "loss": 1.5429, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.5934959349593496, | |
| "grad_norm": 0.13671875, | |
| "learning_rate": 4.0650406504065046e-06, | |
| "loss": 1.5305, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.6016260162601627, | |
| "grad_norm": 0.1533203125, | |
| "learning_rate": 3.983739837398374e-06, | |
| "loss": 1.4979, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.6097560975609756, | |
| "grad_norm": 0.1640625, | |
| "learning_rate": 3.902439024390244e-06, | |
| "loss": 1.5252, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.6178861788617886, | |
| "grad_norm": 0.1591796875, | |
| "learning_rate": 3.821138211382115e-06, | |
| "loss": 1.4131, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.6260162601626016, | |
| "grad_norm": 0.1484375, | |
| "learning_rate": 3.7398373983739838e-06, | |
| "loss": 1.4648, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.6341463414634146, | |
| "grad_norm": 0.298828125, | |
| "learning_rate": 3.6585365853658537e-06, | |
| "loss": 1.5375, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.6422764227642277, | |
| "grad_norm": 0.2119140625, | |
| "learning_rate": 3.577235772357724e-06, | |
| "loss": 1.4335, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.6504065040650406, | |
| "grad_norm": 0.142578125, | |
| "learning_rate": 3.495934959349594e-06, | |
| "loss": 1.5052, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.6585365853658537, | |
| "grad_norm": 0.1474609375, | |
| "learning_rate": 3.414634146341464e-06, | |
| "loss": 1.5025, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 0.248046875, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 1.4299, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.6747967479674797, | |
| "grad_norm": 0.1689453125, | |
| "learning_rate": 3.2520325203252037e-06, | |
| "loss": 1.4853, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.6829268292682927, | |
| "grad_norm": 0.138671875, | |
| "learning_rate": 3.1707317073170736e-06, | |
| "loss": 1.5545, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.6910569105691057, | |
| "grad_norm": 0.158203125, | |
| "learning_rate": 3.0894308943089435e-06, | |
| "loss": 1.4281, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.6991869918699187, | |
| "grad_norm": 0.1396484375, | |
| "learning_rate": 3.0081300813008134e-06, | |
| "loss": 1.4572, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.7073170731707317, | |
| "grad_norm": 0.1357421875, | |
| "learning_rate": 2.926829268292683e-06, | |
| "loss": 1.4792, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.7154471544715447, | |
| "grad_norm": 0.15625, | |
| "learning_rate": 2.845528455284553e-06, | |
| "loss": 1.4081, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.7235772357723578, | |
| "grad_norm": 0.13671875, | |
| "learning_rate": 2.764227642276423e-06, | |
| "loss": 1.4979, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.7317073170731707, | |
| "grad_norm": 0.1298828125, | |
| "learning_rate": 2.682926829268293e-06, | |
| "loss": 1.4775, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.7398373983739838, | |
| "grad_norm": 0.140625, | |
| "learning_rate": 2.601626016260163e-06, | |
| "loss": 1.4734, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.7479674796747967, | |
| "grad_norm": 0.1455078125, | |
| "learning_rate": 2.5203252032520324e-06, | |
| "loss": 1.3895, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.7560975609756098, | |
| "grad_norm": 0.1611328125, | |
| "learning_rate": 2.4390243902439027e-06, | |
| "loss": 1.4338, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.7642276422764228, | |
| "grad_norm": 0.150390625, | |
| "learning_rate": 2.3577235772357727e-06, | |
| "loss": 1.4483, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.7723577235772358, | |
| "grad_norm": 0.1396484375, | |
| "learning_rate": 2.2764227642276426e-06, | |
| "loss": 1.4468, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.7804878048780488, | |
| "grad_norm": 0.16796875, | |
| "learning_rate": 2.1951219512195125e-06, | |
| "loss": 1.4327, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.7886178861788617, | |
| "grad_norm": 0.1796875, | |
| "learning_rate": 2.1138211382113824e-06, | |
| "loss": 1.5092, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.7967479674796748, | |
| "grad_norm": 0.1630859375, | |
| "learning_rate": 2.0325203252032523e-06, | |
| "loss": 1.499, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.8048780487804879, | |
| "grad_norm": 0.1474609375, | |
| "learning_rate": 1.951219512195122e-06, | |
| "loss": 1.4513, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.8130081300813008, | |
| "grad_norm": 0.2099609375, | |
| "learning_rate": 1.8699186991869919e-06, | |
| "loss": 1.473, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.8211382113821138, | |
| "grad_norm": 0.185546875, | |
| "learning_rate": 1.788617886178862e-06, | |
| "loss": 1.4495, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.8292682926829268, | |
| "grad_norm": 0.1416015625, | |
| "learning_rate": 1.707317073170732e-06, | |
| "loss": 1.504, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.8373983739837398, | |
| "grad_norm": 0.20703125, | |
| "learning_rate": 1.6260162601626018e-06, | |
| "loss": 1.4803, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.8455284552845529, | |
| "grad_norm": 0.1376953125, | |
| "learning_rate": 1.5447154471544717e-06, | |
| "loss": 1.4936, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.8536585365853658, | |
| "grad_norm": 0.1552734375, | |
| "learning_rate": 1.4634146341463414e-06, | |
| "loss": 1.5159, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.8617886178861789, | |
| "grad_norm": 0.140625, | |
| "learning_rate": 1.3821138211382116e-06, | |
| "loss": 1.5042, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.8699186991869918, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 1.3008130081300815e-06, | |
| "loss": 1.3785, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.8780487804878049, | |
| "grad_norm": 0.15625, | |
| "learning_rate": 1.2195121951219514e-06, | |
| "loss": 1.4519, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.8861788617886179, | |
| "grad_norm": 0.2080078125, | |
| "learning_rate": 1.1382113821138213e-06, | |
| "loss": 1.4291, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.8943089430894309, | |
| "grad_norm": 0.158203125, | |
| "learning_rate": 1.0569105691056912e-06, | |
| "loss": 1.3927, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.9024390243902439, | |
| "grad_norm": 0.138671875, | |
| "learning_rate": 9.75609756097561e-07, | |
| "loss": 1.4396, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.9105691056910569, | |
| "grad_norm": 0.1416015625, | |
| "learning_rate": 8.94308943089431e-07, | |
| "loss": 1.4591, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.9186991869918699, | |
| "grad_norm": 0.1748046875, | |
| "learning_rate": 8.130081300813009e-07, | |
| "loss": 1.5032, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.926829268292683, | |
| "grad_norm": 0.15234375, | |
| "learning_rate": 7.317073170731707e-07, | |
| "loss": 1.4562, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.9349593495934959, | |
| "grad_norm": 0.169921875, | |
| "learning_rate": 6.504065040650407e-07, | |
| "loss": 1.4683, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.943089430894309, | |
| "grad_norm": 0.1396484375, | |
| "learning_rate": 5.691056910569106e-07, | |
| "loss": 1.5243, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.9512195121951219, | |
| "grad_norm": 0.1884765625, | |
| "learning_rate": 4.878048780487805e-07, | |
| "loss": 1.4146, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.959349593495935, | |
| "grad_norm": 0.1552734375, | |
| "learning_rate": 4.0650406504065046e-07, | |
| "loss": 1.3936, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.967479674796748, | |
| "grad_norm": 0.1552734375, | |
| "learning_rate": 3.2520325203252037e-07, | |
| "loss": 1.4421, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.975609756097561, | |
| "grad_norm": 0.251953125, | |
| "learning_rate": 2.439024390243903e-07, | |
| "loss": 1.3564, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.983739837398374, | |
| "grad_norm": 0.1708984375, | |
| "learning_rate": 1.6260162601626018e-07, | |
| "loss": 1.5465, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.991869918699187, | |
| "grad_norm": 0.19140625, | |
| "learning_rate": 8.130081300813009e-08, | |
| "loss": 1.4304, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.1474609375, | |
| "learning_rate": 0.0, | |
| "loss": 1.4253, | |
| "step": 123 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 123, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 0, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.31844774623445e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |