| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 0, |
| "global_step": 306, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0032679738562091504, |
| "grad_norm": 0.50390625, |
| "learning_rate": 9.96732026143791e-06, |
| "loss": 1.7678, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.006535947712418301, |
| "grad_norm": 0.451171875, |
| "learning_rate": 9.934640522875818e-06, |
| "loss": 1.7126, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.00980392156862745, |
| "grad_norm": 0.439453125, |
| "learning_rate": 9.901960784313727e-06, |
| "loss": 1.6738, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.013071895424836602, |
| "grad_norm": 0.4296875, |
| "learning_rate": 9.869281045751634e-06, |
| "loss": 1.6994, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.016339869281045753, |
| "grad_norm": 0.41796875, |
| "learning_rate": 9.836601307189542e-06, |
| "loss": 1.6942, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0196078431372549, |
| "grad_norm": 0.404296875, |
| "learning_rate": 9.803921568627451e-06, |
| "loss": 1.6875, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.02287581699346405, |
| "grad_norm": 0.3671875, |
| "learning_rate": 9.77124183006536e-06, |
| "loss": 1.5903, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.026143790849673203, |
| "grad_norm": 0.337890625, |
| "learning_rate": 9.738562091503268e-06, |
| "loss": 1.6187, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.029411764705882353, |
| "grad_norm": 0.3203125, |
| "learning_rate": 9.705882352941177e-06, |
| "loss": 1.6379, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.032679738562091505, |
| "grad_norm": 0.265625, |
| "learning_rate": 9.673202614379087e-06, |
| "loss": 1.5965, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03594771241830065, |
| "grad_norm": 0.259765625, |
| "learning_rate": 9.640522875816994e-06, |
| "loss": 1.6126, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0392156862745098, |
| "grad_norm": 0.296875, |
| "learning_rate": 9.607843137254903e-06, |
| "loss": 1.7813, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.042483660130718956, |
| "grad_norm": 0.244140625, |
| "learning_rate": 9.575163398692811e-06, |
| "loss": 1.6, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0457516339869281, |
| "grad_norm": 0.2353515625, |
| "learning_rate": 9.54248366013072e-06, |
| "loss": 1.6003, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.049019607843137254, |
| "grad_norm": 0.234375, |
| "learning_rate": 9.509803921568628e-06, |
| "loss": 1.5348, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.05228758169934641, |
| "grad_norm": 0.2216796875, |
| "learning_rate": 9.477124183006537e-06, |
| "loss": 1.5196, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.05555555555555555, |
| "grad_norm": 0.2412109375, |
| "learning_rate": 9.444444444444445e-06, |
| "loss": 1.5982, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.058823529411764705, |
| "grad_norm": 0.2236328125, |
| "learning_rate": 9.411764705882354e-06, |
| "loss": 1.5775, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.06209150326797386, |
| "grad_norm": 0.1982421875, |
| "learning_rate": 9.379084967320261e-06, |
| "loss": 1.48, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.06535947712418301, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 9.34640522875817e-06, |
| "loss": 1.5339, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06862745098039216, |
| "grad_norm": 0.193359375, |
| "learning_rate": 9.31372549019608e-06, |
| "loss": 1.5514, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.0718954248366013, |
| "grad_norm": 0.1845703125, |
| "learning_rate": 9.281045751633987e-06, |
| "loss": 1.5424, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.07516339869281045, |
| "grad_norm": 0.2138671875, |
| "learning_rate": 9.248366013071897e-06, |
| "loss": 1.5233, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.0784313725490196, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 9.215686274509804e-06, |
| "loss": 1.506, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.08169934640522876, |
| "grad_norm": 0.15234375, |
| "learning_rate": 9.183006535947713e-06, |
| "loss": 1.4921, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.08496732026143791, |
| "grad_norm": 0.1689453125, |
| "learning_rate": 9.150326797385621e-06, |
| "loss": 1.4648, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.08823529411764706, |
| "grad_norm": 0.154296875, |
| "learning_rate": 9.11764705882353e-06, |
| "loss": 1.4648, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.0915032679738562, |
| "grad_norm": 0.138671875, |
| "learning_rate": 9.084967320261438e-06, |
| "loss": 1.4776, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.09477124183006536, |
| "grad_norm": 0.1875, |
| "learning_rate": 9.052287581699347e-06, |
| "loss": 1.5891, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.09803921568627451, |
| "grad_norm": 0.140625, |
| "learning_rate": 9.019607843137256e-06, |
| "loss": 1.4656, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.10130718954248366, |
| "grad_norm": 0.24609375, |
| "learning_rate": 8.986928104575164e-06, |
| "loss": 1.4948, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.10457516339869281, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 8.954248366013073e-06, |
| "loss": 1.4988, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.10784313725490197, |
| "grad_norm": 0.1474609375, |
| "learning_rate": 8.921568627450982e-06, |
| "loss": 1.4855, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.1111111111111111, |
| "grad_norm": 0.1279296875, |
| "learning_rate": 8.888888888888888e-06, |
| "loss": 1.4475, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.11437908496732026, |
| "grad_norm": 0.1279296875, |
| "learning_rate": 8.856209150326798e-06, |
| "loss": 1.4693, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.11764705882352941, |
| "grad_norm": 0.1435546875, |
| "learning_rate": 8.823529411764707e-06, |
| "loss": 1.456, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.12091503267973856, |
| "grad_norm": 0.1416015625, |
| "learning_rate": 8.790849673202614e-06, |
| "loss": 1.4638, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.12418300653594772, |
| "grad_norm": 0.1259765625, |
| "learning_rate": 8.758169934640524e-06, |
| "loss": 1.4378, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.12745098039215685, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 8.725490196078433e-06, |
| "loss": 1.4161, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.13071895424836602, |
| "grad_norm": 0.126953125, |
| "learning_rate": 8.69281045751634e-06, |
| "loss": 1.4582, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.13398692810457516, |
| "grad_norm": 0.12255859375, |
| "learning_rate": 8.66013071895425e-06, |
| "loss": 1.4215, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.13725490196078433, |
| "grad_norm": 0.173828125, |
| "learning_rate": 8.627450980392157e-06, |
| "loss": 1.5164, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.14052287581699346, |
| "grad_norm": 0.12451171875, |
| "learning_rate": 8.594771241830066e-06, |
| "loss": 1.3956, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.1437908496732026, |
| "grad_norm": 0.12158203125, |
| "learning_rate": 8.562091503267974e-06, |
| "loss": 1.4147, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.14705882352941177, |
| "grad_norm": 0.1279296875, |
| "learning_rate": 8.529411764705883e-06, |
| "loss": 1.4664, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.1503267973856209, |
| "grad_norm": 0.12060546875, |
| "learning_rate": 8.496732026143791e-06, |
| "loss": 1.402, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.15359477124183007, |
| "grad_norm": 0.11279296875, |
| "learning_rate": 8.4640522875817e-06, |
| "loss": 1.455, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.1568627450980392, |
| "grad_norm": 0.1337890625, |
| "learning_rate": 8.43137254901961e-06, |
| "loss": 1.4874, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.16013071895424835, |
| "grad_norm": 0.1552734375, |
| "learning_rate": 8.398692810457517e-06, |
| "loss": 1.5623, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.16339869281045752, |
| "grad_norm": 0.11083984375, |
| "learning_rate": 8.366013071895426e-06, |
| "loss": 1.4036, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.16666666666666666, |
| "grad_norm": 0.10986328125, |
| "learning_rate": 8.333333333333334e-06, |
| "loss": 1.4001, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.16993464052287582, |
| "grad_norm": 0.1484375, |
| "learning_rate": 8.300653594771243e-06, |
| "loss": 1.5234, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.17320261437908496, |
| "grad_norm": 0.126953125, |
| "learning_rate": 8.26797385620915e-06, |
| "loss": 1.4116, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.17647058823529413, |
| "grad_norm": 0.1220703125, |
| "learning_rate": 8.23529411764706e-06, |
| "loss": 1.4592, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.17973856209150327, |
| "grad_norm": 0.10546875, |
| "learning_rate": 8.202614379084967e-06, |
| "loss": 1.4332, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.1830065359477124, |
| "grad_norm": 0.11181640625, |
| "learning_rate": 8.169934640522877e-06, |
| "loss": 1.4171, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.18627450980392157, |
| "grad_norm": 0.1015625, |
| "learning_rate": 8.137254901960784e-06, |
| "loss": 1.3787, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.1895424836601307, |
| "grad_norm": 0.10595703125, |
| "learning_rate": 8.104575163398693e-06, |
| "loss": 1.4223, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.19281045751633988, |
| "grad_norm": 0.107421875, |
| "learning_rate": 8.071895424836603e-06, |
| "loss": 1.4104, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.19607843137254902, |
| "grad_norm": 0.11572265625, |
| "learning_rate": 8.03921568627451e-06, |
| "loss": 1.4097, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.19934640522875818, |
| "grad_norm": 0.10791015625, |
| "learning_rate": 8.00653594771242e-06, |
| "loss": 1.4235, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.20261437908496732, |
| "grad_norm": 0.10205078125, |
| "learning_rate": 7.973856209150329e-06, |
| "loss": 1.3798, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.20588235294117646, |
| "grad_norm": 0.1513671875, |
| "learning_rate": 7.941176470588236e-06, |
| "loss": 1.4078, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.20915032679738563, |
| "grad_norm": 0.099609375, |
| "learning_rate": 7.908496732026144e-06, |
| "loss": 1.3696, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.21241830065359477, |
| "grad_norm": 0.09716796875, |
| "learning_rate": 7.875816993464053e-06, |
| "loss": 1.3765, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.21568627450980393, |
| "grad_norm": 0.10205078125, |
| "learning_rate": 7.84313725490196e-06, |
| "loss": 1.3658, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.21895424836601307, |
| "grad_norm": 0.1025390625, |
| "learning_rate": 7.81045751633987e-06, |
| "loss": 1.3527, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.2222222222222222, |
| "grad_norm": 0.107421875, |
| "learning_rate": 7.77777777777778e-06, |
| "loss": 1.346, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.22549019607843138, |
| "grad_norm": 0.1005859375, |
| "learning_rate": 7.745098039215687e-06, |
| "loss": 1.3623, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.22875816993464052, |
| "grad_norm": 0.10400390625, |
| "learning_rate": 7.712418300653596e-06, |
| "loss": 1.3469, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.23202614379084968, |
| "grad_norm": 0.11669921875, |
| "learning_rate": 7.679738562091504e-06, |
| "loss": 1.385, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.23529411764705882, |
| "grad_norm": 0.1005859375, |
| "learning_rate": 7.647058823529411e-06, |
| "loss": 1.3932, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.238562091503268, |
| "grad_norm": 0.130859375, |
| "learning_rate": 7.61437908496732e-06, |
| "loss": 1.3533, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.24183006535947713, |
| "grad_norm": 0.10009765625, |
| "learning_rate": 7.581699346405229e-06, |
| "loss": 1.3961, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.24509803921568626, |
| "grad_norm": 0.1064453125, |
| "learning_rate": 7.549019607843138e-06, |
| "loss": 1.3901, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.24836601307189543, |
| "grad_norm": 0.10546875, |
| "learning_rate": 7.516339869281046e-06, |
| "loss": 1.3643, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.25163398692810457, |
| "grad_norm": 0.095703125, |
| "learning_rate": 7.483660130718955e-06, |
| "loss": 1.3425, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.2549019607843137, |
| "grad_norm": 0.1083984375, |
| "learning_rate": 7.450980392156863e-06, |
| "loss": 1.4032, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.2581699346405229, |
| "grad_norm": 0.134765625, |
| "learning_rate": 7.4183006535947725e-06, |
| "loss": 1.3228, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.26143790849673204, |
| "grad_norm": 0.1005859375, |
| "learning_rate": 7.385620915032681e-06, |
| "loss": 1.3964, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2647058823529412, |
| "grad_norm": 0.11767578125, |
| "learning_rate": 7.352941176470589e-06, |
| "loss": 1.3605, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.2679738562091503, |
| "grad_norm": 0.09765625, |
| "learning_rate": 7.320261437908497e-06, |
| "loss": 1.3084, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.27124183006535946, |
| "grad_norm": 0.09521484375, |
| "learning_rate": 7.287581699346405e-06, |
| "loss": 1.3851, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.27450980392156865, |
| "grad_norm": 0.0966796875, |
| "learning_rate": 7.2549019607843145e-06, |
| "loss": 1.3596, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.2777777777777778, |
| "grad_norm": 0.419921875, |
| "learning_rate": 7.222222222222223e-06, |
| "loss": 1.6292, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.28104575163398693, |
| "grad_norm": 0.09521484375, |
| "learning_rate": 7.189542483660131e-06, |
| "loss": 1.3443, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.28431372549019607, |
| "grad_norm": 0.0986328125, |
| "learning_rate": 7.15686274509804e-06, |
| "loss": 1.3978, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.2875816993464052, |
| "grad_norm": 0.10107421875, |
| "learning_rate": 7.124183006535948e-06, |
| "loss": 1.3581, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.2908496732026144, |
| "grad_norm": 0.09619140625, |
| "learning_rate": 7.091503267973857e-06, |
| "loss": 1.3509, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.29411764705882354, |
| "grad_norm": 0.099609375, |
| "learning_rate": 7.058823529411766e-06, |
| "loss": 1.3522, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2973856209150327, |
| "grad_norm": 0.09423828125, |
| "learning_rate": 7.026143790849673e-06, |
| "loss": 1.3553, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.3006535947712418, |
| "grad_norm": 0.1015625, |
| "learning_rate": 6.993464052287582e-06, |
| "loss": 1.2874, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.30392156862745096, |
| "grad_norm": 0.146484375, |
| "learning_rate": 6.96078431372549e-06, |
| "loss": 1.3791, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.30718954248366015, |
| "grad_norm": 0.09521484375, |
| "learning_rate": 6.928104575163399e-06, |
| "loss": 1.3514, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.3104575163398693, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 6.895424836601308e-06, |
| "loss": 1.3437, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.3137254901960784, |
| "grad_norm": 0.103515625, |
| "learning_rate": 6.862745098039216e-06, |
| "loss": 1.3763, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.31699346405228757, |
| "grad_norm": 0.09912109375, |
| "learning_rate": 6.830065359477125e-06, |
| "loss": 1.3632, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.3202614379084967, |
| "grad_norm": 0.09716796875, |
| "learning_rate": 6.797385620915034e-06, |
| "loss": 1.3221, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.3235294117647059, |
| "grad_norm": 0.0927734375, |
| "learning_rate": 6.764705882352942e-06, |
| "loss": 1.3214, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.32679738562091504, |
| "grad_norm": 0.095703125, |
| "learning_rate": 6.732026143790851e-06, |
| "loss": 1.3447, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3300653594771242, |
| "grad_norm": 0.1025390625, |
| "learning_rate": 6.699346405228758e-06, |
| "loss": 1.3675, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.11376953125, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 1.3547, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.3366013071895425, |
| "grad_norm": 0.09765625, |
| "learning_rate": 6.633986928104575e-06, |
| "loss": 1.323, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.33986928104575165, |
| "grad_norm": 0.0986328125, |
| "learning_rate": 6.601307189542484e-06, |
| "loss": 1.3437, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.3431372549019608, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 6.568627450980393e-06, |
| "loss": 1.3564, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.3464052287581699, |
| "grad_norm": 0.103515625, |
| "learning_rate": 6.535947712418301e-06, |
| "loss": 1.3487, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.34967320261437906, |
| "grad_norm": 0.09619140625, |
| "learning_rate": 6.5032679738562095e-06, |
| "loss": 1.3322, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.35294117647058826, |
| "grad_norm": 0.09326171875, |
| "learning_rate": 6.470588235294119e-06, |
| "loss": 1.3215, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.3562091503267974, |
| "grad_norm": 0.09375, |
| "learning_rate": 6.437908496732027e-06, |
| "loss": 1.3258, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.35947712418300654, |
| "grad_norm": 0.1015625, |
| "learning_rate": 6.405228758169935e-06, |
| "loss": 1.3013, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3627450980392157, |
| "grad_norm": 0.1044921875, |
| "learning_rate": 6.372549019607843e-06, |
| "loss": 1.3078, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.3660130718954248, |
| "grad_norm": 0.10107421875, |
| "learning_rate": 6.3398692810457515e-06, |
| "loss": 1.3178, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.369281045751634, |
| "grad_norm": 0.09423828125, |
| "learning_rate": 6.307189542483661e-06, |
| "loss": 1.3343, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.37254901960784315, |
| "grad_norm": 0.091796875, |
| "learning_rate": 6.274509803921569e-06, |
| "loss": 1.3418, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.3758169934640523, |
| "grad_norm": 0.10693359375, |
| "learning_rate": 6.241830065359478e-06, |
| "loss": 1.3293, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.3790849673202614, |
| "grad_norm": 0.09228515625, |
| "learning_rate": 6.209150326797386e-06, |
| "loss": 1.3046, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.38235294117647056, |
| "grad_norm": 0.0908203125, |
| "learning_rate": 6.176470588235295e-06, |
| "loss": 1.3192, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.38562091503267976, |
| "grad_norm": 0.09130859375, |
| "learning_rate": 6.143790849673204e-06, |
| "loss": 1.2923, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.3888888888888889, |
| "grad_norm": 0.10205078125, |
| "learning_rate": 6.111111111111112e-06, |
| "loss": 1.34, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.39215686274509803, |
| "grad_norm": 0.0986328125, |
| "learning_rate": 6.07843137254902e-06, |
| "loss": 1.3105, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.3954248366013072, |
| "grad_norm": 0.09619140625, |
| "learning_rate": 6.045751633986928e-06, |
| "loss": 1.3412, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.39869281045751637, |
| "grad_norm": 0.09423828125, |
| "learning_rate": 6.0130718954248365e-06, |
| "loss": 1.3483, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.4019607843137255, |
| "grad_norm": 0.103515625, |
| "learning_rate": 5.980392156862746e-06, |
| "loss": 1.3248, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.40522875816993464, |
| "grad_norm": 0.10205078125, |
| "learning_rate": 5.947712418300654e-06, |
| "loss": 1.3346, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.4084967320261438, |
| "grad_norm": 0.1103515625, |
| "learning_rate": 5.9150326797385625e-06, |
| "loss": 1.3605, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.4117647058823529, |
| "grad_norm": 0.095703125, |
| "learning_rate": 5.882352941176471e-06, |
| "loss": 1.3217, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.4150326797385621, |
| "grad_norm": 0.095703125, |
| "learning_rate": 5.84967320261438e-06, |
| "loss": 1.3464, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.41830065359477125, |
| "grad_norm": 0.115234375, |
| "learning_rate": 5.816993464052289e-06, |
| "loss": 1.322, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.4215686274509804, |
| "grad_norm": 0.11181640625, |
| "learning_rate": 5.784313725490197e-06, |
| "loss": 1.3154, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.42483660130718953, |
| "grad_norm": 0.111328125, |
| "learning_rate": 5.7516339869281045e-06, |
| "loss": 1.2998, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.42810457516339867, |
| "grad_norm": 0.095703125, |
| "learning_rate": 5.718954248366013e-06, |
| "loss": 1.286, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.43137254901960786, |
| "grad_norm": 0.10400390625, |
| "learning_rate": 5.686274509803922e-06, |
| "loss": 1.3048, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.434640522875817, |
| "grad_norm": 0.12353515625, |
| "learning_rate": 5.653594771241831e-06, |
| "loss": 1.2749, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.43790849673202614, |
| "grad_norm": 0.103515625, |
| "learning_rate": 5.620915032679739e-06, |
| "loss": 1.3243, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.4411764705882353, |
| "grad_norm": 0.10498046875, |
| "learning_rate": 5.588235294117647e-06, |
| "loss": 1.3356, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 0.134765625, |
| "learning_rate": 5.555555555555557e-06, |
| "loss": 1.2729, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.4477124183006536, |
| "grad_norm": 0.103515625, |
| "learning_rate": 5.522875816993465e-06, |
| "loss": 1.2757, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.45098039215686275, |
| "grad_norm": 0.10205078125, |
| "learning_rate": 5.4901960784313735e-06, |
| "loss": 1.2965, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.4542483660130719, |
| "grad_norm": 0.095703125, |
| "learning_rate": 5.457516339869281e-06, |
| "loss": 1.3195, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.45751633986928103, |
| "grad_norm": 0.1337890625, |
| "learning_rate": 5.4248366013071894e-06, |
| "loss": 1.3627, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.46078431372549017, |
| "grad_norm": 0.1416015625, |
| "learning_rate": 5.392156862745098e-06, |
| "loss": 1.3071, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.46405228758169936, |
| "grad_norm": 0.146484375, |
| "learning_rate": 5.359477124183007e-06, |
| "loss": 1.2768, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.4673202614379085, |
| "grad_norm": 0.1337890625, |
| "learning_rate": 5.3267973856209155e-06, |
| "loss": 1.3083, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "grad_norm": 0.09716796875, |
| "learning_rate": 5.294117647058824e-06, |
| "loss": 1.2987, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.4738562091503268, |
| "grad_norm": 0.09912109375, |
| "learning_rate": 5.261437908496732e-06, |
| "loss": 1.3562, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.477124183006536, |
| "grad_norm": 0.10791015625, |
| "learning_rate": 5.2287581699346416e-06, |
| "loss": 1.3431, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.4803921568627451, |
| "grad_norm": 0.09912109375, |
| "learning_rate": 5.19607843137255e-06, |
| "loss": 1.28, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.48366013071895425, |
| "grad_norm": 0.09912109375, |
| "learning_rate": 5.163398692810458e-06, |
| "loss": 1.3059, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.4869281045751634, |
| "grad_norm": 0.09765625, |
| "learning_rate": 5.130718954248366e-06, |
| "loss": 1.3164, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.49019607843137253, |
| "grad_norm": 0.09716796875, |
| "learning_rate": 5.098039215686274e-06, |
| "loss": 1.2918, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.4934640522875817, |
| "grad_norm": 0.1171875, |
| "learning_rate": 5.065359477124184e-06, |
| "loss": 1.305, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.49673202614379086, |
| "grad_norm": 0.099609375, |
| "learning_rate": 5.032679738562092e-06, |
| "loss": 1.2707, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.10009765625, |
| "learning_rate": 5e-06, |
| "loss": 1.2975, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.5032679738562091, |
| "grad_norm": 0.12255859375, |
| "learning_rate": 4.967320261437909e-06, |
| "loss": 1.343, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.5065359477124183, |
| "grad_norm": 0.11572265625, |
| "learning_rate": 4.934640522875817e-06, |
| "loss": 1.273, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.5098039215686274, |
| "grad_norm": 0.09716796875, |
| "learning_rate": 4.901960784313726e-06, |
| "loss": 1.2939, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.5130718954248366, |
| "grad_norm": 0.134765625, |
| "learning_rate": 4.869281045751634e-06, |
| "loss": 1.3252, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.5163398692810458, |
| "grad_norm": 0.1181640625, |
| "learning_rate": 4.836601307189543e-06, |
| "loss": 1.3202, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.5196078431372549, |
| "grad_norm": 0.107421875, |
| "learning_rate": 4.803921568627452e-06, |
| "loss": 1.3185, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.5228758169934641, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 4.77124183006536e-06, |
| "loss": 1.3852, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5261437908496732, |
| "grad_norm": 0.1103515625, |
| "learning_rate": 4.7385620915032685e-06, |
| "loss": 1.3733, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.5294117647058824, |
| "grad_norm": 0.10693359375, |
| "learning_rate": 4.705882352941177e-06, |
| "loss": 1.3023, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.5326797385620915, |
| "grad_norm": 0.1748046875, |
| "learning_rate": 4.673202614379085e-06, |
| "loss": 1.3231, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.5359477124183006, |
| "grad_norm": 0.1240234375, |
| "learning_rate": 4.640522875816994e-06, |
| "loss": 1.3121, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.5392156862745098, |
| "grad_norm": 0.11083984375, |
| "learning_rate": 4.607843137254902e-06, |
| "loss": 1.3771, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.5424836601307189, |
| "grad_norm": 0.107421875, |
| "learning_rate": 4.5751633986928105e-06, |
| "loss": 1.3331, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.545751633986928, |
| "grad_norm": 0.1142578125, |
| "learning_rate": 4.542483660130719e-06, |
| "loss": 1.2652, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.5490196078431373, |
| "grad_norm": 0.12353515625, |
| "learning_rate": 4.509803921568628e-06, |
| "loss": 1.3278, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.5522875816993464, |
| "grad_norm": 0.11572265625, |
| "learning_rate": 4.477124183006537e-06, |
| "loss": 1.3639, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.5555555555555556, |
| "grad_norm": 0.0986328125, |
| "learning_rate": 4.444444444444444e-06, |
| "loss": 1.3325, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5588235294117647, |
| "grad_norm": 0.10693359375, |
| "learning_rate": 4.411764705882353e-06, |
| "loss": 1.2689, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.5620915032679739, |
| "grad_norm": 0.11279296875, |
| "learning_rate": 4.379084967320262e-06, |
| "loss": 1.2967, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.565359477124183, |
| "grad_norm": 0.11767578125, |
| "learning_rate": 4.34640522875817e-06, |
| "loss": 1.2843, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.5686274509803921, |
| "grad_norm": 0.1064453125, |
| "learning_rate": 4.313725490196079e-06, |
| "loss": 1.3083, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.5718954248366013, |
| "grad_norm": 0.1240234375, |
| "learning_rate": 4.281045751633987e-06, |
| "loss": 1.3492, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.5751633986928104, |
| "grad_norm": 0.1064453125, |
| "learning_rate": 4.2483660130718954e-06, |
| "loss": 1.2731, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.5784313725490197, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 4.215686274509805e-06, |
| "loss": 1.241, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.5816993464052288, |
| "grad_norm": 0.10546875, |
| "learning_rate": 4.183006535947713e-06, |
| "loss": 1.3399, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.5849673202614379, |
| "grad_norm": 0.1015625, |
| "learning_rate": 4.1503267973856215e-06, |
| "loss": 1.3507, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 0.11328125, |
| "learning_rate": 4.11764705882353e-06, |
| "loss": 1.2953, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5915032679738562, |
| "grad_norm": 0.099609375, |
| "learning_rate": 4.084967320261438e-06, |
| "loss": 1.2757, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.5947712418300654, |
| "grad_norm": 0.11279296875, |
| "learning_rate": 4.052287581699347e-06, |
| "loss": 1.3029, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.5980392156862745, |
| "grad_norm": 0.10302734375, |
| "learning_rate": 4.019607843137255e-06, |
| "loss": 1.2806, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.6013071895424836, |
| "grad_norm": 0.11376953125, |
| "learning_rate": 3.986928104575164e-06, |
| "loss": 1.3346, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.6045751633986928, |
| "grad_norm": 0.103515625, |
| "learning_rate": 3.954248366013072e-06, |
| "loss": 1.3607, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.6078431372549019, |
| "grad_norm": 0.1103515625, |
| "learning_rate": 3.92156862745098e-06, |
| "loss": 1.3136, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.6111111111111112, |
| "grad_norm": 0.130859375, |
| "learning_rate": 3.88888888888889e-06, |
| "loss": 1.3384, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.6143790849673203, |
| "grad_norm": 0.1181640625, |
| "learning_rate": 3.856209150326798e-06, |
| "loss": 1.308, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.6176470588235294, |
| "grad_norm": 0.10302734375, |
| "learning_rate": 3.8235294117647055e-06, |
| "loss": 1.2922, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.6209150326797386, |
| "grad_norm": 0.1044921875, |
| "learning_rate": 3.7908496732026144e-06, |
| "loss": 1.3052, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.6241830065359477, |
| "grad_norm": 0.15234375, |
| "learning_rate": 3.758169934640523e-06, |
| "loss": 1.3447, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.6274509803921569, |
| "grad_norm": 0.10107421875, |
| "learning_rate": 3.7254901960784316e-06, |
| "loss": 1.319, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.630718954248366, |
| "grad_norm": 0.1279296875, |
| "learning_rate": 3.6928104575163404e-06, |
| "loss": 1.2778, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.6339869281045751, |
| "grad_norm": 0.16015625, |
| "learning_rate": 3.6601307189542484e-06, |
| "loss": 1.3332, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.6372549019607843, |
| "grad_norm": 0.1064453125, |
| "learning_rate": 3.6274509803921573e-06, |
| "loss": 1.3033, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.6405228758169934, |
| "grad_norm": 0.1123046875, |
| "learning_rate": 3.5947712418300657e-06, |
| "loss": 1.2795, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.6437908496732027, |
| "grad_norm": 0.10888671875, |
| "learning_rate": 3.562091503267974e-06, |
| "loss": 1.3305, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.6470588235294118, |
| "grad_norm": 0.1123046875, |
| "learning_rate": 3.529411764705883e-06, |
| "loss": 1.2839, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.6503267973856209, |
| "grad_norm": 0.12255859375, |
| "learning_rate": 3.496732026143791e-06, |
| "loss": 1.3031, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.6535947712418301, |
| "grad_norm": 0.158203125, |
| "learning_rate": 3.4640522875816997e-06, |
| "loss": 1.2619, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6568627450980392, |
| "grad_norm": 0.126953125, |
| "learning_rate": 3.431372549019608e-06, |
| "loss": 1.3154, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.6601307189542484, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 3.398692810457517e-06, |
| "loss": 1.332, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.6633986928104575, |
| "grad_norm": 0.11474609375, |
| "learning_rate": 3.3660130718954253e-06, |
| "loss": 1.331, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.10546875, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 1.3, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.6699346405228758, |
| "grad_norm": 0.10986328125, |
| "learning_rate": 3.300653594771242e-06, |
| "loss": 1.3237, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.673202614379085, |
| "grad_norm": 0.1640625, |
| "learning_rate": 3.2679738562091506e-06, |
| "loss": 1.4259, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.6764705882352942, |
| "grad_norm": 0.12158203125, |
| "learning_rate": 3.2352941176470594e-06, |
| "loss": 1.3311, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.6797385620915033, |
| "grad_norm": 0.109375, |
| "learning_rate": 3.2026143790849674e-06, |
| "loss": 1.3069, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.6830065359477124, |
| "grad_norm": 0.1337890625, |
| "learning_rate": 3.1699346405228758e-06, |
| "loss": 1.3022, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.6862745098039216, |
| "grad_norm": 0.12158203125, |
| "learning_rate": 3.1372549019607846e-06, |
| "loss": 1.3439, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.6895424836601307, |
| "grad_norm": 0.10888671875, |
| "learning_rate": 3.104575163398693e-06, |
| "loss": 1.2768, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.6928104575163399, |
| "grad_norm": 0.1171875, |
| "learning_rate": 3.071895424836602e-06, |
| "loss": 1.3184, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.696078431372549, |
| "grad_norm": 0.107421875, |
| "learning_rate": 3.03921568627451e-06, |
| "loss": 1.328, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.6993464052287581, |
| "grad_norm": 0.109375, |
| "learning_rate": 3.0065359477124182e-06, |
| "loss": 1.287, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.7026143790849673, |
| "grad_norm": 0.11181640625, |
| "learning_rate": 2.973856209150327e-06, |
| "loss": 1.2768, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.7058823529411765, |
| "grad_norm": 0.119140625, |
| "learning_rate": 2.9411764705882355e-06, |
| "loss": 1.3145, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.7091503267973857, |
| "grad_norm": 0.11572265625, |
| "learning_rate": 2.9084967320261443e-06, |
| "loss": 1.2902, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.7124183006535948, |
| "grad_norm": 0.10888671875, |
| "learning_rate": 2.8758169934640523e-06, |
| "loss": 1.3166, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.7156862745098039, |
| "grad_norm": 0.115234375, |
| "learning_rate": 2.843137254901961e-06, |
| "loss": 1.3041, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.7189542483660131, |
| "grad_norm": 0.10791015625, |
| "learning_rate": 2.8104575163398695e-06, |
| "loss": 1.2861, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.7222222222222222, |
| "grad_norm": 0.11767578125, |
| "learning_rate": 2.7777777777777783e-06, |
| "loss": 1.2623, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.7254901960784313, |
| "grad_norm": 0.111328125, |
| "learning_rate": 2.7450980392156867e-06, |
| "loss": 1.268, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.7287581699346405, |
| "grad_norm": 0.12451171875, |
| "learning_rate": 2.7124183006535947e-06, |
| "loss": 1.2768, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.7320261437908496, |
| "grad_norm": 0.173828125, |
| "learning_rate": 2.6797385620915036e-06, |
| "loss": 1.4249, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.7352941176470589, |
| "grad_norm": 0.11181640625, |
| "learning_rate": 2.647058823529412e-06, |
| "loss": 1.3141, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.738562091503268, |
| "grad_norm": 0.1171875, |
| "learning_rate": 2.6143790849673208e-06, |
| "loss": 1.2831, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.7418300653594772, |
| "grad_norm": 0.126953125, |
| "learning_rate": 2.581699346405229e-06, |
| "loss": 1.3071, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.7450980392156863, |
| "grad_norm": 0.1787109375, |
| "learning_rate": 2.549019607843137e-06, |
| "loss": 1.3029, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.7483660130718954, |
| "grad_norm": 0.11474609375, |
| "learning_rate": 2.516339869281046e-06, |
| "loss": 1.2983, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.7516339869281046, |
| "grad_norm": 0.11669921875, |
| "learning_rate": 2.4836601307189544e-06, |
| "loss": 1.2885, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.7549019607843137, |
| "grad_norm": 0.11083984375, |
| "learning_rate": 2.450980392156863e-06, |
| "loss": 1.2684, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.7581699346405228, |
| "grad_norm": 0.150390625, |
| "learning_rate": 2.4183006535947716e-06, |
| "loss": 1.233, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.761437908496732, |
| "grad_norm": 0.11328125, |
| "learning_rate": 2.38562091503268e-06, |
| "loss": 1.3528, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.7647058823529411, |
| "grad_norm": 0.1123046875, |
| "learning_rate": 2.3529411764705885e-06, |
| "loss": 1.2915, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.7679738562091504, |
| "grad_norm": 0.111328125, |
| "learning_rate": 2.320261437908497e-06, |
| "loss": 1.238, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.7712418300653595, |
| "grad_norm": 0.1279296875, |
| "learning_rate": 2.2875816993464053e-06, |
| "loss": 1.2826, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.7745098039215687, |
| "grad_norm": 0.10986328125, |
| "learning_rate": 2.254901960784314e-06, |
| "loss": 1.2619, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.7777777777777778, |
| "grad_norm": 0.126953125, |
| "learning_rate": 2.222222222222222e-06, |
| "loss": 1.3, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.7810457516339869, |
| "grad_norm": 0.162109375, |
| "learning_rate": 2.189542483660131e-06, |
| "loss": 1.3125, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.7843137254901961, |
| "grad_norm": 0.11572265625, |
| "learning_rate": 2.1568627450980393e-06, |
| "loss": 1.3028, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.7875816993464052, |
| "grad_norm": 0.1201171875, |
| "learning_rate": 2.1241830065359477e-06, |
| "loss": 1.2957, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.7908496732026143, |
| "grad_norm": 0.10693359375, |
| "learning_rate": 2.0915032679738565e-06, |
| "loss": 1.2738, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.7941176470588235, |
| "grad_norm": 0.1064453125, |
| "learning_rate": 2.058823529411765e-06, |
| "loss": 1.2864, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.7973856209150327, |
| "grad_norm": 0.109375, |
| "learning_rate": 2.0261437908496734e-06, |
| "loss": 1.3131, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.8006535947712419, |
| "grad_norm": 0.11572265625, |
| "learning_rate": 1.993464052287582e-06, |
| "loss": 1.3015, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.803921568627451, |
| "grad_norm": 0.1318359375, |
| "learning_rate": 1.96078431372549e-06, |
| "loss": 1.3037, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.8071895424836601, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 1.928104575163399e-06, |
| "loss": 1.2882, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.8104575163398693, |
| "grad_norm": 0.1240234375, |
| "learning_rate": 1.8954248366013072e-06, |
| "loss": 1.3064, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.8137254901960784, |
| "grad_norm": 0.11083984375, |
| "learning_rate": 1.8627450980392158e-06, |
| "loss": 1.2951, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.8169934640522876, |
| "grad_norm": 0.12158203125, |
| "learning_rate": 1.8300653594771242e-06, |
| "loss": 1.2906, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.8202614379084967, |
| "grad_norm": 0.12158203125, |
| "learning_rate": 1.7973856209150328e-06, |
| "loss": 1.3138, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.8235294117647058, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 1.7647058823529414e-06, |
| "loss": 1.24, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.826797385620915, |
| "grad_norm": 0.134765625, |
| "learning_rate": 1.7320261437908499e-06, |
| "loss": 1.2741, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.8300653594771242, |
| "grad_norm": 0.107421875, |
| "learning_rate": 1.6993464052287585e-06, |
| "loss": 1.2784, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.8333333333333334, |
| "grad_norm": 0.15625, |
| "learning_rate": 1.6666666666666667e-06, |
| "loss": 1.2769, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.8366013071895425, |
| "grad_norm": 0.11328125, |
| "learning_rate": 1.6339869281045753e-06, |
| "loss": 1.3121, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.8398692810457516, |
| "grad_norm": 0.11083984375, |
| "learning_rate": 1.6013071895424837e-06, |
| "loss": 1.3014, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.8431372549019608, |
| "grad_norm": 0.10693359375, |
| "learning_rate": 1.5686274509803923e-06, |
| "loss": 1.2812, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.8464052287581699, |
| "grad_norm": 0.12158203125, |
| "learning_rate": 1.535947712418301e-06, |
| "loss": 1.3214, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.8496732026143791, |
| "grad_norm": 0.12451171875, |
| "learning_rate": 1.5032679738562091e-06, |
| "loss": 1.3066, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.8529411764705882, |
| "grad_norm": 0.1806640625, |
| "learning_rate": 1.4705882352941177e-06, |
| "loss": 1.3292, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.8562091503267973, |
| "grad_norm": 0.1103515625, |
| "learning_rate": 1.4379084967320261e-06, |
| "loss": 1.2693, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.8594771241830066, |
| "grad_norm": 0.1484375, |
| "learning_rate": 1.4052287581699348e-06, |
| "loss": 1.2824, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.8627450980392157, |
| "grad_norm": 0.10986328125, |
| "learning_rate": 1.3725490196078434e-06, |
| "loss": 1.2549, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.8660130718954249, |
| "grad_norm": 0.125, |
| "learning_rate": 1.3398692810457518e-06, |
| "loss": 1.2826, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.869281045751634, |
| "grad_norm": 0.12451171875, |
| "learning_rate": 1.3071895424836604e-06, |
| "loss": 1.3209, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.8725490196078431, |
| "grad_norm": 0.1044921875, |
| "learning_rate": 1.2745098039215686e-06, |
| "loss": 1.2802, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.8758169934640523, |
| "grad_norm": 0.111328125, |
| "learning_rate": 1.2418300653594772e-06, |
| "loss": 1.2775, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.8790849673202614, |
| "grad_norm": 0.10693359375, |
| "learning_rate": 1.2091503267973858e-06, |
| "loss": 1.2438, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.8823529411764706, |
| "grad_norm": 0.1259765625, |
| "learning_rate": 1.1764705882352942e-06, |
| "loss": 1.3108, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.8856209150326797, |
| "grad_norm": 0.1142578125, |
| "learning_rate": 1.1437908496732026e-06, |
| "loss": 1.2374, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 0.109375, |
| "learning_rate": 1.111111111111111e-06, |
| "loss": 1.2826, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.8921568627450981, |
| "grad_norm": 0.12451171875, |
| "learning_rate": 1.0784313725490197e-06, |
| "loss": 1.3142, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.8954248366013072, |
| "grad_norm": 0.1044921875, |
| "learning_rate": 1.0457516339869283e-06, |
| "loss": 1.2863, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.8986928104575164, |
| "grad_norm": 0.12890625, |
| "learning_rate": 1.0130718954248367e-06, |
| "loss": 1.269, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.9019607843137255, |
| "grad_norm": 0.1083984375, |
| "learning_rate": 9.80392156862745e-07, |
| "loss": 1.2899, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.9052287581699346, |
| "grad_norm": 0.169921875, |
| "learning_rate": 9.477124183006536e-07, |
| "loss": 1.3393, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.9084967320261438, |
| "grad_norm": 0.1455078125, |
| "learning_rate": 9.150326797385621e-07, |
| "loss": 1.2714, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.9117647058823529, |
| "grad_norm": 0.10400390625, |
| "learning_rate": 8.823529411764707e-07, |
| "loss": 1.2991, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.9150326797385621, |
| "grad_norm": 0.140625, |
| "learning_rate": 8.496732026143792e-07, |
| "loss": 1.3242, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.9183006535947712, |
| "grad_norm": 0.1083984375, |
| "learning_rate": 8.169934640522876e-07, |
| "loss": 1.2888, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.9215686274509803, |
| "grad_norm": 0.13671875, |
| "learning_rate": 7.843137254901962e-07, |
| "loss": 1.2745, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.9248366013071896, |
| "grad_norm": 0.10400390625, |
| "learning_rate": 7.516339869281046e-07, |
| "loss": 1.2467, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.9281045751633987, |
| "grad_norm": 0.1025390625, |
| "learning_rate": 7.189542483660131e-07, |
| "loss": 1.3205, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.9313725490196079, |
| "grad_norm": 0.115234375, |
| "learning_rate": 6.862745098039217e-07, |
| "loss": 1.2527, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.934640522875817, |
| "grad_norm": 0.1240234375, |
| "learning_rate": 6.535947712418302e-07, |
| "loss": 1.335, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.9379084967320261, |
| "grad_norm": 0.10302734375, |
| "learning_rate": 6.209150326797386e-07, |
| "loss": 1.2643, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.9411764705882353, |
| "grad_norm": 0.123046875, |
| "learning_rate": 5.882352941176471e-07, |
| "loss": 1.3361, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.9444444444444444, |
| "grad_norm": 0.11767578125, |
| "learning_rate": 5.555555555555555e-07, |
| "loss": 1.291, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.9477124183006536, |
| "grad_norm": 0.10498046875, |
| "learning_rate": 5.228758169934641e-07, |
| "loss": 1.3035, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.9509803921568627, |
| "grad_norm": 0.1083984375, |
| "learning_rate": 4.901960784313725e-07, |
| "loss": 1.2796, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.954248366013072, |
| "grad_norm": 0.109375, |
| "learning_rate": 4.5751633986928105e-07, |
| "loss": 1.2886, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.9575163398692811, |
| "grad_norm": 0.134765625, |
| "learning_rate": 4.248366013071896e-07, |
| "loss": 1.3668, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.9607843137254902, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 3.921568627450981e-07, |
| "loss": 1.3172, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.9640522875816994, |
| "grad_norm": 0.1162109375, |
| "learning_rate": 3.5947712418300653e-07, |
| "loss": 1.2876, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.9673202614379085, |
| "grad_norm": 0.11865234375, |
| "learning_rate": 3.267973856209151e-07, |
| "loss": 1.2527, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.9705882352941176, |
| "grad_norm": 0.10546875, |
| "learning_rate": 2.9411764705882356e-07, |
| "loss": 1.2697, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.9738562091503268, |
| "grad_norm": 0.103515625, |
| "learning_rate": 2.6143790849673207e-07, |
| "loss": 1.2876, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.9771241830065359, |
| "grad_norm": 0.1064453125, |
| "learning_rate": 2.2875816993464053e-07, |
| "loss": 1.2968, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.9803921568627451, |
| "grad_norm": 0.1171875, |
| "learning_rate": 1.9607843137254904e-07, |
| "loss": 1.3265, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.9836601307189542, |
| "grad_norm": 0.1171875, |
| "learning_rate": 1.6339869281045755e-07, |
| "loss": 1.2475, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.9869281045751634, |
| "grad_norm": 0.1201171875, |
| "learning_rate": 1.3071895424836603e-07, |
| "loss": 1.2667, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.9901960784313726, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 9.803921568627452e-08, |
| "loss": 1.3196, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.9934640522875817, |
| "grad_norm": 0.11181640625, |
| "learning_rate": 6.535947712418302e-08, |
| "loss": 1.2911, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.9967320261437909, |
| "grad_norm": 0.1142578125, |
| "learning_rate": 3.267973856209151e-08, |
| "loss": 1.3154, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.12158203125, |
| "learning_rate": 0.0, |
| "loss": 1.2803, |
| "step": 306 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 306, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 0, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.566222808116101e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|