Wanff
Add fine-tuned model
2097219
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 0,
"global_step": 306,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0032679738562091504,
"grad_norm": 0.50390625,
"learning_rate": 9.96732026143791e-06,
"loss": 1.7678,
"step": 1
},
{
"epoch": 0.006535947712418301,
"grad_norm": 0.451171875,
"learning_rate": 9.934640522875818e-06,
"loss": 1.7126,
"step": 2
},
{
"epoch": 0.00980392156862745,
"grad_norm": 0.439453125,
"learning_rate": 9.901960784313727e-06,
"loss": 1.6738,
"step": 3
},
{
"epoch": 0.013071895424836602,
"grad_norm": 0.4296875,
"learning_rate": 9.869281045751634e-06,
"loss": 1.6994,
"step": 4
},
{
"epoch": 0.016339869281045753,
"grad_norm": 0.41796875,
"learning_rate": 9.836601307189542e-06,
"loss": 1.6942,
"step": 5
},
{
"epoch": 0.0196078431372549,
"grad_norm": 0.404296875,
"learning_rate": 9.803921568627451e-06,
"loss": 1.6875,
"step": 6
},
{
"epoch": 0.02287581699346405,
"grad_norm": 0.3671875,
"learning_rate": 9.77124183006536e-06,
"loss": 1.5903,
"step": 7
},
{
"epoch": 0.026143790849673203,
"grad_norm": 0.337890625,
"learning_rate": 9.738562091503268e-06,
"loss": 1.6187,
"step": 8
},
{
"epoch": 0.029411764705882353,
"grad_norm": 0.3203125,
"learning_rate": 9.705882352941177e-06,
"loss": 1.6379,
"step": 9
},
{
"epoch": 0.032679738562091505,
"grad_norm": 0.265625,
"learning_rate": 9.673202614379087e-06,
"loss": 1.5965,
"step": 10
},
{
"epoch": 0.03594771241830065,
"grad_norm": 0.259765625,
"learning_rate": 9.640522875816994e-06,
"loss": 1.6126,
"step": 11
},
{
"epoch": 0.0392156862745098,
"grad_norm": 0.296875,
"learning_rate": 9.607843137254903e-06,
"loss": 1.7813,
"step": 12
},
{
"epoch": 0.042483660130718956,
"grad_norm": 0.244140625,
"learning_rate": 9.575163398692811e-06,
"loss": 1.6,
"step": 13
},
{
"epoch": 0.0457516339869281,
"grad_norm": 0.2353515625,
"learning_rate": 9.54248366013072e-06,
"loss": 1.6003,
"step": 14
},
{
"epoch": 0.049019607843137254,
"grad_norm": 0.234375,
"learning_rate": 9.509803921568628e-06,
"loss": 1.5348,
"step": 15
},
{
"epoch": 0.05228758169934641,
"grad_norm": 0.2216796875,
"learning_rate": 9.477124183006537e-06,
"loss": 1.5196,
"step": 16
},
{
"epoch": 0.05555555555555555,
"grad_norm": 0.2412109375,
"learning_rate": 9.444444444444445e-06,
"loss": 1.5982,
"step": 17
},
{
"epoch": 0.058823529411764705,
"grad_norm": 0.2236328125,
"learning_rate": 9.411764705882354e-06,
"loss": 1.5775,
"step": 18
},
{
"epoch": 0.06209150326797386,
"grad_norm": 0.1982421875,
"learning_rate": 9.379084967320261e-06,
"loss": 1.48,
"step": 19
},
{
"epoch": 0.06535947712418301,
"grad_norm": 0.1943359375,
"learning_rate": 9.34640522875817e-06,
"loss": 1.5339,
"step": 20
},
{
"epoch": 0.06862745098039216,
"grad_norm": 0.193359375,
"learning_rate": 9.31372549019608e-06,
"loss": 1.5514,
"step": 21
},
{
"epoch": 0.0718954248366013,
"grad_norm": 0.1845703125,
"learning_rate": 9.281045751633987e-06,
"loss": 1.5424,
"step": 22
},
{
"epoch": 0.07516339869281045,
"grad_norm": 0.2138671875,
"learning_rate": 9.248366013071897e-06,
"loss": 1.5233,
"step": 23
},
{
"epoch": 0.0784313725490196,
"grad_norm": 0.1591796875,
"learning_rate": 9.215686274509804e-06,
"loss": 1.506,
"step": 24
},
{
"epoch": 0.08169934640522876,
"grad_norm": 0.15234375,
"learning_rate": 9.183006535947713e-06,
"loss": 1.4921,
"step": 25
},
{
"epoch": 0.08496732026143791,
"grad_norm": 0.1689453125,
"learning_rate": 9.150326797385621e-06,
"loss": 1.4648,
"step": 26
},
{
"epoch": 0.08823529411764706,
"grad_norm": 0.154296875,
"learning_rate": 9.11764705882353e-06,
"loss": 1.4648,
"step": 27
},
{
"epoch": 0.0915032679738562,
"grad_norm": 0.138671875,
"learning_rate": 9.084967320261438e-06,
"loss": 1.4776,
"step": 28
},
{
"epoch": 0.09477124183006536,
"grad_norm": 0.1875,
"learning_rate": 9.052287581699347e-06,
"loss": 1.5891,
"step": 29
},
{
"epoch": 0.09803921568627451,
"grad_norm": 0.140625,
"learning_rate": 9.019607843137256e-06,
"loss": 1.4656,
"step": 30
},
{
"epoch": 0.10130718954248366,
"grad_norm": 0.24609375,
"learning_rate": 8.986928104575164e-06,
"loss": 1.4948,
"step": 31
},
{
"epoch": 0.10457516339869281,
"grad_norm": 0.1357421875,
"learning_rate": 8.954248366013073e-06,
"loss": 1.4988,
"step": 32
},
{
"epoch": 0.10784313725490197,
"grad_norm": 0.1474609375,
"learning_rate": 8.921568627450982e-06,
"loss": 1.4855,
"step": 33
},
{
"epoch": 0.1111111111111111,
"grad_norm": 0.1279296875,
"learning_rate": 8.888888888888888e-06,
"loss": 1.4475,
"step": 34
},
{
"epoch": 0.11437908496732026,
"grad_norm": 0.1279296875,
"learning_rate": 8.856209150326798e-06,
"loss": 1.4693,
"step": 35
},
{
"epoch": 0.11764705882352941,
"grad_norm": 0.1435546875,
"learning_rate": 8.823529411764707e-06,
"loss": 1.456,
"step": 36
},
{
"epoch": 0.12091503267973856,
"grad_norm": 0.1416015625,
"learning_rate": 8.790849673202614e-06,
"loss": 1.4638,
"step": 37
},
{
"epoch": 0.12418300653594772,
"grad_norm": 0.1259765625,
"learning_rate": 8.758169934640524e-06,
"loss": 1.4378,
"step": 38
},
{
"epoch": 0.12745098039215685,
"grad_norm": 0.1396484375,
"learning_rate": 8.725490196078433e-06,
"loss": 1.4161,
"step": 39
},
{
"epoch": 0.13071895424836602,
"grad_norm": 0.126953125,
"learning_rate": 8.69281045751634e-06,
"loss": 1.4582,
"step": 40
},
{
"epoch": 0.13398692810457516,
"grad_norm": 0.12255859375,
"learning_rate": 8.66013071895425e-06,
"loss": 1.4215,
"step": 41
},
{
"epoch": 0.13725490196078433,
"grad_norm": 0.173828125,
"learning_rate": 8.627450980392157e-06,
"loss": 1.5164,
"step": 42
},
{
"epoch": 0.14052287581699346,
"grad_norm": 0.12451171875,
"learning_rate": 8.594771241830066e-06,
"loss": 1.3956,
"step": 43
},
{
"epoch": 0.1437908496732026,
"grad_norm": 0.12158203125,
"learning_rate": 8.562091503267974e-06,
"loss": 1.4147,
"step": 44
},
{
"epoch": 0.14705882352941177,
"grad_norm": 0.1279296875,
"learning_rate": 8.529411764705883e-06,
"loss": 1.4664,
"step": 45
},
{
"epoch": 0.1503267973856209,
"grad_norm": 0.12060546875,
"learning_rate": 8.496732026143791e-06,
"loss": 1.402,
"step": 46
},
{
"epoch": 0.15359477124183007,
"grad_norm": 0.11279296875,
"learning_rate": 8.4640522875817e-06,
"loss": 1.455,
"step": 47
},
{
"epoch": 0.1568627450980392,
"grad_norm": 0.1337890625,
"learning_rate": 8.43137254901961e-06,
"loss": 1.4874,
"step": 48
},
{
"epoch": 0.16013071895424835,
"grad_norm": 0.1552734375,
"learning_rate": 8.398692810457517e-06,
"loss": 1.5623,
"step": 49
},
{
"epoch": 0.16339869281045752,
"grad_norm": 0.11083984375,
"learning_rate": 8.366013071895426e-06,
"loss": 1.4036,
"step": 50
},
{
"epoch": 0.16666666666666666,
"grad_norm": 0.10986328125,
"learning_rate": 8.333333333333334e-06,
"loss": 1.4001,
"step": 51
},
{
"epoch": 0.16993464052287582,
"grad_norm": 0.1484375,
"learning_rate": 8.300653594771243e-06,
"loss": 1.5234,
"step": 52
},
{
"epoch": 0.17320261437908496,
"grad_norm": 0.126953125,
"learning_rate": 8.26797385620915e-06,
"loss": 1.4116,
"step": 53
},
{
"epoch": 0.17647058823529413,
"grad_norm": 0.1220703125,
"learning_rate": 8.23529411764706e-06,
"loss": 1.4592,
"step": 54
},
{
"epoch": 0.17973856209150327,
"grad_norm": 0.10546875,
"learning_rate": 8.202614379084967e-06,
"loss": 1.4332,
"step": 55
},
{
"epoch": 0.1830065359477124,
"grad_norm": 0.11181640625,
"learning_rate": 8.169934640522877e-06,
"loss": 1.4171,
"step": 56
},
{
"epoch": 0.18627450980392157,
"grad_norm": 0.1015625,
"learning_rate": 8.137254901960784e-06,
"loss": 1.3787,
"step": 57
},
{
"epoch": 0.1895424836601307,
"grad_norm": 0.10595703125,
"learning_rate": 8.104575163398693e-06,
"loss": 1.4223,
"step": 58
},
{
"epoch": 0.19281045751633988,
"grad_norm": 0.107421875,
"learning_rate": 8.071895424836603e-06,
"loss": 1.4104,
"step": 59
},
{
"epoch": 0.19607843137254902,
"grad_norm": 0.11572265625,
"learning_rate": 8.03921568627451e-06,
"loss": 1.4097,
"step": 60
},
{
"epoch": 0.19934640522875818,
"grad_norm": 0.10791015625,
"learning_rate": 8.00653594771242e-06,
"loss": 1.4235,
"step": 61
},
{
"epoch": 0.20261437908496732,
"grad_norm": 0.10205078125,
"learning_rate": 7.973856209150329e-06,
"loss": 1.3798,
"step": 62
},
{
"epoch": 0.20588235294117646,
"grad_norm": 0.1513671875,
"learning_rate": 7.941176470588236e-06,
"loss": 1.4078,
"step": 63
},
{
"epoch": 0.20915032679738563,
"grad_norm": 0.099609375,
"learning_rate": 7.908496732026144e-06,
"loss": 1.3696,
"step": 64
},
{
"epoch": 0.21241830065359477,
"grad_norm": 0.09716796875,
"learning_rate": 7.875816993464053e-06,
"loss": 1.3765,
"step": 65
},
{
"epoch": 0.21568627450980393,
"grad_norm": 0.10205078125,
"learning_rate": 7.84313725490196e-06,
"loss": 1.3658,
"step": 66
},
{
"epoch": 0.21895424836601307,
"grad_norm": 0.1025390625,
"learning_rate": 7.81045751633987e-06,
"loss": 1.3527,
"step": 67
},
{
"epoch": 0.2222222222222222,
"grad_norm": 0.107421875,
"learning_rate": 7.77777777777778e-06,
"loss": 1.346,
"step": 68
},
{
"epoch": 0.22549019607843138,
"grad_norm": 0.1005859375,
"learning_rate": 7.745098039215687e-06,
"loss": 1.3623,
"step": 69
},
{
"epoch": 0.22875816993464052,
"grad_norm": 0.10400390625,
"learning_rate": 7.712418300653596e-06,
"loss": 1.3469,
"step": 70
},
{
"epoch": 0.23202614379084968,
"grad_norm": 0.11669921875,
"learning_rate": 7.679738562091504e-06,
"loss": 1.385,
"step": 71
},
{
"epoch": 0.23529411764705882,
"grad_norm": 0.1005859375,
"learning_rate": 7.647058823529411e-06,
"loss": 1.3932,
"step": 72
},
{
"epoch": 0.238562091503268,
"grad_norm": 0.130859375,
"learning_rate": 7.61437908496732e-06,
"loss": 1.3533,
"step": 73
},
{
"epoch": 0.24183006535947713,
"grad_norm": 0.10009765625,
"learning_rate": 7.581699346405229e-06,
"loss": 1.3961,
"step": 74
},
{
"epoch": 0.24509803921568626,
"grad_norm": 0.1064453125,
"learning_rate": 7.549019607843138e-06,
"loss": 1.3901,
"step": 75
},
{
"epoch": 0.24836601307189543,
"grad_norm": 0.10546875,
"learning_rate": 7.516339869281046e-06,
"loss": 1.3643,
"step": 76
},
{
"epoch": 0.25163398692810457,
"grad_norm": 0.095703125,
"learning_rate": 7.483660130718955e-06,
"loss": 1.3425,
"step": 77
},
{
"epoch": 0.2549019607843137,
"grad_norm": 0.1083984375,
"learning_rate": 7.450980392156863e-06,
"loss": 1.4032,
"step": 78
},
{
"epoch": 0.2581699346405229,
"grad_norm": 0.134765625,
"learning_rate": 7.4183006535947725e-06,
"loss": 1.3228,
"step": 79
},
{
"epoch": 0.26143790849673204,
"grad_norm": 0.1005859375,
"learning_rate": 7.385620915032681e-06,
"loss": 1.3964,
"step": 80
},
{
"epoch": 0.2647058823529412,
"grad_norm": 0.11767578125,
"learning_rate": 7.352941176470589e-06,
"loss": 1.3605,
"step": 81
},
{
"epoch": 0.2679738562091503,
"grad_norm": 0.09765625,
"learning_rate": 7.320261437908497e-06,
"loss": 1.3084,
"step": 82
},
{
"epoch": 0.27124183006535946,
"grad_norm": 0.09521484375,
"learning_rate": 7.287581699346405e-06,
"loss": 1.3851,
"step": 83
},
{
"epoch": 0.27450980392156865,
"grad_norm": 0.0966796875,
"learning_rate": 7.2549019607843145e-06,
"loss": 1.3596,
"step": 84
},
{
"epoch": 0.2777777777777778,
"grad_norm": 0.419921875,
"learning_rate": 7.222222222222223e-06,
"loss": 1.6292,
"step": 85
},
{
"epoch": 0.28104575163398693,
"grad_norm": 0.09521484375,
"learning_rate": 7.189542483660131e-06,
"loss": 1.3443,
"step": 86
},
{
"epoch": 0.28431372549019607,
"grad_norm": 0.0986328125,
"learning_rate": 7.15686274509804e-06,
"loss": 1.3978,
"step": 87
},
{
"epoch": 0.2875816993464052,
"grad_norm": 0.10107421875,
"learning_rate": 7.124183006535948e-06,
"loss": 1.3581,
"step": 88
},
{
"epoch": 0.2908496732026144,
"grad_norm": 0.09619140625,
"learning_rate": 7.091503267973857e-06,
"loss": 1.3509,
"step": 89
},
{
"epoch": 0.29411764705882354,
"grad_norm": 0.099609375,
"learning_rate": 7.058823529411766e-06,
"loss": 1.3522,
"step": 90
},
{
"epoch": 0.2973856209150327,
"grad_norm": 0.09423828125,
"learning_rate": 7.026143790849673e-06,
"loss": 1.3553,
"step": 91
},
{
"epoch": 0.3006535947712418,
"grad_norm": 0.1015625,
"learning_rate": 6.993464052287582e-06,
"loss": 1.2874,
"step": 92
},
{
"epoch": 0.30392156862745096,
"grad_norm": 0.146484375,
"learning_rate": 6.96078431372549e-06,
"loss": 1.3791,
"step": 93
},
{
"epoch": 0.30718954248366015,
"grad_norm": 0.09521484375,
"learning_rate": 6.928104575163399e-06,
"loss": 1.3514,
"step": 94
},
{
"epoch": 0.3104575163398693,
"grad_norm": 0.1376953125,
"learning_rate": 6.895424836601308e-06,
"loss": 1.3437,
"step": 95
},
{
"epoch": 0.3137254901960784,
"grad_norm": 0.103515625,
"learning_rate": 6.862745098039216e-06,
"loss": 1.3763,
"step": 96
},
{
"epoch": 0.31699346405228757,
"grad_norm": 0.09912109375,
"learning_rate": 6.830065359477125e-06,
"loss": 1.3632,
"step": 97
},
{
"epoch": 0.3202614379084967,
"grad_norm": 0.09716796875,
"learning_rate": 6.797385620915034e-06,
"loss": 1.3221,
"step": 98
},
{
"epoch": 0.3235294117647059,
"grad_norm": 0.0927734375,
"learning_rate": 6.764705882352942e-06,
"loss": 1.3214,
"step": 99
},
{
"epoch": 0.32679738562091504,
"grad_norm": 0.095703125,
"learning_rate": 6.732026143790851e-06,
"loss": 1.3447,
"step": 100
},
{
"epoch": 0.3300653594771242,
"grad_norm": 0.1025390625,
"learning_rate": 6.699346405228758e-06,
"loss": 1.3675,
"step": 101
},
{
"epoch": 0.3333333333333333,
"grad_norm": 0.11376953125,
"learning_rate": 6.666666666666667e-06,
"loss": 1.3547,
"step": 102
},
{
"epoch": 0.3366013071895425,
"grad_norm": 0.09765625,
"learning_rate": 6.633986928104575e-06,
"loss": 1.323,
"step": 103
},
{
"epoch": 0.33986928104575165,
"grad_norm": 0.0986328125,
"learning_rate": 6.601307189542484e-06,
"loss": 1.3437,
"step": 104
},
{
"epoch": 0.3431372549019608,
"grad_norm": 0.1357421875,
"learning_rate": 6.568627450980393e-06,
"loss": 1.3564,
"step": 105
},
{
"epoch": 0.3464052287581699,
"grad_norm": 0.103515625,
"learning_rate": 6.535947712418301e-06,
"loss": 1.3487,
"step": 106
},
{
"epoch": 0.34967320261437906,
"grad_norm": 0.09619140625,
"learning_rate": 6.5032679738562095e-06,
"loss": 1.3322,
"step": 107
},
{
"epoch": 0.35294117647058826,
"grad_norm": 0.09326171875,
"learning_rate": 6.470588235294119e-06,
"loss": 1.3215,
"step": 108
},
{
"epoch": 0.3562091503267974,
"grad_norm": 0.09375,
"learning_rate": 6.437908496732027e-06,
"loss": 1.3258,
"step": 109
},
{
"epoch": 0.35947712418300654,
"grad_norm": 0.1015625,
"learning_rate": 6.405228758169935e-06,
"loss": 1.3013,
"step": 110
},
{
"epoch": 0.3627450980392157,
"grad_norm": 0.1044921875,
"learning_rate": 6.372549019607843e-06,
"loss": 1.3078,
"step": 111
},
{
"epoch": 0.3660130718954248,
"grad_norm": 0.10107421875,
"learning_rate": 6.3398692810457515e-06,
"loss": 1.3178,
"step": 112
},
{
"epoch": 0.369281045751634,
"grad_norm": 0.09423828125,
"learning_rate": 6.307189542483661e-06,
"loss": 1.3343,
"step": 113
},
{
"epoch": 0.37254901960784315,
"grad_norm": 0.091796875,
"learning_rate": 6.274509803921569e-06,
"loss": 1.3418,
"step": 114
},
{
"epoch": 0.3758169934640523,
"grad_norm": 0.10693359375,
"learning_rate": 6.241830065359478e-06,
"loss": 1.3293,
"step": 115
},
{
"epoch": 0.3790849673202614,
"grad_norm": 0.09228515625,
"learning_rate": 6.209150326797386e-06,
"loss": 1.3046,
"step": 116
},
{
"epoch": 0.38235294117647056,
"grad_norm": 0.0908203125,
"learning_rate": 6.176470588235295e-06,
"loss": 1.3192,
"step": 117
},
{
"epoch": 0.38562091503267976,
"grad_norm": 0.09130859375,
"learning_rate": 6.143790849673204e-06,
"loss": 1.2923,
"step": 118
},
{
"epoch": 0.3888888888888889,
"grad_norm": 0.10205078125,
"learning_rate": 6.111111111111112e-06,
"loss": 1.34,
"step": 119
},
{
"epoch": 0.39215686274509803,
"grad_norm": 0.0986328125,
"learning_rate": 6.07843137254902e-06,
"loss": 1.3105,
"step": 120
},
{
"epoch": 0.3954248366013072,
"grad_norm": 0.09619140625,
"learning_rate": 6.045751633986928e-06,
"loss": 1.3412,
"step": 121
},
{
"epoch": 0.39869281045751637,
"grad_norm": 0.09423828125,
"learning_rate": 6.0130718954248365e-06,
"loss": 1.3483,
"step": 122
},
{
"epoch": 0.4019607843137255,
"grad_norm": 0.103515625,
"learning_rate": 5.980392156862746e-06,
"loss": 1.3248,
"step": 123
},
{
"epoch": 0.40522875816993464,
"grad_norm": 0.10205078125,
"learning_rate": 5.947712418300654e-06,
"loss": 1.3346,
"step": 124
},
{
"epoch": 0.4084967320261438,
"grad_norm": 0.1103515625,
"learning_rate": 5.9150326797385625e-06,
"loss": 1.3605,
"step": 125
},
{
"epoch": 0.4117647058823529,
"grad_norm": 0.095703125,
"learning_rate": 5.882352941176471e-06,
"loss": 1.3217,
"step": 126
},
{
"epoch": 0.4150326797385621,
"grad_norm": 0.095703125,
"learning_rate": 5.84967320261438e-06,
"loss": 1.3464,
"step": 127
},
{
"epoch": 0.41830065359477125,
"grad_norm": 0.115234375,
"learning_rate": 5.816993464052289e-06,
"loss": 1.322,
"step": 128
},
{
"epoch": 0.4215686274509804,
"grad_norm": 0.11181640625,
"learning_rate": 5.784313725490197e-06,
"loss": 1.3154,
"step": 129
},
{
"epoch": 0.42483660130718953,
"grad_norm": 0.111328125,
"learning_rate": 5.7516339869281045e-06,
"loss": 1.2998,
"step": 130
},
{
"epoch": 0.42810457516339867,
"grad_norm": 0.095703125,
"learning_rate": 5.718954248366013e-06,
"loss": 1.286,
"step": 131
},
{
"epoch": 0.43137254901960786,
"grad_norm": 0.10400390625,
"learning_rate": 5.686274509803922e-06,
"loss": 1.3048,
"step": 132
},
{
"epoch": 0.434640522875817,
"grad_norm": 0.12353515625,
"learning_rate": 5.653594771241831e-06,
"loss": 1.2749,
"step": 133
},
{
"epoch": 0.43790849673202614,
"grad_norm": 0.103515625,
"learning_rate": 5.620915032679739e-06,
"loss": 1.3243,
"step": 134
},
{
"epoch": 0.4411764705882353,
"grad_norm": 0.10498046875,
"learning_rate": 5.588235294117647e-06,
"loss": 1.3356,
"step": 135
},
{
"epoch": 0.4444444444444444,
"grad_norm": 0.134765625,
"learning_rate": 5.555555555555557e-06,
"loss": 1.2729,
"step": 136
},
{
"epoch": 0.4477124183006536,
"grad_norm": 0.103515625,
"learning_rate": 5.522875816993465e-06,
"loss": 1.2757,
"step": 137
},
{
"epoch": 0.45098039215686275,
"grad_norm": 0.10205078125,
"learning_rate": 5.4901960784313735e-06,
"loss": 1.2965,
"step": 138
},
{
"epoch": 0.4542483660130719,
"grad_norm": 0.095703125,
"learning_rate": 5.457516339869281e-06,
"loss": 1.3195,
"step": 139
},
{
"epoch": 0.45751633986928103,
"grad_norm": 0.1337890625,
"learning_rate": 5.4248366013071894e-06,
"loss": 1.3627,
"step": 140
},
{
"epoch": 0.46078431372549017,
"grad_norm": 0.1416015625,
"learning_rate": 5.392156862745098e-06,
"loss": 1.3071,
"step": 141
},
{
"epoch": 0.46405228758169936,
"grad_norm": 0.146484375,
"learning_rate": 5.359477124183007e-06,
"loss": 1.2768,
"step": 142
},
{
"epoch": 0.4673202614379085,
"grad_norm": 0.1337890625,
"learning_rate": 5.3267973856209155e-06,
"loss": 1.3083,
"step": 143
},
{
"epoch": 0.47058823529411764,
"grad_norm": 0.09716796875,
"learning_rate": 5.294117647058824e-06,
"loss": 1.2987,
"step": 144
},
{
"epoch": 0.4738562091503268,
"grad_norm": 0.09912109375,
"learning_rate": 5.261437908496732e-06,
"loss": 1.3562,
"step": 145
},
{
"epoch": 0.477124183006536,
"grad_norm": 0.10791015625,
"learning_rate": 5.2287581699346416e-06,
"loss": 1.3431,
"step": 146
},
{
"epoch": 0.4803921568627451,
"grad_norm": 0.09912109375,
"learning_rate": 5.19607843137255e-06,
"loss": 1.28,
"step": 147
},
{
"epoch": 0.48366013071895425,
"grad_norm": 0.09912109375,
"learning_rate": 5.163398692810458e-06,
"loss": 1.3059,
"step": 148
},
{
"epoch": 0.4869281045751634,
"grad_norm": 0.09765625,
"learning_rate": 5.130718954248366e-06,
"loss": 1.3164,
"step": 149
},
{
"epoch": 0.49019607843137253,
"grad_norm": 0.09716796875,
"learning_rate": 5.098039215686274e-06,
"loss": 1.2918,
"step": 150
},
{
"epoch": 0.4934640522875817,
"grad_norm": 0.1171875,
"learning_rate": 5.065359477124184e-06,
"loss": 1.305,
"step": 151
},
{
"epoch": 0.49673202614379086,
"grad_norm": 0.099609375,
"learning_rate": 5.032679738562092e-06,
"loss": 1.2707,
"step": 152
},
{
"epoch": 0.5,
"grad_norm": 0.10009765625,
"learning_rate": 5e-06,
"loss": 1.2975,
"step": 153
},
{
"epoch": 0.5032679738562091,
"grad_norm": 0.12255859375,
"learning_rate": 4.967320261437909e-06,
"loss": 1.343,
"step": 154
},
{
"epoch": 0.5065359477124183,
"grad_norm": 0.11572265625,
"learning_rate": 4.934640522875817e-06,
"loss": 1.273,
"step": 155
},
{
"epoch": 0.5098039215686274,
"grad_norm": 0.09716796875,
"learning_rate": 4.901960784313726e-06,
"loss": 1.2939,
"step": 156
},
{
"epoch": 0.5130718954248366,
"grad_norm": 0.134765625,
"learning_rate": 4.869281045751634e-06,
"loss": 1.3252,
"step": 157
},
{
"epoch": 0.5163398692810458,
"grad_norm": 0.1181640625,
"learning_rate": 4.836601307189543e-06,
"loss": 1.3202,
"step": 158
},
{
"epoch": 0.5196078431372549,
"grad_norm": 0.107421875,
"learning_rate": 4.803921568627452e-06,
"loss": 1.3185,
"step": 159
},
{
"epoch": 0.5228758169934641,
"grad_norm": 0.1357421875,
"learning_rate": 4.77124183006536e-06,
"loss": 1.3852,
"step": 160
},
{
"epoch": 0.5261437908496732,
"grad_norm": 0.1103515625,
"learning_rate": 4.7385620915032685e-06,
"loss": 1.3733,
"step": 161
},
{
"epoch": 0.5294117647058824,
"grad_norm": 0.10693359375,
"learning_rate": 4.705882352941177e-06,
"loss": 1.3023,
"step": 162
},
{
"epoch": 0.5326797385620915,
"grad_norm": 0.1748046875,
"learning_rate": 4.673202614379085e-06,
"loss": 1.3231,
"step": 163
},
{
"epoch": 0.5359477124183006,
"grad_norm": 0.1240234375,
"learning_rate": 4.640522875816994e-06,
"loss": 1.3121,
"step": 164
},
{
"epoch": 0.5392156862745098,
"grad_norm": 0.11083984375,
"learning_rate": 4.607843137254902e-06,
"loss": 1.3771,
"step": 165
},
{
"epoch": 0.5424836601307189,
"grad_norm": 0.107421875,
"learning_rate": 4.5751633986928105e-06,
"loss": 1.3331,
"step": 166
},
{
"epoch": 0.545751633986928,
"grad_norm": 0.1142578125,
"learning_rate": 4.542483660130719e-06,
"loss": 1.2652,
"step": 167
},
{
"epoch": 0.5490196078431373,
"grad_norm": 0.12353515625,
"learning_rate": 4.509803921568628e-06,
"loss": 1.3278,
"step": 168
},
{
"epoch": 0.5522875816993464,
"grad_norm": 0.11572265625,
"learning_rate": 4.477124183006537e-06,
"loss": 1.3639,
"step": 169
},
{
"epoch": 0.5555555555555556,
"grad_norm": 0.0986328125,
"learning_rate": 4.444444444444444e-06,
"loss": 1.3325,
"step": 170
},
{
"epoch": 0.5588235294117647,
"grad_norm": 0.10693359375,
"learning_rate": 4.411764705882353e-06,
"loss": 1.2689,
"step": 171
},
{
"epoch": 0.5620915032679739,
"grad_norm": 0.11279296875,
"learning_rate": 4.379084967320262e-06,
"loss": 1.2967,
"step": 172
},
{
"epoch": 0.565359477124183,
"grad_norm": 0.11767578125,
"learning_rate": 4.34640522875817e-06,
"loss": 1.2843,
"step": 173
},
{
"epoch": 0.5686274509803921,
"grad_norm": 0.1064453125,
"learning_rate": 4.313725490196079e-06,
"loss": 1.3083,
"step": 174
},
{
"epoch": 0.5718954248366013,
"grad_norm": 0.1240234375,
"learning_rate": 4.281045751633987e-06,
"loss": 1.3492,
"step": 175
},
{
"epoch": 0.5751633986928104,
"grad_norm": 0.1064453125,
"learning_rate": 4.2483660130718954e-06,
"loss": 1.2731,
"step": 176
},
{
"epoch": 0.5784313725490197,
"grad_norm": 0.1572265625,
"learning_rate": 4.215686274509805e-06,
"loss": 1.241,
"step": 177
},
{
"epoch": 0.5816993464052288,
"grad_norm": 0.10546875,
"learning_rate": 4.183006535947713e-06,
"loss": 1.3399,
"step": 178
},
{
"epoch": 0.5849673202614379,
"grad_norm": 0.1015625,
"learning_rate": 4.1503267973856215e-06,
"loss": 1.3507,
"step": 179
},
{
"epoch": 0.5882352941176471,
"grad_norm": 0.11328125,
"learning_rate": 4.11764705882353e-06,
"loss": 1.2953,
"step": 180
},
{
"epoch": 0.5915032679738562,
"grad_norm": 0.099609375,
"learning_rate": 4.084967320261438e-06,
"loss": 1.2757,
"step": 181
},
{
"epoch": 0.5947712418300654,
"grad_norm": 0.11279296875,
"learning_rate": 4.052287581699347e-06,
"loss": 1.3029,
"step": 182
},
{
"epoch": 0.5980392156862745,
"grad_norm": 0.10302734375,
"learning_rate": 4.019607843137255e-06,
"loss": 1.2806,
"step": 183
},
{
"epoch": 0.6013071895424836,
"grad_norm": 0.11376953125,
"learning_rate": 3.986928104575164e-06,
"loss": 1.3346,
"step": 184
},
{
"epoch": 0.6045751633986928,
"grad_norm": 0.103515625,
"learning_rate": 3.954248366013072e-06,
"loss": 1.3607,
"step": 185
},
{
"epoch": 0.6078431372549019,
"grad_norm": 0.1103515625,
"learning_rate": 3.92156862745098e-06,
"loss": 1.3136,
"step": 186
},
{
"epoch": 0.6111111111111112,
"grad_norm": 0.130859375,
"learning_rate": 3.88888888888889e-06,
"loss": 1.3384,
"step": 187
},
{
"epoch": 0.6143790849673203,
"grad_norm": 0.1181640625,
"learning_rate": 3.856209150326798e-06,
"loss": 1.308,
"step": 188
},
{
"epoch": 0.6176470588235294,
"grad_norm": 0.10302734375,
"learning_rate": 3.8235294117647055e-06,
"loss": 1.2922,
"step": 189
},
{
"epoch": 0.6209150326797386,
"grad_norm": 0.1044921875,
"learning_rate": 3.7908496732026144e-06,
"loss": 1.3052,
"step": 190
},
{
"epoch": 0.6241830065359477,
"grad_norm": 0.15234375,
"learning_rate": 3.758169934640523e-06,
"loss": 1.3447,
"step": 191
},
{
"epoch": 0.6274509803921569,
"grad_norm": 0.10107421875,
"learning_rate": 3.7254901960784316e-06,
"loss": 1.319,
"step": 192
},
{
"epoch": 0.630718954248366,
"grad_norm": 0.1279296875,
"learning_rate": 3.6928104575163404e-06,
"loss": 1.2778,
"step": 193
},
{
"epoch": 0.6339869281045751,
"grad_norm": 0.16015625,
"learning_rate": 3.6601307189542484e-06,
"loss": 1.3332,
"step": 194
},
{
"epoch": 0.6372549019607843,
"grad_norm": 0.1064453125,
"learning_rate": 3.6274509803921573e-06,
"loss": 1.3033,
"step": 195
},
{
"epoch": 0.6405228758169934,
"grad_norm": 0.1123046875,
"learning_rate": 3.5947712418300657e-06,
"loss": 1.2795,
"step": 196
},
{
"epoch": 0.6437908496732027,
"grad_norm": 0.10888671875,
"learning_rate": 3.562091503267974e-06,
"loss": 1.3305,
"step": 197
},
{
"epoch": 0.6470588235294118,
"grad_norm": 0.1123046875,
"learning_rate": 3.529411764705883e-06,
"loss": 1.2839,
"step": 198
},
{
"epoch": 0.6503267973856209,
"grad_norm": 0.12255859375,
"learning_rate": 3.496732026143791e-06,
"loss": 1.3031,
"step": 199
},
{
"epoch": 0.6535947712418301,
"grad_norm": 0.158203125,
"learning_rate": 3.4640522875816997e-06,
"loss": 1.2619,
"step": 200
},
{
"epoch": 0.6568627450980392,
"grad_norm": 0.126953125,
"learning_rate": 3.431372549019608e-06,
"loss": 1.3154,
"step": 201
},
{
"epoch": 0.6601307189542484,
"grad_norm": 0.1611328125,
"learning_rate": 3.398692810457517e-06,
"loss": 1.332,
"step": 202
},
{
"epoch": 0.6633986928104575,
"grad_norm": 0.11474609375,
"learning_rate": 3.3660130718954253e-06,
"loss": 1.331,
"step": 203
},
{
"epoch": 0.6666666666666666,
"grad_norm": 0.10546875,
"learning_rate": 3.3333333333333333e-06,
"loss": 1.3,
"step": 204
},
{
"epoch": 0.6699346405228758,
"grad_norm": 0.10986328125,
"learning_rate": 3.300653594771242e-06,
"loss": 1.3237,
"step": 205
},
{
"epoch": 0.673202614379085,
"grad_norm": 0.1640625,
"learning_rate": 3.2679738562091506e-06,
"loss": 1.4259,
"step": 206
},
{
"epoch": 0.6764705882352942,
"grad_norm": 0.12158203125,
"learning_rate": 3.2352941176470594e-06,
"loss": 1.3311,
"step": 207
},
{
"epoch": 0.6797385620915033,
"grad_norm": 0.109375,
"learning_rate": 3.2026143790849674e-06,
"loss": 1.3069,
"step": 208
},
{
"epoch": 0.6830065359477124,
"grad_norm": 0.1337890625,
"learning_rate": 3.1699346405228758e-06,
"loss": 1.3022,
"step": 209
},
{
"epoch": 0.6862745098039216,
"grad_norm": 0.12158203125,
"learning_rate": 3.1372549019607846e-06,
"loss": 1.3439,
"step": 210
},
{
"epoch": 0.6895424836601307,
"grad_norm": 0.10888671875,
"learning_rate": 3.104575163398693e-06,
"loss": 1.2768,
"step": 211
},
{
"epoch": 0.6928104575163399,
"grad_norm": 0.1171875,
"learning_rate": 3.071895424836602e-06,
"loss": 1.3184,
"step": 212
},
{
"epoch": 0.696078431372549,
"grad_norm": 0.107421875,
"learning_rate": 3.03921568627451e-06,
"loss": 1.328,
"step": 213
},
{
"epoch": 0.6993464052287581,
"grad_norm": 0.109375,
"learning_rate": 3.0065359477124182e-06,
"loss": 1.287,
"step": 214
},
{
"epoch": 0.7026143790849673,
"grad_norm": 0.11181640625,
"learning_rate": 2.973856209150327e-06,
"loss": 1.2768,
"step": 215
},
{
"epoch": 0.7058823529411765,
"grad_norm": 0.119140625,
"learning_rate": 2.9411764705882355e-06,
"loss": 1.3145,
"step": 216
},
{
"epoch": 0.7091503267973857,
"grad_norm": 0.11572265625,
"learning_rate": 2.9084967320261443e-06,
"loss": 1.2902,
"step": 217
},
{
"epoch": 0.7124183006535948,
"grad_norm": 0.10888671875,
"learning_rate": 2.8758169934640523e-06,
"loss": 1.3166,
"step": 218
},
{
"epoch": 0.7156862745098039,
"grad_norm": 0.115234375,
"learning_rate": 2.843137254901961e-06,
"loss": 1.3041,
"step": 219
},
{
"epoch": 0.7189542483660131,
"grad_norm": 0.10791015625,
"learning_rate": 2.8104575163398695e-06,
"loss": 1.2861,
"step": 220
},
{
"epoch": 0.7222222222222222,
"grad_norm": 0.11767578125,
"learning_rate": 2.7777777777777783e-06,
"loss": 1.2623,
"step": 221
},
{
"epoch": 0.7254901960784313,
"grad_norm": 0.111328125,
"learning_rate": 2.7450980392156867e-06,
"loss": 1.268,
"step": 222
},
{
"epoch": 0.7287581699346405,
"grad_norm": 0.12451171875,
"learning_rate": 2.7124183006535947e-06,
"loss": 1.2768,
"step": 223
},
{
"epoch": 0.7320261437908496,
"grad_norm": 0.173828125,
"learning_rate": 2.6797385620915036e-06,
"loss": 1.4249,
"step": 224
},
{
"epoch": 0.7352941176470589,
"grad_norm": 0.11181640625,
"learning_rate": 2.647058823529412e-06,
"loss": 1.3141,
"step": 225
},
{
"epoch": 0.738562091503268,
"grad_norm": 0.1171875,
"learning_rate": 2.6143790849673208e-06,
"loss": 1.2831,
"step": 226
},
{
"epoch": 0.7418300653594772,
"grad_norm": 0.126953125,
"learning_rate": 2.581699346405229e-06,
"loss": 1.3071,
"step": 227
},
{
"epoch": 0.7450980392156863,
"grad_norm": 0.1787109375,
"learning_rate": 2.549019607843137e-06,
"loss": 1.3029,
"step": 228
},
{
"epoch": 0.7483660130718954,
"grad_norm": 0.11474609375,
"learning_rate": 2.516339869281046e-06,
"loss": 1.2983,
"step": 229
},
{
"epoch": 0.7516339869281046,
"grad_norm": 0.11669921875,
"learning_rate": 2.4836601307189544e-06,
"loss": 1.2885,
"step": 230
},
{
"epoch": 0.7549019607843137,
"grad_norm": 0.11083984375,
"learning_rate": 2.450980392156863e-06,
"loss": 1.2684,
"step": 231
},
{
"epoch": 0.7581699346405228,
"grad_norm": 0.150390625,
"learning_rate": 2.4183006535947716e-06,
"loss": 1.233,
"step": 232
},
{
"epoch": 0.761437908496732,
"grad_norm": 0.11328125,
"learning_rate": 2.38562091503268e-06,
"loss": 1.3528,
"step": 233
},
{
"epoch": 0.7647058823529411,
"grad_norm": 0.1123046875,
"learning_rate": 2.3529411764705885e-06,
"loss": 1.2915,
"step": 234
},
{
"epoch": 0.7679738562091504,
"grad_norm": 0.111328125,
"learning_rate": 2.320261437908497e-06,
"loss": 1.238,
"step": 235
},
{
"epoch": 0.7712418300653595,
"grad_norm": 0.1279296875,
"learning_rate": 2.2875816993464053e-06,
"loss": 1.2826,
"step": 236
},
{
"epoch": 0.7745098039215687,
"grad_norm": 0.10986328125,
"learning_rate": 2.254901960784314e-06,
"loss": 1.2619,
"step": 237
},
{
"epoch": 0.7777777777777778,
"grad_norm": 0.126953125,
"learning_rate": 2.222222222222222e-06,
"loss": 1.3,
"step": 238
},
{
"epoch": 0.7810457516339869,
"grad_norm": 0.162109375,
"learning_rate": 2.189542483660131e-06,
"loss": 1.3125,
"step": 239
},
{
"epoch": 0.7843137254901961,
"grad_norm": 0.11572265625,
"learning_rate": 2.1568627450980393e-06,
"loss": 1.3028,
"step": 240
},
{
"epoch": 0.7875816993464052,
"grad_norm": 0.1201171875,
"learning_rate": 2.1241830065359477e-06,
"loss": 1.2957,
"step": 241
},
{
"epoch": 0.7908496732026143,
"grad_norm": 0.10693359375,
"learning_rate": 2.0915032679738565e-06,
"loss": 1.2738,
"step": 242
},
{
"epoch": 0.7941176470588235,
"grad_norm": 0.1064453125,
"learning_rate": 2.058823529411765e-06,
"loss": 1.2864,
"step": 243
},
{
"epoch": 0.7973856209150327,
"grad_norm": 0.109375,
"learning_rate": 2.0261437908496734e-06,
"loss": 1.3131,
"step": 244
},
{
"epoch": 0.8006535947712419,
"grad_norm": 0.11572265625,
"learning_rate": 1.993464052287582e-06,
"loss": 1.3015,
"step": 245
},
{
"epoch": 0.803921568627451,
"grad_norm": 0.1318359375,
"learning_rate": 1.96078431372549e-06,
"loss": 1.3037,
"step": 246
},
{
"epoch": 0.8071895424836601,
"grad_norm": 0.1357421875,
"learning_rate": 1.928104575163399e-06,
"loss": 1.2882,
"step": 247
},
{
"epoch": 0.8104575163398693,
"grad_norm": 0.1240234375,
"learning_rate": 1.8954248366013072e-06,
"loss": 1.3064,
"step": 248
},
{
"epoch": 0.8137254901960784,
"grad_norm": 0.11083984375,
"learning_rate": 1.8627450980392158e-06,
"loss": 1.2951,
"step": 249
},
{
"epoch": 0.8169934640522876,
"grad_norm": 0.12158203125,
"learning_rate": 1.8300653594771242e-06,
"loss": 1.2906,
"step": 250
},
{
"epoch": 0.8202614379084967,
"grad_norm": 0.12158203125,
"learning_rate": 1.7973856209150328e-06,
"loss": 1.3138,
"step": 251
},
{
"epoch": 0.8235294117647058,
"grad_norm": 0.1591796875,
"learning_rate": 1.7647058823529414e-06,
"loss": 1.24,
"step": 252
},
{
"epoch": 0.826797385620915,
"grad_norm": 0.134765625,
"learning_rate": 1.7320261437908499e-06,
"loss": 1.2741,
"step": 253
},
{
"epoch": 0.8300653594771242,
"grad_norm": 0.107421875,
"learning_rate": 1.6993464052287585e-06,
"loss": 1.2784,
"step": 254
},
{
"epoch": 0.8333333333333334,
"grad_norm": 0.15625,
"learning_rate": 1.6666666666666667e-06,
"loss": 1.2769,
"step": 255
},
{
"epoch": 0.8366013071895425,
"grad_norm": 0.11328125,
"learning_rate": 1.6339869281045753e-06,
"loss": 1.3121,
"step": 256
},
{
"epoch": 0.8398692810457516,
"grad_norm": 0.11083984375,
"learning_rate": 1.6013071895424837e-06,
"loss": 1.3014,
"step": 257
},
{
"epoch": 0.8431372549019608,
"grad_norm": 0.10693359375,
"learning_rate": 1.5686274509803923e-06,
"loss": 1.2812,
"step": 258
},
{
"epoch": 0.8464052287581699,
"grad_norm": 0.12158203125,
"learning_rate": 1.535947712418301e-06,
"loss": 1.3214,
"step": 259
},
{
"epoch": 0.8496732026143791,
"grad_norm": 0.12451171875,
"learning_rate": 1.5032679738562091e-06,
"loss": 1.3066,
"step": 260
},
{
"epoch": 0.8529411764705882,
"grad_norm": 0.1806640625,
"learning_rate": 1.4705882352941177e-06,
"loss": 1.3292,
"step": 261
},
{
"epoch": 0.8562091503267973,
"grad_norm": 0.1103515625,
"learning_rate": 1.4379084967320261e-06,
"loss": 1.2693,
"step": 262
},
{
"epoch": 0.8594771241830066,
"grad_norm": 0.1484375,
"learning_rate": 1.4052287581699348e-06,
"loss": 1.2824,
"step": 263
},
{
"epoch": 0.8627450980392157,
"grad_norm": 0.10986328125,
"learning_rate": 1.3725490196078434e-06,
"loss": 1.2549,
"step": 264
},
{
"epoch": 0.8660130718954249,
"grad_norm": 0.125,
"learning_rate": 1.3398692810457518e-06,
"loss": 1.2826,
"step": 265
},
{
"epoch": 0.869281045751634,
"grad_norm": 0.12451171875,
"learning_rate": 1.3071895424836604e-06,
"loss": 1.3209,
"step": 266
},
{
"epoch": 0.8725490196078431,
"grad_norm": 0.1044921875,
"learning_rate": 1.2745098039215686e-06,
"loss": 1.2802,
"step": 267
},
{
"epoch": 0.8758169934640523,
"grad_norm": 0.111328125,
"learning_rate": 1.2418300653594772e-06,
"loss": 1.2775,
"step": 268
},
{
"epoch": 0.8790849673202614,
"grad_norm": 0.10693359375,
"learning_rate": 1.2091503267973858e-06,
"loss": 1.2438,
"step": 269
},
{
"epoch": 0.8823529411764706,
"grad_norm": 0.1259765625,
"learning_rate": 1.1764705882352942e-06,
"loss": 1.3108,
"step": 270
},
{
"epoch": 0.8856209150326797,
"grad_norm": 0.1142578125,
"learning_rate": 1.1437908496732026e-06,
"loss": 1.2374,
"step": 271
},
{
"epoch": 0.8888888888888888,
"grad_norm": 0.109375,
"learning_rate": 1.111111111111111e-06,
"loss": 1.2826,
"step": 272
},
{
"epoch": 0.8921568627450981,
"grad_norm": 0.12451171875,
"learning_rate": 1.0784313725490197e-06,
"loss": 1.3142,
"step": 273
},
{
"epoch": 0.8954248366013072,
"grad_norm": 0.1044921875,
"learning_rate": 1.0457516339869283e-06,
"loss": 1.2863,
"step": 274
},
{
"epoch": 0.8986928104575164,
"grad_norm": 0.12890625,
"learning_rate": 1.0130718954248367e-06,
"loss": 1.269,
"step": 275
},
{
"epoch": 0.9019607843137255,
"grad_norm": 0.1083984375,
"learning_rate": 9.80392156862745e-07,
"loss": 1.2899,
"step": 276
},
{
"epoch": 0.9052287581699346,
"grad_norm": 0.169921875,
"learning_rate": 9.477124183006536e-07,
"loss": 1.3393,
"step": 277
},
{
"epoch": 0.9084967320261438,
"grad_norm": 0.1455078125,
"learning_rate": 9.150326797385621e-07,
"loss": 1.2714,
"step": 278
},
{
"epoch": 0.9117647058823529,
"grad_norm": 0.10400390625,
"learning_rate": 8.823529411764707e-07,
"loss": 1.2991,
"step": 279
},
{
"epoch": 0.9150326797385621,
"grad_norm": 0.140625,
"learning_rate": 8.496732026143792e-07,
"loss": 1.3242,
"step": 280
},
{
"epoch": 0.9183006535947712,
"grad_norm": 0.1083984375,
"learning_rate": 8.169934640522876e-07,
"loss": 1.2888,
"step": 281
},
{
"epoch": 0.9215686274509803,
"grad_norm": 0.13671875,
"learning_rate": 7.843137254901962e-07,
"loss": 1.2745,
"step": 282
},
{
"epoch": 0.9248366013071896,
"grad_norm": 0.10400390625,
"learning_rate": 7.516339869281046e-07,
"loss": 1.2467,
"step": 283
},
{
"epoch": 0.9281045751633987,
"grad_norm": 0.1025390625,
"learning_rate": 7.189542483660131e-07,
"loss": 1.3205,
"step": 284
},
{
"epoch": 0.9313725490196079,
"grad_norm": 0.115234375,
"learning_rate": 6.862745098039217e-07,
"loss": 1.2527,
"step": 285
},
{
"epoch": 0.934640522875817,
"grad_norm": 0.1240234375,
"learning_rate": 6.535947712418302e-07,
"loss": 1.335,
"step": 286
},
{
"epoch": 0.9379084967320261,
"grad_norm": 0.10302734375,
"learning_rate": 6.209150326797386e-07,
"loss": 1.2643,
"step": 287
},
{
"epoch": 0.9411764705882353,
"grad_norm": 0.123046875,
"learning_rate": 5.882352941176471e-07,
"loss": 1.3361,
"step": 288
},
{
"epoch": 0.9444444444444444,
"grad_norm": 0.11767578125,
"learning_rate": 5.555555555555555e-07,
"loss": 1.291,
"step": 289
},
{
"epoch": 0.9477124183006536,
"grad_norm": 0.10498046875,
"learning_rate": 5.228758169934641e-07,
"loss": 1.3035,
"step": 290
},
{
"epoch": 0.9509803921568627,
"grad_norm": 0.1083984375,
"learning_rate": 4.901960784313725e-07,
"loss": 1.2796,
"step": 291
},
{
"epoch": 0.954248366013072,
"grad_norm": 0.109375,
"learning_rate": 4.5751633986928105e-07,
"loss": 1.2886,
"step": 292
},
{
"epoch": 0.9575163398692811,
"grad_norm": 0.134765625,
"learning_rate": 4.248366013071896e-07,
"loss": 1.3668,
"step": 293
},
{
"epoch": 0.9607843137254902,
"grad_norm": 0.1533203125,
"learning_rate": 3.921568627450981e-07,
"loss": 1.3172,
"step": 294
},
{
"epoch": 0.9640522875816994,
"grad_norm": 0.1162109375,
"learning_rate": 3.5947712418300653e-07,
"loss": 1.2876,
"step": 295
},
{
"epoch": 0.9673202614379085,
"grad_norm": 0.11865234375,
"learning_rate": 3.267973856209151e-07,
"loss": 1.2527,
"step": 296
},
{
"epoch": 0.9705882352941176,
"grad_norm": 0.10546875,
"learning_rate": 2.9411764705882356e-07,
"loss": 1.2697,
"step": 297
},
{
"epoch": 0.9738562091503268,
"grad_norm": 0.103515625,
"learning_rate": 2.6143790849673207e-07,
"loss": 1.2876,
"step": 298
},
{
"epoch": 0.9771241830065359,
"grad_norm": 0.1064453125,
"learning_rate": 2.2875816993464053e-07,
"loss": 1.2968,
"step": 299
},
{
"epoch": 0.9803921568627451,
"grad_norm": 0.1171875,
"learning_rate": 1.9607843137254904e-07,
"loss": 1.3265,
"step": 300
},
{
"epoch": 0.9836601307189542,
"grad_norm": 0.1171875,
"learning_rate": 1.6339869281045755e-07,
"loss": 1.2475,
"step": 301
},
{
"epoch": 0.9869281045751634,
"grad_norm": 0.1201171875,
"learning_rate": 1.3071895424836603e-07,
"loss": 1.2667,
"step": 302
},
{
"epoch": 0.9901960784313726,
"grad_norm": 0.1728515625,
"learning_rate": 9.803921568627452e-08,
"loss": 1.3196,
"step": 303
},
{
"epoch": 0.9934640522875817,
"grad_norm": 0.11181640625,
"learning_rate": 6.535947712418302e-08,
"loss": 1.2911,
"step": 304
},
{
"epoch": 0.9967320261437909,
"grad_norm": 0.1142578125,
"learning_rate": 3.267973856209151e-08,
"loss": 1.3154,
"step": 305
},
{
"epoch": 1.0,
"grad_norm": 0.12158203125,
"learning_rate": 0.0,
"loss": 1.2803,
"step": 306
}
],
"logging_steps": 1.0,
"max_steps": 306,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 0,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9.566222808116101e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}