model-translator-lfm-1 / trainer_state.json
Ba2han's picture
Upload folder using huggingface_hub
8833284 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.45023865498325855,
"eval_steps": 1580,
"global_step": 1580,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.000284961174040037,
"grad_norm": 29.125,
"learning_rate": 0.0,
"loss": 2.6234,
"step": 1
},
{
"epoch": 0.000569922348080074,
"grad_norm": 21.5,
"learning_rate": 6.329113924050633e-07,
"loss": 2.6285,
"step": 2
},
{
"epoch": 0.0008548835221201111,
"grad_norm": 23.5,
"learning_rate": 1.2658227848101265e-06,
"loss": 2.6019,
"step": 3
},
{
"epoch": 0.001139844696160148,
"grad_norm": 19.75,
"learning_rate": 1.8987341772151901e-06,
"loss": 2.5364,
"step": 4
},
{
"epoch": 0.0014248058702001853,
"grad_norm": 14.5625,
"learning_rate": 2.531645569620253e-06,
"loss": 2.2591,
"step": 5
},
{
"epoch": 0.0017097670442402222,
"grad_norm": 16.5,
"learning_rate": 3.1645569620253167e-06,
"loss": 2.2259,
"step": 6
},
{
"epoch": 0.001994728218280259,
"grad_norm": 30.375,
"learning_rate": 3.7974683544303802e-06,
"loss": 2.757,
"step": 7
},
{
"epoch": 0.002279689392320296,
"grad_norm": 16.375,
"learning_rate": 4.430379746835443e-06,
"loss": 2.431,
"step": 8
},
{
"epoch": 0.0025646505663603335,
"grad_norm": 15.5625,
"learning_rate": 5.063291139240506e-06,
"loss": 2.3019,
"step": 9
},
{
"epoch": 0.0028496117404003705,
"grad_norm": 13.75,
"learning_rate": 5.69620253164557e-06,
"loss": 2.2385,
"step": 10
},
{
"epoch": 0.0031345729144404075,
"grad_norm": 12.8125,
"learning_rate": 6.329113924050633e-06,
"loss": 2.3134,
"step": 11
},
{
"epoch": 0.0034195340884804444,
"grad_norm": 12.875,
"learning_rate": 6.9620253164556965e-06,
"loss": 2.3087,
"step": 12
},
{
"epoch": 0.0037044952625204814,
"grad_norm": 11.0625,
"learning_rate": 7.5949367088607605e-06,
"loss": 1.9818,
"step": 13
},
{
"epoch": 0.003989456436560518,
"grad_norm": 9.9375,
"learning_rate": 8.227848101265822e-06,
"loss": 2.0041,
"step": 14
},
{
"epoch": 0.004274417610600555,
"grad_norm": 9.4375,
"learning_rate": 8.860759493670886e-06,
"loss": 2.0196,
"step": 15
},
{
"epoch": 0.004559378784640592,
"grad_norm": 8.0625,
"learning_rate": 9.49367088607595e-06,
"loss": 1.8285,
"step": 16
},
{
"epoch": 0.00484433995868063,
"grad_norm": 7.4375,
"learning_rate": 1.0126582278481012e-05,
"loss": 1.9578,
"step": 17
},
{
"epoch": 0.005129301132720667,
"grad_norm": 5.625,
"learning_rate": 1.0759493670886076e-05,
"loss": 1.8962,
"step": 18
},
{
"epoch": 0.005414262306760704,
"grad_norm": 4.125,
"learning_rate": 1.139240506329114e-05,
"loss": 1.7924,
"step": 19
},
{
"epoch": 0.005699223480800741,
"grad_norm": 3.3125,
"learning_rate": 1.2025316455696203e-05,
"loss": 1.718,
"step": 20
},
{
"epoch": 0.005984184654840778,
"grad_norm": 3.234375,
"learning_rate": 1.2658227848101267e-05,
"loss": 1.6582,
"step": 21
},
{
"epoch": 0.006269145828880815,
"grad_norm": 3.109375,
"learning_rate": 1.3291139240506329e-05,
"loss": 1.6866,
"step": 22
},
{
"epoch": 0.006554107002920852,
"grad_norm": 2.8125,
"learning_rate": 1.3924050632911393e-05,
"loss": 1.7584,
"step": 23
},
{
"epoch": 0.006839068176960889,
"grad_norm": 2.109375,
"learning_rate": 1.4556962025316457e-05,
"loss": 1.6102,
"step": 24
},
{
"epoch": 0.007124029351000926,
"grad_norm": 2.0625,
"learning_rate": 1.5189873417721521e-05,
"loss": 1.6869,
"step": 25
},
{
"epoch": 0.007408990525040963,
"grad_norm": 2.09375,
"learning_rate": 1.5822784810126583e-05,
"loss": 1.6164,
"step": 26
},
{
"epoch": 0.007693951699081001,
"grad_norm": 1.953125,
"learning_rate": 1.6455696202531644e-05,
"loss": 1.7301,
"step": 27
},
{
"epoch": 0.007978912873121037,
"grad_norm": 1.640625,
"learning_rate": 1.7088607594936708e-05,
"loss": 1.3851,
"step": 28
},
{
"epoch": 0.008263874047161075,
"grad_norm": 1.625,
"learning_rate": 1.7721518987341772e-05,
"loss": 1.6785,
"step": 29
},
{
"epoch": 0.00854883522120111,
"grad_norm": 1.4765625,
"learning_rate": 1.8354430379746836e-05,
"loss": 1.3419,
"step": 30
},
{
"epoch": 0.008833796395241148,
"grad_norm": 1.4375,
"learning_rate": 1.89873417721519e-05,
"loss": 1.4765,
"step": 31
},
{
"epoch": 0.009118757569281185,
"grad_norm": 1.296875,
"learning_rate": 1.962025316455696e-05,
"loss": 1.3954,
"step": 32
},
{
"epoch": 0.009403718743321222,
"grad_norm": 1.484375,
"learning_rate": 2.0253164556962025e-05,
"loss": 1.576,
"step": 33
},
{
"epoch": 0.00968867991736126,
"grad_norm": 1.3828125,
"learning_rate": 2.088607594936709e-05,
"loss": 1.4133,
"step": 34
},
{
"epoch": 0.009973641091401296,
"grad_norm": 1.34375,
"learning_rate": 2.1518987341772153e-05,
"loss": 1.6026,
"step": 35
},
{
"epoch": 0.010258602265441334,
"grad_norm": 1.5859375,
"learning_rate": 2.2151898734177217e-05,
"loss": 1.6557,
"step": 36
},
{
"epoch": 0.01054356343948137,
"grad_norm": 1.34375,
"learning_rate": 2.278481012658228e-05,
"loss": 1.5608,
"step": 37
},
{
"epoch": 0.010828524613521408,
"grad_norm": 1.3203125,
"learning_rate": 2.341772151898734e-05,
"loss": 1.4184,
"step": 38
},
{
"epoch": 0.011113485787561444,
"grad_norm": 1.3515625,
"learning_rate": 2.4050632911392405e-05,
"loss": 1.6377,
"step": 39
},
{
"epoch": 0.011398446961601482,
"grad_norm": 1.21875,
"learning_rate": 2.468354430379747e-05,
"loss": 1.3586,
"step": 40
},
{
"epoch": 0.011683408135641518,
"grad_norm": 1.21875,
"learning_rate": 2.5316455696202533e-05,
"loss": 1.4761,
"step": 41
},
{
"epoch": 0.011968369309681556,
"grad_norm": 1.2421875,
"learning_rate": 2.5949367088607597e-05,
"loss": 1.3997,
"step": 42
},
{
"epoch": 0.012253330483721592,
"grad_norm": 1.1484375,
"learning_rate": 2.6582278481012658e-05,
"loss": 1.5065,
"step": 43
},
{
"epoch": 0.01253829165776163,
"grad_norm": 1.203125,
"learning_rate": 2.7215189873417722e-05,
"loss": 1.3738,
"step": 44
},
{
"epoch": 0.012823252831801668,
"grad_norm": 1.25,
"learning_rate": 2.7848101265822786e-05,
"loss": 1.4361,
"step": 45
},
{
"epoch": 0.013108214005841704,
"grad_norm": 1.2734375,
"learning_rate": 2.848101265822785e-05,
"loss": 1.3365,
"step": 46
},
{
"epoch": 0.013393175179881742,
"grad_norm": 1.140625,
"learning_rate": 2.9113924050632914e-05,
"loss": 1.3863,
"step": 47
},
{
"epoch": 0.013678136353921778,
"grad_norm": 1.1953125,
"learning_rate": 2.9746835443037974e-05,
"loss": 1.465,
"step": 48
},
{
"epoch": 0.013963097527961816,
"grad_norm": 1.265625,
"learning_rate": 3.0379746835443042e-05,
"loss": 1.5108,
"step": 49
},
{
"epoch": 0.014248058702001852,
"grad_norm": 1.1953125,
"learning_rate": 3.10126582278481e-05,
"loss": 1.3891,
"step": 50
},
{
"epoch": 0.01453301987604189,
"grad_norm": 1.1328125,
"learning_rate": 3.1645569620253167e-05,
"loss": 1.285,
"step": 51
},
{
"epoch": 0.014817981050081926,
"grad_norm": 1.1640625,
"learning_rate": 3.227848101265823e-05,
"loss": 1.3574,
"step": 52
},
{
"epoch": 0.015102942224121963,
"grad_norm": 1.09375,
"learning_rate": 3.291139240506329e-05,
"loss": 1.3037,
"step": 53
},
{
"epoch": 0.015387903398162001,
"grad_norm": 1.1484375,
"learning_rate": 3.354430379746836e-05,
"loss": 1.3654,
"step": 54
},
{
"epoch": 0.015672864572202037,
"grad_norm": 1.1484375,
"learning_rate": 3.4177215189873416e-05,
"loss": 1.4181,
"step": 55
},
{
"epoch": 0.015957825746242073,
"grad_norm": 1.078125,
"learning_rate": 3.4810126582278487e-05,
"loss": 1.2465,
"step": 56
},
{
"epoch": 0.016242786920282113,
"grad_norm": 1.171875,
"learning_rate": 3.5443037974683544e-05,
"loss": 1.29,
"step": 57
},
{
"epoch": 0.01652774809432215,
"grad_norm": 1.25,
"learning_rate": 3.607594936708861e-05,
"loss": 1.4819,
"step": 58
},
{
"epoch": 0.016812709268362185,
"grad_norm": 1.0703125,
"learning_rate": 3.670886075949367e-05,
"loss": 1.2337,
"step": 59
},
{
"epoch": 0.01709767044240222,
"grad_norm": 1.078125,
"learning_rate": 3.7341772151898736e-05,
"loss": 1.4545,
"step": 60
},
{
"epoch": 0.01738263161644226,
"grad_norm": 1.078125,
"learning_rate": 3.79746835443038e-05,
"loss": 1.4053,
"step": 61
},
{
"epoch": 0.017667592790482297,
"grad_norm": 1.2109375,
"learning_rate": 3.8607594936708864e-05,
"loss": 1.3166,
"step": 62
},
{
"epoch": 0.017952553964522333,
"grad_norm": 1.2890625,
"learning_rate": 3.924050632911392e-05,
"loss": 1.495,
"step": 63
},
{
"epoch": 0.01823751513856237,
"grad_norm": 1.09375,
"learning_rate": 3.987341772151899e-05,
"loss": 1.3391,
"step": 64
},
{
"epoch": 0.01852247631260241,
"grad_norm": 1.1484375,
"learning_rate": 4.050632911392405e-05,
"loss": 1.3754,
"step": 65
},
{
"epoch": 0.018807437486642445,
"grad_norm": 1.0703125,
"learning_rate": 4.113924050632912e-05,
"loss": 1.35,
"step": 66
},
{
"epoch": 0.01909239866068248,
"grad_norm": 1.046875,
"learning_rate": 4.177215189873418e-05,
"loss": 1.359,
"step": 67
},
{
"epoch": 0.01937735983472252,
"grad_norm": 1.0703125,
"learning_rate": 4.240506329113924e-05,
"loss": 1.3325,
"step": 68
},
{
"epoch": 0.019662321008762557,
"grad_norm": 1.1171875,
"learning_rate": 4.3037974683544305e-05,
"loss": 1.4202,
"step": 69
},
{
"epoch": 0.019947282182802593,
"grad_norm": 1.03125,
"learning_rate": 4.367088607594937e-05,
"loss": 1.2194,
"step": 70
},
{
"epoch": 0.02023224335684263,
"grad_norm": 1.140625,
"learning_rate": 4.430379746835443e-05,
"loss": 1.4287,
"step": 71
},
{
"epoch": 0.02051720453088267,
"grad_norm": 1.1015625,
"learning_rate": 4.49367088607595e-05,
"loss": 1.2697,
"step": 72
},
{
"epoch": 0.020802165704922704,
"grad_norm": 1.0859375,
"learning_rate": 4.556962025316456e-05,
"loss": 1.4088,
"step": 73
},
{
"epoch": 0.02108712687896274,
"grad_norm": 1.09375,
"learning_rate": 4.6202531645569625e-05,
"loss": 1.3963,
"step": 74
},
{
"epoch": 0.021372088053002777,
"grad_norm": 1.1171875,
"learning_rate": 4.683544303797468e-05,
"loss": 1.5456,
"step": 75
},
{
"epoch": 0.021657049227042816,
"grad_norm": 1.078125,
"learning_rate": 4.7468354430379746e-05,
"loss": 1.2655,
"step": 76
},
{
"epoch": 0.021942010401082852,
"grad_norm": 1.0703125,
"learning_rate": 4.810126582278481e-05,
"loss": 1.3796,
"step": 77
},
{
"epoch": 0.02222697157512289,
"grad_norm": 1.0078125,
"learning_rate": 4.8734177215189874e-05,
"loss": 1.2177,
"step": 78
},
{
"epoch": 0.022511932749162928,
"grad_norm": 1.09375,
"learning_rate": 4.936708860759494e-05,
"loss": 1.5553,
"step": 79
},
{
"epoch": 0.022796893923202964,
"grad_norm": 1.1015625,
"learning_rate": 5e-05,
"loss": 1.3957,
"step": 80
},
{
"epoch": 0.023081855097243,
"grad_norm": 1.15625,
"learning_rate": 5.0632911392405066e-05,
"loss": 1.4037,
"step": 81
},
{
"epoch": 0.023366816271283036,
"grad_norm": 1.0390625,
"learning_rate": 5.1265822784810124e-05,
"loss": 1.2413,
"step": 82
},
{
"epoch": 0.023651777445323076,
"grad_norm": 0.98828125,
"learning_rate": 5.1898734177215194e-05,
"loss": 1.2343,
"step": 83
},
{
"epoch": 0.023936738619363112,
"grad_norm": 0.9453125,
"learning_rate": 5.253164556962026e-05,
"loss": 1.148,
"step": 84
},
{
"epoch": 0.024221699793403148,
"grad_norm": 1.0859375,
"learning_rate": 5.3164556962025316e-05,
"loss": 1.5004,
"step": 85
},
{
"epoch": 0.024506660967443184,
"grad_norm": 0.98046875,
"learning_rate": 5.379746835443038e-05,
"loss": 1.2534,
"step": 86
},
{
"epoch": 0.024791622141483224,
"grad_norm": 1.015625,
"learning_rate": 5.4430379746835444e-05,
"loss": 1.2575,
"step": 87
},
{
"epoch": 0.02507658331552326,
"grad_norm": 1.0859375,
"learning_rate": 5.5063291139240514e-05,
"loss": 1.2185,
"step": 88
},
{
"epoch": 0.025361544489563296,
"grad_norm": 1.0078125,
"learning_rate": 5.569620253164557e-05,
"loss": 1.3606,
"step": 89
},
{
"epoch": 0.025646505663603335,
"grad_norm": 1.0,
"learning_rate": 5.6329113924050636e-05,
"loss": 1.3193,
"step": 90
},
{
"epoch": 0.02593146683764337,
"grad_norm": 1.0546875,
"learning_rate": 5.69620253164557e-05,
"loss": 1.2805,
"step": 91
},
{
"epoch": 0.026216428011683408,
"grad_norm": 1.1796875,
"learning_rate": 5.759493670886076e-05,
"loss": 1.433,
"step": 92
},
{
"epoch": 0.026501389185723444,
"grad_norm": 0.94140625,
"learning_rate": 5.822784810126583e-05,
"loss": 1.1702,
"step": 93
},
{
"epoch": 0.026786350359763483,
"grad_norm": 1.265625,
"learning_rate": 5.886075949367089e-05,
"loss": 1.4432,
"step": 94
},
{
"epoch": 0.02707131153380352,
"grad_norm": 1.1015625,
"learning_rate": 5.949367088607595e-05,
"loss": 1.419,
"step": 95
},
{
"epoch": 0.027356272707843556,
"grad_norm": 1.0546875,
"learning_rate": 6.012658227848101e-05,
"loss": 1.2796,
"step": 96
},
{
"epoch": 0.027641233881883595,
"grad_norm": 1.1484375,
"learning_rate": 6.0759493670886084e-05,
"loss": 1.2824,
"step": 97
},
{
"epoch": 0.02792619505592363,
"grad_norm": 1.0234375,
"learning_rate": 6.139240506329115e-05,
"loss": 1.3208,
"step": 98
},
{
"epoch": 0.028211156229963667,
"grad_norm": 1.015625,
"learning_rate": 6.20253164556962e-05,
"loss": 1.2591,
"step": 99
},
{
"epoch": 0.028496117404003703,
"grad_norm": 0.984375,
"learning_rate": 6.265822784810128e-05,
"loss": 1.185,
"step": 100
},
{
"epoch": 0.028781078578043743,
"grad_norm": 1.0546875,
"learning_rate": 6.329113924050633e-05,
"loss": 1.3322,
"step": 101
},
{
"epoch": 0.02906603975208378,
"grad_norm": 1.0703125,
"learning_rate": 6.392405063291139e-05,
"loss": 1.4192,
"step": 102
},
{
"epoch": 0.029351000926123815,
"grad_norm": 1.015625,
"learning_rate": 6.455696202531646e-05,
"loss": 1.3446,
"step": 103
},
{
"epoch": 0.02963596210016385,
"grad_norm": 1.015625,
"learning_rate": 6.518987341772153e-05,
"loss": 1.3051,
"step": 104
},
{
"epoch": 0.02992092327420389,
"grad_norm": 0.93359375,
"learning_rate": 6.582278481012658e-05,
"loss": 1.2271,
"step": 105
},
{
"epoch": 0.030205884448243927,
"grad_norm": 0.98046875,
"learning_rate": 6.645569620253165e-05,
"loss": 1.3005,
"step": 106
},
{
"epoch": 0.030490845622283963,
"grad_norm": 1.0390625,
"learning_rate": 6.708860759493672e-05,
"loss": 1.3054,
"step": 107
},
{
"epoch": 0.030775806796324003,
"grad_norm": 0.953125,
"learning_rate": 6.772151898734177e-05,
"loss": 1.1751,
"step": 108
},
{
"epoch": 0.03106076797036404,
"grad_norm": 1.09375,
"learning_rate": 6.835443037974683e-05,
"loss": 1.2837,
"step": 109
},
{
"epoch": 0.031345729144404075,
"grad_norm": 1.0546875,
"learning_rate": 6.89873417721519e-05,
"loss": 1.284,
"step": 110
},
{
"epoch": 0.031630690318444114,
"grad_norm": 1.0390625,
"learning_rate": 6.962025316455697e-05,
"loss": 1.1764,
"step": 111
},
{
"epoch": 0.03191565149248415,
"grad_norm": 1.015625,
"learning_rate": 7.025316455696203e-05,
"loss": 1.3151,
"step": 112
},
{
"epoch": 0.032200612666524187,
"grad_norm": 1.0390625,
"learning_rate": 7.088607594936709e-05,
"loss": 1.3396,
"step": 113
},
{
"epoch": 0.032485573840564226,
"grad_norm": 1.046875,
"learning_rate": 7.151898734177216e-05,
"loss": 1.3189,
"step": 114
},
{
"epoch": 0.03277053501460426,
"grad_norm": 1.03125,
"learning_rate": 7.215189873417722e-05,
"loss": 1.3304,
"step": 115
},
{
"epoch": 0.0330554961886443,
"grad_norm": 1.0,
"learning_rate": 7.278481012658229e-05,
"loss": 1.3019,
"step": 116
},
{
"epoch": 0.03334045736268433,
"grad_norm": 0.94140625,
"learning_rate": 7.341772151898734e-05,
"loss": 1.1286,
"step": 117
},
{
"epoch": 0.03362541853672437,
"grad_norm": 1.078125,
"learning_rate": 7.40506329113924e-05,
"loss": 1.2951,
"step": 118
},
{
"epoch": 0.03391037971076441,
"grad_norm": 0.98828125,
"learning_rate": 7.468354430379747e-05,
"loss": 1.3084,
"step": 119
},
{
"epoch": 0.03419534088480444,
"grad_norm": 0.99609375,
"learning_rate": 7.531645569620254e-05,
"loss": 1.2161,
"step": 120
},
{
"epoch": 0.03448030205884448,
"grad_norm": 1.0703125,
"learning_rate": 7.59493670886076e-05,
"loss": 1.3622,
"step": 121
},
{
"epoch": 0.03476526323288452,
"grad_norm": 1.0859375,
"learning_rate": 7.658227848101266e-05,
"loss": 1.2539,
"step": 122
},
{
"epoch": 0.035050224406924554,
"grad_norm": 1.1015625,
"learning_rate": 7.721518987341773e-05,
"loss": 1.3334,
"step": 123
},
{
"epoch": 0.035335185580964594,
"grad_norm": 1.0234375,
"learning_rate": 7.78481012658228e-05,
"loss": 1.3351,
"step": 124
},
{
"epoch": 0.035620146755004634,
"grad_norm": 1.0,
"learning_rate": 7.848101265822784e-05,
"loss": 1.2245,
"step": 125
},
{
"epoch": 0.035905107929044666,
"grad_norm": 1.0546875,
"learning_rate": 7.911392405063291e-05,
"loss": 1.3739,
"step": 126
},
{
"epoch": 0.036190069103084706,
"grad_norm": 0.95703125,
"learning_rate": 7.974683544303798e-05,
"loss": 1.2191,
"step": 127
},
{
"epoch": 0.03647503027712474,
"grad_norm": 1.0390625,
"learning_rate": 8.037974683544304e-05,
"loss": 1.304,
"step": 128
},
{
"epoch": 0.03675999145116478,
"grad_norm": 1.0234375,
"learning_rate": 8.10126582278481e-05,
"loss": 1.2241,
"step": 129
},
{
"epoch": 0.03704495262520482,
"grad_norm": 1.03125,
"learning_rate": 8.164556962025317e-05,
"loss": 1.0792,
"step": 130
},
{
"epoch": 0.03732991379924485,
"grad_norm": 0.9609375,
"learning_rate": 8.227848101265824e-05,
"loss": 1.218,
"step": 131
},
{
"epoch": 0.03761487497328489,
"grad_norm": 1.0703125,
"learning_rate": 8.29113924050633e-05,
"loss": 1.2508,
"step": 132
},
{
"epoch": 0.03789983614732493,
"grad_norm": 1.03125,
"learning_rate": 8.354430379746835e-05,
"loss": 1.3057,
"step": 133
},
{
"epoch": 0.03818479732136496,
"grad_norm": 1.0390625,
"learning_rate": 8.417721518987342e-05,
"loss": 1.3126,
"step": 134
},
{
"epoch": 0.038469758495405,
"grad_norm": 1.0234375,
"learning_rate": 8.481012658227848e-05,
"loss": 1.3134,
"step": 135
},
{
"epoch": 0.03875471966944504,
"grad_norm": 1.0234375,
"learning_rate": 8.544303797468355e-05,
"loss": 1.2166,
"step": 136
},
{
"epoch": 0.039039680843485074,
"grad_norm": 1.0,
"learning_rate": 8.607594936708861e-05,
"loss": 1.2348,
"step": 137
},
{
"epoch": 0.03932464201752511,
"grad_norm": 1.015625,
"learning_rate": 8.670886075949367e-05,
"loss": 1.1484,
"step": 138
},
{
"epoch": 0.039609603191565146,
"grad_norm": 1.0390625,
"learning_rate": 8.734177215189874e-05,
"loss": 1.3253,
"step": 139
},
{
"epoch": 0.039894564365605185,
"grad_norm": 1.125,
"learning_rate": 8.797468354430381e-05,
"loss": 1.3117,
"step": 140
},
{
"epoch": 0.040179525539645225,
"grad_norm": 0.91015625,
"learning_rate": 8.860759493670887e-05,
"loss": 1.1099,
"step": 141
},
{
"epoch": 0.04046448671368526,
"grad_norm": 1.0546875,
"learning_rate": 8.924050632911392e-05,
"loss": 1.3046,
"step": 142
},
{
"epoch": 0.0407494478877253,
"grad_norm": 1.0625,
"learning_rate": 8.9873417721519e-05,
"loss": 1.2424,
"step": 143
},
{
"epoch": 0.04103440906176534,
"grad_norm": 1.0546875,
"learning_rate": 9.050632911392407e-05,
"loss": 1.3855,
"step": 144
},
{
"epoch": 0.04131937023580537,
"grad_norm": 1.1171875,
"learning_rate": 9.113924050632912e-05,
"loss": 1.2624,
"step": 145
},
{
"epoch": 0.04160433140984541,
"grad_norm": 1.015625,
"learning_rate": 9.177215189873418e-05,
"loss": 1.2397,
"step": 146
},
{
"epoch": 0.04188929258388545,
"grad_norm": 1.0546875,
"learning_rate": 9.240506329113925e-05,
"loss": 1.3379,
"step": 147
},
{
"epoch": 0.04217425375792548,
"grad_norm": 1.0078125,
"learning_rate": 9.303797468354431e-05,
"loss": 1.2333,
"step": 148
},
{
"epoch": 0.04245921493196552,
"grad_norm": 1.0703125,
"learning_rate": 9.367088607594936e-05,
"loss": 1.2861,
"step": 149
},
{
"epoch": 0.04274417610600555,
"grad_norm": 0.94921875,
"learning_rate": 9.430379746835444e-05,
"loss": 1.1509,
"step": 150
},
{
"epoch": 0.04302913728004559,
"grad_norm": 0.87890625,
"learning_rate": 9.493670886075949e-05,
"loss": 1.0674,
"step": 151
},
{
"epoch": 0.04331409845408563,
"grad_norm": 0.9765625,
"learning_rate": 9.556962025316456e-05,
"loss": 1.2168,
"step": 152
},
{
"epoch": 0.043599059628125665,
"grad_norm": 1.0390625,
"learning_rate": 9.620253164556962e-05,
"loss": 1.3691,
"step": 153
},
{
"epoch": 0.043884020802165705,
"grad_norm": 1.0078125,
"learning_rate": 9.683544303797469e-05,
"loss": 1.377,
"step": 154
},
{
"epoch": 0.044168981976205744,
"grad_norm": 0.92578125,
"learning_rate": 9.746835443037975e-05,
"loss": 1.1519,
"step": 155
},
{
"epoch": 0.04445394315024578,
"grad_norm": 0.94921875,
"learning_rate": 9.810126582278482e-05,
"loss": 1.1929,
"step": 156
},
{
"epoch": 0.044738904324285816,
"grad_norm": 0.94140625,
"learning_rate": 9.873417721518988e-05,
"loss": 1.2098,
"step": 157
},
{
"epoch": 0.045023865498325856,
"grad_norm": 0.94140625,
"learning_rate": 9.936708860759493e-05,
"loss": 1.2152,
"step": 158
},
{
"epoch": 0.04530882667236589,
"grad_norm": 0.89453125,
"learning_rate": 0.0001,
"loss": 1.143,
"step": 159
},
{
"epoch": 0.04559378784640593,
"grad_norm": 0.9609375,
"learning_rate": 9.999999053963213e-05,
"loss": 1.183,
"step": 160
},
{
"epoch": 0.04587874902044596,
"grad_norm": 0.90625,
"learning_rate": 9.999996215853209e-05,
"loss": 1.1637,
"step": 161
},
{
"epoch": 0.046163710194486,
"grad_norm": 0.91015625,
"learning_rate": 9.999991485671061e-05,
"loss": 1.3169,
"step": 162
},
{
"epoch": 0.04644867136852604,
"grad_norm": 0.95703125,
"learning_rate": 9.99998486341856e-05,
"loss": 1.216,
"step": 163
},
{
"epoch": 0.04673363254256607,
"grad_norm": 1.109375,
"learning_rate": 9.999976349098214e-05,
"loss": 1.3819,
"step": 164
},
{
"epoch": 0.04701859371660611,
"grad_norm": 0.9609375,
"learning_rate": 9.999965942713241e-05,
"loss": 1.3172,
"step": 165
},
{
"epoch": 0.04730355489064615,
"grad_norm": 0.87890625,
"learning_rate": 9.99995364426758e-05,
"loss": 1.1758,
"step": 166
},
{
"epoch": 0.047588516064686184,
"grad_norm": 0.8828125,
"learning_rate": 9.999939453765888e-05,
"loss": 1.1493,
"step": 167
},
{
"epoch": 0.047873477238726224,
"grad_norm": 0.89453125,
"learning_rate": 9.999923371213531e-05,
"loss": 1.2477,
"step": 168
},
{
"epoch": 0.048158438412766263,
"grad_norm": 0.90234375,
"learning_rate": 9.999905396616598e-05,
"loss": 1.2091,
"step": 169
},
{
"epoch": 0.048443399586806296,
"grad_norm": 0.8671875,
"learning_rate": 9.999885529981888e-05,
"loss": 1.1595,
"step": 170
},
{
"epoch": 0.048728360760846336,
"grad_norm": 0.9921875,
"learning_rate": 9.999863771316922e-05,
"loss": 1.2418,
"step": 171
},
{
"epoch": 0.04901332193488637,
"grad_norm": 0.84765625,
"learning_rate": 9.99984012062993e-05,
"loss": 1.1701,
"step": 172
},
{
"epoch": 0.04929828310892641,
"grad_norm": 0.890625,
"learning_rate": 9.999814577929864e-05,
"loss": 1.19,
"step": 173
},
{
"epoch": 0.04958324428296645,
"grad_norm": 0.8984375,
"learning_rate": 9.99978714322639e-05,
"loss": 1.1907,
"step": 174
},
{
"epoch": 0.04986820545700648,
"grad_norm": 0.9140625,
"learning_rate": 9.999757816529889e-05,
"loss": 1.209,
"step": 175
},
{
"epoch": 0.05015316663104652,
"grad_norm": 0.87890625,
"learning_rate": 9.99972659785146e-05,
"loss": 1.152,
"step": 176
},
{
"epoch": 0.05043812780508656,
"grad_norm": 0.859375,
"learning_rate": 9.999693487202915e-05,
"loss": 1.1788,
"step": 177
},
{
"epoch": 0.05072308897912659,
"grad_norm": 0.9609375,
"learning_rate": 9.999658484596782e-05,
"loss": 1.4722,
"step": 178
},
{
"epoch": 0.05100805015316663,
"grad_norm": 0.92578125,
"learning_rate": 9.999621590046311e-05,
"loss": 1.3028,
"step": 179
},
{
"epoch": 0.05129301132720667,
"grad_norm": 0.87109375,
"learning_rate": 9.999582803565459e-05,
"loss": 1.078,
"step": 180
},
{
"epoch": 0.051577972501246704,
"grad_norm": 0.91015625,
"learning_rate": 9.999542125168906e-05,
"loss": 1.1153,
"step": 181
},
{
"epoch": 0.05186293367528674,
"grad_norm": 0.9765625,
"learning_rate": 9.999499554872045e-05,
"loss": 1.3579,
"step": 182
},
{
"epoch": 0.052147894849326776,
"grad_norm": 0.84375,
"learning_rate": 9.999455092690985e-05,
"loss": 1.1063,
"step": 183
},
{
"epoch": 0.052432856023366815,
"grad_norm": 0.8359375,
"learning_rate": 9.999408738642551e-05,
"loss": 1.1352,
"step": 184
},
{
"epoch": 0.052717817197406855,
"grad_norm": 0.984375,
"learning_rate": 9.999360492744283e-05,
"loss": 1.3186,
"step": 185
},
{
"epoch": 0.05300277837144689,
"grad_norm": 0.984375,
"learning_rate": 9.99931035501444e-05,
"loss": 1.2133,
"step": 186
},
{
"epoch": 0.05328773954548693,
"grad_norm": 0.8828125,
"learning_rate": 9.999258325471994e-05,
"loss": 1.0683,
"step": 187
},
{
"epoch": 0.05357270071952697,
"grad_norm": 0.9765625,
"learning_rate": 9.999204404136633e-05,
"loss": 1.2635,
"step": 188
},
{
"epoch": 0.053857661893567,
"grad_norm": 0.875,
"learning_rate": 9.999148591028762e-05,
"loss": 1.079,
"step": 189
},
{
"epoch": 0.05414262306760704,
"grad_norm": 0.87109375,
"learning_rate": 9.999090886169502e-05,
"loss": 1.1602,
"step": 190
},
{
"epoch": 0.05442758424164708,
"grad_norm": 0.875,
"learning_rate": 9.999031289580689e-05,
"loss": 1.2393,
"step": 191
},
{
"epoch": 0.05471254541568711,
"grad_norm": 0.92578125,
"learning_rate": 9.998969801284877e-05,
"loss": 1.2645,
"step": 192
},
{
"epoch": 0.05499750658972715,
"grad_norm": 0.87890625,
"learning_rate": 9.99890642130533e-05,
"loss": 1.1608,
"step": 193
},
{
"epoch": 0.05528246776376719,
"grad_norm": 0.89453125,
"learning_rate": 9.998841149666036e-05,
"loss": 1.1396,
"step": 194
},
{
"epoch": 0.05556742893780722,
"grad_norm": 0.7734375,
"learning_rate": 9.998773986391692e-05,
"loss": 0.9592,
"step": 195
},
{
"epoch": 0.05585239011184726,
"grad_norm": 0.8046875,
"learning_rate": 9.998704931507716e-05,
"loss": 1.0386,
"step": 196
},
{
"epoch": 0.056137351285887295,
"grad_norm": 0.81640625,
"learning_rate": 9.998633985040235e-05,
"loss": 1.0778,
"step": 197
},
{
"epoch": 0.056422312459927335,
"grad_norm": 0.859375,
"learning_rate": 9.998561147016103e-05,
"loss": 1.1591,
"step": 198
},
{
"epoch": 0.056707273633967374,
"grad_norm": 0.90234375,
"learning_rate": 9.998486417462879e-05,
"loss": 1.2099,
"step": 199
},
{
"epoch": 0.05699223480800741,
"grad_norm": 1.0,
"learning_rate": 9.998409796408839e-05,
"loss": 1.2088,
"step": 200
},
{
"epoch": 0.057277195982047446,
"grad_norm": 0.81640625,
"learning_rate": 9.998331283882981e-05,
"loss": 1.0929,
"step": 201
},
{
"epoch": 0.057562157156087486,
"grad_norm": 0.98828125,
"learning_rate": 9.998250879915017e-05,
"loss": 1.2388,
"step": 202
},
{
"epoch": 0.05784711833012752,
"grad_norm": 0.84765625,
"learning_rate": 9.998168584535368e-05,
"loss": 1.1363,
"step": 203
},
{
"epoch": 0.05813207950416756,
"grad_norm": 0.8984375,
"learning_rate": 9.998084397775181e-05,
"loss": 1.3204,
"step": 204
},
{
"epoch": 0.0584170406782076,
"grad_norm": 0.890625,
"learning_rate": 9.997998319666311e-05,
"loss": 1.159,
"step": 205
},
{
"epoch": 0.05870200185224763,
"grad_norm": 0.8359375,
"learning_rate": 9.997910350241329e-05,
"loss": 1.2258,
"step": 206
},
{
"epoch": 0.05898696302628767,
"grad_norm": 0.875,
"learning_rate": 9.997820489533529e-05,
"loss": 1.1889,
"step": 207
},
{
"epoch": 0.0592719242003277,
"grad_norm": 0.87890625,
"learning_rate": 9.997728737576912e-05,
"loss": 1.11,
"step": 208
},
{
"epoch": 0.05955688537436774,
"grad_norm": 0.953125,
"learning_rate": 9.997635094406198e-05,
"loss": 1.2151,
"step": 209
},
{
"epoch": 0.05984184654840778,
"grad_norm": 0.85546875,
"learning_rate": 9.997539560056826e-05,
"loss": 1.0552,
"step": 210
},
{
"epoch": 0.060126807722447814,
"grad_norm": 0.91015625,
"learning_rate": 9.997442134564944e-05,
"loss": 1.1783,
"step": 211
},
{
"epoch": 0.060411768896487854,
"grad_norm": 0.8359375,
"learning_rate": 9.997342817967421e-05,
"loss": 1.1427,
"step": 212
},
{
"epoch": 0.06069673007052789,
"grad_norm": 0.94921875,
"learning_rate": 9.997241610301841e-05,
"loss": 1.2496,
"step": 213
},
{
"epoch": 0.060981691244567926,
"grad_norm": 0.921875,
"learning_rate": 9.997138511606501e-05,
"loss": 1.293,
"step": 214
},
{
"epoch": 0.061266652418607966,
"grad_norm": 0.94140625,
"learning_rate": 9.997033521920415e-05,
"loss": 1.2313,
"step": 215
},
{
"epoch": 0.061551613592648005,
"grad_norm": 0.859375,
"learning_rate": 9.996926641283314e-05,
"loss": 1.1479,
"step": 216
},
{
"epoch": 0.06183657476668804,
"grad_norm": 0.8203125,
"learning_rate": 9.996817869735642e-05,
"loss": 1.0872,
"step": 217
},
{
"epoch": 0.06212153594072808,
"grad_norm": 0.87890625,
"learning_rate": 9.996707207318558e-05,
"loss": 1.2486,
"step": 218
},
{
"epoch": 0.06240649711476811,
"grad_norm": 1.0,
"learning_rate": 9.996594654073943e-05,
"loss": 1.2349,
"step": 219
},
{
"epoch": 0.06269145828880815,
"grad_norm": 0.84765625,
"learning_rate": 9.996480210044384e-05,
"loss": 1.2223,
"step": 220
},
{
"epoch": 0.06297641946284818,
"grad_norm": 0.84765625,
"learning_rate": 9.996363875273192e-05,
"loss": 1.2541,
"step": 221
},
{
"epoch": 0.06326138063688823,
"grad_norm": 0.93359375,
"learning_rate": 9.996245649804386e-05,
"loss": 1.2676,
"step": 222
},
{
"epoch": 0.06354634181092826,
"grad_norm": 0.83984375,
"learning_rate": 9.996125533682708e-05,
"loss": 1.1631,
"step": 223
},
{
"epoch": 0.0638313029849683,
"grad_norm": 0.78515625,
"learning_rate": 9.99600352695361e-05,
"loss": 1.1795,
"step": 224
},
{
"epoch": 0.06411626415900834,
"grad_norm": 0.84765625,
"learning_rate": 9.99587962966326e-05,
"loss": 1.1826,
"step": 225
},
{
"epoch": 0.06440122533304837,
"grad_norm": 0.85546875,
"learning_rate": 9.995753841858546e-05,
"loss": 1.0396,
"step": 226
},
{
"epoch": 0.0646861865070884,
"grad_norm": 0.87109375,
"learning_rate": 9.995626163587065e-05,
"loss": 1.2267,
"step": 227
},
{
"epoch": 0.06497114768112845,
"grad_norm": 0.828125,
"learning_rate": 9.995496594897132e-05,
"loss": 1.2575,
"step": 228
},
{
"epoch": 0.06525610885516848,
"grad_norm": 0.91015625,
"learning_rate": 9.99536513583778e-05,
"loss": 1.2338,
"step": 229
},
{
"epoch": 0.06554107002920852,
"grad_norm": 0.8359375,
"learning_rate": 9.995231786458754e-05,
"loss": 1.1742,
"step": 230
},
{
"epoch": 0.06582603120324856,
"grad_norm": 0.9140625,
"learning_rate": 9.995096546810514e-05,
"loss": 1.28,
"step": 231
},
{
"epoch": 0.0661109923772886,
"grad_norm": 0.87109375,
"learning_rate": 9.994959416944238e-05,
"loss": 1.3021,
"step": 232
},
{
"epoch": 0.06639595355132863,
"grad_norm": 0.82421875,
"learning_rate": 9.994820396911819e-05,
"loss": 1.1945,
"step": 233
},
{
"epoch": 0.06668091472536866,
"grad_norm": 0.87890625,
"learning_rate": 9.994679486765863e-05,
"loss": 1.3251,
"step": 234
},
{
"epoch": 0.06696587589940871,
"grad_norm": 0.85546875,
"learning_rate": 9.994536686559692e-05,
"loss": 1.1902,
"step": 235
},
{
"epoch": 0.06725083707344874,
"grad_norm": 0.78125,
"learning_rate": 9.994391996347344e-05,
"loss": 1.0474,
"step": 236
},
{
"epoch": 0.06753579824748877,
"grad_norm": 0.86328125,
"learning_rate": 9.994245416183572e-05,
"loss": 1.125,
"step": 237
},
{
"epoch": 0.06782075942152882,
"grad_norm": 0.84765625,
"learning_rate": 9.994096946123846e-05,
"loss": 1.116,
"step": 238
},
{
"epoch": 0.06810572059556885,
"grad_norm": 0.80859375,
"learning_rate": 9.993946586224346e-05,
"loss": 1.1467,
"step": 239
},
{
"epoch": 0.06839068176960889,
"grad_norm": 0.82421875,
"learning_rate": 9.993794336541972e-05,
"loss": 1.0831,
"step": 240
},
{
"epoch": 0.06867564294364893,
"grad_norm": 0.859375,
"learning_rate": 9.993640197134338e-05,
"loss": 1.1449,
"step": 241
},
{
"epoch": 0.06896060411768896,
"grad_norm": 0.7890625,
"learning_rate": 9.99348416805977e-05,
"loss": 1.0175,
"step": 242
},
{
"epoch": 0.069245565291729,
"grad_norm": 0.78515625,
"learning_rate": 9.993326249377316e-05,
"loss": 1.1124,
"step": 243
},
{
"epoch": 0.06953052646576904,
"grad_norm": 0.87109375,
"learning_rate": 9.993166441146732e-05,
"loss": 1.1746,
"step": 244
},
{
"epoch": 0.06981548763980908,
"grad_norm": 0.87109375,
"learning_rate": 9.993004743428491e-05,
"loss": 1.1883,
"step": 245
},
{
"epoch": 0.07010044881384911,
"grad_norm": 0.7734375,
"learning_rate": 9.992841156283786e-05,
"loss": 1.0455,
"step": 246
},
{
"epoch": 0.07038540998788916,
"grad_norm": 0.83984375,
"learning_rate": 9.992675679774515e-05,
"loss": 1.2231,
"step": 247
},
{
"epoch": 0.07067037116192919,
"grad_norm": 0.8203125,
"learning_rate": 9.9925083139633e-05,
"loss": 1.1612,
"step": 248
},
{
"epoch": 0.07095533233596922,
"grad_norm": 0.78515625,
"learning_rate": 9.992339058913475e-05,
"loss": 1.1999,
"step": 249
},
{
"epoch": 0.07124029351000927,
"grad_norm": 0.8828125,
"learning_rate": 9.992167914689087e-05,
"loss": 1.1173,
"step": 250
},
{
"epoch": 0.0715252546840493,
"grad_norm": 0.78125,
"learning_rate": 9.991994881354903e-05,
"loss": 1.0398,
"step": 251
},
{
"epoch": 0.07181021585808933,
"grad_norm": 0.796875,
"learning_rate": 9.991819958976396e-05,
"loss": 1.1054,
"step": 252
},
{
"epoch": 0.07209517703212938,
"grad_norm": 0.80078125,
"learning_rate": 9.991643147619762e-05,
"loss": 1.0642,
"step": 253
},
{
"epoch": 0.07238013820616941,
"grad_norm": 0.94140625,
"learning_rate": 9.99146444735191e-05,
"loss": 1.1822,
"step": 254
},
{
"epoch": 0.07266509938020944,
"grad_norm": 0.81640625,
"learning_rate": 9.991283858240462e-05,
"loss": 1.1526,
"step": 255
},
{
"epoch": 0.07295006055424948,
"grad_norm": 0.84765625,
"learning_rate": 9.991101380353756e-05,
"loss": 1.2147,
"step": 256
},
{
"epoch": 0.07323502172828952,
"grad_norm": 0.87109375,
"learning_rate": 9.990917013760841e-05,
"loss": 1.2225,
"step": 257
},
{
"epoch": 0.07351998290232956,
"grad_norm": 0.7421875,
"learning_rate": 9.990730758531489e-05,
"loss": 1.0918,
"step": 258
},
{
"epoch": 0.07380494407636959,
"grad_norm": 0.80859375,
"learning_rate": 9.990542614736178e-05,
"loss": 0.9888,
"step": 259
},
{
"epoch": 0.07408990525040964,
"grad_norm": 0.78515625,
"learning_rate": 9.990352582446108e-05,
"loss": 1.03,
"step": 260
},
{
"epoch": 0.07437486642444967,
"grad_norm": 0.8359375,
"learning_rate": 9.990160661733185e-05,
"loss": 1.1679,
"step": 261
},
{
"epoch": 0.0746598275984897,
"grad_norm": 0.828125,
"learning_rate": 9.989966852670039e-05,
"loss": 1.1835,
"step": 262
},
{
"epoch": 0.07494478877252975,
"grad_norm": 0.8046875,
"learning_rate": 9.989771155330008e-05,
"loss": 1.1622,
"step": 263
},
{
"epoch": 0.07522974994656978,
"grad_norm": 0.87890625,
"learning_rate": 9.989573569787148e-05,
"loss": 1.2752,
"step": 264
},
{
"epoch": 0.07551471112060981,
"grad_norm": 0.796875,
"learning_rate": 9.989374096116229e-05,
"loss": 1.2444,
"step": 265
},
{
"epoch": 0.07579967229464986,
"grad_norm": 0.7734375,
"learning_rate": 9.989172734392731e-05,
"loss": 1.0388,
"step": 266
},
{
"epoch": 0.07608463346868989,
"grad_norm": 0.8203125,
"learning_rate": 9.988969484692855e-05,
"loss": 1.1025,
"step": 267
},
{
"epoch": 0.07636959464272992,
"grad_norm": 0.828125,
"learning_rate": 9.988764347093515e-05,
"loss": 1.1958,
"step": 268
},
{
"epoch": 0.07665455581676997,
"grad_norm": 0.8046875,
"learning_rate": 9.988557321672334e-05,
"loss": 1.1299,
"step": 269
},
{
"epoch": 0.07693951699081,
"grad_norm": 0.80859375,
"learning_rate": 9.988348408507657e-05,
"loss": 1.1703,
"step": 270
},
{
"epoch": 0.07722447816485004,
"grad_norm": 0.8671875,
"learning_rate": 9.988137607678541e-05,
"loss": 1.2471,
"step": 271
},
{
"epoch": 0.07750943933889008,
"grad_norm": 0.78515625,
"learning_rate": 9.987924919264751e-05,
"loss": 1.1829,
"step": 272
},
{
"epoch": 0.07779440051293011,
"grad_norm": 0.80859375,
"learning_rate": 9.987710343346774e-05,
"loss": 1.187,
"step": 273
},
{
"epoch": 0.07807936168697015,
"grad_norm": 0.79296875,
"learning_rate": 9.987493880005812e-05,
"loss": 1.0768,
"step": 274
},
{
"epoch": 0.0783643228610102,
"grad_norm": 0.7734375,
"learning_rate": 9.987275529323772e-05,
"loss": 1.0325,
"step": 275
},
{
"epoch": 0.07864928403505023,
"grad_norm": 0.79296875,
"learning_rate": 9.987055291383285e-05,
"loss": 1.0838,
"step": 276
},
{
"epoch": 0.07893424520909026,
"grad_norm": 0.86328125,
"learning_rate": 9.986833166267691e-05,
"loss": 1.2686,
"step": 277
},
{
"epoch": 0.07921920638313029,
"grad_norm": 0.81640625,
"learning_rate": 9.986609154061047e-05,
"loss": 1.1666,
"step": 278
},
{
"epoch": 0.07950416755717034,
"grad_norm": 0.79296875,
"learning_rate": 9.98638325484812e-05,
"loss": 1.2386,
"step": 279
},
{
"epoch": 0.07978912873121037,
"grad_norm": 0.8125,
"learning_rate": 9.986155468714394e-05,
"loss": 1.1716,
"step": 280
},
{
"epoch": 0.0800740899052504,
"grad_norm": 0.84375,
"learning_rate": 9.985925795746068e-05,
"loss": 1.1143,
"step": 281
},
{
"epoch": 0.08035905107929045,
"grad_norm": 0.8515625,
"learning_rate": 9.985694236030054e-05,
"loss": 1.0497,
"step": 282
},
{
"epoch": 0.08064401225333048,
"grad_norm": 0.74609375,
"learning_rate": 9.985460789653976e-05,
"loss": 1.0611,
"step": 283
},
{
"epoch": 0.08092897342737052,
"grad_norm": 0.78515625,
"learning_rate": 9.985225456706174e-05,
"loss": 1.1523,
"step": 284
},
{
"epoch": 0.08121393460141056,
"grad_norm": 0.80859375,
"learning_rate": 9.984988237275703e-05,
"loss": 1.1303,
"step": 285
},
{
"epoch": 0.0814988957754506,
"grad_norm": 0.76953125,
"learning_rate": 9.984749131452327e-05,
"loss": 1.0736,
"step": 286
},
{
"epoch": 0.08178385694949063,
"grad_norm": 0.84765625,
"learning_rate": 9.98450813932653e-05,
"loss": 1.1636,
"step": 287
},
{
"epoch": 0.08206881812353067,
"grad_norm": 0.8125,
"learning_rate": 9.984265260989506e-05,
"loss": 1.2484,
"step": 288
},
{
"epoch": 0.0823537792975707,
"grad_norm": 0.73828125,
"learning_rate": 9.984020496533165e-05,
"loss": 1.0472,
"step": 289
},
{
"epoch": 0.08263874047161074,
"grad_norm": 0.8046875,
"learning_rate": 9.983773846050126e-05,
"loss": 1.2355,
"step": 290
},
{
"epoch": 0.08292370164565079,
"grad_norm": 0.76171875,
"learning_rate": 9.983525309633729e-05,
"loss": 1.1402,
"step": 291
},
{
"epoch": 0.08320866281969082,
"grad_norm": 0.77734375,
"learning_rate": 9.983274887378022e-05,
"loss": 1.1719,
"step": 292
},
{
"epoch": 0.08349362399373085,
"grad_norm": 0.765625,
"learning_rate": 9.983022579377768e-05,
"loss": 1.1649,
"step": 293
},
{
"epoch": 0.0837785851677709,
"grad_norm": 0.80859375,
"learning_rate": 9.982768385728446e-05,
"loss": 1.2616,
"step": 294
},
{
"epoch": 0.08406354634181093,
"grad_norm": 0.86328125,
"learning_rate": 9.982512306526245e-05,
"loss": 1.2156,
"step": 295
},
{
"epoch": 0.08434850751585096,
"grad_norm": 0.84375,
"learning_rate": 9.98225434186807e-05,
"loss": 1.3357,
"step": 296
},
{
"epoch": 0.08463346868989101,
"grad_norm": 0.8046875,
"learning_rate": 9.981994491851537e-05,
"loss": 1.3112,
"step": 297
},
{
"epoch": 0.08491842986393104,
"grad_norm": 0.77734375,
"learning_rate": 9.981732756574979e-05,
"loss": 1.2327,
"step": 298
},
{
"epoch": 0.08520339103797107,
"grad_norm": 0.80859375,
"learning_rate": 9.98146913613744e-05,
"loss": 1.242,
"step": 299
},
{
"epoch": 0.0854883522120111,
"grad_norm": 0.77734375,
"learning_rate": 9.981203630638678e-05,
"loss": 1.1656,
"step": 300
},
{
"epoch": 0.08577331338605115,
"grad_norm": 0.765625,
"learning_rate": 9.980936240179163e-05,
"loss": 1.1681,
"step": 301
},
{
"epoch": 0.08605827456009119,
"grad_norm": 0.69921875,
"learning_rate": 9.980666964860081e-05,
"loss": 0.935,
"step": 302
},
{
"epoch": 0.08634323573413122,
"grad_norm": 0.8046875,
"learning_rate": 9.980395804783329e-05,
"loss": 1.071,
"step": 303
},
{
"epoch": 0.08662819690817126,
"grad_norm": 0.734375,
"learning_rate": 9.980122760051518e-05,
"loss": 1.0744,
"step": 304
},
{
"epoch": 0.0869131580822113,
"grad_norm": 1.0234375,
"learning_rate": 9.979847830767971e-05,
"loss": 1.1554,
"step": 305
},
{
"epoch": 0.08719811925625133,
"grad_norm": 0.79296875,
"learning_rate": 9.979571017036727e-05,
"loss": 1.1006,
"step": 306
},
{
"epoch": 0.08748308043029138,
"grad_norm": 0.8203125,
"learning_rate": 9.979292318962537e-05,
"loss": 1.1425,
"step": 307
},
{
"epoch": 0.08776804160433141,
"grad_norm": 0.73046875,
"learning_rate": 9.979011736650862e-05,
"loss": 1.0564,
"step": 308
},
{
"epoch": 0.08805300277837144,
"grad_norm": 0.79296875,
"learning_rate": 9.97872927020788e-05,
"loss": 1.2216,
"step": 309
},
{
"epoch": 0.08833796395241149,
"grad_norm": 0.7421875,
"learning_rate": 9.97844491974048e-05,
"loss": 1.0732,
"step": 310
},
{
"epoch": 0.08862292512645152,
"grad_norm": 0.72265625,
"learning_rate": 9.978158685356265e-05,
"loss": 0.9132,
"step": 311
},
{
"epoch": 0.08890788630049155,
"grad_norm": 0.76953125,
"learning_rate": 9.97787056716355e-05,
"loss": 1.1084,
"step": 312
},
{
"epoch": 0.0891928474745316,
"grad_norm": 0.7734375,
"learning_rate": 9.977580565271362e-05,
"loss": 1.1568,
"step": 313
},
{
"epoch": 0.08947780864857163,
"grad_norm": 0.79296875,
"learning_rate": 9.977288679789446e-05,
"loss": 1.1736,
"step": 314
},
{
"epoch": 0.08976276982261167,
"grad_norm": 0.75390625,
"learning_rate": 9.976994910828249e-05,
"loss": 1.1166,
"step": 315
},
{
"epoch": 0.09004773099665171,
"grad_norm": 0.7109375,
"learning_rate": 9.976699258498943e-05,
"loss": 1.0088,
"step": 316
},
{
"epoch": 0.09033269217069174,
"grad_norm": 0.8203125,
"learning_rate": 9.976401722913406e-05,
"loss": 1.2103,
"step": 317
},
{
"epoch": 0.09061765334473178,
"grad_norm": 0.75,
"learning_rate": 9.976102304184229e-05,
"loss": 1.1212,
"step": 318
},
{
"epoch": 0.09090261451877182,
"grad_norm": 0.859375,
"learning_rate": 9.975801002424715e-05,
"loss": 1.2837,
"step": 319
},
{
"epoch": 0.09118757569281186,
"grad_norm": 0.76171875,
"learning_rate": 9.975497817748886e-05,
"loss": 1.1607,
"step": 320
},
{
"epoch": 0.09147253686685189,
"grad_norm": 0.73046875,
"learning_rate": 9.975192750271467e-05,
"loss": 1.1187,
"step": 321
},
{
"epoch": 0.09175749804089192,
"grad_norm": 0.84375,
"learning_rate": 9.9748858001079e-05,
"loss": 1.2933,
"step": 322
},
{
"epoch": 0.09204245921493197,
"grad_norm": 0.73828125,
"learning_rate": 9.974576967374343e-05,
"loss": 1.0885,
"step": 323
},
{
"epoch": 0.092327420388972,
"grad_norm": 0.7890625,
"learning_rate": 9.97426625218766e-05,
"loss": 1.286,
"step": 324
},
{
"epoch": 0.09261238156301203,
"grad_norm": 0.8046875,
"learning_rate": 9.97395365466543e-05,
"loss": 1.1598,
"step": 325
},
{
"epoch": 0.09289734273705208,
"grad_norm": 0.7265625,
"learning_rate": 9.973639174925946e-05,
"loss": 1.0932,
"step": 326
},
{
"epoch": 0.09318230391109211,
"grad_norm": 0.7890625,
"learning_rate": 9.973322813088211e-05,
"loss": 1.2748,
"step": 327
},
{
"epoch": 0.09346726508513215,
"grad_norm": 0.765625,
"learning_rate": 9.973004569271942e-05,
"loss": 1.1366,
"step": 328
},
{
"epoch": 0.09375222625917219,
"grad_norm": 0.79296875,
"learning_rate": 9.972684443597565e-05,
"loss": 1.1482,
"step": 329
},
{
"epoch": 0.09403718743321222,
"grad_norm": 0.73046875,
"learning_rate": 9.972362436186223e-05,
"loss": 1.191,
"step": 330
},
{
"epoch": 0.09432214860725226,
"grad_norm": 0.75,
"learning_rate": 9.972038547159765e-05,
"loss": 1.135,
"step": 331
},
{
"epoch": 0.0946071097812923,
"grad_norm": 0.75390625,
"learning_rate": 9.97171277664076e-05,
"loss": 1.0789,
"step": 332
},
{
"epoch": 0.09489207095533234,
"grad_norm": 0.74609375,
"learning_rate": 9.971385124752479e-05,
"loss": 1.1557,
"step": 333
},
{
"epoch": 0.09517703212937237,
"grad_norm": 0.74609375,
"learning_rate": 9.971055591618915e-05,
"loss": 1.0509,
"step": 334
},
{
"epoch": 0.09546199330341242,
"grad_norm": 0.8359375,
"learning_rate": 9.970724177364762e-05,
"loss": 1.0501,
"step": 335
},
{
"epoch": 0.09574695447745245,
"grad_norm": 0.734375,
"learning_rate": 9.970390882115442e-05,
"loss": 1.0781,
"step": 336
},
{
"epoch": 0.09603191565149248,
"grad_norm": 0.79296875,
"learning_rate": 9.970055705997069e-05,
"loss": 1.1743,
"step": 337
},
{
"epoch": 0.09631687682553253,
"grad_norm": 0.83203125,
"learning_rate": 9.969718649136484e-05,
"loss": 1.1001,
"step": 338
},
{
"epoch": 0.09660183799957256,
"grad_norm": 0.76953125,
"learning_rate": 9.969379711661232e-05,
"loss": 1.1502,
"step": 339
},
{
"epoch": 0.09688679917361259,
"grad_norm": 0.7890625,
"learning_rate": 9.969038893699573e-05,
"loss": 1.0567,
"step": 340
},
{
"epoch": 0.09717176034765264,
"grad_norm": 0.765625,
"learning_rate": 9.968696195380479e-05,
"loss": 1.1007,
"step": 341
},
{
"epoch": 0.09745672152169267,
"grad_norm": 0.734375,
"learning_rate": 9.968351616833626e-05,
"loss": 1.1599,
"step": 342
},
{
"epoch": 0.0977416826957327,
"grad_norm": 0.69921875,
"learning_rate": 9.968005158189415e-05,
"loss": 1.0283,
"step": 343
},
{
"epoch": 0.09802664386977274,
"grad_norm": 0.83984375,
"learning_rate": 9.967656819578948e-05,
"loss": 1.1361,
"step": 344
},
{
"epoch": 0.09831160504381278,
"grad_norm": 0.88671875,
"learning_rate": 9.967306601134042e-05,
"loss": 1.1803,
"step": 345
},
{
"epoch": 0.09859656621785282,
"grad_norm": 0.78515625,
"learning_rate": 9.966954502987222e-05,
"loss": 1.1143,
"step": 346
},
{
"epoch": 0.09888152739189285,
"grad_norm": 0.74609375,
"learning_rate": 9.966600525271732e-05,
"loss": 1.0432,
"step": 347
},
{
"epoch": 0.0991664885659329,
"grad_norm": 0.94921875,
"learning_rate": 9.966244668121518e-05,
"loss": 1.2893,
"step": 348
},
{
"epoch": 0.09945144973997293,
"grad_norm": 0.73828125,
"learning_rate": 9.965886931671245e-05,
"loss": 1.0628,
"step": 349
},
{
"epoch": 0.09973641091401296,
"grad_norm": 0.7265625,
"learning_rate": 9.965527316056282e-05,
"loss": 0.9833,
"step": 350
},
{
"epoch": 0.100021372088053,
"grad_norm": 0.7265625,
"learning_rate": 9.965165821412716e-05,
"loss": 1.1057,
"step": 351
},
{
"epoch": 0.10030633326209304,
"grad_norm": 0.734375,
"learning_rate": 9.964802447877341e-05,
"loss": 1.1369,
"step": 352
},
{
"epoch": 0.10059129443613307,
"grad_norm": 0.69921875,
"learning_rate": 9.964437195587662e-05,
"loss": 0.9499,
"step": 353
},
{
"epoch": 0.10087625561017312,
"grad_norm": 1.0234375,
"learning_rate": 9.964070064681897e-05,
"loss": 1.1867,
"step": 354
},
{
"epoch": 0.10116121678421315,
"grad_norm": 0.73046875,
"learning_rate": 9.963701055298972e-05,
"loss": 1.0414,
"step": 355
},
{
"epoch": 0.10144617795825318,
"grad_norm": 0.80078125,
"learning_rate": 9.963330167578529e-05,
"loss": 1.1084,
"step": 356
},
{
"epoch": 0.10173113913229323,
"grad_norm": 0.796875,
"learning_rate": 9.962957401660915e-05,
"loss": 1.1254,
"step": 357
},
{
"epoch": 0.10201610030633326,
"grad_norm": 0.7578125,
"learning_rate": 9.96258275768719e-05,
"loss": 1.1325,
"step": 358
},
{
"epoch": 0.1023010614803733,
"grad_norm": 0.8203125,
"learning_rate": 9.962206235799124e-05,
"loss": 1.1208,
"step": 359
},
{
"epoch": 0.10258602265441334,
"grad_norm": 0.734375,
"learning_rate": 9.961827836139201e-05,
"loss": 1.1003,
"step": 360
},
{
"epoch": 0.10287098382845337,
"grad_norm": 0.78125,
"learning_rate": 9.96144755885061e-05,
"loss": 1.1071,
"step": 361
},
{
"epoch": 0.10315594500249341,
"grad_norm": 0.703125,
"learning_rate": 9.961065404077257e-05,
"loss": 1.0503,
"step": 362
},
{
"epoch": 0.10344090617653345,
"grad_norm": 0.8203125,
"learning_rate": 9.960681371963751e-05,
"loss": 1.2441,
"step": 363
},
{
"epoch": 0.10372586735057349,
"grad_norm": 0.71484375,
"learning_rate": 9.960295462655418e-05,
"loss": 1.0898,
"step": 364
},
{
"epoch": 0.10401082852461352,
"grad_norm": 0.765625,
"learning_rate": 9.959907676298293e-05,
"loss": 1.2068,
"step": 365
},
{
"epoch": 0.10429578969865355,
"grad_norm": 0.76953125,
"learning_rate": 9.959518013039118e-05,
"loss": 1.1186,
"step": 366
},
{
"epoch": 0.1045807508726936,
"grad_norm": 0.7734375,
"learning_rate": 9.959126473025347e-05,
"loss": 1.1527,
"step": 367
},
{
"epoch": 0.10486571204673363,
"grad_norm": 0.72265625,
"learning_rate": 9.958733056405143e-05,
"loss": 1.0711,
"step": 368
},
{
"epoch": 0.10515067322077366,
"grad_norm": 0.72265625,
"learning_rate": 9.958337763327385e-05,
"loss": 1.0928,
"step": 369
},
{
"epoch": 0.10543563439481371,
"grad_norm": 0.734375,
"learning_rate": 9.957940593941655e-05,
"loss": 1.0705,
"step": 370
},
{
"epoch": 0.10572059556885374,
"grad_norm": 0.703125,
"learning_rate": 9.957541548398249e-05,
"loss": 0.9933,
"step": 371
},
{
"epoch": 0.10600555674289378,
"grad_norm": 0.7578125,
"learning_rate": 9.957140626848169e-05,
"loss": 1.1651,
"step": 372
},
{
"epoch": 0.10629051791693382,
"grad_norm": 0.8125,
"learning_rate": 9.956737829443132e-05,
"loss": 1.1076,
"step": 373
},
{
"epoch": 0.10657547909097385,
"grad_norm": 0.7890625,
"learning_rate": 9.956333156335564e-05,
"loss": 1.1624,
"step": 374
},
{
"epoch": 0.10686044026501389,
"grad_norm": 0.70703125,
"learning_rate": 9.955926607678596e-05,
"loss": 0.9549,
"step": 375
},
{
"epoch": 0.10714540143905393,
"grad_norm": 0.76171875,
"learning_rate": 9.955518183626073e-05,
"loss": 1.0584,
"step": 376
},
{
"epoch": 0.10743036261309397,
"grad_norm": 0.7578125,
"learning_rate": 9.955107884332549e-05,
"loss": 1.1332,
"step": 377
},
{
"epoch": 0.107715323787134,
"grad_norm": 0.75,
"learning_rate": 9.954695709953287e-05,
"loss": 1.0829,
"step": 378
},
{
"epoch": 0.10800028496117405,
"grad_norm": 0.72265625,
"learning_rate": 9.95428166064426e-05,
"loss": 1.0994,
"step": 379
},
{
"epoch": 0.10828524613521408,
"grad_norm": 0.72265625,
"learning_rate": 9.953865736562151e-05,
"loss": 1.0907,
"step": 380
},
{
"epoch": 0.10857020730925411,
"grad_norm": 0.75390625,
"learning_rate": 9.953447937864351e-05,
"loss": 1.1457,
"step": 381
},
{
"epoch": 0.10885516848329416,
"grad_norm": 0.7109375,
"learning_rate": 9.953028264708962e-05,
"loss": 0.93,
"step": 382
},
{
"epoch": 0.10914012965733419,
"grad_norm": 0.7578125,
"learning_rate": 9.952606717254793e-05,
"loss": 1.1132,
"step": 383
},
{
"epoch": 0.10942509083137422,
"grad_norm": 0.78515625,
"learning_rate": 9.952183295661365e-05,
"loss": 1.0453,
"step": 384
},
{
"epoch": 0.10971005200541427,
"grad_norm": 0.76171875,
"learning_rate": 9.951758000088906e-05,
"loss": 1.0444,
"step": 385
},
{
"epoch": 0.1099950131794543,
"grad_norm": 0.7890625,
"learning_rate": 9.951330830698356e-05,
"loss": 0.9644,
"step": 386
},
{
"epoch": 0.11027997435349433,
"grad_norm": 0.6796875,
"learning_rate": 9.950901787651358e-05,
"loss": 1.0284,
"step": 387
},
{
"epoch": 0.11056493552753438,
"grad_norm": 0.74609375,
"learning_rate": 9.950470871110274e-05,
"loss": 1.0884,
"step": 388
},
{
"epoch": 0.11084989670157441,
"grad_norm": 0.7109375,
"learning_rate": 9.950038081238166e-05,
"loss": 1.1775,
"step": 389
},
{
"epoch": 0.11113485787561445,
"grad_norm": 0.71875,
"learning_rate": 9.949603418198808e-05,
"loss": 1.0584,
"step": 390
},
{
"epoch": 0.11141981904965448,
"grad_norm": 0.76171875,
"learning_rate": 9.949166882156681e-05,
"loss": 1.0921,
"step": 391
},
{
"epoch": 0.11170478022369452,
"grad_norm": 0.68359375,
"learning_rate": 9.948728473276982e-05,
"loss": 1.0868,
"step": 392
},
{
"epoch": 0.11198974139773456,
"grad_norm": 0.7265625,
"learning_rate": 9.948288191725607e-05,
"loss": 1.0989,
"step": 393
},
{
"epoch": 0.11227470257177459,
"grad_norm": 0.76953125,
"learning_rate": 9.947846037669166e-05,
"loss": 1.1235,
"step": 394
},
{
"epoch": 0.11255966374581464,
"grad_norm": 0.71875,
"learning_rate": 9.947402011274977e-05,
"loss": 1.0322,
"step": 395
},
{
"epoch": 0.11284462491985467,
"grad_norm": 0.73046875,
"learning_rate": 9.946956112711066e-05,
"loss": 1.1093,
"step": 396
},
{
"epoch": 0.1131295860938947,
"grad_norm": 0.73046875,
"learning_rate": 9.946508342146168e-05,
"loss": 1.0425,
"step": 397
},
{
"epoch": 0.11341454726793475,
"grad_norm": 0.765625,
"learning_rate": 9.946058699749725e-05,
"loss": 1.1546,
"step": 398
},
{
"epoch": 0.11369950844197478,
"grad_norm": 0.765625,
"learning_rate": 9.945607185691887e-05,
"loss": 1.1101,
"step": 399
},
{
"epoch": 0.11398446961601481,
"grad_norm": 0.76953125,
"learning_rate": 9.945153800143518e-05,
"loss": 1.1504,
"step": 400
},
{
"epoch": 0.11426943079005486,
"grad_norm": 0.7578125,
"learning_rate": 9.944698543276184e-05,
"loss": 1.1959,
"step": 401
},
{
"epoch": 0.11455439196409489,
"grad_norm": 0.76953125,
"learning_rate": 9.944241415262157e-05,
"loss": 1.1331,
"step": 402
},
{
"epoch": 0.11483935313813493,
"grad_norm": 0.66796875,
"learning_rate": 9.943782416274425e-05,
"loss": 0.9313,
"step": 403
},
{
"epoch": 0.11512431431217497,
"grad_norm": 0.69921875,
"learning_rate": 9.94332154648668e-05,
"loss": 1.0915,
"step": 404
},
{
"epoch": 0.115409275486215,
"grad_norm": 0.71484375,
"learning_rate": 9.94285880607332e-05,
"loss": 1.1251,
"step": 405
},
{
"epoch": 0.11569423666025504,
"grad_norm": 0.7734375,
"learning_rate": 9.942394195209454e-05,
"loss": 1.1683,
"step": 406
},
{
"epoch": 0.11597919783429508,
"grad_norm": 0.73046875,
"learning_rate": 9.941927714070897e-05,
"loss": 1.1182,
"step": 407
},
{
"epoch": 0.11626415900833512,
"grad_norm": 0.70703125,
"learning_rate": 9.941459362834173e-05,
"loss": 1.0603,
"step": 408
},
{
"epoch": 0.11654912018237515,
"grad_norm": 0.6953125,
"learning_rate": 9.940989141676512e-05,
"loss": 0.9558,
"step": 409
},
{
"epoch": 0.1168340813564152,
"grad_norm": 0.72265625,
"learning_rate": 9.940517050775852e-05,
"loss": 1.0887,
"step": 410
},
{
"epoch": 0.11711904253045523,
"grad_norm": 0.71484375,
"learning_rate": 9.940043090310843e-05,
"loss": 1.0875,
"step": 411
},
{
"epoch": 0.11740400370449526,
"grad_norm": 0.8671875,
"learning_rate": 9.939567260460835e-05,
"loss": 1.0975,
"step": 412
},
{
"epoch": 0.1176889648785353,
"grad_norm": 0.73046875,
"learning_rate": 9.93908956140589e-05,
"loss": 1.0867,
"step": 413
},
{
"epoch": 0.11797392605257534,
"grad_norm": 0.6875,
"learning_rate": 9.938609993326776e-05,
"loss": 1.0554,
"step": 414
},
{
"epoch": 0.11825888722661537,
"grad_norm": 0.65625,
"learning_rate": 9.938128556404969e-05,
"loss": 0.9474,
"step": 415
},
{
"epoch": 0.1185438484006554,
"grad_norm": 0.703125,
"learning_rate": 9.937645250822652e-05,
"loss": 0.9967,
"step": 416
},
{
"epoch": 0.11882880957469545,
"grad_norm": 0.71484375,
"learning_rate": 9.937160076762714e-05,
"loss": 1.0917,
"step": 417
},
{
"epoch": 0.11911377074873548,
"grad_norm": 0.828125,
"learning_rate": 9.936673034408752e-05,
"loss": 1.1522,
"step": 418
},
{
"epoch": 0.11939873192277552,
"grad_norm": 0.6875,
"learning_rate": 9.936184123945073e-05,
"loss": 1.0427,
"step": 419
},
{
"epoch": 0.11968369309681556,
"grad_norm": 0.74609375,
"learning_rate": 9.935693345556685e-05,
"loss": 1.0779,
"step": 420
},
{
"epoch": 0.1199686542708556,
"grad_norm": 0.73046875,
"learning_rate": 9.935200699429305e-05,
"loss": 1.1378,
"step": 421
},
{
"epoch": 0.12025361544489563,
"grad_norm": 0.71875,
"learning_rate": 9.93470618574936e-05,
"loss": 1.0899,
"step": 422
},
{
"epoch": 0.12053857661893567,
"grad_norm": 0.75,
"learning_rate": 9.93420980470398e-05,
"loss": 1.1852,
"step": 423
},
{
"epoch": 0.12082353779297571,
"grad_norm": 0.69140625,
"learning_rate": 9.933711556481003e-05,
"loss": 1.0719,
"step": 424
},
{
"epoch": 0.12110849896701574,
"grad_norm": 0.74609375,
"learning_rate": 9.933211441268972e-05,
"loss": 1.1908,
"step": 425
},
{
"epoch": 0.12139346014105579,
"grad_norm": 0.69921875,
"learning_rate": 9.932709459257141e-05,
"loss": 0.9042,
"step": 426
},
{
"epoch": 0.12167842131509582,
"grad_norm": 0.73046875,
"learning_rate": 9.932205610635465e-05,
"loss": 1.092,
"step": 427
},
{
"epoch": 0.12196338248913585,
"grad_norm": 0.74609375,
"learning_rate": 9.93169989559461e-05,
"loss": 1.1415,
"step": 428
},
{
"epoch": 0.1222483436631759,
"grad_norm": 0.74609375,
"learning_rate": 9.931192314325944e-05,
"loss": 1.1678,
"step": 429
},
{
"epoch": 0.12253330483721593,
"grad_norm": 0.7109375,
"learning_rate": 9.930682867021543e-05,
"loss": 1.1055,
"step": 430
},
{
"epoch": 0.12281826601125596,
"grad_norm": 0.70703125,
"learning_rate": 9.930171553874192e-05,
"loss": 0.9951,
"step": 431
},
{
"epoch": 0.12310322718529601,
"grad_norm": 0.69140625,
"learning_rate": 9.929658375077376e-05,
"loss": 1.0517,
"step": 432
},
{
"epoch": 0.12338818835933604,
"grad_norm": 0.75390625,
"learning_rate": 9.929143330825291e-05,
"loss": 1.2472,
"step": 433
},
{
"epoch": 0.12367314953337608,
"grad_norm": 0.7265625,
"learning_rate": 9.928626421312838e-05,
"loss": 1.0593,
"step": 434
},
{
"epoch": 0.12395811070741611,
"grad_norm": 0.73828125,
"learning_rate": 9.928107646735622e-05,
"loss": 1.1089,
"step": 435
},
{
"epoch": 0.12424307188145615,
"grad_norm": 0.75,
"learning_rate": 9.927587007289955e-05,
"loss": 1.1667,
"step": 436
},
{
"epoch": 0.12452803305549619,
"grad_norm": 0.6953125,
"learning_rate": 9.927064503172857e-05,
"loss": 1.1203,
"step": 437
},
{
"epoch": 0.12481299422953622,
"grad_norm": 0.71875,
"learning_rate": 9.926540134582048e-05,
"loss": 1.1971,
"step": 438
},
{
"epoch": 0.12509795540357627,
"grad_norm": 0.71484375,
"learning_rate": 9.926013901715958e-05,
"loss": 1.1693,
"step": 439
},
{
"epoch": 0.1253829165776163,
"grad_norm": 0.6796875,
"learning_rate": 9.925485804773721e-05,
"loss": 0.8981,
"step": 440
},
{
"epoch": 0.12566787775165633,
"grad_norm": 0.6796875,
"learning_rate": 9.924955843955177e-05,
"loss": 1.0178,
"step": 441
},
{
"epoch": 0.12595283892569636,
"grad_norm": 0.640625,
"learning_rate": 9.924424019460872e-05,
"loss": 0.942,
"step": 442
},
{
"epoch": 0.12623780009973642,
"grad_norm": 0.703125,
"learning_rate": 9.923890331492055e-05,
"loss": 1.0282,
"step": 443
},
{
"epoch": 0.12652276127377646,
"grad_norm": 0.6796875,
"learning_rate": 9.923354780250681e-05,
"loss": 0.91,
"step": 444
},
{
"epoch": 0.1268077224478165,
"grad_norm": 0.671875,
"learning_rate": 9.922817365939412e-05,
"loss": 1.0535,
"step": 445
},
{
"epoch": 0.12709268362185652,
"grad_norm": 0.67578125,
"learning_rate": 9.92227808876161e-05,
"loss": 1.1114,
"step": 446
},
{
"epoch": 0.12737764479589656,
"grad_norm": 0.73828125,
"learning_rate": 9.921736948921351e-05,
"loss": 1.2053,
"step": 447
},
{
"epoch": 0.1276626059699366,
"grad_norm": 0.734375,
"learning_rate": 9.921193946623406e-05,
"loss": 1.0866,
"step": 448
},
{
"epoch": 0.12794756714397662,
"grad_norm": 0.69140625,
"learning_rate": 9.920649082073255e-05,
"loss": 1.0868,
"step": 449
},
{
"epoch": 0.12823252831801668,
"grad_norm": 0.69140625,
"learning_rate": 9.920102355477086e-05,
"loss": 1.0975,
"step": 450
},
{
"epoch": 0.1285174894920567,
"grad_norm": 0.74609375,
"learning_rate": 9.919553767041785e-05,
"loss": 1.13,
"step": 451
},
{
"epoch": 0.12880245066609675,
"grad_norm": 0.71875,
"learning_rate": 9.919003316974949e-05,
"loss": 1.0777,
"step": 452
},
{
"epoch": 0.12908741184013678,
"grad_norm": 0.73046875,
"learning_rate": 9.918451005484873e-05,
"loss": 1.0536,
"step": 453
},
{
"epoch": 0.1293723730141768,
"grad_norm": 0.6484375,
"learning_rate": 9.917896832780563e-05,
"loss": 0.9851,
"step": 454
},
{
"epoch": 0.12965733418821684,
"grad_norm": 0.7265625,
"learning_rate": 9.917340799071724e-05,
"loss": 1.1509,
"step": 455
},
{
"epoch": 0.1299422953622569,
"grad_norm": 0.78125,
"learning_rate": 9.916782904568767e-05,
"loss": 1.2241,
"step": 456
},
{
"epoch": 0.13022725653629694,
"grad_norm": 0.7109375,
"learning_rate": 9.91622314948281e-05,
"loss": 1.0991,
"step": 457
},
{
"epoch": 0.13051221771033697,
"grad_norm": 0.72265625,
"learning_rate": 9.91566153402567e-05,
"loss": 1.1697,
"step": 458
},
{
"epoch": 0.130797178884377,
"grad_norm": 0.68359375,
"learning_rate": 9.915098058409873e-05,
"loss": 1.0856,
"step": 459
},
{
"epoch": 0.13108214005841703,
"grad_norm": 0.66015625,
"learning_rate": 9.914532722848644e-05,
"loss": 1.0753,
"step": 460
},
{
"epoch": 0.13136710123245707,
"grad_norm": 0.69921875,
"learning_rate": 9.913965527555916e-05,
"loss": 1.0088,
"step": 461
},
{
"epoch": 0.13165206240649713,
"grad_norm": 0.76953125,
"learning_rate": 9.913396472746324e-05,
"loss": 1.0529,
"step": 462
},
{
"epoch": 0.13193702358053716,
"grad_norm": 0.6875,
"learning_rate": 9.912825558635204e-05,
"loss": 1.0326,
"step": 463
},
{
"epoch": 0.1322219847545772,
"grad_norm": 0.73828125,
"learning_rate": 9.912252785438603e-05,
"loss": 1.0233,
"step": 464
},
{
"epoch": 0.13250694592861723,
"grad_norm": 0.68359375,
"learning_rate": 9.911678153373262e-05,
"loss": 1.0682,
"step": 465
},
{
"epoch": 0.13279190710265726,
"grad_norm": 0.72265625,
"learning_rate": 9.911101662656633e-05,
"loss": 1.0957,
"step": 466
},
{
"epoch": 0.1330768682766973,
"grad_norm": 0.69140625,
"learning_rate": 9.910523313506868e-05,
"loss": 1.0413,
"step": 467
},
{
"epoch": 0.13336182945073732,
"grad_norm": 0.69140625,
"learning_rate": 9.909943106142823e-05,
"loss": 0.9981,
"step": 468
},
{
"epoch": 0.13364679062477738,
"grad_norm": 0.71484375,
"learning_rate": 9.909361040784057e-05,
"loss": 1.1892,
"step": 469
},
{
"epoch": 0.13393175179881742,
"grad_norm": 0.69140625,
"learning_rate": 9.908777117650833e-05,
"loss": 0.9979,
"step": 470
},
{
"epoch": 0.13421671297285745,
"grad_norm": 0.76171875,
"learning_rate": 9.908191336964115e-05,
"loss": 1.1893,
"step": 471
},
{
"epoch": 0.13450167414689748,
"grad_norm": 0.703125,
"learning_rate": 9.907603698945569e-05,
"loss": 1.1279,
"step": 472
},
{
"epoch": 0.13478663532093751,
"grad_norm": 0.69140625,
"learning_rate": 9.907014203817571e-05,
"loss": 1.0797,
"step": 473
},
{
"epoch": 0.13507159649497755,
"grad_norm": 0.71484375,
"learning_rate": 9.906422851803189e-05,
"loss": 1.1168,
"step": 474
},
{
"epoch": 0.1353565576690176,
"grad_norm": 0.73046875,
"learning_rate": 9.905829643126204e-05,
"loss": 1.062,
"step": 475
},
{
"epoch": 0.13564151884305764,
"grad_norm": 0.75390625,
"learning_rate": 9.905234578011091e-05,
"loss": 1.0797,
"step": 476
},
{
"epoch": 0.13592648001709767,
"grad_norm": 0.7421875,
"learning_rate": 9.904637656683033e-05,
"loss": 1.0835,
"step": 477
},
{
"epoch": 0.1362114411911377,
"grad_norm": 0.68359375,
"learning_rate": 9.904038879367915e-05,
"loss": 0.9696,
"step": 478
},
{
"epoch": 0.13649640236517774,
"grad_norm": 0.7265625,
"learning_rate": 9.903438246292323e-05,
"loss": 1.1078,
"step": 479
},
{
"epoch": 0.13678136353921777,
"grad_norm": 0.703125,
"learning_rate": 9.902835757683541e-05,
"loss": 1.1059,
"step": 480
},
{
"epoch": 0.13706632471325783,
"grad_norm": 0.68359375,
"learning_rate": 9.902231413769568e-05,
"loss": 0.9766,
"step": 481
},
{
"epoch": 0.13735128588729786,
"grad_norm": 0.671875,
"learning_rate": 9.901625214779089e-05,
"loss": 0.9869,
"step": 482
},
{
"epoch": 0.1376362470613379,
"grad_norm": 0.6953125,
"learning_rate": 9.901017160941501e-05,
"loss": 1.0538,
"step": 483
},
{
"epoch": 0.13792120823537793,
"grad_norm": 0.734375,
"learning_rate": 9.900407252486902e-05,
"loss": 1.1201,
"step": 484
},
{
"epoch": 0.13820616940941796,
"grad_norm": 0.75390625,
"learning_rate": 9.899795489646088e-05,
"loss": 1.137,
"step": 485
},
{
"epoch": 0.138491130583458,
"grad_norm": 0.71484375,
"learning_rate": 9.899181872650562e-05,
"loss": 1.1196,
"step": 486
},
{
"epoch": 0.13877609175749805,
"grad_norm": 0.7265625,
"learning_rate": 9.898566401732523e-05,
"loss": 1.0865,
"step": 487
},
{
"epoch": 0.1390610529315381,
"grad_norm": 0.7265625,
"learning_rate": 9.897949077124876e-05,
"loss": 1.0929,
"step": 488
},
{
"epoch": 0.13934601410557812,
"grad_norm": 0.6640625,
"learning_rate": 9.897329899061225e-05,
"loss": 1.0018,
"step": 489
},
{
"epoch": 0.13963097527961815,
"grad_norm": 0.7109375,
"learning_rate": 9.896708867775874e-05,
"loss": 1.1109,
"step": 490
},
{
"epoch": 0.13991593645365819,
"grad_norm": 0.69921875,
"learning_rate": 9.896085983503833e-05,
"loss": 1.0882,
"step": 491
},
{
"epoch": 0.14020089762769822,
"grad_norm": 0.69140625,
"learning_rate": 9.895461246480812e-05,
"loss": 1.0533,
"step": 492
},
{
"epoch": 0.14048585880173825,
"grad_norm": 0.71484375,
"learning_rate": 9.894834656943217e-05,
"loss": 1.0354,
"step": 493
},
{
"epoch": 0.1407708199757783,
"grad_norm": 0.82421875,
"learning_rate": 9.894206215128161e-05,
"loss": 1.1812,
"step": 494
},
{
"epoch": 0.14105578114981834,
"grad_norm": 0.72265625,
"learning_rate": 9.893575921273455e-05,
"loss": 1.0564,
"step": 495
},
{
"epoch": 0.14134074232385838,
"grad_norm": 0.71484375,
"learning_rate": 9.892943775617612e-05,
"loss": 1.0851,
"step": 496
},
{
"epoch": 0.1416257034978984,
"grad_norm": 0.671875,
"learning_rate": 9.892309778399843e-05,
"loss": 0.9474,
"step": 497
},
{
"epoch": 0.14191066467193844,
"grad_norm": 0.7578125,
"learning_rate": 9.891673929860065e-05,
"loss": 1.1343,
"step": 498
},
{
"epoch": 0.14219562584597847,
"grad_norm": 0.6796875,
"learning_rate": 9.891036230238891e-05,
"loss": 0.9444,
"step": 499
},
{
"epoch": 0.14248058702001853,
"grad_norm": 0.71875,
"learning_rate": 9.890396679777634e-05,
"loss": 1.1215,
"step": 500
},
{
"epoch": 0.14276554819405857,
"grad_norm": 0.6796875,
"learning_rate": 9.889755278718313e-05,
"loss": 1.0703,
"step": 501
},
{
"epoch": 0.1430505093680986,
"grad_norm": 0.67578125,
"learning_rate": 9.889112027303642e-05,
"loss": 0.9277,
"step": 502
},
{
"epoch": 0.14333547054213863,
"grad_norm": 0.70703125,
"learning_rate": 9.888466925777036e-05,
"loss": 1.0271,
"step": 503
},
{
"epoch": 0.14362043171617866,
"grad_norm": 0.7734375,
"learning_rate": 9.887819974382612e-05,
"loss": 1.0812,
"step": 504
},
{
"epoch": 0.1439053928902187,
"grad_norm": 0.6875,
"learning_rate": 9.887171173365184e-05,
"loss": 1.0638,
"step": 505
},
{
"epoch": 0.14419035406425876,
"grad_norm": 0.73046875,
"learning_rate": 9.886520522970271e-05,
"loss": 1.1232,
"step": 506
},
{
"epoch": 0.1444753152382988,
"grad_norm": 0.6875,
"learning_rate": 9.885868023444087e-05,
"loss": 1.0195,
"step": 507
},
{
"epoch": 0.14476027641233882,
"grad_norm": 0.71484375,
"learning_rate": 9.885213675033547e-05,
"loss": 1.0605,
"step": 508
},
{
"epoch": 0.14504523758637886,
"grad_norm": 0.6875,
"learning_rate": 9.884557477986266e-05,
"loss": 1.0994,
"step": 509
},
{
"epoch": 0.1453301987604189,
"grad_norm": 0.72265625,
"learning_rate": 9.883899432550559e-05,
"loss": 1.1552,
"step": 510
},
{
"epoch": 0.14561515993445892,
"grad_norm": 0.67578125,
"learning_rate": 9.883239538975442e-05,
"loss": 1.0431,
"step": 511
},
{
"epoch": 0.14590012110849895,
"grad_norm": 0.66015625,
"learning_rate": 9.882577797510624e-05,
"loss": 1.0683,
"step": 512
},
{
"epoch": 0.14618508228253901,
"grad_norm": 0.6875,
"learning_rate": 9.881914208406522e-05,
"loss": 1.0933,
"step": 513
},
{
"epoch": 0.14647004345657905,
"grad_norm": 0.65625,
"learning_rate": 9.881248771914247e-05,
"loss": 0.9927,
"step": 514
},
{
"epoch": 0.14675500463061908,
"grad_norm": 0.734375,
"learning_rate": 9.880581488285607e-05,
"loss": 1.1179,
"step": 515
},
{
"epoch": 0.1470399658046591,
"grad_norm": 0.7578125,
"learning_rate": 9.879912357773115e-05,
"loss": 1.0898,
"step": 516
},
{
"epoch": 0.14732492697869914,
"grad_norm": 0.7734375,
"learning_rate": 9.87924138062998e-05,
"loss": 1.1643,
"step": 517
},
{
"epoch": 0.14760988815273918,
"grad_norm": 0.71875,
"learning_rate": 9.878568557110108e-05,
"loss": 1.0915,
"step": 518
},
{
"epoch": 0.14789484932677924,
"grad_norm": 0.6875,
"learning_rate": 9.877893887468107e-05,
"loss": 1.0033,
"step": 519
},
{
"epoch": 0.14817981050081927,
"grad_norm": 0.75,
"learning_rate": 9.877217371959277e-05,
"loss": 1.1139,
"step": 520
},
{
"epoch": 0.1484647716748593,
"grad_norm": 0.66796875,
"learning_rate": 9.876539010839629e-05,
"loss": 1.007,
"step": 521
},
{
"epoch": 0.14874973284889934,
"grad_norm": 0.6796875,
"learning_rate": 9.87585880436586e-05,
"loss": 0.9808,
"step": 522
},
{
"epoch": 0.14903469402293937,
"grad_norm": 0.6328125,
"learning_rate": 9.87517675279537e-05,
"loss": 0.9804,
"step": 523
},
{
"epoch": 0.1493196551969794,
"grad_norm": 0.7109375,
"learning_rate": 9.87449285638626e-05,
"loss": 1.0908,
"step": 524
},
{
"epoch": 0.14960461637101946,
"grad_norm": 0.73828125,
"learning_rate": 9.873807115397325e-05,
"loss": 1.1514,
"step": 525
},
{
"epoch": 0.1498895775450595,
"grad_norm": 0.71875,
"learning_rate": 9.87311953008806e-05,
"loss": 1.0292,
"step": 526
},
{
"epoch": 0.15017453871909953,
"grad_norm": 0.7109375,
"learning_rate": 9.872430100718655e-05,
"loss": 1.1896,
"step": 527
},
{
"epoch": 0.15045949989313956,
"grad_norm": 0.7109375,
"learning_rate": 9.871738827550003e-05,
"loss": 1.1468,
"step": 528
},
{
"epoch": 0.1507444610671796,
"grad_norm": 0.6953125,
"learning_rate": 9.871045710843691e-05,
"loss": 1.0774,
"step": 529
},
{
"epoch": 0.15102942224121962,
"grad_norm": 0.734375,
"learning_rate": 9.870350750862006e-05,
"loss": 1.149,
"step": 530
},
{
"epoch": 0.15131438341525968,
"grad_norm": 0.703125,
"learning_rate": 9.869653947867928e-05,
"loss": 0.9858,
"step": 531
},
{
"epoch": 0.15159934458929972,
"grad_norm": 0.78515625,
"learning_rate": 9.86895530212514e-05,
"loss": 1.2171,
"step": 532
},
{
"epoch": 0.15188430576333975,
"grad_norm": 0.6953125,
"learning_rate": 9.86825481389802e-05,
"loss": 1.0752,
"step": 533
},
{
"epoch": 0.15216926693737978,
"grad_norm": 0.71484375,
"learning_rate": 9.86755248345164e-05,
"loss": 1.0845,
"step": 534
},
{
"epoch": 0.15245422811141982,
"grad_norm": 0.65625,
"learning_rate": 9.866848311051775e-05,
"loss": 0.9647,
"step": 535
},
{
"epoch": 0.15273918928545985,
"grad_norm": 0.7734375,
"learning_rate": 9.866142296964893e-05,
"loss": 1.1936,
"step": 536
},
{
"epoch": 0.15302415045949988,
"grad_norm": 0.75,
"learning_rate": 9.865434441458162e-05,
"loss": 1.05,
"step": 537
},
{
"epoch": 0.15330911163353994,
"grad_norm": 0.67578125,
"learning_rate": 9.864724744799443e-05,
"loss": 0.9444,
"step": 538
},
{
"epoch": 0.15359407280757997,
"grad_norm": 0.703125,
"learning_rate": 9.864013207257296e-05,
"loss": 0.9836,
"step": 539
},
{
"epoch": 0.15387903398162,
"grad_norm": 0.66796875,
"learning_rate": 9.863299829100978e-05,
"loss": 1.0449,
"step": 540
},
{
"epoch": 0.15416399515566004,
"grad_norm": 0.671875,
"learning_rate": 9.86258461060044e-05,
"loss": 1.0249,
"step": 541
},
{
"epoch": 0.15444895632970007,
"grad_norm": 0.70703125,
"learning_rate": 9.861867552026334e-05,
"loss": 1.0781,
"step": 542
},
{
"epoch": 0.1547339175037401,
"grad_norm": 0.73046875,
"learning_rate": 9.861148653650003e-05,
"loss": 1.2094,
"step": 543
},
{
"epoch": 0.15501887867778016,
"grad_norm": 0.70703125,
"learning_rate": 9.86042791574349e-05,
"loss": 1.022,
"step": 544
},
{
"epoch": 0.1553038398518202,
"grad_norm": 0.71484375,
"learning_rate": 9.859705338579533e-05,
"loss": 1.1364,
"step": 545
},
{
"epoch": 0.15558880102586023,
"grad_norm": 0.6796875,
"learning_rate": 9.858980922431565e-05,
"loss": 1.0014,
"step": 546
},
{
"epoch": 0.15587376219990026,
"grad_norm": 0.68359375,
"learning_rate": 9.858254667573715e-05,
"loss": 1.0937,
"step": 547
},
{
"epoch": 0.1561587233739403,
"grad_norm": 0.6015625,
"learning_rate": 9.857526574280811e-05,
"loss": 0.9719,
"step": 548
},
{
"epoch": 0.15644368454798033,
"grad_norm": 0.71875,
"learning_rate": 9.856796642828372e-05,
"loss": 1.0532,
"step": 549
},
{
"epoch": 0.1567286457220204,
"grad_norm": 0.671875,
"learning_rate": 9.856064873492616e-05,
"loss": 0.9626,
"step": 550
},
{
"epoch": 0.15701360689606042,
"grad_norm": 0.7109375,
"learning_rate": 9.855331266550455e-05,
"loss": 1.0518,
"step": 551
},
{
"epoch": 0.15729856807010045,
"grad_norm": 0.70703125,
"learning_rate": 9.854595822279496e-05,
"loss": 1.1948,
"step": 552
},
{
"epoch": 0.15758352924414049,
"grad_norm": 0.66015625,
"learning_rate": 9.853858540958043e-05,
"loss": 0.9794,
"step": 553
},
{
"epoch": 0.15786849041818052,
"grad_norm": 0.671875,
"learning_rate": 9.853119422865094e-05,
"loss": 0.9747,
"step": 554
},
{
"epoch": 0.15815345159222055,
"grad_norm": 0.6640625,
"learning_rate": 9.852378468280341e-05,
"loss": 0.9863,
"step": 555
},
{
"epoch": 0.15843841276626058,
"grad_norm": 0.75390625,
"learning_rate": 9.851635677484174e-05,
"loss": 1.1277,
"step": 556
},
{
"epoch": 0.15872337394030064,
"grad_norm": 0.640625,
"learning_rate": 9.850891050757674e-05,
"loss": 0.9376,
"step": 557
},
{
"epoch": 0.15900833511434068,
"grad_norm": 0.703125,
"learning_rate": 9.85014458838262e-05,
"loss": 1.0365,
"step": 558
},
{
"epoch": 0.1592932962883807,
"grad_norm": 0.7109375,
"learning_rate": 9.849396290641483e-05,
"loss": 1.1507,
"step": 559
},
{
"epoch": 0.15957825746242074,
"grad_norm": 0.67578125,
"learning_rate": 9.848646157817432e-05,
"loss": 1.0318,
"step": 560
},
{
"epoch": 0.15986321863646077,
"grad_norm": 0.7109375,
"learning_rate": 9.847894190194327e-05,
"loss": 1.0928,
"step": 561
},
{
"epoch": 0.1601481798105008,
"grad_norm": 0.67578125,
"learning_rate": 9.847140388056724e-05,
"loss": 1.099,
"step": 562
},
{
"epoch": 0.16043314098454087,
"grad_norm": 0.69140625,
"learning_rate": 9.846384751689872e-05,
"loss": 1.0512,
"step": 563
},
{
"epoch": 0.1607181021585809,
"grad_norm": 0.640625,
"learning_rate": 9.845627281379714e-05,
"loss": 0.9933,
"step": 564
},
{
"epoch": 0.16100306333262093,
"grad_norm": 0.6640625,
"learning_rate": 9.844867977412892e-05,
"loss": 0.9725,
"step": 565
},
{
"epoch": 0.16128802450666097,
"grad_norm": 0.6953125,
"learning_rate": 9.844106840076734e-05,
"loss": 0.9792,
"step": 566
},
{
"epoch": 0.161572985680701,
"grad_norm": 0.6171875,
"learning_rate": 9.843343869659267e-05,
"loss": 0.892,
"step": 567
},
{
"epoch": 0.16185794685474103,
"grad_norm": 0.703125,
"learning_rate": 9.84257906644921e-05,
"loss": 1.0554,
"step": 568
},
{
"epoch": 0.1621429080287811,
"grad_norm": 0.64453125,
"learning_rate": 9.841812430735974e-05,
"loss": 0.9122,
"step": 569
},
{
"epoch": 0.16242786920282112,
"grad_norm": 0.640625,
"learning_rate": 9.841043962809669e-05,
"loss": 0.9609,
"step": 570
},
{
"epoch": 0.16271283037686116,
"grad_norm": 0.640625,
"learning_rate": 9.840273662961092e-05,
"loss": 0.9692,
"step": 571
},
{
"epoch": 0.1629977915509012,
"grad_norm": 0.7265625,
"learning_rate": 9.839501531481736e-05,
"loss": 1.0936,
"step": 572
},
{
"epoch": 0.16328275272494122,
"grad_norm": 0.703125,
"learning_rate": 9.838727568663787e-05,
"loss": 1.065,
"step": 573
},
{
"epoch": 0.16356771389898125,
"grad_norm": 0.7109375,
"learning_rate": 9.837951774800125e-05,
"loss": 1.0459,
"step": 574
},
{
"epoch": 0.16385267507302131,
"grad_norm": 0.703125,
"learning_rate": 9.83717415018432e-05,
"loss": 1.1155,
"step": 575
},
{
"epoch": 0.16413763624706135,
"grad_norm": 0.6640625,
"learning_rate": 9.836394695110639e-05,
"loss": 1.0229,
"step": 576
},
{
"epoch": 0.16442259742110138,
"grad_norm": 0.671875,
"learning_rate": 9.835613409874038e-05,
"loss": 1.0946,
"step": 577
},
{
"epoch": 0.1647075585951414,
"grad_norm": 0.7109375,
"learning_rate": 9.834830294770165e-05,
"loss": 1.1005,
"step": 578
},
{
"epoch": 0.16499251976918144,
"grad_norm": 0.63671875,
"learning_rate": 9.834045350095364e-05,
"loss": 1.0206,
"step": 579
},
{
"epoch": 0.16527748094322148,
"grad_norm": 0.7109375,
"learning_rate": 9.833258576146671e-05,
"loss": 0.986,
"step": 580
},
{
"epoch": 0.1655624421172615,
"grad_norm": 0.65625,
"learning_rate": 9.832469973221812e-05,
"loss": 1.0962,
"step": 581
},
{
"epoch": 0.16584740329130157,
"grad_norm": 0.71484375,
"learning_rate": 9.831679541619203e-05,
"loss": 1.0193,
"step": 582
},
{
"epoch": 0.1661323644653416,
"grad_norm": 0.62890625,
"learning_rate": 9.830887281637959e-05,
"loss": 0.895,
"step": 583
},
{
"epoch": 0.16641732563938164,
"grad_norm": 0.68359375,
"learning_rate": 9.830093193577881e-05,
"loss": 1.0524,
"step": 584
},
{
"epoch": 0.16670228681342167,
"grad_norm": 0.6875,
"learning_rate": 9.829297277739465e-05,
"loss": 1.0872,
"step": 585
},
{
"epoch": 0.1669872479874617,
"grad_norm": 0.703125,
"learning_rate": 9.828499534423894e-05,
"loss": 1.1642,
"step": 586
},
{
"epoch": 0.16727220916150173,
"grad_norm": 0.7734375,
"learning_rate": 9.827699963933048e-05,
"loss": 1.1197,
"step": 587
},
{
"epoch": 0.1675571703355418,
"grad_norm": 0.671875,
"learning_rate": 9.826898566569495e-05,
"loss": 1.0563,
"step": 588
},
{
"epoch": 0.16784213150958183,
"grad_norm": 0.69921875,
"learning_rate": 9.8260953426365e-05,
"loss": 1.1249,
"step": 589
},
{
"epoch": 0.16812709268362186,
"grad_norm": 0.65234375,
"learning_rate": 9.82529029243801e-05,
"loss": 0.987,
"step": 590
},
{
"epoch": 0.1684120538576619,
"grad_norm": 0.734375,
"learning_rate": 9.824483416278669e-05,
"loss": 1.1132,
"step": 591
},
{
"epoch": 0.16869701503170192,
"grad_norm": 0.8125,
"learning_rate": 9.823674714463811e-05,
"loss": 1.0831,
"step": 592
},
{
"epoch": 0.16898197620574196,
"grad_norm": 0.65234375,
"learning_rate": 9.82286418729946e-05,
"loss": 1.0008,
"step": 593
},
{
"epoch": 0.16926693737978202,
"grad_norm": 0.671875,
"learning_rate": 9.822051835092332e-05,
"loss": 1.0579,
"step": 594
},
{
"epoch": 0.16955189855382205,
"grad_norm": 0.734375,
"learning_rate": 9.821237658149834e-05,
"loss": 1.0904,
"step": 595
},
{
"epoch": 0.16983685972786208,
"grad_norm": 0.67578125,
"learning_rate": 9.820421656780062e-05,
"loss": 1.0895,
"step": 596
},
{
"epoch": 0.17012182090190212,
"grad_norm": 0.63671875,
"learning_rate": 9.819603831291803e-05,
"loss": 0.9854,
"step": 597
},
{
"epoch": 0.17040678207594215,
"grad_norm": 0.61328125,
"learning_rate": 9.818784181994532e-05,
"loss": 0.966,
"step": 598
},
{
"epoch": 0.17069174324998218,
"grad_norm": 0.6875,
"learning_rate": 9.817962709198421e-05,
"loss": 1.0342,
"step": 599
},
{
"epoch": 0.1709767044240222,
"grad_norm": 0.7265625,
"learning_rate": 9.817139413214323e-05,
"loss": 1.1733,
"step": 600
},
{
"epoch": 0.17126166559806227,
"grad_norm": 0.63671875,
"learning_rate": 9.816314294353785e-05,
"loss": 0.9962,
"step": 601
},
{
"epoch": 0.1715466267721023,
"grad_norm": 0.734375,
"learning_rate": 9.815487352929048e-05,
"loss": 1.1545,
"step": 602
},
{
"epoch": 0.17183158794614234,
"grad_norm": 0.671875,
"learning_rate": 9.814658589253037e-05,
"loss": 1.0865,
"step": 603
},
{
"epoch": 0.17211654912018237,
"grad_norm": 0.6640625,
"learning_rate": 9.813828003639367e-05,
"loss": 1.0435,
"step": 604
},
{
"epoch": 0.1724015102942224,
"grad_norm": 0.65625,
"learning_rate": 9.812995596402346e-05,
"loss": 1.0579,
"step": 605
},
{
"epoch": 0.17268647146826244,
"grad_norm": 0.64453125,
"learning_rate": 9.812161367856968e-05,
"loss": 0.9626,
"step": 606
},
{
"epoch": 0.1729714326423025,
"grad_norm": 0.6875,
"learning_rate": 9.811325318318916e-05,
"loss": 1.0333,
"step": 607
},
{
"epoch": 0.17325639381634253,
"grad_norm": 0.66796875,
"learning_rate": 9.810487448104568e-05,
"loss": 1.0203,
"step": 608
},
{
"epoch": 0.17354135499038256,
"grad_norm": 0.69140625,
"learning_rate": 9.809647757530981e-05,
"loss": 1.1151,
"step": 609
},
{
"epoch": 0.1738263161644226,
"grad_norm": 0.6796875,
"learning_rate": 9.808806246915909e-05,
"loss": 1.074,
"step": 610
},
{
"epoch": 0.17411127733846263,
"grad_norm": 0.71875,
"learning_rate": 9.807962916577792e-05,
"loss": 1.1281,
"step": 611
},
{
"epoch": 0.17439623851250266,
"grad_norm": 0.66015625,
"learning_rate": 9.807117766835758e-05,
"loss": 0.9904,
"step": 612
},
{
"epoch": 0.17468119968654272,
"grad_norm": 0.66796875,
"learning_rate": 9.806270798009624e-05,
"loss": 1.0156,
"step": 613
},
{
"epoch": 0.17496616086058275,
"grad_norm": 0.671875,
"learning_rate": 9.805422010419897e-05,
"loss": 1.1171,
"step": 614
},
{
"epoch": 0.17525112203462279,
"grad_norm": 0.73828125,
"learning_rate": 9.804571404387768e-05,
"loss": 1.1166,
"step": 615
},
{
"epoch": 0.17553608320866282,
"grad_norm": 0.69140625,
"learning_rate": 9.803718980235121e-05,
"loss": 1.1091,
"step": 616
},
{
"epoch": 0.17582104438270285,
"grad_norm": 0.6796875,
"learning_rate": 9.802864738284527e-05,
"loss": 1.1542,
"step": 617
},
{
"epoch": 0.17610600555674288,
"grad_norm": 0.74609375,
"learning_rate": 9.80200867885924e-05,
"loss": 1.1295,
"step": 618
},
{
"epoch": 0.17639096673078294,
"grad_norm": 0.609375,
"learning_rate": 9.801150802283207e-05,
"loss": 0.9426,
"step": 619
},
{
"epoch": 0.17667592790482298,
"grad_norm": 0.69140625,
"learning_rate": 9.800291108881063e-05,
"loss": 1.0658,
"step": 620
},
{
"epoch": 0.176960889078863,
"grad_norm": 0.6640625,
"learning_rate": 9.799429598978127e-05,
"loss": 0.965,
"step": 621
},
{
"epoch": 0.17724585025290304,
"grad_norm": 0.6640625,
"learning_rate": 9.798566272900404e-05,
"loss": 1.0469,
"step": 622
},
{
"epoch": 0.17753081142694307,
"grad_norm": 0.68359375,
"learning_rate": 9.797701130974597e-05,
"loss": 1.1454,
"step": 623
},
{
"epoch": 0.1778157726009831,
"grad_norm": 0.62890625,
"learning_rate": 9.796834173528081e-05,
"loss": 0.8913,
"step": 624
},
{
"epoch": 0.17810073377502314,
"grad_norm": 0.6875,
"learning_rate": 9.79596540088893e-05,
"loss": 1.0771,
"step": 625
},
{
"epoch": 0.1783856949490632,
"grad_norm": 0.6796875,
"learning_rate": 9.795094813385898e-05,
"loss": 1.1235,
"step": 626
},
{
"epoch": 0.17867065612310323,
"grad_norm": 0.69921875,
"learning_rate": 9.794222411348429e-05,
"loss": 1.0831,
"step": 627
},
{
"epoch": 0.17895561729714327,
"grad_norm": 0.66015625,
"learning_rate": 9.793348195106652e-05,
"loss": 0.9887,
"step": 628
},
{
"epoch": 0.1792405784711833,
"grad_norm": 0.66015625,
"learning_rate": 9.792472164991384e-05,
"loss": 1.0003,
"step": 629
},
{
"epoch": 0.17952553964522333,
"grad_norm": 0.66796875,
"learning_rate": 9.79159432133413e-05,
"loss": 0.9718,
"step": 630
},
{
"epoch": 0.17981050081926336,
"grad_norm": 0.68359375,
"learning_rate": 9.790714664467073e-05,
"loss": 0.9599,
"step": 631
},
{
"epoch": 0.18009546199330342,
"grad_norm": 0.65625,
"learning_rate": 9.789833194723094e-05,
"loss": 1.0511,
"step": 632
},
{
"epoch": 0.18038042316734346,
"grad_norm": 0.66796875,
"learning_rate": 9.78894991243575e-05,
"loss": 1.0931,
"step": 633
},
{
"epoch": 0.1806653843413835,
"grad_norm": 0.69140625,
"learning_rate": 9.788064817939291e-05,
"loss": 1.2157,
"step": 634
},
{
"epoch": 0.18095034551542352,
"grad_norm": 0.67578125,
"learning_rate": 9.787177911568648e-05,
"loss": 1.0819,
"step": 635
},
{
"epoch": 0.18123530668946355,
"grad_norm": 0.6171875,
"learning_rate": 9.786289193659441e-05,
"loss": 0.8964,
"step": 636
},
{
"epoch": 0.1815202678635036,
"grad_norm": 0.63671875,
"learning_rate": 9.785398664547973e-05,
"loss": 0.9387,
"step": 637
},
{
"epoch": 0.18180522903754365,
"grad_norm": 0.69140625,
"learning_rate": 9.784506324571232e-05,
"loss": 1.0766,
"step": 638
},
{
"epoch": 0.18209019021158368,
"grad_norm": 0.6796875,
"learning_rate": 9.783612174066894e-05,
"loss": 1.0544,
"step": 639
},
{
"epoch": 0.1823751513856237,
"grad_norm": 0.69140625,
"learning_rate": 9.782716213373318e-05,
"loss": 1.1305,
"step": 640
},
{
"epoch": 0.18266011255966375,
"grad_norm": 0.6484375,
"learning_rate": 9.781818442829552e-05,
"loss": 0.9968,
"step": 641
},
{
"epoch": 0.18294507373370378,
"grad_norm": 0.6875,
"learning_rate": 9.780918862775319e-05,
"loss": 1.0873,
"step": 642
},
{
"epoch": 0.1832300349077438,
"grad_norm": 0.703125,
"learning_rate": 9.78001747355104e-05,
"loss": 1.0623,
"step": 643
},
{
"epoch": 0.18351499608178384,
"grad_norm": 0.73046875,
"learning_rate": 9.77911427549781e-05,
"loss": 1.0687,
"step": 644
},
{
"epoch": 0.1837999572558239,
"grad_norm": 0.6328125,
"learning_rate": 9.778209268957414e-05,
"loss": 1.016,
"step": 645
},
{
"epoch": 0.18408491842986394,
"grad_norm": 0.6875,
"learning_rate": 9.777302454272319e-05,
"loss": 1.0787,
"step": 646
},
{
"epoch": 0.18436987960390397,
"grad_norm": 0.66796875,
"learning_rate": 9.776393831785677e-05,
"loss": 1.0606,
"step": 647
},
{
"epoch": 0.184654840777944,
"grad_norm": 0.65625,
"learning_rate": 9.775483401841325e-05,
"loss": 0.996,
"step": 648
},
{
"epoch": 0.18493980195198403,
"grad_norm": 0.65625,
"learning_rate": 9.774571164783782e-05,
"loss": 1.0357,
"step": 649
},
{
"epoch": 0.18522476312602407,
"grad_norm": 0.6640625,
"learning_rate": 9.773657120958252e-05,
"loss": 0.9564,
"step": 650
},
{
"epoch": 0.18550972430006413,
"grad_norm": 0.69140625,
"learning_rate": 9.772741270710626e-05,
"loss": 1.2073,
"step": 651
},
{
"epoch": 0.18579468547410416,
"grad_norm": 0.6640625,
"learning_rate": 9.771823614387469e-05,
"loss": 1.0351,
"step": 652
},
{
"epoch": 0.1860796466481442,
"grad_norm": 0.66796875,
"learning_rate": 9.77090415233604e-05,
"loss": 1.0352,
"step": 653
},
{
"epoch": 0.18636460782218423,
"grad_norm": 0.67578125,
"learning_rate": 9.769982884904276e-05,
"loss": 1.0765,
"step": 654
},
{
"epoch": 0.18664956899622426,
"grad_norm": 0.61328125,
"learning_rate": 9.769059812440799e-05,
"loss": 0.9166,
"step": 655
},
{
"epoch": 0.1869345301702643,
"grad_norm": 0.69921875,
"learning_rate": 9.768134935294912e-05,
"loss": 1.1236,
"step": 656
},
{
"epoch": 0.18721949134430435,
"grad_norm": 0.6953125,
"learning_rate": 9.767208253816602e-05,
"loss": 1.0862,
"step": 657
},
{
"epoch": 0.18750445251834438,
"grad_norm": 0.6015625,
"learning_rate": 9.766279768356539e-05,
"loss": 0.9412,
"step": 658
},
{
"epoch": 0.18778941369238442,
"grad_norm": 0.6796875,
"learning_rate": 9.765349479266078e-05,
"loss": 1.0688,
"step": 659
},
{
"epoch": 0.18807437486642445,
"grad_norm": 0.68359375,
"learning_rate": 9.764417386897249e-05,
"loss": 1.1256,
"step": 660
},
{
"epoch": 0.18835933604046448,
"grad_norm": 0.65234375,
"learning_rate": 9.763483491602773e-05,
"loss": 1.01,
"step": 661
},
{
"epoch": 0.1886442972145045,
"grad_norm": 0.69921875,
"learning_rate": 9.762547793736049e-05,
"loss": 1.0967,
"step": 662
},
{
"epoch": 0.18892925838854457,
"grad_norm": 0.69140625,
"learning_rate": 9.76161029365116e-05,
"loss": 1.0862,
"step": 663
},
{
"epoch": 0.1892142195625846,
"grad_norm": 0.68359375,
"learning_rate": 9.760670991702869e-05,
"loss": 1.1208,
"step": 664
},
{
"epoch": 0.18949918073662464,
"grad_norm": 0.6328125,
"learning_rate": 9.759729888246623e-05,
"loss": 0.9391,
"step": 665
},
{
"epoch": 0.18978414191066467,
"grad_norm": 0.62890625,
"learning_rate": 9.758786983638545e-05,
"loss": 0.9891,
"step": 666
},
{
"epoch": 0.1900691030847047,
"grad_norm": 0.703125,
"learning_rate": 9.757842278235449e-05,
"loss": 1.0646,
"step": 667
},
{
"epoch": 0.19035406425874474,
"grad_norm": 0.70703125,
"learning_rate": 9.756895772394821e-05,
"loss": 1.1456,
"step": 668
},
{
"epoch": 0.19063902543278477,
"grad_norm": 0.640625,
"learning_rate": 9.755947466474838e-05,
"loss": 0.9196,
"step": 669
},
{
"epoch": 0.19092398660682483,
"grad_norm": 0.6484375,
"learning_rate": 9.754997360834348e-05,
"loss": 1.0306,
"step": 670
},
{
"epoch": 0.19120894778086486,
"grad_norm": 0.6484375,
"learning_rate": 9.754045455832888e-05,
"loss": 0.9923,
"step": 671
},
{
"epoch": 0.1914939089549049,
"grad_norm": 0.6328125,
"learning_rate": 9.75309175183067e-05,
"loss": 0.912,
"step": 672
},
{
"epoch": 0.19177887012894493,
"grad_norm": 0.703125,
"learning_rate": 9.752136249188593e-05,
"loss": 1.1638,
"step": 673
},
{
"epoch": 0.19206383130298496,
"grad_norm": 0.69140625,
"learning_rate": 9.751178948268231e-05,
"loss": 1.1008,
"step": 674
},
{
"epoch": 0.192348792477025,
"grad_norm": 0.6875,
"learning_rate": 9.750219849431841e-05,
"loss": 1.0502,
"step": 675
},
{
"epoch": 0.19263375365106505,
"grad_norm": 0.66015625,
"learning_rate": 9.749258953042362e-05,
"loss": 1.0533,
"step": 676
},
{
"epoch": 0.1929187148251051,
"grad_norm": 0.734375,
"learning_rate": 9.748296259463407e-05,
"loss": 1.2064,
"step": 677
},
{
"epoch": 0.19320367599914512,
"grad_norm": 0.6640625,
"learning_rate": 9.74733176905928e-05,
"loss": 1.0515,
"step": 678
},
{
"epoch": 0.19348863717318515,
"grad_norm": 0.68359375,
"learning_rate": 9.746365482194952e-05,
"loss": 1.0417,
"step": 679
},
{
"epoch": 0.19377359834722518,
"grad_norm": 0.66796875,
"learning_rate": 9.745397399236085e-05,
"loss": 1.0463,
"step": 680
},
{
"epoch": 0.19405855952126522,
"grad_norm": 0.625,
"learning_rate": 9.744427520549011e-05,
"loss": 0.9756,
"step": 681
},
{
"epoch": 0.19434352069530528,
"grad_norm": 0.62890625,
"learning_rate": 9.74345584650075e-05,
"loss": 0.9914,
"step": 682
},
{
"epoch": 0.1946284818693453,
"grad_norm": 0.67578125,
"learning_rate": 9.742482377458999e-05,
"loss": 1.0467,
"step": 683
},
{
"epoch": 0.19491344304338534,
"grad_norm": 0.66796875,
"learning_rate": 9.741507113792128e-05,
"loss": 0.9449,
"step": 684
},
{
"epoch": 0.19519840421742538,
"grad_norm": 0.875,
"learning_rate": 9.740530055869194e-05,
"loss": 1.1425,
"step": 685
},
{
"epoch": 0.1954833653914654,
"grad_norm": 0.69140625,
"learning_rate": 9.739551204059932e-05,
"loss": 1.128,
"step": 686
},
{
"epoch": 0.19576832656550544,
"grad_norm": 0.6328125,
"learning_rate": 9.73857055873475e-05,
"loss": 0.9958,
"step": 687
},
{
"epoch": 0.19605328773954547,
"grad_norm": 0.82421875,
"learning_rate": 9.737588120264743e-05,
"loss": 0.9836,
"step": 688
},
{
"epoch": 0.19633824891358553,
"grad_norm": 0.64453125,
"learning_rate": 9.736603889021676e-05,
"loss": 0.9003,
"step": 689
},
{
"epoch": 0.19662321008762557,
"grad_norm": 0.6875,
"learning_rate": 9.735617865377997e-05,
"loss": 1.0004,
"step": 690
},
{
"epoch": 0.1969081712616656,
"grad_norm": 0.6171875,
"learning_rate": 9.734630049706835e-05,
"loss": 0.9171,
"step": 691
},
{
"epoch": 0.19719313243570563,
"grad_norm": 0.66015625,
"learning_rate": 9.733640442381993e-05,
"loss": 1.0012,
"step": 692
},
{
"epoch": 0.19747809360974566,
"grad_norm": 0.69140625,
"learning_rate": 9.732649043777951e-05,
"loss": 1.0959,
"step": 693
},
{
"epoch": 0.1977630547837857,
"grad_norm": 0.75,
"learning_rate": 9.731655854269869e-05,
"loss": 1.1731,
"step": 694
},
{
"epoch": 0.19804801595782576,
"grad_norm": 0.67578125,
"learning_rate": 9.730660874233586e-05,
"loss": 1.0082,
"step": 695
},
{
"epoch": 0.1983329771318658,
"grad_norm": 0.6875,
"learning_rate": 9.729664104045618e-05,
"loss": 1.1264,
"step": 696
},
{
"epoch": 0.19861793830590582,
"grad_norm": 0.63671875,
"learning_rate": 9.728665544083154e-05,
"loss": 1.0095,
"step": 697
},
{
"epoch": 0.19890289947994585,
"grad_norm": 0.66796875,
"learning_rate": 9.727665194724067e-05,
"loss": 1.0986,
"step": 698
},
{
"epoch": 0.1991878606539859,
"grad_norm": 0.66015625,
"learning_rate": 9.726663056346903e-05,
"loss": 1.0622,
"step": 699
},
{
"epoch": 0.19947282182802592,
"grad_norm": 0.6953125,
"learning_rate": 9.725659129330885e-05,
"loss": 1.0092,
"step": 700
},
{
"epoch": 0.19975778300206598,
"grad_norm": 0.7109375,
"learning_rate": 9.724653414055916e-05,
"loss": 1.2674,
"step": 701
},
{
"epoch": 0.200042744176106,
"grad_norm": 0.67578125,
"learning_rate": 9.72364591090257e-05,
"loss": 1.1078,
"step": 702
},
{
"epoch": 0.20032770535014605,
"grad_norm": 0.6484375,
"learning_rate": 9.722636620252103e-05,
"loss": 1.0586,
"step": 703
},
{
"epoch": 0.20061266652418608,
"grad_norm": 0.62109375,
"learning_rate": 9.721625542486446e-05,
"loss": 0.9358,
"step": 704
},
{
"epoch": 0.2008976276982261,
"grad_norm": 0.68359375,
"learning_rate": 9.720612677988206e-05,
"loss": 1.1108,
"step": 705
},
{
"epoch": 0.20118258887226614,
"grad_norm": 0.7109375,
"learning_rate": 9.719598027140663e-05,
"loss": 1.1469,
"step": 706
},
{
"epoch": 0.2014675500463062,
"grad_norm": 0.69140625,
"learning_rate": 9.718581590327777e-05,
"loss": 1.1143,
"step": 707
},
{
"epoch": 0.20175251122034624,
"grad_norm": 0.6796875,
"learning_rate": 9.717563367934186e-05,
"loss": 1.0907,
"step": 708
},
{
"epoch": 0.20203747239438627,
"grad_norm": 0.6640625,
"learning_rate": 9.716543360345197e-05,
"loss": 1.0438,
"step": 709
},
{
"epoch": 0.2023224335684263,
"grad_norm": 0.65234375,
"learning_rate": 9.715521567946797e-05,
"loss": 1.0072,
"step": 710
},
{
"epoch": 0.20260739474246633,
"grad_norm": 0.671875,
"learning_rate": 9.714497991125644e-05,
"loss": 0.9971,
"step": 711
},
{
"epoch": 0.20289235591650637,
"grad_norm": 0.65625,
"learning_rate": 9.713472630269081e-05,
"loss": 1.0122,
"step": 712
},
{
"epoch": 0.2031773170905464,
"grad_norm": 0.69140625,
"learning_rate": 9.712445485765114e-05,
"loss": 1.06,
"step": 713
},
{
"epoch": 0.20346227826458646,
"grad_norm": 0.68359375,
"learning_rate": 9.71141655800243e-05,
"loss": 0.9886,
"step": 714
},
{
"epoch": 0.2037472394386265,
"grad_norm": 0.6171875,
"learning_rate": 9.710385847370394e-05,
"loss": 0.8964,
"step": 715
},
{
"epoch": 0.20403220061266653,
"grad_norm": 0.6328125,
"learning_rate": 9.709353354259041e-05,
"loss": 0.9988,
"step": 716
},
{
"epoch": 0.20431716178670656,
"grad_norm": 0.6796875,
"learning_rate": 9.70831907905908e-05,
"loss": 1.0312,
"step": 717
},
{
"epoch": 0.2046021229607466,
"grad_norm": 0.671875,
"learning_rate": 9.707283022161896e-05,
"loss": 1.0767,
"step": 718
},
{
"epoch": 0.20488708413478662,
"grad_norm": 0.66015625,
"learning_rate": 9.706245183959548e-05,
"loss": 0.9476,
"step": 719
},
{
"epoch": 0.20517204530882668,
"grad_norm": 0.62109375,
"learning_rate": 9.705205564844773e-05,
"loss": 0.9307,
"step": 720
},
{
"epoch": 0.20545700648286672,
"grad_norm": 0.66796875,
"learning_rate": 9.704164165210972e-05,
"loss": 1.1137,
"step": 721
},
{
"epoch": 0.20574196765690675,
"grad_norm": 0.71875,
"learning_rate": 9.70312098545223e-05,
"loss": 1.2179,
"step": 722
},
{
"epoch": 0.20602692883094678,
"grad_norm": 0.65625,
"learning_rate": 9.702076025963303e-05,
"loss": 1.1086,
"step": 723
},
{
"epoch": 0.20631189000498681,
"grad_norm": 0.62109375,
"learning_rate": 9.701029287139614e-05,
"loss": 0.9571,
"step": 724
},
{
"epoch": 0.20659685117902685,
"grad_norm": 0.66015625,
"learning_rate": 9.699980769377269e-05,
"loss": 1.062,
"step": 725
},
{
"epoch": 0.2068818123530669,
"grad_norm": 0.7109375,
"learning_rate": 9.698930473073038e-05,
"loss": 1.1846,
"step": 726
},
{
"epoch": 0.20716677352710694,
"grad_norm": 0.64453125,
"learning_rate": 9.697878398624374e-05,
"loss": 0.9158,
"step": 727
},
{
"epoch": 0.20745173470114697,
"grad_norm": 0.59765625,
"learning_rate": 9.696824546429393e-05,
"loss": 0.9724,
"step": 728
},
{
"epoch": 0.207736695875187,
"grad_norm": 0.6796875,
"learning_rate": 9.695768916886892e-05,
"loss": 1.1375,
"step": 729
},
{
"epoch": 0.20802165704922704,
"grad_norm": 0.609375,
"learning_rate": 9.694711510396333e-05,
"loss": 0.8772,
"step": 730
},
{
"epoch": 0.20830661822326707,
"grad_norm": 0.6640625,
"learning_rate": 9.693652327357856e-05,
"loss": 1.0808,
"step": 731
},
{
"epoch": 0.2085915793973071,
"grad_norm": 0.6875,
"learning_rate": 9.692591368172271e-05,
"loss": 1.1139,
"step": 732
},
{
"epoch": 0.20887654057134716,
"grad_norm": 0.66796875,
"learning_rate": 9.691528633241061e-05,
"loss": 1.0693,
"step": 733
},
{
"epoch": 0.2091615017453872,
"grad_norm": 0.6953125,
"learning_rate": 9.69046412296638e-05,
"loss": 1.081,
"step": 734
},
{
"epoch": 0.20944646291942723,
"grad_norm": 0.6484375,
"learning_rate": 9.689397837751058e-05,
"loss": 1.0401,
"step": 735
},
{
"epoch": 0.20973142409346726,
"grad_norm": 0.64453125,
"learning_rate": 9.688329777998586e-05,
"loss": 1.0267,
"step": 736
},
{
"epoch": 0.2100163852675073,
"grad_norm": 0.62890625,
"learning_rate": 9.687259944113138e-05,
"loss": 1.0412,
"step": 737
},
{
"epoch": 0.21030134644154733,
"grad_norm": 0.62109375,
"learning_rate": 9.686188336499555e-05,
"loss": 0.916,
"step": 738
},
{
"epoch": 0.2105863076155874,
"grad_norm": 0.66015625,
"learning_rate": 9.685114955563349e-05,
"loss": 1.0391,
"step": 739
},
{
"epoch": 0.21087126878962742,
"grad_norm": 0.66015625,
"learning_rate": 9.6840398017107e-05,
"loss": 1.1002,
"step": 740
},
{
"epoch": 0.21115622996366745,
"grad_norm": 0.71875,
"learning_rate": 9.682962875348467e-05,
"loss": 1.141,
"step": 741
},
{
"epoch": 0.21144119113770748,
"grad_norm": 0.6953125,
"learning_rate": 9.68188417688417e-05,
"loss": 0.9937,
"step": 742
},
{
"epoch": 0.21172615231174752,
"grad_norm": 0.68359375,
"learning_rate": 9.680803706726007e-05,
"loss": 1.2662,
"step": 743
},
{
"epoch": 0.21201111348578755,
"grad_norm": 0.6953125,
"learning_rate": 9.679721465282845e-05,
"loss": 1.1191,
"step": 744
},
{
"epoch": 0.2122960746598276,
"grad_norm": 0.6796875,
"learning_rate": 9.678637452964217e-05,
"loss": 1.1676,
"step": 745
},
{
"epoch": 0.21258103583386764,
"grad_norm": 0.640625,
"learning_rate": 9.677551670180331e-05,
"loss": 1.0041,
"step": 746
},
{
"epoch": 0.21286599700790768,
"grad_norm": 0.82421875,
"learning_rate": 9.676464117342063e-05,
"loss": 1.0965,
"step": 747
},
{
"epoch": 0.2131509581819477,
"grad_norm": 0.66015625,
"learning_rate": 9.675374794860958e-05,
"loss": 0.9738,
"step": 748
},
{
"epoch": 0.21343591935598774,
"grad_norm": 0.67578125,
"learning_rate": 9.674283703149234e-05,
"loss": 1.0534,
"step": 749
},
{
"epoch": 0.21372088053002777,
"grad_norm": 0.6640625,
"learning_rate": 9.673190842619774e-05,
"loss": 0.9893,
"step": 750
},
{
"epoch": 0.21400584170406783,
"grad_norm": 0.6171875,
"learning_rate": 9.672096213686133e-05,
"loss": 0.9149,
"step": 751
},
{
"epoch": 0.21429080287810787,
"grad_norm": 0.6484375,
"learning_rate": 9.670999816762536e-05,
"loss": 1.032,
"step": 752
},
{
"epoch": 0.2145757640521479,
"grad_norm": 0.6640625,
"learning_rate": 9.669901652263873e-05,
"loss": 1.0288,
"step": 753
},
{
"epoch": 0.21486072522618793,
"grad_norm": 0.640625,
"learning_rate": 9.668801720605709e-05,
"loss": 0.9815,
"step": 754
},
{
"epoch": 0.21514568640022796,
"grad_norm": 0.66015625,
"learning_rate": 9.667700022204272e-05,
"loss": 0.9901,
"step": 755
},
{
"epoch": 0.215430647574268,
"grad_norm": 0.69140625,
"learning_rate": 9.66659655747646e-05,
"loss": 1.1246,
"step": 756
},
{
"epoch": 0.21571560874830803,
"grad_norm": 0.6640625,
"learning_rate": 9.665491326839843e-05,
"loss": 1.1472,
"step": 757
},
{
"epoch": 0.2160005699223481,
"grad_norm": 0.61328125,
"learning_rate": 9.664384330712655e-05,
"loss": 0.9106,
"step": 758
},
{
"epoch": 0.21628553109638812,
"grad_norm": 0.671875,
"learning_rate": 9.6632755695138e-05,
"loss": 1.0301,
"step": 759
},
{
"epoch": 0.21657049227042816,
"grad_norm": 0.65625,
"learning_rate": 9.66216504366285e-05,
"loss": 1.0129,
"step": 760
},
{
"epoch": 0.2168554534444682,
"grad_norm": 0.74609375,
"learning_rate": 9.661052753580041e-05,
"loss": 1.0987,
"step": 761
},
{
"epoch": 0.21714041461850822,
"grad_norm": 0.6484375,
"learning_rate": 9.659938699686286e-05,
"loss": 0.984,
"step": 762
},
{
"epoch": 0.21742537579254825,
"grad_norm": 0.6484375,
"learning_rate": 9.658822882403156e-05,
"loss": 1.0109,
"step": 763
},
{
"epoch": 0.2177103369665883,
"grad_norm": 0.62890625,
"learning_rate": 9.65770530215289e-05,
"loss": 1.0126,
"step": 764
},
{
"epoch": 0.21799529814062835,
"grad_norm": 0.64453125,
"learning_rate": 9.656585959358402e-05,
"loss": 0.9701,
"step": 765
},
{
"epoch": 0.21828025931466838,
"grad_norm": 0.625,
"learning_rate": 9.655464854443266e-05,
"loss": 0.9953,
"step": 766
},
{
"epoch": 0.2185652204887084,
"grad_norm": 0.6015625,
"learning_rate": 9.654341987831721e-05,
"loss": 0.9398,
"step": 767
},
{
"epoch": 0.21885018166274844,
"grad_norm": 0.62890625,
"learning_rate": 9.653217359948681e-05,
"loss": 0.978,
"step": 768
},
{
"epoch": 0.21913514283678848,
"grad_norm": 0.65234375,
"learning_rate": 9.65209097121972e-05,
"loss": 1.0498,
"step": 769
},
{
"epoch": 0.21942010401082854,
"grad_norm": 0.640625,
"learning_rate": 9.65096282207108e-05,
"loss": 1.021,
"step": 770
},
{
"epoch": 0.21970506518486857,
"grad_norm": 0.6171875,
"learning_rate": 9.649832912929669e-05,
"loss": 0.8944,
"step": 771
},
{
"epoch": 0.2199900263589086,
"grad_norm": 0.66015625,
"learning_rate": 9.648701244223062e-05,
"loss": 1.0971,
"step": 772
},
{
"epoch": 0.22027498753294864,
"grad_norm": 0.67578125,
"learning_rate": 9.647567816379496e-05,
"loss": 1.1193,
"step": 773
},
{
"epoch": 0.22055994870698867,
"grad_norm": 0.60546875,
"learning_rate": 9.646432629827883e-05,
"loss": 0.9676,
"step": 774
},
{
"epoch": 0.2208449098810287,
"grad_norm": 0.60546875,
"learning_rate": 9.645295684997789e-05,
"loss": 0.9198,
"step": 775
},
{
"epoch": 0.22112987105506876,
"grad_norm": 0.66796875,
"learning_rate": 9.644156982319452e-05,
"loss": 1.064,
"step": 776
},
{
"epoch": 0.2214148322291088,
"grad_norm": 0.6328125,
"learning_rate": 9.643016522223776e-05,
"loss": 0.943,
"step": 777
},
{
"epoch": 0.22169979340314883,
"grad_norm": 0.63671875,
"learning_rate": 9.641874305142324e-05,
"loss": 1.0012,
"step": 778
},
{
"epoch": 0.22198475457718886,
"grad_norm": 0.6484375,
"learning_rate": 9.640730331507331e-05,
"loss": 1.022,
"step": 779
},
{
"epoch": 0.2222697157512289,
"grad_norm": 0.64453125,
"learning_rate": 9.639584601751691e-05,
"loss": 1.1066,
"step": 780
},
{
"epoch": 0.22255467692526892,
"grad_norm": 0.609375,
"learning_rate": 9.638437116308967e-05,
"loss": 0.9619,
"step": 781
},
{
"epoch": 0.22283963809930896,
"grad_norm": 0.63671875,
"learning_rate": 9.637287875613384e-05,
"loss": 0.9892,
"step": 782
},
{
"epoch": 0.22312459927334902,
"grad_norm": 0.6796875,
"learning_rate": 9.636136880099831e-05,
"loss": 1.0539,
"step": 783
},
{
"epoch": 0.22340956044738905,
"grad_norm": 0.6171875,
"learning_rate": 9.634984130203861e-05,
"loss": 0.9384,
"step": 784
},
{
"epoch": 0.22369452162142908,
"grad_norm": 0.61328125,
"learning_rate": 9.633829626361695e-05,
"loss": 0.9422,
"step": 785
},
{
"epoch": 0.22397948279546911,
"grad_norm": 0.640625,
"learning_rate": 9.632673369010208e-05,
"loss": 1.015,
"step": 786
},
{
"epoch": 0.22426444396950915,
"grad_norm": 0.67578125,
"learning_rate": 9.631515358586952e-05,
"loss": 1.0725,
"step": 787
},
{
"epoch": 0.22454940514354918,
"grad_norm": 0.62109375,
"learning_rate": 9.630355595530129e-05,
"loss": 0.9548,
"step": 788
},
{
"epoch": 0.22483436631758924,
"grad_norm": 0.7109375,
"learning_rate": 9.629194080278614e-05,
"loss": 1.0999,
"step": 789
},
{
"epoch": 0.22511932749162927,
"grad_norm": 0.6328125,
"learning_rate": 9.628030813271939e-05,
"loss": 1.1077,
"step": 790
},
{
"epoch": 0.2254042886656693,
"grad_norm": 0.6640625,
"learning_rate": 9.626865794950303e-05,
"loss": 1.0716,
"step": 791
},
{
"epoch": 0.22568924983970934,
"grad_norm": 0.625,
"learning_rate": 9.625699025754569e-05,
"loss": 1.0403,
"step": 792
},
{
"epoch": 0.22597421101374937,
"grad_norm": 0.62109375,
"learning_rate": 9.624530506126254e-05,
"loss": 0.996,
"step": 793
},
{
"epoch": 0.2262591721877894,
"grad_norm": 0.6484375,
"learning_rate": 9.623360236507546e-05,
"loss": 0.984,
"step": 794
},
{
"epoch": 0.22654413336182946,
"grad_norm": 0.609375,
"learning_rate": 9.622188217341292e-05,
"loss": 0.8911,
"step": 795
},
{
"epoch": 0.2268290945358695,
"grad_norm": 0.609375,
"learning_rate": 9.621014449071002e-05,
"loss": 0.9672,
"step": 796
},
{
"epoch": 0.22711405570990953,
"grad_norm": 0.6484375,
"learning_rate": 9.619838932140845e-05,
"loss": 1.0569,
"step": 797
},
{
"epoch": 0.22739901688394956,
"grad_norm": 0.6484375,
"learning_rate": 9.618661666995656e-05,
"loss": 0.9946,
"step": 798
},
{
"epoch": 0.2276839780579896,
"grad_norm": 0.60546875,
"learning_rate": 9.617482654080927e-05,
"loss": 0.8449,
"step": 799
},
{
"epoch": 0.22796893923202963,
"grad_norm": 0.62890625,
"learning_rate": 9.616301893842817e-05,
"loss": 1.0208,
"step": 800
},
{
"epoch": 0.22825390040606966,
"grad_norm": 0.58984375,
"learning_rate": 9.615119386728142e-05,
"loss": 0.8984,
"step": 801
},
{
"epoch": 0.22853886158010972,
"grad_norm": 0.60546875,
"learning_rate": 9.613935133184378e-05,
"loss": 0.9595,
"step": 802
},
{
"epoch": 0.22882382275414975,
"grad_norm": 0.65234375,
"learning_rate": 9.612749133659666e-05,
"loss": 1.0099,
"step": 803
},
{
"epoch": 0.22910878392818979,
"grad_norm": 0.61328125,
"learning_rate": 9.611561388602805e-05,
"loss": 0.9618,
"step": 804
},
{
"epoch": 0.22939374510222982,
"grad_norm": 0.6328125,
"learning_rate": 9.610371898463257e-05,
"loss": 0.9654,
"step": 805
},
{
"epoch": 0.22967870627626985,
"grad_norm": 0.640625,
"learning_rate": 9.60918066369114e-05,
"loss": 1.0519,
"step": 806
},
{
"epoch": 0.22996366745030988,
"grad_norm": 0.6640625,
"learning_rate": 9.607987684737237e-05,
"loss": 1.1106,
"step": 807
},
{
"epoch": 0.23024862862434994,
"grad_norm": 0.6171875,
"learning_rate": 9.606792962052986e-05,
"loss": 0.968,
"step": 808
},
{
"epoch": 0.23053358979838998,
"grad_norm": 0.62890625,
"learning_rate": 9.60559649609049e-05,
"loss": 1.0045,
"step": 809
},
{
"epoch": 0.23081855097243,
"grad_norm": 0.62109375,
"learning_rate": 9.604398287302509e-05,
"loss": 0.9767,
"step": 810
},
{
"epoch": 0.23110351214647004,
"grad_norm": 0.6640625,
"learning_rate": 9.60319833614246e-05,
"loss": 1.0603,
"step": 811
},
{
"epoch": 0.23138847332051007,
"grad_norm": 0.6953125,
"learning_rate": 9.601996643064428e-05,
"loss": 1.106,
"step": 812
},
{
"epoch": 0.2316734344945501,
"grad_norm": 0.58203125,
"learning_rate": 9.600793208523147e-05,
"loss": 0.9162,
"step": 813
},
{
"epoch": 0.23195839566859017,
"grad_norm": 0.7109375,
"learning_rate": 9.599588032974014e-05,
"loss": 0.9119,
"step": 814
},
{
"epoch": 0.2322433568426302,
"grad_norm": 0.625,
"learning_rate": 9.598381116873088e-05,
"loss": 0.9307,
"step": 815
},
{
"epoch": 0.23252831801667023,
"grad_norm": 0.59765625,
"learning_rate": 9.597172460677079e-05,
"loss": 0.9318,
"step": 816
},
{
"epoch": 0.23281327919071026,
"grad_norm": 0.64453125,
"learning_rate": 9.595962064843367e-05,
"loss": 1.0533,
"step": 817
},
{
"epoch": 0.2330982403647503,
"grad_norm": 0.6328125,
"learning_rate": 9.594749929829979e-05,
"loss": 0.9443,
"step": 818
},
{
"epoch": 0.23338320153879033,
"grad_norm": 0.6171875,
"learning_rate": 9.593536056095606e-05,
"loss": 1.0741,
"step": 819
},
{
"epoch": 0.2336681627128304,
"grad_norm": 0.62890625,
"learning_rate": 9.592320444099595e-05,
"loss": 1.0224,
"step": 820
},
{
"epoch": 0.23395312388687042,
"grad_norm": 0.61328125,
"learning_rate": 9.591103094301952e-05,
"loss": 0.9979,
"step": 821
},
{
"epoch": 0.23423808506091046,
"grad_norm": 0.63671875,
"learning_rate": 9.589884007163341e-05,
"loss": 1.0549,
"step": 822
},
{
"epoch": 0.2345230462349505,
"grad_norm": 0.63671875,
"learning_rate": 9.588663183145082e-05,
"loss": 1.0795,
"step": 823
},
{
"epoch": 0.23480800740899052,
"grad_norm": 0.578125,
"learning_rate": 9.58744062270915e-05,
"loss": 1.0403,
"step": 824
},
{
"epoch": 0.23509296858303055,
"grad_norm": 0.640625,
"learning_rate": 9.586216326318185e-05,
"loss": 1.0365,
"step": 825
},
{
"epoch": 0.2353779297570706,
"grad_norm": 0.62890625,
"learning_rate": 9.584990294435474e-05,
"loss": 0.8826,
"step": 826
},
{
"epoch": 0.23566289093111065,
"grad_norm": 0.64453125,
"learning_rate": 9.583762527524968e-05,
"loss": 1.0307,
"step": 827
},
{
"epoch": 0.23594785210515068,
"grad_norm": 0.609375,
"learning_rate": 9.582533026051272e-05,
"loss": 0.9801,
"step": 828
},
{
"epoch": 0.2362328132791907,
"grad_norm": 0.6875,
"learning_rate": 9.581301790479646e-05,
"loss": 1.1224,
"step": 829
},
{
"epoch": 0.23651777445323074,
"grad_norm": 0.6484375,
"learning_rate": 9.58006882127601e-05,
"loss": 1.0178,
"step": 830
},
{
"epoch": 0.23680273562727078,
"grad_norm": 0.65234375,
"learning_rate": 9.578834118906936e-05,
"loss": 1.0178,
"step": 831
},
{
"epoch": 0.2370876968013108,
"grad_norm": 0.65625,
"learning_rate": 9.577597683839653e-05,
"loss": 0.8616,
"step": 832
},
{
"epoch": 0.23737265797535087,
"grad_norm": 0.640625,
"learning_rate": 9.576359516542049e-05,
"loss": 1.0821,
"step": 833
},
{
"epoch": 0.2376576191493909,
"grad_norm": 0.6484375,
"learning_rate": 9.57511961748266e-05,
"loss": 1.0931,
"step": 834
},
{
"epoch": 0.23794258032343094,
"grad_norm": 0.6171875,
"learning_rate": 9.573877987130687e-05,
"loss": 0.9808,
"step": 835
},
{
"epoch": 0.23822754149747097,
"grad_norm": 0.62890625,
"learning_rate": 9.572634625955979e-05,
"loss": 1.0453,
"step": 836
},
{
"epoch": 0.238512502671511,
"grad_norm": 0.6875,
"learning_rate": 9.571389534429042e-05,
"loss": 1.0502,
"step": 837
},
{
"epoch": 0.23879746384555103,
"grad_norm": 0.6796875,
"learning_rate": 9.570142713021038e-05,
"loss": 1.0263,
"step": 838
},
{
"epoch": 0.2390824250195911,
"grad_norm": 0.62890625,
"learning_rate": 9.56889416220378e-05,
"loss": 0.9521,
"step": 839
},
{
"epoch": 0.23936738619363113,
"grad_norm": 0.72265625,
"learning_rate": 9.567643882449741e-05,
"loss": 1.0707,
"step": 840
},
{
"epoch": 0.23965234736767116,
"grad_norm": 0.60546875,
"learning_rate": 9.566391874232043e-05,
"loss": 0.9217,
"step": 841
},
{
"epoch": 0.2399373085417112,
"grad_norm": 0.62890625,
"learning_rate": 9.565138138024467e-05,
"loss": 0.9518,
"step": 842
},
{
"epoch": 0.24022226971575122,
"grad_norm": 0.66015625,
"learning_rate": 9.563882674301442e-05,
"loss": 1.0683,
"step": 843
},
{
"epoch": 0.24050723088979126,
"grad_norm": 0.62890625,
"learning_rate": 9.562625483538056e-05,
"loss": 0.9946,
"step": 844
},
{
"epoch": 0.2407921920638313,
"grad_norm": 0.65234375,
"learning_rate": 9.561366566210048e-05,
"loss": 0.8849,
"step": 845
},
{
"epoch": 0.24107715323787135,
"grad_norm": 0.58984375,
"learning_rate": 9.560105922793811e-05,
"loss": 0.9304,
"step": 846
},
{
"epoch": 0.24136211441191138,
"grad_norm": 0.625,
"learning_rate": 9.558843553766392e-05,
"loss": 1.0837,
"step": 847
},
{
"epoch": 0.24164707558595142,
"grad_norm": 0.76953125,
"learning_rate": 9.557579459605488e-05,
"loss": 1.0149,
"step": 848
},
{
"epoch": 0.24193203675999145,
"grad_norm": 0.7109375,
"learning_rate": 9.556313640789452e-05,
"loss": 1.1387,
"step": 849
},
{
"epoch": 0.24221699793403148,
"grad_norm": 0.6171875,
"learning_rate": 9.555046097797288e-05,
"loss": 0.9633,
"step": 850
},
{
"epoch": 0.2425019591080715,
"grad_norm": 0.640625,
"learning_rate": 9.553776831108654e-05,
"loss": 0.982,
"step": 851
},
{
"epoch": 0.24278692028211157,
"grad_norm": 0.65625,
"learning_rate": 9.552505841203856e-05,
"loss": 1.0704,
"step": 852
},
{
"epoch": 0.2430718814561516,
"grad_norm": 0.6171875,
"learning_rate": 9.55123312856386e-05,
"loss": 0.9039,
"step": 853
},
{
"epoch": 0.24335684263019164,
"grad_norm": 0.6328125,
"learning_rate": 9.549958693670276e-05,
"loss": 0.9455,
"step": 854
},
{
"epoch": 0.24364180380423167,
"grad_norm": 0.640625,
"learning_rate": 9.548682537005369e-05,
"loss": 0.9679,
"step": 855
},
{
"epoch": 0.2439267649782717,
"grad_norm": 0.640625,
"learning_rate": 9.547404659052057e-05,
"loss": 1.002,
"step": 856
},
{
"epoch": 0.24421172615231174,
"grad_norm": 0.6640625,
"learning_rate": 9.546125060293906e-05,
"loss": 0.9739,
"step": 857
},
{
"epoch": 0.2444966873263518,
"grad_norm": 0.63671875,
"learning_rate": 9.544843741215137e-05,
"loss": 1.0245,
"step": 858
},
{
"epoch": 0.24478164850039183,
"grad_norm": 0.63671875,
"learning_rate": 9.54356070230062e-05,
"loss": 1.0823,
"step": 859
},
{
"epoch": 0.24506660967443186,
"grad_norm": 0.640625,
"learning_rate": 9.542275944035874e-05,
"loss": 1.0684,
"step": 860
},
{
"epoch": 0.2453515708484719,
"grad_norm": 0.59765625,
"learning_rate": 9.54098946690707e-05,
"loss": 0.962,
"step": 861
},
{
"epoch": 0.24563653202251193,
"grad_norm": 0.6015625,
"learning_rate": 9.539701271401031e-05,
"loss": 0.9923,
"step": 862
},
{
"epoch": 0.24592149319655196,
"grad_norm": 0.6484375,
"learning_rate": 9.538411358005231e-05,
"loss": 0.9796,
"step": 863
},
{
"epoch": 0.24620645437059202,
"grad_norm": 0.640625,
"learning_rate": 9.537119727207791e-05,
"loss": 0.9334,
"step": 864
},
{
"epoch": 0.24649141554463205,
"grad_norm": 0.6484375,
"learning_rate": 9.53582637949748e-05,
"loss": 1.0171,
"step": 865
},
{
"epoch": 0.24677637671867209,
"grad_norm": 0.61328125,
"learning_rate": 9.534531315363723e-05,
"loss": 0.9819,
"step": 866
},
{
"epoch": 0.24706133789271212,
"grad_norm": 0.6796875,
"learning_rate": 9.533234535296591e-05,
"loss": 1.115,
"step": 867
},
{
"epoch": 0.24734629906675215,
"grad_norm": 0.6171875,
"learning_rate": 9.531936039786806e-05,
"loss": 0.9978,
"step": 868
},
{
"epoch": 0.24763126024079218,
"grad_norm": 0.65625,
"learning_rate": 9.530635829325734e-05,
"loss": 1.0792,
"step": 869
},
{
"epoch": 0.24791622141483222,
"grad_norm": 0.58203125,
"learning_rate": 9.529333904405398e-05,
"loss": 0.8426,
"step": 870
},
{
"epoch": 0.24820118258887228,
"grad_norm": 0.60546875,
"learning_rate": 9.528030265518461e-05,
"loss": 0.9491,
"step": 871
},
{
"epoch": 0.2484861437629123,
"grad_norm": 0.57421875,
"learning_rate": 9.526724913158242e-05,
"loss": 0.9119,
"step": 872
},
{
"epoch": 0.24877110493695234,
"grad_norm": 0.60546875,
"learning_rate": 9.525417847818707e-05,
"loss": 0.9728,
"step": 873
},
{
"epoch": 0.24905606611099237,
"grad_norm": 0.62109375,
"learning_rate": 9.524109069994466e-05,
"loss": 0.9545,
"step": 874
},
{
"epoch": 0.2493410272850324,
"grad_norm": 0.6484375,
"learning_rate": 9.522798580180781e-05,
"loss": 1.0756,
"step": 875
},
{
"epoch": 0.24962598845907244,
"grad_norm": 0.578125,
"learning_rate": 9.521486378873558e-05,
"loss": 0.9064,
"step": 876
},
{
"epoch": 0.2499109496331125,
"grad_norm": 0.60546875,
"learning_rate": 9.520172466569358e-05,
"loss": 0.9686,
"step": 877
},
{
"epoch": 0.25019591080715253,
"grad_norm": 0.625,
"learning_rate": 9.518856843765382e-05,
"loss": 1.0244,
"step": 878
},
{
"epoch": 0.25048087198119257,
"grad_norm": 0.640625,
"learning_rate": 9.517539510959482e-05,
"loss": 1.0626,
"step": 879
},
{
"epoch": 0.2507658331552326,
"grad_norm": 0.6484375,
"learning_rate": 9.516220468650155e-05,
"loss": 1.0234,
"step": 880
},
{
"epoch": 0.25105079432927263,
"grad_norm": 0.63671875,
"learning_rate": 9.514899717336545e-05,
"loss": 0.978,
"step": 881
},
{
"epoch": 0.25133575550331266,
"grad_norm": 0.64453125,
"learning_rate": 9.513577257518447e-05,
"loss": 1.0112,
"step": 882
},
{
"epoch": 0.2516207166773527,
"grad_norm": 0.609375,
"learning_rate": 9.512253089696296e-05,
"loss": 0.9261,
"step": 883
},
{
"epoch": 0.25190567785139273,
"grad_norm": 0.64453125,
"learning_rate": 9.51092721437118e-05,
"loss": 0.9873,
"step": 884
},
{
"epoch": 0.25219063902543276,
"grad_norm": 0.58984375,
"learning_rate": 9.509599632044827e-05,
"loss": 0.945,
"step": 885
},
{
"epoch": 0.25247560019947285,
"grad_norm": 0.65234375,
"learning_rate": 9.508270343219614e-05,
"loss": 1.073,
"step": 886
},
{
"epoch": 0.2527605613735129,
"grad_norm": 0.61328125,
"learning_rate": 9.506939348398565e-05,
"loss": 0.9472,
"step": 887
},
{
"epoch": 0.2530455225475529,
"grad_norm": 0.625,
"learning_rate": 9.505606648085346e-05,
"loss": 0.942,
"step": 888
},
{
"epoch": 0.25333048372159295,
"grad_norm": 0.62109375,
"learning_rate": 9.504272242784272e-05,
"loss": 0.9839,
"step": 889
},
{
"epoch": 0.253615444895633,
"grad_norm": 0.6015625,
"learning_rate": 9.5029361330003e-05,
"loss": 0.9566,
"step": 890
},
{
"epoch": 0.253900406069673,
"grad_norm": 0.62109375,
"learning_rate": 9.501598319239036e-05,
"loss": 1.0334,
"step": 891
},
{
"epoch": 0.25418536724371305,
"grad_norm": 0.65625,
"learning_rate": 9.500258802006726e-05,
"loss": 1.0752,
"step": 892
},
{
"epoch": 0.2544703284177531,
"grad_norm": 0.6171875,
"learning_rate": 9.498917581810265e-05,
"loss": 1.0278,
"step": 893
},
{
"epoch": 0.2547552895917931,
"grad_norm": 0.625,
"learning_rate": 9.49757465915719e-05,
"loss": 1.0858,
"step": 894
},
{
"epoch": 0.25504025076583314,
"grad_norm": 0.60546875,
"learning_rate": 9.49623003455568e-05,
"loss": 0.9767,
"step": 895
},
{
"epoch": 0.2553252119398732,
"grad_norm": 0.61328125,
"learning_rate": 9.494883708514564e-05,
"loss": 0.9732,
"step": 896
},
{
"epoch": 0.2556101731139132,
"grad_norm": 0.63671875,
"learning_rate": 9.49353568154331e-05,
"loss": 1.0778,
"step": 897
},
{
"epoch": 0.25589513428795324,
"grad_norm": 0.62109375,
"learning_rate": 9.492185954152032e-05,
"loss": 0.9255,
"step": 898
},
{
"epoch": 0.25618009546199333,
"grad_norm": 0.6015625,
"learning_rate": 9.490834526851489e-05,
"loss": 0.9085,
"step": 899
},
{
"epoch": 0.25646505663603336,
"grad_norm": 0.58984375,
"learning_rate": 9.489481400153075e-05,
"loss": 0.971,
"step": 900
},
{
"epoch": 0.2567500178100734,
"grad_norm": 0.6796875,
"learning_rate": 9.488126574568838e-05,
"loss": 1.0312,
"step": 901
},
{
"epoch": 0.2570349789841134,
"grad_norm": 0.640625,
"learning_rate": 9.486770050611463e-05,
"loss": 1.1106,
"step": 902
},
{
"epoch": 0.25731994015815346,
"grad_norm": 0.62109375,
"learning_rate": 9.485411828794278e-05,
"loss": 0.9696,
"step": 903
},
{
"epoch": 0.2576049013321935,
"grad_norm": 0.6484375,
"learning_rate": 9.484051909631253e-05,
"loss": 0.9714,
"step": 904
},
{
"epoch": 0.2578898625062335,
"grad_norm": 0.609375,
"learning_rate": 9.482690293637004e-05,
"loss": 0.858,
"step": 905
},
{
"epoch": 0.25817482368027356,
"grad_norm": 0.62890625,
"learning_rate": 9.481326981326784e-05,
"loss": 1.0682,
"step": 906
},
{
"epoch": 0.2584597848543136,
"grad_norm": 0.63671875,
"learning_rate": 9.479961973216492e-05,
"loss": 1.0548,
"step": 907
},
{
"epoch": 0.2587447460283536,
"grad_norm": 0.671875,
"learning_rate": 9.478595269822666e-05,
"loss": 1.1323,
"step": 908
},
{
"epoch": 0.25902970720239366,
"grad_norm": 0.6484375,
"learning_rate": 9.477226871662489e-05,
"loss": 1.0222,
"step": 909
},
{
"epoch": 0.2593146683764337,
"grad_norm": 0.6796875,
"learning_rate": 9.47585677925378e-05,
"loss": 1.0132,
"step": 910
},
{
"epoch": 0.2595996295504737,
"grad_norm": 0.5625,
"learning_rate": 9.474484993115005e-05,
"loss": 0.9136,
"step": 911
},
{
"epoch": 0.2598845907245138,
"grad_norm": 0.72265625,
"learning_rate": 9.473111513765265e-05,
"loss": 1.2132,
"step": 912
},
{
"epoch": 0.26016955189855384,
"grad_norm": 0.6171875,
"learning_rate": 9.471736341724307e-05,
"loss": 0.9834,
"step": 913
},
{
"epoch": 0.2604545130725939,
"grad_norm": 0.61328125,
"learning_rate": 9.470359477512517e-05,
"loss": 0.9819,
"step": 914
},
{
"epoch": 0.2607394742466339,
"grad_norm": 0.6640625,
"learning_rate": 9.468980921650919e-05,
"loss": 1.0251,
"step": 915
},
{
"epoch": 0.26102443542067394,
"grad_norm": 0.640625,
"learning_rate": 9.467600674661177e-05,
"loss": 1.0043,
"step": 916
},
{
"epoch": 0.26130939659471397,
"grad_norm": 0.69921875,
"learning_rate": 9.466218737065601e-05,
"loss": 1.1059,
"step": 917
},
{
"epoch": 0.261594357768754,
"grad_norm": 0.671875,
"learning_rate": 9.464835109387133e-05,
"loss": 0.9952,
"step": 918
},
{
"epoch": 0.26187931894279404,
"grad_norm": 0.640625,
"learning_rate": 9.463449792149362e-05,
"loss": 1.0236,
"step": 919
},
{
"epoch": 0.26216428011683407,
"grad_norm": 0.6484375,
"learning_rate": 9.462062785876509e-05,
"loss": 0.9314,
"step": 920
},
{
"epoch": 0.2624492412908741,
"grad_norm": 0.6640625,
"learning_rate": 9.460674091093438e-05,
"loss": 1.1074,
"step": 921
},
{
"epoch": 0.26273420246491413,
"grad_norm": 0.625,
"learning_rate": 9.459283708325653e-05,
"loss": 0.9517,
"step": 922
},
{
"epoch": 0.26301916363895417,
"grad_norm": 0.66796875,
"learning_rate": 9.457891638099293e-05,
"loss": 1.0116,
"step": 923
},
{
"epoch": 0.26330412481299426,
"grad_norm": 0.76953125,
"learning_rate": 9.45649788094114e-05,
"loss": 0.9336,
"step": 924
},
{
"epoch": 0.2635890859870343,
"grad_norm": 0.64453125,
"learning_rate": 9.455102437378611e-05,
"loss": 1.0192,
"step": 925
},
{
"epoch": 0.2638740471610743,
"grad_norm": 0.62109375,
"learning_rate": 9.453705307939764e-05,
"loss": 0.981,
"step": 926
},
{
"epoch": 0.26415900833511435,
"grad_norm": 0.72265625,
"learning_rate": 9.452306493153292e-05,
"loss": 1.1055,
"step": 927
},
{
"epoch": 0.2644439695091544,
"grad_norm": 0.7109375,
"learning_rate": 9.450905993548527e-05,
"loss": 1.1894,
"step": 928
},
{
"epoch": 0.2647289306831944,
"grad_norm": 0.63671875,
"learning_rate": 9.44950380965544e-05,
"loss": 1.0545,
"step": 929
},
{
"epoch": 0.26501389185723445,
"grad_norm": 0.6484375,
"learning_rate": 9.448099942004636e-05,
"loss": 1.1089,
"step": 930
},
{
"epoch": 0.2652988530312745,
"grad_norm": 0.61328125,
"learning_rate": 9.44669439112736e-05,
"loss": 1.0321,
"step": 931
},
{
"epoch": 0.2655838142053145,
"grad_norm": 0.5859375,
"learning_rate": 9.445287157555494e-05,
"loss": 0.9055,
"step": 932
},
{
"epoch": 0.26586877537935455,
"grad_norm": 0.68359375,
"learning_rate": 9.443878241821555e-05,
"loss": 1.2172,
"step": 933
},
{
"epoch": 0.2661537365533946,
"grad_norm": 0.609375,
"learning_rate": 9.442467644458699e-05,
"loss": 0.9766,
"step": 934
},
{
"epoch": 0.2664386977274346,
"grad_norm": 0.62109375,
"learning_rate": 9.441055366000712e-05,
"loss": 1.0756,
"step": 935
},
{
"epoch": 0.26672365890147465,
"grad_norm": 0.6171875,
"learning_rate": 9.439641406982028e-05,
"loss": 1.0067,
"step": 936
},
{
"epoch": 0.26700862007551474,
"grad_norm": 0.59765625,
"learning_rate": 9.438225767937705e-05,
"loss": 0.9499,
"step": 937
},
{
"epoch": 0.26729358124955477,
"grad_norm": 0.6484375,
"learning_rate": 9.436808449403442e-05,
"loss": 1.0982,
"step": 938
},
{
"epoch": 0.2675785424235948,
"grad_norm": 0.59765625,
"learning_rate": 9.435389451915576e-05,
"loss": 0.9502,
"step": 939
},
{
"epoch": 0.26786350359763483,
"grad_norm": 0.671875,
"learning_rate": 9.433968776011074e-05,
"loss": 1.0042,
"step": 940
},
{
"epoch": 0.26814846477167487,
"grad_norm": 0.58203125,
"learning_rate": 9.432546422227542e-05,
"loss": 0.8678,
"step": 941
},
{
"epoch": 0.2684334259457149,
"grad_norm": 0.62109375,
"learning_rate": 9.431122391103217e-05,
"loss": 1.041,
"step": 942
},
{
"epoch": 0.26871838711975493,
"grad_norm": 0.58984375,
"learning_rate": 9.429696683176976e-05,
"loss": 0.9748,
"step": 943
},
{
"epoch": 0.26900334829379496,
"grad_norm": 0.59765625,
"learning_rate": 9.428269298988328e-05,
"loss": 0.9924,
"step": 944
},
{
"epoch": 0.269288309467835,
"grad_norm": 0.63671875,
"learning_rate": 9.426840239077414e-05,
"loss": 1.0842,
"step": 945
},
{
"epoch": 0.26957327064187503,
"grad_norm": 0.6171875,
"learning_rate": 9.425409503985014e-05,
"loss": 0.9269,
"step": 946
},
{
"epoch": 0.26985823181591506,
"grad_norm": 0.6328125,
"learning_rate": 9.423977094252537e-05,
"loss": 1.0251,
"step": 947
},
{
"epoch": 0.2701431929899551,
"grad_norm": 0.63671875,
"learning_rate": 9.422543010422029e-05,
"loss": 1.0265,
"step": 948
},
{
"epoch": 0.2704281541639952,
"grad_norm": 0.609375,
"learning_rate": 9.421107253036168e-05,
"loss": 0.9327,
"step": 949
},
{
"epoch": 0.2707131153380352,
"grad_norm": 0.62109375,
"learning_rate": 9.419669822638267e-05,
"loss": 0.9137,
"step": 950
},
{
"epoch": 0.27099807651207525,
"grad_norm": 0.64453125,
"learning_rate": 9.418230719772269e-05,
"loss": 1.0876,
"step": 951
},
{
"epoch": 0.2712830376861153,
"grad_norm": 0.66015625,
"learning_rate": 9.41678994498275e-05,
"loss": 1.0511,
"step": 952
},
{
"epoch": 0.2715679988601553,
"grad_norm": 0.57421875,
"learning_rate": 9.415347498814925e-05,
"loss": 0.9393,
"step": 953
},
{
"epoch": 0.27185296003419535,
"grad_norm": 0.58984375,
"learning_rate": 9.413903381814638e-05,
"loss": 0.9865,
"step": 954
},
{
"epoch": 0.2721379212082354,
"grad_norm": 0.5703125,
"learning_rate": 9.412457594528357e-05,
"loss": 0.8684,
"step": 955
},
{
"epoch": 0.2724228823822754,
"grad_norm": 0.609375,
"learning_rate": 9.411010137503192e-05,
"loss": 0.8862,
"step": 956
},
{
"epoch": 0.27270784355631544,
"grad_norm": 0.66015625,
"learning_rate": 9.409561011286885e-05,
"loss": 1.1248,
"step": 957
},
{
"epoch": 0.2729928047303555,
"grad_norm": 0.6171875,
"learning_rate": 9.408110216427804e-05,
"loss": 1.0661,
"step": 958
},
{
"epoch": 0.2732777659043955,
"grad_norm": 0.95703125,
"learning_rate": 9.406657753474952e-05,
"loss": 0.9036,
"step": 959
},
{
"epoch": 0.27356272707843554,
"grad_norm": 0.58984375,
"learning_rate": 9.405203622977963e-05,
"loss": 0.8965,
"step": 960
},
{
"epoch": 0.2738476882524756,
"grad_norm": 0.63671875,
"learning_rate": 9.403747825487099e-05,
"loss": 1.0589,
"step": 961
},
{
"epoch": 0.27413264942651566,
"grad_norm": 0.61328125,
"learning_rate": 9.402290361553257e-05,
"loss": 1.057,
"step": 962
},
{
"epoch": 0.2744176106005557,
"grad_norm": 0.6484375,
"learning_rate": 9.400831231727962e-05,
"loss": 1.049,
"step": 963
},
{
"epoch": 0.2747025717745957,
"grad_norm": 0.6015625,
"learning_rate": 9.399370436563372e-05,
"loss": 0.8457,
"step": 964
},
{
"epoch": 0.27498753294863576,
"grad_norm": 0.578125,
"learning_rate": 9.397907976612272e-05,
"loss": 0.9325,
"step": 965
},
{
"epoch": 0.2752724941226758,
"grad_norm": 0.65625,
"learning_rate": 9.396443852428078e-05,
"loss": 1.0996,
"step": 966
},
{
"epoch": 0.2755574552967158,
"grad_norm": 0.69921875,
"learning_rate": 9.394978064564836e-05,
"loss": 1.0698,
"step": 967
},
{
"epoch": 0.27584241647075586,
"grad_norm": 0.60546875,
"learning_rate": 9.393510613577225e-05,
"loss": 0.9456,
"step": 968
},
{
"epoch": 0.2761273776447959,
"grad_norm": 0.58203125,
"learning_rate": 9.392041500020545e-05,
"loss": 0.8715,
"step": 969
},
{
"epoch": 0.2764123388188359,
"grad_norm": 0.59375,
"learning_rate": 9.390570724450733e-05,
"loss": 0.9401,
"step": 970
},
{
"epoch": 0.27669729999287596,
"grad_norm": 0.69921875,
"learning_rate": 9.389098287424351e-05,
"loss": 1.0906,
"step": 971
},
{
"epoch": 0.276982261166916,
"grad_norm": 0.6484375,
"learning_rate": 9.387624189498593e-05,
"loss": 1.061,
"step": 972
},
{
"epoch": 0.277267222340956,
"grad_norm": 0.671875,
"learning_rate": 9.386148431231276e-05,
"loss": 1.0958,
"step": 973
},
{
"epoch": 0.2775521835149961,
"grad_norm": 0.6015625,
"learning_rate": 9.384671013180852e-05,
"loss": 0.9663,
"step": 974
},
{
"epoch": 0.27783714468903614,
"grad_norm": 0.66015625,
"learning_rate": 9.383191935906395e-05,
"loss": 1.011,
"step": 975
},
{
"epoch": 0.2781221058630762,
"grad_norm": 0.61328125,
"learning_rate": 9.381711199967613e-05,
"loss": 1.0356,
"step": 976
},
{
"epoch": 0.2784070670371162,
"grad_norm": 0.59375,
"learning_rate": 9.380228805924835e-05,
"loss": 0.9148,
"step": 977
},
{
"epoch": 0.27869202821115624,
"grad_norm": 0.58984375,
"learning_rate": 9.378744754339022e-05,
"loss": 0.8963,
"step": 978
},
{
"epoch": 0.2789769893851963,
"grad_norm": 0.6328125,
"learning_rate": 9.377259045771759e-05,
"loss": 1.0365,
"step": 979
},
{
"epoch": 0.2792619505592363,
"grad_norm": 0.60546875,
"learning_rate": 9.375771680785263e-05,
"loss": 1.0531,
"step": 980
},
{
"epoch": 0.27954691173327634,
"grad_norm": 0.59375,
"learning_rate": 9.374282659942374e-05,
"loss": 0.9912,
"step": 981
},
{
"epoch": 0.27983187290731637,
"grad_norm": 0.6328125,
"learning_rate": 9.372791983806559e-05,
"loss": 1.0263,
"step": 982
},
{
"epoch": 0.2801168340813564,
"grad_norm": 0.640625,
"learning_rate": 9.371299652941911e-05,
"loss": 0.9641,
"step": 983
},
{
"epoch": 0.28040179525539644,
"grad_norm": 0.64453125,
"learning_rate": 9.36980566791315e-05,
"loss": 1.0648,
"step": 984
},
{
"epoch": 0.28068675642943647,
"grad_norm": 0.6171875,
"learning_rate": 9.368310029285625e-05,
"loss": 0.949,
"step": 985
},
{
"epoch": 0.2809717176034765,
"grad_norm": 0.6171875,
"learning_rate": 9.366812737625303e-05,
"loss": 1.0348,
"step": 986
},
{
"epoch": 0.2812566787775166,
"grad_norm": 0.6328125,
"learning_rate": 9.365313793498785e-05,
"loss": 0.9918,
"step": 987
},
{
"epoch": 0.2815416399515566,
"grad_norm": 0.63671875,
"learning_rate": 9.363813197473291e-05,
"loss": 1.0178,
"step": 988
},
{
"epoch": 0.28182660112559665,
"grad_norm": 0.640625,
"learning_rate": 9.36231095011667e-05,
"loss": 1.09,
"step": 989
},
{
"epoch": 0.2821115622996367,
"grad_norm": 0.609375,
"learning_rate": 9.360807051997394e-05,
"loss": 0.9619,
"step": 990
},
{
"epoch": 0.2823965234736767,
"grad_norm": 0.62109375,
"learning_rate": 9.359301503684561e-05,
"loss": 0.9254,
"step": 991
},
{
"epoch": 0.28268148464771675,
"grad_norm": 0.59375,
"learning_rate": 9.357794305747891e-05,
"loss": 0.9638,
"step": 992
},
{
"epoch": 0.2829664458217568,
"grad_norm": 0.59765625,
"learning_rate": 9.356285458757731e-05,
"loss": 0.8641,
"step": 993
},
{
"epoch": 0.2832514069957968,
"grad_norm": 0.62890625,
"learning_rate": 9.354774963285051e-05,
"loss": 1.027,
"step": 994
},
{
"epoch": 0.28353636816983685,
"grad_norm": 0.73046875,
"learning_rate": 9.353262819901444e-05,
"loss": 0.9502,
"step": 995
},
{
"epoch": 0.2838213293438769,
"grad_norm": 0.60546875,
"learning_rate": 9.351749029179128e-05,
"loss": 0.9592,
"step": 996
},
{
"epoch": 0.2841062905179169,
"grad_norm": 0.66796875,
"learning_rate": 9.350233591690943e-05,
"loss": 0.9631,
"step": 997
},
{
"epoch": 0.28439125169195695,
"grad_norm": 0.578125,
"learning_rate": 9.348716508010354e-05,
"loss": 0.9393,
"step": 998
},
{
"epoch": 0.284676212865997,
"grad_norm": 0.65625,
"learning_rate": 9.347197778711446e-05,
"loss": 0.934,
"step": 999
},
{
"epoch": 0.28496117404003707,
"grad_norm": 0.640625,
"learning_rate": 9.345677404368931e-05,
"loss": 1.0546,
"step": 1000
},
{
"epoch": 0.2852461352140771,
"grad_norm": 0.625,
"learning_rate": 9.344155385558138e-05,
"loss": 1.0737,
"step": 1001
},
{
"epoch": 0.28553109638811713,
"grad_norm": 0.66015625,
"learning_rate": 9.342631722855024e-05,
"loss": 1.0648,
"step": 1002
},
{
"epoch": 0.28581605756215717,
"grad_norm": 0.59375,
"learning_rate": 9.341106416836165e-05,
"loss": 0.8566,
"step": 1003
},
{
"epoch": 0.2861010187361972,
"grad_norm": 0.58984375,
"learning_rate": 9.339579468078757e-05,
"loss": 0.9621,
"step": 1004
},
{
"epoch": 0.28638597991023723,
"grad_norm": 0.59375,
"learning_rate": 9.338050877160623e-05,
"loss": 0.9685,
"step": 1005
},
{
"epoch": 0.28667094108427726,
"grad_norm": 0.62109375,
"learning_rate": 9.3365206446602e-05,
"loss": 1.1444,
"step": 1006
},
{
"epoch": 0.2869559022583173,
"grad_norm": 0.5859375,
"learning_rate": 9.334988771156556e-05,
"loss": 0.9522,
"step": 1007
},
{
"epoch": 0.28724086343235733,
"grad_norm": 0.60546875,
"learning_rate": 9.333455257229369e-05,
"loss": 0.9159,
"step": 1008
},
{
"epoch": 0.28752582460639736,
"grad_norm": 0.71875,
"learning_rate": 9.33192010345895e-05,
"loss": 1.1429,
"step": 1009
},
{
"epoch": 0.2878107857804374,
"grad_norm": 0.62109375,
"learning_rate": 9.330383310426216e-05,
"loss": 0.9845,
"step": 1010
},
{
"epoch": 0.2880957469544774,
"grad_norm": 0.6640625,
"learning_rate": 9.328844878712716e-05,
"loss": 1.0455,
"step": 1011
},
{
"epoch": 0.2883807081285175,
"grad_norm": 0.6171875,
"learning_rate": 9.327304808900617e-05,
"loss": 0.9269,
"step": 1012
},
{
"epoch": 0.28866566930255755,
"grad_norm": 0.65625,
"learning_rate": 9.325763101572701e-05,
"loss": 1.0034,
"step": 1013
},
{
"epoch": 0.2889506304765976,
"grad_norm": 0.609375,
"learning_rate": 9.324219757312374e-05,
"loss": 0.9634,
"step": 1014
},
{
"epoch": 0.2892355916506376,
"grad_norm": 0.59765625,
"learning_rate": 9.32267477670366e-05,
"loss": 0.9526,
"step": 1015
},
{
"epoch": 0.28952055282467765,
"grad_norm": 0.57421875,
"learning_rate": 9.321128160331201e-05,
"loss": 0.878,
"step": 1016
},
{
"epoch": 0.2898055139987177,
"grad_norm": 0.58984375,
"learning_rate": 9.319579908780263e-05,
"loss": 0.8794,
"step": 1017
},
{
"epoch": 0.2900904751727577,
"grad_norm": 0.6171875,
"learning_rate": 9.318030022636723e-05,
"loss": 0.961,
"step": 1018
},
{
"epoch": 0.29037543634679774,
"grad_norm": 0.6328125,
"learning_rate": 9.316478502487085e-05,
"loss": 1.0243,
"step": 1019
},
{
"epoch": 0.2906603975208378,
"grad_norm": 0.63671875,
"learning_rate": 9.314925348918462e-05,
"loss": 0.9801,
"step": 1020
},
{
"epoch": 0.2909453586948778,
"grad_norm": 0.62109375,
"learning_rate": 9.313370562518596e-05,
"loss": 1.038,
"step": 1021
},
{
"epoch": 0.29123031986891784,
"grad_norm": 0.58984375,
"learning_rate": 9.311814143875834e-05,
"loss": 0.9654,
"step": 1022
},
{
"epoch": 0.2915152810429579,
"grad_norm": 0.61328125,
"learning_rate": 9.310256093579155e-05,
"loss": 0.9441,
"step": 1023
},
{
"epoch": 0.2918002422169979,
"grad_norm": 0.66015625,
"learning_rate": 9.308696412218143e-05,
"loss": 0.9697,
"step": 1024
},
{
"epoch": 0.292085203391038,
"grad_norm": 0.5625,
"learning_rate": 9.307135100383006e-05,
"loss": 0.8806,
"step": 1025
},
{
"epoch": 0.29237016456507803,
"grad_norm": 0.6328125,
"learning_rate": 9.30557215866457e-05,
"loss": 1.1131,
"step": 1026
},
{
"epoch": 0.29265512573911806,
"grad_norm": 0.640625,
"learning_rate": 9.304007587654271e-05,
"loss": 1.0907,
"step": 1027
},
{
"epoch": 0.2929400869131581,
"grad_norm": 0.58203125,
"learning_rate": 9.302441387944167e-05,
"loss": 0.8848,
"step": 1028
},
{
"epoch": 0.2932250480871981,
"grad_norm": 0.640625,
"learning_rate": 9.300873560126932e-05,
"loss": 0.969,
"step": 1029
},
{
"epoch": 0.29351000926123816,
"grad_norm": 0.59765625,
"learning_rate": 9.299304104795854e-05,
"loss": 1.0513,
"step": 1030
},
{
"epoch": 0.2937949704352782,
"grad_norm": 0.625,
"learning_rate": 9.297733022544839e-05,
"loss": 0.945,
"step": 1031
},
{
"epoch": 0.2940799316093182,
"grad_norm": 0.63671875,
"learning_rate": 9.296160313968408e-05,
"loss": 1.0124,
"step": 1032
},
{
"epoch": 0.29436489278335826,
"grad_norm": 0.62109375,
"learning_rate": 9.294585979661694e-05,
"loss": 0.9433,
"step": 1033
},
{
"epoch": 0.2946498539573983,
"grad_norm": 0.640625,
"learning_rate": 9.293010020220452e-05,
"loss": 1.0351,
"step": 1034
},
{
"epoch": 0.2949348151314383,
"grad_norm": 0.6171875,
"learning_rate": 9.291432436241048e-05,
"loss": 0.974,
"step": 1035
},
{
"epoch": 0.29521977630547835,
"grad_norm": 0.578125,
"learning_rate": 9.28985322832046e-05,
"loss": 0.9246,
"step": 1036
},
{
"epoch": 0.29550473747951844,
"grad_norm": 0.60546875,
"learning_rate": 9.288272397056286e-05,
"loss": 0.9589,
"step": 1037
},
{
"epoch": 0.2957896986535585,
"grad_norm": 0.6015625,
"learning_rate": 9.286689943046734e-05,
"loss": 0.9765,
"step": 1038
},
{
"epoch": 0.2960746598275985,
"grad_norm": 0.59375,
"learning_rate": 9.285105866890629e-05,
"loss": 0.8761,
"step": 1039
},
{
"epoch": 0.29635962100163854,
"grad_norm": 0.62890625,
"learning_rate": 9.28352016918741e-05,
"loss": 0.9729,
"step": 1040
},
{
"epoch": 0.2966445821756786,
"grad_norm": 0.5859375,
"learning_rate": 9.281932850537125e-05,
"loss": 0.8454,
"step": 1041
},
{
"epoch": 0.2969295433497186,
"grad_norm": 0.609375,
"learning_rate": 9.280343911540444e-05,
"loss": 0.939,
"step": 1042
},
{
"epoch": 0.29721450452375864,
"grad_norm": 0.66796875,
"learning_rate": 9.278753352798639e-05,
"loss": 1.1006,
"step": 1043
},
{
"epoch": 0.29749946569779867,
"grad_norm": 0.62890625,
"learning_rate": 9.277161174913605e-05,
"loss": 0.9765,
"step": 1044
},
{
"epoch": 0.2977844268718387,
"grad_norm": 0.58984375,
"learning_rate": 9.275567378487841e-05,
"loss": 0.9614,
"step": 1045
},
{
"epoch": 0.29806938804587874,
"grad_norm": 0.61328125,
"learning_rate": 9.27397196412447e-05,
"loss": 0.9823,
"step": 1046
},
{
"epoch": 0.29835434921991877,
"grad_norm": 0.64453125,
"learning_rate": 9.272374932427215e-05,
"loss": 1.0585,
"step": 1047
},
{
"epoch": 0.2986393103939588,
"grad_norm": 0.609375,
"learning_rate": 9.270776284000417e-05,
"loss": 0.9719,
"step": 1048
},
{
"epoch": 0.29892427156799883,
"grad_norm": 0.6171875,
"learning_rate": 9.269176019449028e-05,
"loss": 0.982,
"step": 1049
},
{
"epoch": 0.2992092327420389,
"grad_norm": 0.6328125,
"learning_rate": 9.267574139378614e-05,
"loss": 0.9483,
"step": 1050
},
{
"epoch": 0.29949419391607895,
"grad_norm": 0.6328125,
"learning_rate": 9.265970644395347e-05,
"loss": 0.9968,
"step": 1051
},
{
"epoch": 0.299779155090119,
"grad_norm": 0.6328125,
"learning_rate": 9.264365535106016e-05,
"loss": 1.0735,
"step": 1052
},
{
"epoch": 0.300064116264159,
"grad_norm": 0.6015625,
"learning_rate": 9.262758812118015e-05,
"loss": 0.9386,
"step": 1053
},
{
"epoch": 0.30034907743819905,
"grad_norm": 0.703125,
"learning_rate": 9.261150476039354e-05,
"loss": 1.0808,
"step": 1054
},
{
"epoch": 0.3006340386122391,
"grad_norm": 0.59765625,
"learning_rate": 9.25954052747865e-05,
"loss": 1.0207,
"step": 1055
},
{
"epoch": 0.3009189997862791,
"grad_norm": 0.61328125,
"learning_rate": 9.25792896704513e-05,
"loss": 0.9417,
"step": 1056
},
{
"epoch": 0.30120396096031915,
"grad_norm": 0.609375,
"learning_rate": 9.256315795348635e-05,
"loss": 0.9978,
"step": 1057
},
{
"epoch": 0.3014889221343592,
"grad_norm": 0.58984375,
"learning_rate": 9.254701012999612e-05,
"loss": 0.805,
"step": 1058
},
{
"epoch": 0.3017738833083992,
"grad_norm": 0.640625,
"learning_rate": 9.253084620609116e-05,
"loss": 1.0497,
"step": 1059
},
{
"epoch": 0.30205884448243925,
"grad_norm": 0.61328125,
"learning_rate": 9.251466618788815e-05,
"loss": 0.9615,
"step": 1060
},
{
"epoch": 0.3023438056564793,
"grad_norm": 0.60546875,
"learning_rate": 9.249847008150987e-05,
"loss": 0.9506,
"step": 1061
},
{
"epoch": 0.30262876683051937,
"grad_norm": 0.65625,
"learning_rate": 9.248225789308514e-05,
"loss": 1.0929,
"step": 1062
},
{
"epoch": 0.3029137280045594,
"grad_norm": 0.62890625,
"learning_rate": 9.246602962874891e-05,
"loss": 0.8992,
"step": 1063
},
{
"epoch": 0.30319868917859943,
"grad_norm": 0.65234375,
"learning_rate": 9.244978529464216e-05,
"loss": 1.1483,
"step": 1064
},
{
"epoch": 0.30348365035263947,
"grad_norm": 0.59765625,
"learning_rate": 9.243352489691201e-05,
"loss": 0.9486,
"step": 1065
},
{
"epoch": 0.3037686115266795,
"grad_norm": 0.60546875,
"learning_rate": 9.241724844171164e-05,
"loss": 1.056,
"step": 1066
},
{
"epoch": 0.30405357270071953,
"grad_norm": 0.64453125,
"learning_rate": 9.240095593520028e-05,
"loss": 1.053,
"step": 1067
},
{
"epoch": 0.30433853387475956,
"grad_norm": 0.58984375,
"learning_rate": 9.238464738354326e-05,
"loss": 1.0057,
"step": 1068
},
{
"epoch": 0.3046234950487996,
"grad_norm": 0.625,
"learning_rate": 9.2368322792912e-05,
"loss": 1.0151,
"step": 1069
},
{
"epoch": 0.30490845622283963,
"grad_norm": 0.65234375,
"learning_rate": 9.235198216948392e-05,
"loss": 0.9603,
"step": 1070
},
{
"epoch": 0.30519341739687966,
"grad_norm": 0.71875,
"learning_rate": 9.233562551944262e-05,
"loss": 1.2601,
"step": 1071
},
{
"epoch": 0.3054783785709197,
"grad_norm": 0.6328125,
"learning_rate": 9.231925284897762e-05,
"loss": 0.9887,
"step": 1072
},
{
"epoch": 0.30576333974495973,
"grad_norm": 0.65234375,
"learning_rate": 9.230286416428462e-05,
"loss": 1.0693,
"step": 1073
},
{
"epoch": 0.30604830091899976,
"grad_norm": 0.64453125,
"learning_rate": 9.228645947156533e-05,
"loss": 1.0099,
"step": 1074
},
{
"epoch": 0.30633326209303985,
"grad_norm": 0.6171875,
"learning_rate": 9.227003877702755e-05,
"loss": 0.9986,
"step": 1075
},
{
"epoch": 0.3066182232670799,
"grad_norm": 0.6484375,
"learning_rate": 9.225360208688507e-05,
"loss": 0.9211,
"step": 1076
},
{
"epoch": 0.3069031844411199,
"grad_norm": 0.58984375,
"learning_rate": 9.223714940735781e-05,
"loss": 0.9174,
"step": 1077
},
{
"epoch": 0.30718814561515995,
"grad_norm": 0.6484375,
"learning_rate": 9.22206807446717e-05,
"loss": 1.1184,
"step": 1078
},
{
"epoch": 0.3074731067892,
"grad_norm": 0.58203125,
"learning_rate": 9.220419610505873e-05,
"loss": 0.9029,
"step": 1079
},
{
"epoch": 0.30775806796324,
"grad_norm": 0.62109375,
"learning_rate": 9.21876954947569e-05,
"loss": 1.035,
"step": 1080
},
{
"epoch": 0.30804302913728004,
"grad_norm": 0.61328125,
"learning_rate": 9.21711789200103e-05,
"loss": 1.0567,
"step": 1081
},
{
"epoch": 0.3083279903113201,
"grad_norm": 0.63671875,
"learning_rate": 9.215464638706906e-05,
"loss": 1.0679,
"step": 1082
},
{
"epoch": 0.3086129514853601,
"grad_norm": 0.58203125,
"learning_rate": 9.213809790218933e-05,
"loss": 0.908,
"step": 1083
},
{
"epoch": 0.30889791265940014,
"grad_norm": 0.63671875,
"learning_rate": 9.212153347163326e-05,
"loss": 1.051,
"step": 1084
},
{
"epoch": 0.3091828738334402,
"grad_norm": 0.62890625,
"learning_rate": 9.210495310166913e-05,
"loss": 1.0482,
"step": 1085
},
{
"epoch": 0.3094678350074802,
"grad_norm": 0.6171875,
"learning_rate": 9.208835679857117e-05,
"loss": 1.0254,
"step": 1086
},
{
"epoch": 0.30975279618152024,
"grad_norm": 0.6484375,
"learning_rate": 9.207174456861967e-05,
"loss": 1.0842,
"step": 1087
},
{
"epoch": 0.31003775735556033,
"grad_norm": 0.59765625,
"learning_rate": 9.205511641810093e-05,
"loss": 0.9351,
"step": 1088
},
{
"epoch": 0.31032271852960036,
"grad_norm": 0.62890625,
"learning_rate": 9.203847235330731e-05,
"loss": 1.0185,
"step": 1089
},
{
"epoch": 0.3106076797036404,
"grad_norm": 0.62890625,
"learning_rate": 9.202181238053715e-05,
"loss": 1.0603,
"step": 1090
},
{
"epoch": 0.3108926408776804,
"grad_norm": 0.6953125,
"learning_rate": 9.200513650609484e-05,
"loss": 1.0863,
"step": 1091
},
{
"epoch": 0.31117760205172046,
"grad_norm": 0.58984375,
"learning_rate": 9.198844473629076e-05,
"loss": 0.9454,
"step": 1092
},
{
"epoch": 0.3114625632257605,
"grad_norm": 0.59375,
"learning_rate": 9.197173707744134e-05,
"loss": 1.0291,
"step": 1093
},
{
"epoch": 0.3117475243998005,
"grad_norm": 0.61328125,
"learning_rate": 9.1955013535869e-05,
"loss": 0.9479,
"step": 1094
},
{
"epoch": 0.31203248557384056,
"grad_norm": 0.578125,
"learning_rate": 9.193827411790215e-05,
"loss": 0.9679,
"step": 1095
},
{
"epoch": 0.3123174467478806,
"grad_norm": 0.6171875,
"learning_rate": 9.192151882987528e-05,
"loss": 0.9627,
"step": 1096
},
{
"epoch": 0.3126024079219206,
"grad_norm": 0.6171875,
"learning_rate": 9.19047476781288e-05,
"loss": 1.0748,
"step": 1097
},
{
"epoch": 0.31288736909596065,
"grad_norm": 0.59765625,
"learning_rate": 9.188796066900916e-05,
"loss": 0.944,
"step": 1098
},
{
"epoch": 0.3131723302700007,
"grad_norm": 0.59375,
"learning_rate": 9.187115780886883e-05,
"loss": 1.0599,
"step": 1099
},
{
"epoch": 0.3134572914440408,
"grad_norm": 0.57421875,
"learning_rate": 9.185433910406624e-05,
"loss": 0.8577,
"step": 1100
},
{
"epoch": 0.3137422526180808,
"grad_norm": 0.60546875,
"learning_rate": 9.183750456096587e-05,
"loss": 1.0378,
"step": 1101
},
{
"epoch": 0.31402721379212084,
"grad_norm": 0.61328125,
"learning_rate": 9.182065418593811e-05,
"loss": 1.0227,
"step": 1102
},
{
"epoch": 0.3143121749661609,
"grad_norm": 0.6171875,
"learning_rate": 9.180378798535943e-05,
"loss": 1.025,
"step": 1103
},
{
"epoch": 0.3145971361402009,
"grad_norm": 0.61328125,
"learning_rate": 9.178690596561224e-05,
"loss": 0.915,
"step": 1104
},
{
"epoch": 0.31488209731424094,
"grad_norm": 0.57421875,
"learning_rate": 9.177000813308493e-05,
"loss": 0.8836,
"step": 1105
},
{
"epoch": 0.31516705848828097,
"grad_norm": 0.6328125,
"learning_rate": 9.17530944941719e-05,
"loss": 0.9882,
"step": 1106
},
{
"epoch": 0.315452019662321,
"grad_norm": 0.58203125,
"learning_rate": 9.173616505527352e-05,
"loss": 0.9752,
"step": 1107
},
{
"epoch": 0.31573698083636104,
"grad_norm": 0.61328125,
"learning_rate": 9.171921982279612e-05,
"loss": 0.9841,
"step": 1108
},
{
"epoch": 0.31602194201040107,
"grad_norm": 0.5859375,
"learning_rate": 9.170225880315207e-05,
"loss": 0.8979,
"step": 1109
},
{
"epoch": 0.3163069031844411,
"grad_norm": 0.60546875,
"learning_rate": 9.16852820027596e-05,
"loss": 0.94,
"step": 1110
},
{
"epoch": 0.31659186435848113,
"grad_norm": 0.6328125,
"learning_rate": 9.166828942804306e-05,
"loss": 1.0453,
"step": 1111
},
{
"epoch": 0.31687682553252117,
"grad_norm": 0.59375,
"learning_rate": 9.165128108543263e-05,
"loss": 0.7893,
"step": 1112
},
{
"epoch": 0.31716178670656126,
"grad_norm": 0.62890625,
"learning_rate": 9.163425698136454e-05,
"loss": 1.0799,
"step": 1113
},
{
"epoch": 0.3174467478806013,
"grad_norm": 0.59375,
"learning_rate": 9.161721712228097e-05,
"loss": 0.8786,
"step": 1114
},
{
"epoch": 0.3177317090546413,
"grad_norm": 0.59765625,
"learning_rate": 9.160016151463004e-05,
"loss": 0.9087,
"step": 1115
},
{
"epoch": 0.31801667022868135,
"grad_norm": 0.59765625,
"learning_rate": 9.158309016486586e-05,
"loss": 1.0729,
"step": 1116
},
{
"epoch": 0.3183016314027214,
"grad_norm": 0.6171875,
"learning_rate": 9.156600307944845e-05,
"loss": 1.0363,
"step": 1117
},
{
"epoch": 0.3185865925767614,
"grad_norm": 0.5859375,
"learning_rate": 9.154890026484385e-05,
"loss": 0.9994,
"step": 1118
},
{
"epoch": 0.31887155375080145,
"grad_norm": 0.61328125,
"learning_rate": 9.153178172752397e-05,
"loss": 0.9361,
"step": 1119
},
{
"epoch": 0.3191565149248415,
"grad_norm": 0.578125,
"learning_rate": 9.151464747396675e-05,
"loss": 0.91,
"step": 1120
},
{
"epoch": 0.3194414760988815,
"grad_norm": 0.60546875,
"learning_rate": 9.149749751065605e-05,
"loss": 1.0182,
"step": 1121
},
{
"epoch": 0.31972643727292155,
"grad_norm": 0.578125,
"learning_rate": 9.148033184408166e-05,
"loss": 0.996,
"step": 1122
},
{
"epoch": 0.3200113984469616,
"grad_norm": 0.61328125,
"learning_rate": 9.146315048073931e-05,
"loss": 1.0218,
"step": 1123
},
{
"epoch": 0.3202963596210016,
"grad_norm": 0.671875,
"learning_rate": 9.144595342713069e-05,
"loss": 1.0173,
"step": 1124
},
{
"epoch": 0.3205813207950417,
"grad_norm": 0.625,
"learning_rate": 9.142874068976341e-05,
"loss": 0.9903,
"step": 1125
},
{
"epoch": 0.32086628196908173,
"grad_norm": 0.59765625,
"learning_rate": 9.141151227515103e-05,
"loss": 0.9324,
"step": 1126
},
{
"epoch": 0.32115124314312177,
"grad_norm": 0.59765625,
"learning_rate": 9.139426818981305e-05,
"loss": 0.9376,
"step": 1127
},
{
"epoch": 0.3214362043171618,
"grad_norm": 0.61328125,
"learning_rate": 9.137700844027486e-05,
"loss": 1.0273,
"step": 1128
},
{
"epoch": 0.32172116549120183,
"grad_norm": 0.6171875,
"learning_rate": 9.135973303306782e-05,
"loss": 1.064,
"step": 1129
},
{
"epoch": 0.32200612666524187,
"grad_norm": 0.66015625,
"learning_rate": 9.134244197472917e-05,
"loss": 1.0161,
"step": 1130
},
{
"epoch": 0.3222910878392819,
"grad_norm": 0.62109375,
"learning_rate": 9.132513527180215e-05,
"loss": 0.9926,
"step": 1131
},
{
"epoch": 0.32257604901332193,
"grad_norm": 0.67578125,
"learning_rate": 9.130781293083583e-05,
"loss": 1.0905,
"step": 1132
},
{
"epoch": 0.32286101018736196,
"grad_norm": 0.59375,
"learning_rate": 9.129047495838526e-05,
"loss": 0.9715,
"step": 1133
},
{
"epoch": 0.323145971361402,
"grad_norm": 0.62109375,
"learning_rate": 9.127312136101139e-05,
"loss": 1.0578,
"step": 1134
},
{
"epoch": 0.32343093253544203,
"grad_norm": 0.59375,
"learning_rate": 9.125575214528104e-05,
"loss": 0.9972,
"step": 1135
},
{
"epoch": 0.32371589370948206,
"grad_norm": 0.63671875,
"learning_rate": 9.1238367317767e-05,
"loss": 1.0004,
"step": 1136
},
{
"epoch": 0.3240008548835221,
"grad_norm": 0.62109375,
"learning_rate": 9.122096688504795e-05,
"loss": 1.0395,
"step": 1137
},
{
"epoch": 0.3242858160575622,
"grad_norm": 0.59765625,
"learning_rate": 9.120355085370847e-05,
"loss": 0.9833,
"step": 1138
},
{
"epoch": 0.3245707772316022,
"grad_norm": 0.59765625,
"learning_rate": 9.118611923033904e-05,
"loss": 0.8799,
"step": 1139
},
{
"epoch": 0.32485573840564225,
"grad_norm": 0.56640625,
"learning_rate": 9.116867202153603e-05,
"loss": 0.8872,
"step": 1140
},
{
"epoch": 0.3251406995796823,
"grad_norm": 0.5859375,
"learning_rate": 9.115120923390174e-05,
"loss": 0.9736,
"step": 1141
},
{
"epoch": 0.3254256607537223,
"grad_norm": 0.6328125,
"learning_rate": 9.113373087404433e-05,
"loss": 1.0867,
"step": 1142
},
{
"epoch": 0.32571062192776234,
"grad_norm": 0.5859375,
"learning_rate": 9.111623694857786e-05,
"loss": 1.0157,
"step": 1143
},
{
"epoch": 0.3259955831018024,
"grad_norm": 0.62890625,
"learning_rate": 9.109872746412233e-05,
"loss": 1.0677,
"step": 1144
},
{
"epoch": 0.3262805442758424,
"grad_norm": 0.59375,
"learning_rate": 9.108120242730356e-05,
"loss": 0.9453,
"step": 1145
},
{
"epoch": 0.32656550544988244,
"grad_norm": 0.6015625,
"learning_rate": 9.106366184475326e-05,
"loss": 0.8899,
"step": 1146
},
{
"epoch": 0.3268504666239225,
"grad_norm": 0.609375,
"learning_rate": 9.104610572310908e-05,
"loss": 1.0708,
"step": 1147
},
{
"epoch": 0.3271354277979625,
"grad_norm": 0.609375,
"learning_rate": 9.10285340690145e-05,
"loss": 1.0211,
"step": 1148
},
{
"epoch": 0.32742038897200254,
"grad_norm": 0.59765625,
"learning_rate": 9.101094688911888e-05,
"loss": 0.9897,
"step": 1149
},
{
"epoch": 0.32770535014604263,
"grad_norm": 0.58203125,
"learning_rate": 9.099334419007751e-05,
"loss": 0.9818,
"step": 1150
},
{
"epoch": 0.32799031132008266,
"grad_norm": 0.61328125,
"learning_rate": 9.097572597855146e-05,
"loss": 1.0104,
"step": 1151
},
{
"epoch": 0.3282752724941227,
"grad_norm": 0.57421875,
"learning_rate": 9.095809226120775e-05,
"loss": 0.8764,
"step": 1152
},
{
"epoch": 0.3285602336681627,
"grad_norm": 0.5703125,
"learning_rate": 9.094044304471923e-05,
"loss": 0.925,
"step": 1153
},
{
"epoch": 0.32884519484220276,
"grad_norm": 0.6171875,
"learning_rate": 9.092277833576464e-05,
"loss": 1.007,
"step": 1154
},
{
"epoch": 0.3291301560162428,
"grad_norm": 0.57421875,
"learning_rate": 9.090509814102852e-05,
"loss": 0.9182,
"step": 1155
},
{
"epoch": 0.3294151171902828,
"grad_norm": 0.62890625,
"learning_rate": 9.088740246720136e-05,
"loss": 1.0229,
"step": 1156
},
{
"epoch": 0.32970007836432286,
"grad_norm": 0.65625,
"learning_rate": 9.086969132097944e-05,
"loss": 1.1545,
"step": 1157
},
{
"epoch": 0.3299850395383629,
"grad_norm": 0.58203125,
"learning_rate": 9.085196470906494e-05,
"loss": 0.9633,
"step": 1158
},
{
"epoch": 0.3302700007124029,
"grad_norm": 0.609375,
"learning_rate": 9.083422263816586e-05,
"loss": 1.0221,
"step": 1159
},
{
"epoch": 0.33055496188644296,
"grad_norm": 0.59375,
"learning_rate": 9.081646511499604e-05,
"loss": 1.0424,
"step": 1160
},
{
"epoch": 0.330839923060483,
"grad_norm": 0.6015625,
"learning_rate": 9.079869214627522e-05,
"loss": 0.9889,
"step": 1161
},
{
"epoch": 0.331124884234523,
"grad_norm": 0.6015625,
"learning_rate": 9.078090373872893e-05,
"loss": 0.9719,
"step": 1162
},
{
"epoch": 0.3314098454085631,
"grad_norm": 0.6015625,
"learning_rate": 9.076309989908859e-05,
"loss": 1.0511,
"step": 1163
},
{
"epoch": 0.33169480658260314,
"grad_norm": 0.58203125,
"learning_rate": 9.07452806340914e-05,
"loss": 0.9553,
"step": 1164
},
{
"epoch": 0.3319797677566432,
"grad_norm": 0.60546875,
"learning_rate": 9.072744595048047e-05,
"loss": 0.9829,
"step": 1165
},
{
"epoch": 0.3322647289306832,
"grad_norm": 0.62109375,
"learning_rate": 9.070959585500468e-05,
"loss": 0.9834,
"step": 1166
},
{
"epoch": 0.33254969010472324,
"grad_norm": 0.62109375,
"learning_rate": 9.069173035441878e-05,
"loss": 1.0448,
"step": 1167
},
{
"epoch": 0.33283465127876327,
"grad_norm": 0.62109375,
"learning_rate": 9.067384945548333e-05,
"loss": 0.9865,
"step": 1168
},
{
"epoch": 0.3331196124528033,
"grad_norm": 0.62109375,
"learning_rate": 9.065595316496474e-05,
"loss": 0.9739,
"step": 1169
},
{
"epoch": 0.33340457362684334,
"grad_norm": 0.6171875,
"learning_rate": 9.063804148963522e-05,
"loss": 1.0073,
"step": 1170
},
{
"epoch": 0.33368953480088337,
"grad_norm": 0.58984375,
"learning_rate": 9.062011443627279e-05,
"loss": 0.9504,
"step": 1171
},
{
"epoch": 0.3339744959749234,
"grad_norm": 0.6484375,
"learning_rate": 9.060217201166135e-05,
"loss": 1.1431,
"step": 1172
},
{
"epoch": 0.33425945714896343,
"grad_norm": 0.578125,
"learning_rate": 9.058421422259057e-05,
"loss": 0.9542,
"step": 1173
},
{
"epoch": 0.33454441832300347,
"grad_norm": 0.5859375,
"learning_rate": 9.056624107585592e-05,
"loss": 0.9396,
"step": 1174
},
{
"epoch": 0.33482937949704356,
"grad_norm": 0.63671875,
"learning_rate": 9.054825257825872e-05,
"loss": 0.9858,
"step": 1175
},
{
"epoch": 0.3351143406710836,
"grad_norm": 0.62109375,
"learning_rate": 9.053024873660608e-05,
"loss": 1.0003,
"step": 1176
},
{
"epoch": 0.3353993018451236,
"grad_norm": 0.58203125,
"learning_rate": 9.05122295577109e-05,
"loss": 0.981,
"step": 1177
},
{
"epoch": 0.33568426301916365,
"grad_norm": 0.62109375,
"learning_rate": 9.049419504839191e-05,
"loss": 1.0826,
"step": 1178
},
{
"epoch": 0.3359692241932037,
"grad_norm": 0.578125,
"learning_rate": 9.047614521547367e-05,
"loss": 0.9341,
"step": 1179
},
{
"epoch": 0.3362541853672437,
"grad_norm": 0.60546875,
"learning_rate": 9.045808006578646e-05,
"loss": 0.9537,
"step": 1180
},
{
"epoch": 0.33653914654128375,
"grad_norm": 0.625,
"learning_rate": 9.043999960616643e-05,
"loss": 0.9899,
"step": 1181
},
{
"epoch": 0.3368241077153238,
"grad_norm": 0.6328125,
"learning_rate": 9.042190384345546e-05,
"loss": 0.9931,
"step": 1182
},
{
"epoch": 0.3371090688893638,
"grad_norm": 0.56640625,
"learning_rate": 9.040379278450128e-05,
"loss": 0.9624,
"step": 1183
},
{
"epoch": 0.33739403006340385,
"grad_norm": 0.6328125,
"learning_rate": 9.038566643615735e-05,
"loss": 1.0492,
"step": 1184
},
{
"epoch": 0.3376789912374439,
"grad_norm": 0.58984375,
"learning_rate": 9.036752480528297e-05,
"loss": 0.8723,
"step": 1185
},
{
"epoch": 0.3379639524114839,
"grad_norm": 0.58203125,
"learning_rate": 9.034936789874319e-05,
"loss": 1.0188,
"step": 1186
},
{
"epoch": 0.33824891358552395,
"grad_norm": 0.61328125,
"learning_rate": 9.033119572340887e-05,
"loss": 1.0452,
"step": 1187
},
{
"epoch": 0.33853387475956404,
"grad_norm": 0.56640625,
"learning_rate": 9.031300828615662e-05,
"loss": 0.9278,
"step": 1188
},
{
"epoch": 0.33881883593360407,
"grad_norm": 0.6015625,
"learning_rate": 9.029480559386882e-05,
"loss": 1.0037,
"step": 1189
},
{
"epoch": 0.3391037971076441,
"grad_norm": 0.5703125,
"learning_rate": 9.027658765343365e-05,
"loss": 0.9879,
"step": 1190
},
{
"epoch": 0.33938875828168413,
"grad_norm": 0.5859375,
"learning_rate": 9.025835447174505e-05,
"loss": 0.8852,
"step": 1191
},
{
"epoch": 0.33967371945572417,
"grad_norm": 0.57421875,
"learning_rate": 9.02401060557027e-05,
"loss": 0.9296,
"step": 1192
},
{
"epoch": 0.3399586806297642,
"grad_norm": 0.58203125,
"learning_rate": 9.022184241221209e-05,
"loss": 0.9042,
"step": 1193
},
{
"epoch": 0.34024364180380423,
"grad_norm": 0.59375,
"learning_rate": 9.020356354818444e-05,
"loss": 1.0113,
"step": 1194
},
{
"epoch": 0.34052860297784426,
"grad_norm": 0.5703125,
"learning_rate": 9.018526947053676e-05,
"loss": 0.9,
"step": 1195
},
{
"epoch": 0.3408135641518843,
"grad_norm": 0.60546875,
"learning_rate": 9.016696018619179e-05,
"loss": 1.04,
"step": 1196
},
{
"epoch": 0.34109852532592433,
"grad_norm": 0.59765625,
"learning_rate": 9.014863570207802e-05,
"loss": 1.0158,
"step": 1197
},
{
"epoch": 0.34138348649996436,
"grad_norm": 0.59375,
"learning_rate": 9.013029602512972e-05,
"loss": 0.9121,
"step": 1198
},
{
"epoch": 0.3416684476740044,
"grad_norm": 0.6484375,
"learning_rate": 9.011194116228689e-05,
"loss": 1.1199,
"step": 1199
},
{
"epoch": 0.3419534088480444,
"grad_norm": 0.62890625,
"learning_rate": 9.009357112049526e-05,
"loss": 1.0077,
"step": 1200
},
{
"epoch": 0.3422383700220845,
"grad_norm": 0.56640625,
"learning_rate": 9.007518590670636e-05,
"loss": 0.8883,
"step": 1201
},
{
"epoch": 0.34252333119612455,
"grad_norm": 0.5859375,
"learning_rate": 9.00567855278774e-05,
"loss": 0.9633,
"step": 1202
},
{
"epoch": 0.3428082923701646,
"grad_norm": 0.6171875,
"learning_rate": 9.003836999097135e-05,
"loss": 0.9557,
"step": 1203
},
{
"epoch": 0.3430932535442046,
"grad_norm": 0.53515625,
"learning_rate": 9.001993930295694e-05,
"loss": 0.8009,
"step": 1204
},
{
"epoch": 0.34337821471824465,
"grad_norm": 0.5625,
"learning_rate": 9.000149347080862e-05,
"loss": 0.921,
"step": 1205
},
{
"epoch": 0.3436631758922847,
"grad_norm": 0.59375,
"learning_rate": 8.998303250150653e-05,
"loss": 0.9377,
"step": 1206
},
{
"epoch": 0.3439481370663247,
"grad_norm": 0.62109375,
"learning_rate": 8.99645564020366e-05,
"loss": 1.0437,
"step": 1207
},
{
"epoch": 0.34423309824036474,
"grad_norm": 0.57421875,
"learning_rate": 8.994606517939043e-05,
"loss": 0.97,
"step": 1208
},
{
"epoch": 0.3445180594144048,
"grad_norm": 0.58984375,
"learning_rate": 8.992755884056542e-05,
"loss": 0.9603,
"step": 1209
},
{
"epoch": 0.3448030205884448,
"grad_norm": 0.6171875,
"learning_rate": 8.99090373925646e-05,
"loss": 1.036,
"step": 1210
},
{
"epoch": 0.34508798176248484,
"grad_norm": 0.5859375,
"learning_rate": 8.989050084239677e-05,
"loss": 1.0138,
"step": 1211
},
{
"epoch": 0.3453729429365249,
"grad_norm": 0.58203125,
"learning_rate": 8.987194919707643e-05,
"loss": 0.9105,
"step": 1212
},
{
"epoch": 0.34565790411056496,
"grad_norm": 0.703125,
"learning_rate": 8.985338246362381e-05,
"loss": 0.9821,
"step": 1213
},
{
"epoch": 0.345942865284605,
"grad_norm": 0.60546875,
"learning_rate": 8.98348006490648e-05,
"loss": 1.0073,
"step": 1214
},
{
"epoch": 0.346227826458645,
"grad_norm": 0.65234375,
"learning_rate": 8.981620376043108e-05,
"loss": 1.0402,
"step": 1215
},
{
"epoch": 0.34651278763268506,
"grad_norm": 0.60546875,
"learning_rate": 8.979759180475992e-05,
"loss": 0.8968,
"step": 1216
},
{
"epoch": 0.3467977488067251,
"grad_norm": 0.578125,
"learning_rate": 8.977896478909445e-05,
"loss": 0.9055,
"step": 1217
},
{
"epoch": 0.3470827099807651,
"grad_norm": 0.640625,
"learning_rate": 8.976032272048333e-05,
"loss": 1.0307,
"step": 1218
},
{
"epoch": 0.34736767115480516,
"grad_norm": 0.62109375,
"learning_rate": 8.974166560598102e-05,
"loss": 1.0401,
"step": 1219
},
{
"epoch": 0.3476526323288452,
"grad_norm": 0.62890625,
"learning_rate": 8.972299345264767e-05,
"loss": 0.9866,
"step": 1220
},
{
"epoch": 0.3479375935028852,
"grad_norm": 0.58984375,
"learning_rate": 8.970430626754905e-05,
"loss": 0.9936,
"step": 1221
},
{
"epoch": 0.34822255467692526,
"grad_norm": 0.5703125,
"learning_rate": 8.96856040577567e-05,
"loss": 0.9534,
"step": 1222
},
{
"epoch": 0.3485075158509653,
"grad_norm": 0.61328125,
"learning_rate": 8.96668868303478e-05,
"loss": 0.9965,
"step": 1223
},
{
"epoch": 0.3487924770250053,
"grad_norm": 0.62109375,
"learning_rate": 8.964815459240522e-05,
"loss": 1.1044,
"step": 1224
},
{
"epoch": 0.34907743819904535,
"grad_norm": 0.59765625,
"learning_rate": 8.962940735101752e-05,
"loss": 0.9528,
"step": 1225
},
{
"epoch": 0.34936239937308544,
"grad_norm": 0.6328125,
"learning_rate": 8.961064511327895e-05,
"loss": 1.083,
"step": 1226
},
{
"epoch": 0.3496473605471255,
"grad_norm": 0.6171875,
"learning_rate": 8.959186788628937e-05,
"loss": 0.9769,
"step": 1227
},
{
"epoch": 0.3499323217211655,
"grad_norm": 0.60546875,
"learning_rate": 8.957307567715441e-05,
"loss": 1.0168,
"step": 1228
},
{
"epoch": 0.35021728289520554,
"grad_norm": 0.57421875,
"learning_rate": 8.95542684929853e-05,
"loss": 0.9218,
"step": 1229
},
{
"epoch": 0.35050224406924557,
"grad_norm": 0.58203125,
"learning_rate": 8.953544634089894e-05,
"loss": 0.9457,
"step": 1230
},
{
"epoch": 0.3507872052432856,
"grad_norm": 0.6328125,
"learning_rate": 8.951660922801792e-05,
"loss": 1.1793,
"step": 1231
},
{
"epoch": 0.35107216641732564,
"grad_norm": 0.6015625,
"learning_rate": 8.949775716147047e-05,
"loss": 1.014,
"step": 1232
},
{
"epoch": 0.35135712759136567,
"grad_norm": 0.5546875,
"learning_rate": 8.947889014839053e-05,
"loss": 0.8847,
"step": 1233
},
{
"epoch": 0.3516420887654057,
"grad_norm": 0.57421875,
"learning_rate": 8.94600081959176e-05,
"loss": 0.9572,
"step": 1234
},
{
"epoch": 0.35192704993944574,
"grad_norm": 0.546875,
"learning_rate": 8.944111131119693e-05,
"loss": 0.8447,
"step": 1235
},
{
"epoch": 0.35221201111348577,
"grad_norm": 0.6015625,
"learning_rate": 8.942219950137935e-05,
"loss": 1.0276,
"step": 1236
},
{
"epoch": 0.3524969722875258,
"grad_norm": 0.58984375,
"learning_rate": 8.94032727736214e-05,
"loss": 0.9163,
"step": 1237
},
{
"epoch": 0.3527819334615659,
"grad_norm": 0.5859375,
"learning_rate": 8.93843311350852e-05,
"loss": 0.9324,
"step": 1238
},
{
"epoch": 0.3530668946356059,
"grad_norm": 0.578125,
"learning_rate": 8.936537459293857e-05,
"loss": 0.9592,
"step": 1239
},
{
"epoch": 0.35335185580964595,
"grad_norm": 0.578125,
"learning_rate": 8.934640315435493e-05,
"loss": 1.0246,
"step": 1240
},
{
"epoch": 0.353636816983686,
"grad_norm": 0.5703125,
"learning_rate": 8.932741682651335e-05,
"loss": 0.9345,
"step": 1241
},
{
"epoch": 0.353921778157726,
"grad_norm": 0.58203125,
"learning_rate": 8.930841561659853e-05,
"loss": 0.9938,
"step": 1242
},
{
"epoch": 0.35420673933176605,
"grad_norm": 0.59375,
"learning_rate": 8.928939953180084e-05,
"loss": 0.912,
"step": 1243
},
{
"epoch": 0.3544917005058061,
"grad_norm": 0.59765625,
"learning_rate": 8.927036857931621e-05,
"loss": 1.0043,
"step": 1244
},
{
"epoch": 0.3547766616798461,
"grad_norm": 0.625,
"learning_rate": 8.925132276634625e-05,
"loss": 1.0069,
"step": 1245
},
{
"epoch": 0.35506162285388615,
"grad_norm": 0.5625,
"learning_rate": 8.923226210009816e-05,
"loss": 0.8568,
"step": 1246
},
{
"epoch": 0.3553465840279262,
"grad_norm": 0.62890625,
"learning_rate": 8.921318658778482e-05,
"loss": 1.0455,
"step": 1247
},
{
"epoch": 0.3556315452019662,
"grad_norm": 0.609375,
"learning_rate": 8.919409623662463e-05,
"loss": 0.978,
"step": 1248
},
{
"epoch": 0.35591650637600625,
"grad_norm": 0.57421875,
"learning_rate": 8.917499105384168e-05,
"loss": 0.9836,
"step": 1249
},
{
"epoch": 0.3562014675500463,
"grad_norm": 0.58984375,
"learning_rate": 8.915587104666567e-05,
"loss": 0.9179,
"step": 1250
},
{
"epoch": 0.35648642872408637,
"grad_norm": 0.609375,
"learning_rate": 8.913673622233187e-05,
"loss": 1.0516,
"step": 1251
},
{
"epoch": 0.3567713898981264,
"grad_norm": 0.59375,
"learning_rate": 8.911758658808118e-05,
"loss": 0.9401,
"step": 1252
},
{
"epoch": 0.35705635107216643,
"grad_norm": 0.6015625,
"learning_rate": 8.909842215116013e-05,
"loss": 0.939,
"step": 1253
},
{
"epoch": 0.35734131224620647,
"grad_norm": 0.57421875,
"learning_rate": 8.907924291882079e-05,
"loss": 0.9655,
"step": 1254
},
{
"epoch": 0.3576262734202465,
"grad_norm": 0.6015625,
"learning_rate": 8.906004889832089e-05,
"loss": 1.0238,
"step": 1255
},
{
"epoch": 0.35791123459428653,
"grad_norm": 0.58984375,
"learning_rate": 8.904084009692371e-05,
"loss": 0.9008,
"step": 1256
},
{
"epoch": 0.35819619576832656,
"grad_norm": 0.61328125,
"learning_rate": 8.902161652189815e-05,
"loss": 0.9832,
"step": 1257
},
{
"epoch": 0.3584811569423666,
"grad_norm": 0.58203125,
"learning_rate": 8.900237818051869e-05,
"loss": 1.0039,
"step": 1258
},
{
"epoch": 0.35876611811640663,
"grad_norm": 0.6171875,
"learning_rate": 8.898312508006542e-05,
"loss": 1.0734,
"step": 1259
},
{
"epoch": 0.35905107929044666,
"grad_norm": 0.6328125,
"learning_rate": 8.896385722782398e-05,
"loss": 1.0488,
"step": 1260
},
{
"epoch": 0.3593360404644867,
"grad_norm": 0.59765625,
"learning_rate": 8.894457463108562e-05,
"loss": 0.9711,
"step": 1261
},
{
"epoch": 0.3596210016385267,
"grad_norm": 0.609375,
"learning_rate": 8.892527729714712e-05,
"loss": 1.0352,
"step": 1262
},
{
"epoch": 0.3599059628125668,
"grad_norm": 0.59375,
"learning_rate": 8.890596523331094e-05,
"loss": 0.9508,
"step": 1263
},
{
"epoch": 0.36019092398660685,
"grad_norm": 0.5703125,
"learning_rate": 8.8886638446885e-05,
"loss": 1.008,
"step": 1264
},
{
"epoch": 0.3604758851606469,
"grad_norm": 0.625,
"learning_rate": 8.886729694518285e-05,
"loss": 0.9686,
"step": 1265
},
{
"epoch": 0.3607608463346869,
"grad_norm": 0.58984375,
"learning_rate": 8.884794073552363e-05,
"loss": 0.9213,
"step": 1266
},
{
"epoch": 0.36104580750872695,
"grad_norm": 0.625,
"learning_rate": 8.882856982523196e-05,
"loss": 0.9974,
"step": 1267
},
{
"epoch": 0.361330768682767,
"grad_norm": 0.59765625,
"learning_rate": 8.88091842216381e-05,
"loss": 0.9662,
"step": 1268
},
{
"epoch": 0.361615729856807,
"grad_norm": 0.578125,
"learning_rate": 8.878978393207788e-05,
"loss": 0.8937,
"step": 1269
},
{
"epoch": 0.36190069103084704,
"grad_norm": 0.55859375,
"learning_rate": 8.877036896389262e-05,
"loss": 0.8857,
"step": 1270
},
{
"epoch": 0.3621856522048871,
"grad_norm": 0.59765625,
"learning_rate": 8.875093932442924e-05,
"loss": 0.941,
"step": 1271
},
{
"epoch": 0.3624706133789271,
"grad_norm": 0.63671875,
"learning_rate": 8.873149502104019e-05,
"loss": 1.0399,
"step": 1272
},
{
"epoch": 0.36275557455296714,
"grad_norm": 0.62890625,
"learning_rate": 8.871203606108351e-05,
"loss": 1.0476,
"step": 1273
},
{
"epoch": 0.3630405357270072,
"grad_norm": 0.5859375,
"learning_rate": 8.869256245192272e-05,
"loss": 0.9912,
"step": 1274
},
{
"epoch": 0.3633254969010472,
"grad_norm": 0.5546875,
"learning_rate": 8.867307420092695e-05,
"loss": 0.8887,
"step": 1275
},
{
"epoch": 0.3636104580750873,
"grad_norm": 0.609375,
"learning_rate": 8.865357131547081e-05,
"loss": 0.9155,
"step": 1276
},
{
"epoch": 0.3638954192491273,
"grad_norm": 0.59375,
"learning_rate": 8.863405380293451e-05,
"loss": 0.9796,
"step": 1277
},
{
"epoch": 0.36418038042316736,
"grad_norm": 0.609375,
"learning_rate": 8.861452167070375e-05,
"loss": 0.9555,
"step": 1278
},
{
"epoch": 0.3644653415972074,
"grad_norm": 0.58203125,
"learning_rate": 8.859497492616977e-05,
"loss": 0.8128,
"step": 1279
},
{
"epoch": 0.3647503027712474,
"grad_norm": 0.61328125,
"learning_rate": 8.857541357672937e-05,
"loss": 1.0065,
"step": 1280
},
{
"epoch": 0.36503526394528746,
"grad_norm": 0.6171875,
"learning_rate": 8.855583762978482e-05,
"loss": 1.0453,
"step": 1281
},
{
"epoch": 0.3653202251193275,
"grad_norm": 0.59375,
"learning_rate": 8.853624709274398e-05,
"loss": 0.8737,
"step": 1282
},
{
"epoch": 0.3656051862933675,
"grad_norm": 0.56640625,
"learning_rate": 8.851664197302016e-05,
"loss": 1.0193,
"step": 1283
},
{
"epoch": 0.36589014746740756,
"grad_norm": 0.55859375,
"learning_rate": 8.849702227803225e-05,
"loss": 0.9143,
"step": 1284
},
{
"epoch": 0.3661751086414476,
"grad_norm": 0.59765625,
"learning_rate": 8.847738801520463e-05,
"loss": 1.0282,
"step": 1285
},
{
"epoch": 0.3664600698154876,
"grad_norm": 0.59375,
"learning_rate": 8.84577391919672e-05,
"loss": 0.8523,
"step": 1286
},
{
"epoch": 0.36674503098952765,
"grad_norm": 0.58203125,
"learning_rate": 8.843807581575534e-05,
"loss": 0.9639,
"step": 1287
},
{
"epoch": 0.3670299921635677,
"grad_norm": 0.546875,
"learning_rate": 8.841839789400998e-05,
"loss": 0.8762,
"step": 1288
},
{
"epoch": 0.3673149533376078,
"grad_norm": 0.625,
"learning_rate": 8.839870543417752e-05,
"loss": 1.0485,
"step": 1289
},
{
"epoch": 0.3675999145116478,
"grad_norm": 0.5859375,
"learning_rate": 8.83789984437099e-05,
"loss": 0.9031,
"step": 1290
},
{
"epoch": 0.36788487568568784,
"grad_norm": 0.59765625,
"learning_rate": 8.835927693006453e-05,
"loss": 0.981,
"step": 1291
},
{
"epoch": 0.3681698368597279,
"grad_norm": 0.6328125,
"learning_rate": 8.83395409007043e-05,
"loss": 1.066,
"step": 1292
},
{
"epoch": 0.3684547980337679,
"grad_norm": 0.6171875,
"learning_rate": 8.831979036309763e-05,
"loss": 0.9907,
"step": 1293
},
{
"epoch": 0.36873975920780794,
"grad_norm": 0.578125,
"learning_rate": 8.830002532471842e-05,
"loss": 0.9663,
"step": 1294
},
{
"epoch": 0.36902472038184797,
"grad_norm": 0.60546875,
"learning_rate": 8.828024579304603e-05,
"loss": 1.0183,
"step": 1295
},
{
"epoch": 0.369309681555888,
"grad_norm": 0.73828125,
"learning_rate": 8.826045177556535e-05,
"loss": 0.8963,
"step": 1296
},
{
"epoch": 0.36959464272992804,
"grad_norm": 0.578125,
"learning_rate": 8.824064327976672e-05,
"loss": 0.9051,
"step": 1297
},
{
"epoch": 0.36987960390396807,
"grad_norm": 0.62890625,
"learning_rate": 8.822082031314593e-05,
"loss": 1.0171,
"step": 1298
},
{
"epoch": 0.3701645650780081,
"grad_norm": 0.55078125,
"learning_rate": 8.820098288320433e-05,
"loss": 0.891,
"step": 1299
},
{
"epoch": 0.37044952625204813,
"grad_norm": 0.6484375,
"learning_rate": 8.818113099744869e-05,
"loss": 1.0859,
"step": 1300
},
{
"epoch": 0.3707344874260882,
"grad_norm": 0.54296875,
"learning_rate": 8.816126466339126e-05,
"loss": 0.9437,
"step": 1301
},
{
"epoch": 0.37101944860012825,
"grad_norm": 0.6484375,
"learning_rate": 8.814138388854971e-05,
"loss": 0.9826,
"step": 1302
},
{
"epoch": 0.3713044097741683,
"grad_norm": 0.609375,
"learning_rate": 8.812148868044726e-05,
"loss": 0.9527,
"step": 1303
},
{
"epoch": 0.3715893709482083,
"grad_norm": 0.640625,
"learning_rate": 8.810157904661253e-05,
"loss": 1.0328,
"step": 1304
},
{
"epoch": 0.37187433212224835,
"grad_norm": 0.53125,
"learning_rate": 8.808165499457964e-05,
"loss": 0.8642,
"step": 1305
},
{
"epoch": 0.3721592932962884,
"grad_norm": 0.5703125,
"learning_rate": 8.80617165318881e-05,
"loss": 0.9816,
"step": 1306
},
{
"epoch": 0.3724442544703284,
"grad_norm": 0.6015625,
"learning_rate": 8.804176366608297e-05,
"loss": 0.9911,
"step": 1307
},
{
"epoch": 0.37272921564436845,
"grad_norm": 0.6328125,
"learning_rate": 8.802179640471467e-05,
"loss": 1.0987,
"step": 1308
},
{
"epoch": 0.3730141768184085,
"grad_norm": 0.625,
"learning_rate": 8.800181475533912e-05,
"loss": 1.0396,
"step": 1309
},
{
"epoch": 0.3732991379924485,
"grad_norm": 0.609375,
"learning_rate": 8.798181872551769e-05,
"loss": 1.0355,
"step": 1310
},
{
"epoch": 0.37358409916648855,
"grad_norm": 0.64453125,
"learning_rate": 8.796180832281714e-05,
"loss": 1.0007,
"step": 1311
},
{
"epoch": 0.3738690603405286,
"grad_norm": 0.58984375,
"learning_rate": 8.79417835548097e-05,
"loss": 0.9801,
"step": 1312
},
{
"epoch": 0.3741540215145686,
"grad_norm": 0.55859375,
"learning_rate": 8.792174442907307e-05,
"loss": 0.8304,
"step": 1313
},
{
"epoch": 0.3744389826886087,
"grad_norm": 0.59375,
"learning_rate": 8.790169095319032e-05,
"loss": 0.9567,
"step": 1314
},
{
"epoch": 0.37472394386264873,
"grad_norm": 0.59375,
"learning_rate": 8.788162313474998e-05,
"loss": 0.9262,
"step": 1315
},
{
"epoch": 0.37500890503668877,
"grad_norm": 0.71484375,
"learning_rate": 8.786154098134604e-05,
"loss": 0.9575,
"step": 1316
},
{
"epoch": 0.3752938662107288,
"grad_norm": 0.65234375,
"learning_rate": 8.784144450057785e-05,
"loss": 0.9661,
"step": 1317
},
{
"epoch": 0.37557882738476883,
"grad_norm": 0.58984375,
"learning_rate": 8.782133370005023e-05,
"loss": 0.9039,
"step": 1318
},
{
"epoch": 0.37586378855880886,
"grad_norm": 0.578125,
"learning_rate": 8.78012085873734e-05,
"loss": 0.8655,
"step": 1319
},
{
"epoch": 0.3761487497328489,
"grad_norm": 0.69140625,
"learning_rate": 8.778106917016298e-05,
"loss": 1.0576,
"step": 1320
},
{
"epoch": 0.37643371090688893,
"grad_norm": 0.57421875,
"learning_rate": 8.776091545604006e-05,
"loss": 0.9396,
"step": 1321
},
{
"epoch": 0.37671867208092896,
"grad_norm": 0.6796875,
"learning_rate": 8.774074745263106e-05,
"loss": 1.1204,
"step": 1322
},
{
"epoch": 0.377003633254969,
"grad_norm": 0.60546875,
"learning_rate": 8.772056516756788e-05,
"loss": 0.9742,
"step": 1323
},
{
"epoch": 0.377288594429009,
"grad_norm": 0.61328125,
"learning_rate": 8.770036860848779e-05,
"loss": 1.1384,
"step": 1324
},
{
"epoch": 0.37757355560304906,
"grad_norm": 0.60546875,
"learning_rate": 8.768015778303344e-05,
"loss": 1.0291,
"step": 1325
},
{
"epoch": 0.37785851677708915,
"grad_norm": 0.60546875,
"learning_rate": 8.765993269885293e-05,
"loss": 0.9446,
"step": 1326
},
{
"epoch": 0.3781434779511292,
"grad_norm": 0.62109375,
"learning_rate": 8.763969336359972e-05,
"loss": 0.8731,
"step": 1327
},
{
"epoch": 0.3784284391251692,
"grad_norm": 0.6015625,
"learning_rate": 8.761943978493266e-05,
"loss": 0.9028,
"step": 1328
},
{
"epoch": 0.37871340029920925,
"grad_norm": 0.5859375,
"learning_rate": 8.759917197051603e-05,
"loss": 0.8729,
"step": 1329
},
{
"epoch": 0.3789983614732493,
"grad_norm": 0.625,
"learning_rate": 8.757888992801945e-05,
"loss": 1.005,
"step": 1330
},
{
"epoch": 0.3792833226472893,
"grad_norm": 0.6015625,
"learning_rate": 8.755859366511796e-05,
"loss": 0.9928,
"step": 1331
},
{
"epoch": 0.37956828382132934,
"grad_norm": 0.578125,
"learning_rate": 8.753828318949196e-05,
"loss": 0.9139,
"step": 1332
},
{
"epoch": 0.3798532449953694,
"grad_norm": 0.5859375,
"learning_rate": 8.75179585088272e-05,
"loss": 0.9584,
"step": 1333
},
{
"epoch": 0.3801382061694094,
"grad_norm": 0.6328125,
"learning_rate": 8.749761963081488e-05,
"loss": 1.0471,
"step": 1334
},
{
"epoch": 0.38042316734344944,
"grad_norm": 0.59765625,
"learning_rate": 8.747726656315152e-05,
"loss": 0.989,
"step": 1335
},
{
"epoch": 0.3807081285174895,
"grad_norm": 0.5625,
"learning_rate": 8.7456899313539e-05,
"loss": 0.9473,
"step": 1336
},
{
"epoch": 0.3809930896915295,
"grad_norm": 0.60546875,
"learning_rate": 8.743651788968461e-05,
"loss": 1.0057,
"step": 1337
},
{
"epoch": 0.38127805086556954,
"grad_norm": 0.57421875,
"learning_rate": 8.741612229930097e-05,
"loss": 0.8847,
"step": 1338
},
{
"epoch": 0.38156301203960963,
"grad_norm": 0.6015625,
"learning_rate": 8.739571255010606e-05,
"loss": 1.0922,
"step": 1339
},
{
"epoch": 0.38184797321364966,
"grad_norm": 0.578125,
"learning_rate": 8.737528864982328e-05,
"loss": 0.9593,
"step": 1340
},
{
"epoch": 0.3821329343876897,
"grad_norm": 0.58984375,
"learning_rate": 8.735485060618127e-05,
"loss": 0.9891,
"step": 1341
},
{
"epoch": 0.3824178955617297,
"grad_norm": 0.58984375,
"learning_rate": 8.733439842691412e-05,
"loss": 0.9242,
"step": 1342
},
{
"epoch": 0.38270285673576976,
"grad_norm": 0.59765625,
"learning_rate": 8.731393211976123e-05,
"loss": 0.9275,
"step": 1343
},
{
"epoch": 0.3829878179098098,
"grad_norm": 0.55859375,
"learning_rate": 8.729345169246734e-05,
"loss": 0.916,
"step": 1344
},
{
"epoch": 0.3832727790838498,
"grad_norm": 0.59765625,
"learning_rate": 8.727295715278257e-05,
"loss": 1.0325,
"step": 1345
},
{
"epoch": 0.38355774025788986,
"grad_norm": 0.56640625,
"learning_rate": 8.725244850846233e-05,
"loss": 0.9395,
"step": 1346
},
{
"epoch": 0.3838427014319299,
"grad_norm": 0.59375,
"learning_rate": 8.723192576726742e-05,
"loss": 0.934,
"step": 1347
},
{
"epoch": 0.3841276626059699,
"grad_norm": 0.5859375,
"learning_rate": 8.721138893696392e-05,
"loss": 0.9359,
"step": 1348
},
{
"epoch": 0.38441262378000995,
"grad_norm": 0.62109375,
"learning_rate": 8.71908380253233e-05,
"loss": 0.9401,
"step": 1349
},
{
"epoch": 0.38469758495405,
"grad_norm": 0.59765625,
"learning_rate": 8.717027304012228e-05,
"loss": 0.9891,
"step": 1350
},
{
"epoch": 0.3849825461280901,
"grad_norm": 0.61328125,
"learning_rate": 8.714969398914301e-05,
"loss": 1.037,
"step": 1351
},
{
"epoch": 0.3852675073021301,
"grad_norm": 0.61328125,
"learning_rate": 8.712910088017287e-05,
"loss": 1.0466,
"step": 1352
},
{
"epoch": 0.38555246847617014,
"grad_norm": 0.59765625,
"learning_rate": 8.71084937210046e-05,
"loss": 0.9235,
"step": 1353
},
{
"epoch": 0.3858374296502102,
"grad_norm": 0.5859375,
"learning_rate": 8.708787251943625e-05,
"loss": 0.914,
"step": 1354
},
{
"epoch": 0.3861223908242502,
"grad_norm": 0.58203125,
"learning_rate": 8.70672372832712e-05,
"loss": 0.9256,
"step": 1355
},
{
"epoch": 0.38640735199829024,
"grad_norm": 0.5703125,
"learning_rate": 8.70465880203181e-05,
"loss": 0.9568,
"step": 1356
},
{
"epoch": 0.38669231317233027,
"grad_norm": 0.58984375,
"learning_rate": 8.702592473839097e-05,
"loss": 0.9254,
"step": 1357
},
{
"epoch": 0.3869772743463703,
"grad_norm": 0.5703125,
"learning_rate": 8.700524744530907e-05,
"loss": 0.9149,
"step": 1358
},
{
"epoch": 0.38726223552041034,
"grad_norm": 0.58984375,
"learning_rate": 8.698455614889703e-05,
"loss": 0.9946,
"step": 1359
},
{
"epoch": 0.38754719669445037,
"grad_norm": 0.62109375,
"learning_rate": 8.696385085698468e-05,
"loss": 1.0242,
"step": 1360
},
{
"epoch": 0.3878321578684904,
"grad_norm": 0.58984375,
"learning_rate": 8.694313157740727e-05,
"loss": 1.0245,
"step": 1361
},
{
"epoch": 0.38811711904253043,
"grad_norm": 0.65234375,
"learning_rate": 8.692239831800524e-05,
"loss": 1.0409,
"step": 1362
},
{
"epoch": 0.38840208021657047,
"grad_norm": 0.57421875,
"learning_rate": 8.690165108662438e-05,
"loss": 0.8971,
"step": 1363
},
{
"epoch": 0.38868704139061055,
"grad_norm": 0.625,
"learning_rate": 8.688088989111573e-05,
"loss": 1.0359,
"step": 1364
},
{
"epoch": 0.3889720025646506,
"grad_norm": 0.6171875,
"learning_rate": 8.686011473933566e-05,
"loss": 0.977,
"step": 1365
},
{
"epoch": 0.3892569637386906,
"grad_norm": 0.6015625,
"learning_rate": 8.683932563914576e-05,
"loss": 0.9534,
"step": 1366
},
{
"epoch": 0.38954192491273065,
"grad_norm": 0.55078125,
"learning_rate": 8.681852259841297e-05,
"loss": 0.8909,
"step": 1367
},
{
"epoch": 0.3898268860867707,
"grad_norm": 0.5859375,
"learning_rate": 8.679770562500943e-05,
"loss": 0.98,
"step": 1368
},
{
"epoch": 0.3901118472608107,
"grad_norm": 0.5859375,
"learning_rate": 8.67768747268126e-05,
"loss": 0.9297,
"step": 1369
},
{
"epoch": 0.39039680843485075,
"grad_norm": 0.58203125,
"learning_rate": 8.675602991170521e-05,
"loss": 0.898,
"step": 1370
},
{
"epoch": 0.3906817696088908,
"grad_norm": 0.60546875,
"learning_rate": 8.673517118757524e-05,
"loss": 1.0496,
"step": 1371
},
{
"epoch": 0.3909667307829308,
"grad_norm": 0.59765625,
"learning_rate": 8.671429856231593e-05,
"loss": 1.0079,
"step": 1372
},
{
"epoch": 0.39125169195697085,
"grad_norm": 0.6015625,
"learning_rate": 8.66934120438258e-05,
"loss": 0.919,
"step": 1373
},
{
"epoch": 0.3915366531310109,
"grad_norm": 0.6015625,
"learning_rate": 8.667251164000859e-05,
"loss": 1.0264,
"step": 1374
},
{
"epoch": 0.3918216143050509,
"grad_norm": 0.59375,
"learning_rate": 8.665159735877335e-05,
"loss": 1.0051,
"step": 1375
},
{
"epoch": 0.39210657547909095,
"grad_norm": 0.55078125,
"learning_rate": 8.663066920803435e-05,
"loss": 0.8353,
"step": 1376
},
{
"epoch": 0.39239153665313103,
"grad_norm": 0.5859375,
"learning_rate": 8.66097271957111e-05,
"loss": 1.0129,
"step": 1377
},
{
"epoch": 0.39267649782717107,
"grad_norm": 0.578125,
"learning_rate": 8.658877132972835e-05,
"loss": 0.9286,
"step": 1378
},
{
"epoch": 0.3929614590012111,
"grad_norm": 0.6015625,
"learning_rate": 8.656780161801615e-05,
"loss": 1.0064,
"step": 1379
},
{
"epoch": 0.39324642017525113,
"grad_norm": 0.59765625,
"learning_rate": 8.654681806850971e-05,
"loss": 1.0259,
"step": 1380
},
{
"epoch": 0.39353138134929116,
"grad_norm": 0.578125,
"learning_rate": 8.652582068914952e-05,
"loss": 1.0131,
"step": 1381
},
{
"epoch": 0.3938163425233312,
"grad_norm": 0.65625,
"learning_rate": 8.65048094878813e-05,
"loss": 1.0303,
"step": 1382
},
{
"epoch": 0.39410130369737123,
"grad_norm": 0.6171875,
"learning_rate": 8.648378447265603e-05,
"loss": 1.0413,
"step": 1383
},
{
"epoch": 0.39438626487141126,
"grad_norm": 0.5859375,
"learning_rate": 8.646274565142984e-05,
"loss": 0.9901,
"step": 1384
},
{
"epoch": 0.3946712260454513,
"grad_norm": 0.5859375,
"learning_rate": 8.644169303216414e-05,
"loss": 0.8797,
"step": 1385
},
{
"epoch": 0.39495618721949133,
"grad_norm": 0.58984375,
"learning_rate": 8.642062662282557e-05,
"loss": 0.9751,
"step": 1386
},
{
"epoch": 0.39524114839353136,
"grad_norm": 0.52734375,
"learning_rate": 8.639954643138595e-05,
"loss": 0.8173,
"step": 1387
},
{
"epoch": 0.3955261095675714,
"grad_norm": 0.5703125,
"learning_rate": 8.637845246582234e-05,
"loss": 0.9654,
"step": 1388
},
{
"epoch": 0.3958110707416115,
"grad_norm": 0.62890625,
"learning_rate": 8.635734473411701e-05,
"loss": 0.987,
"step": 1389
},
{
"epoch": 0.3960960319156515,
"grad_norm": 0.6484375,
"learning_rate": 8.633622324425745e-05,
"loss": 1.049,
"step": 1390
},
{
"epoch": 0.39638099308969155,
"grad_norm": 0.60546875,
"learning_rate": 8.63150880042363e-05,
"loss": 1.0071,
"step": 1391
},
{
"epoch": 0.3966659542637316,
"grad_norm": 0.57421875,
"learning_rate": 8.629393902205149e-05,
"loss": 0.9277,
"step": 1392
},
{
"epoch": 0.3969509154377716,
"grad_norm": 0.58984375,
"learning_rate": 8.62727763057061e-05,
"loss": 0.844,
"step": 1393
},
{
"epoch": 0.39723587661181164,
"grad_norm": 0.58984375,
"learning_rate": 8.625159986320838e-05,
"loss": 0.9436,
"step": 1394
},
{
"epoch": 0.3975208377858517,
"grad_norm": 0.59765625,
"learning_rate": 8.623040970257183e-05,
"loss": 0.9101,
"step": 1395
},
{
"epoch": 0.3978057989598917,
"grad_norm": 0.60546875,
"learning_rate": 8.620920583181513e-05,
"loss": 0.9121,
"step": 1396
},
{
"epoch": 0.39809076013393174,
"grad_norm": 0.5703125,
"learning_rate": 8.618798825896213e-05,
"loss": 0.9445,
"step": 1397
},
{
"epoch": 0.3983757213079718,
"grad_norm": 0.5859375,
"learning_rate": 8.616675699204185e-05,
"loss": 0.9364,
"step": 1398
},
{
"epoch": 0.3986606824820118,
"grad_norm": 0.625,
"learning_rate": 8.614551203908853e-05,
"loss": 1.036,
"step": 1399
},
{
"epoch": 0.39894564365605184,
"grad_norm": 0.57421875,
"learning_rate": 8.612425340814159e-05,
"loss": 0.9296,
"step": 1400
},
{
"epoch": 0.3992306048300919,
"grad_norm": 0.58203125,
"learning_rate": 8.61029811072456e-05,
"loss": 0.8705,
"step": 1401
},
{
"epoch": 0.39951556600413196,
"grad_norm": 0.58984375,
"learning_rate": 8.608169514445028e-05,
"loss": 0.9125,
"step": 1402
},
{
"epoch": 0.399800527178172,
"grad_norm": 0.5703125,
"learning_rate": 8.606039552781058e-05,
"loss": 0.9602,
"step": 1403
},
{
"epoch": 0.400085488352212,
"grad_norm": 0.5703125,
"learning_rate": 8.60390822653866e-05,
"loss": 0.9155,
"step": 1404
},
{
"epoch": 0.40037044952625206,
"grad_norm": 0.5625,
"learning_rate": 8.601775536524355e-05,
"loss": 0.9916,
"step": 1405
},
{
"epoch": 0.4006554107002921,
"grad_norm": 0.58203125,
"learning_rate": 8.599641483545189e-05,
"loss": 0.9282,
"step": 1406
},
{
"epoch": 0.4009403718743321,
"grad_norm": 0.5859375,
"learning_rate": 8.597506068408717e-05,
"loss": 1.0603,
"step": 1407
},
{
"epoch": 0.40122533304837216,
"grad_norm": 0.6015625,
"learning_rate": 8.59536929192301e-05,
"loss": 1.0229,
"step": 1408
},
{
"epoch": 0.4015102942224122,
"grad_norm": 0.58203125,
"learning_rate": 8.593231154896659e-05,
"loss": 0.894,
"step": 1409
},
{
"epoch": 0.4017952553964522,
"grad_norm": 0.5625,
"learning_rate": 8.591091658138761e-05,
"loss": 0.8648,
"step": 1410
},
{
"epoch": 0.40208021657049225,
"grad_norm": 0.57421875,
"learning_rate": 8.58895080245894e-05,
"loss": 0.9662,
"step": 1411
},
{
"epoch": 0.4023651777445323,
"grad_norm": 0.609375,
"learning_rate": 8.586808588667323e-05,
"loss": 1.0265,
"step": 1412
},
{
"epoch": 0.4026501389185723,
"grad_norm": 0.625,
"learning_rate": 8.584665017574556e-05,
"loss": 1.0178,
"step": 1413
},
{
"epoch": 0.4029351000926124,
"grad_norm": 0.59375,
"learning_rate": 8.582520089991798e-05,
"loss": 1.0322,
"step": 1414
},
{
"epoch": 0.40322006126665244,
"grad_norm": 0.58203125,
"learning_rate": 8.580373806730724e-05,
"loss": 0.9437,
"step": 1415
},
{
"epoch": 0.4035050224406925,
"grad_norm": 0.61328125,
"learning_rate": 8.578226168603513e-05,
"loss": 0.9531,
"step": 1416
},
{
"epoch": 0.4037899836147325,
"grad_norm": 0.61328125,
"learning_rate": 8.576077176422867e-05,
"loss": 0.9953,
"step": 1417
},
{
"epoch": 0.40407494478877254,
"grad_norm": 0.56640625,
"learning_rate": 8.573926831001995e-05,
"loss": 0.8791,
"step": 1418
},
{
"epoch": 0.40435990596281257,
"grad_norm": 0.58984375,
"learning_rate": 8.571775133154622e-05,
"loss": 0.9967,
"step": 1419
},
{
"epoch": 0.4046448671368526,
"grad_norm": 0.55859375,
"learning_rate": 8.569622083694977e-05,
"loss": 0.9516,
"step": 1420
},
{
"epoch": 0.40492982831089264,
"grad_norm": 0.5859375,
"learning_rate": 8.567467683437811e-05,
"loss": 1.0403,
"step": 1421
},
{
"epoch": 0.40521478948493267,
"grad_norm": 0.59375,
"learning_rate": 8.565311933198376e-05,
"loss": 0.9238,
"step": 1422
},
{
"epoch": 0.4054997506589727,
"grad_norm": 0.58203125,
"learning_rate": 8.563154833792444e-05,
"loss": 0.9464,
"step": 1423
},
{
"epoch": 0.40578471183301273,
"grad_norm": 0.59765625,
"learning_rate": 8.56099638603629e-05,
"loss": 0.9654,
"step": 1424
},
{
"epoch": 0.40606967300705277,
"grad_norm": 0.58984375,
"learning_rate": 8.558836590746705e-05,
"loss": 0.9547,
"step": 1425
},
{
"epoch": 0.4063546341810928,
"grad_norm": 0.55859375,
"learning_rate": 8.556675448740985e-05,
"loss": 0.9017,
"step": 1426
},
{
"epoch": 0.4066395953551329,
"grad_norm": 0.60546875,
"learning_rate": 8.554512960836937e-05,
"loss": 1.0015,
"step": 1427
},
{
"epoch": 0.4069245565291729,
"grad_norm": 0.6171875,
"learning_rate": 8.552349127852883e-05,
"loss": 0.9377,
"step": 1428
},
{
"epoch": 0.40720951770321295,
"grad_norm": 0.5546875,
"learning_rate": 8.550183950607644e-05,
"loss": 0.8432,
"step": 1429
},
{
"epoch": 0.407494478877253,
"grad_norm": 0.62109375,
"learning_rate": 8.548017429920559e-05,
"loss": 0.9568,
"step": 1430
},
{
"epoch": 0.407779440051293,
"grad_norm": 0.58203125,
"learning_rate": 8.545849566611467e-05,
"loss": 1.0386,
"step": 1431
},
{
"epoch": 0.40806440122533305,
"grad_norm": 0.59765625,
"learning_rate": 8.543680361500725e-05,
"loss": 0.9595,
"step": 1432
},
{
"epoch": 0.4083493623993731,
"grad_norm": 0.5703125,
"learning_rate": 8.541509815409186e-05,
"loss": 0.9505,
"step": 1433
},
{
"epoch": 0.4086343235734131,
"grad_norm": 0.59375,
"learning_rate": 8.539337929158222e-05,
"loss": 0.9916,
"step": 1434
},
{
"epoch": 0.40891928474745315,
"grad_norm": 0.58984375,
"learning_rate": 8.537164703569703e-05,
"loss": 0.9698,
"step": 1435
},
{
"epoch": 0.4092042459214932,
"grad_norm": 0.5703125,
"learning_rate": 8.53499013946601e-05,
"loss": 0.9859,
"step": 1436
},
{
"epoch": 0.4094892070955332,
"grad_norm": 0.609375,
"learning_rate": 8.532814237670032e-05,
"loss": 1.0218,
"step": 1437
},
{
"epoch": 0.40977416826957325,
"grad_norm": 0.55859375,
"learning_rate": 8.530636999005162e-05,
"loss": 0.8932,
"step": 1438
},
{
"epoch": 0.41005912944361333,
"grad_norm": 0.61328125,
"learning_rate": 8.528458424295297e-05,
"loss": 1.0003,
"step": 1439
},
{
"epoch": 0.41034409061765337,
"grad_norm": 0.64453125,
"learning_rate": 8.526278514364843e-05,
"loss": 0.9291,
"step": 1440
},
{
"epoch": 0.4106290517916934,
"grad_norm": 0.578125,
"learning_rate": 8.524097270038712e-05,
"loss": 0.8347,
"step": 1441
},
{
"epoch": 0.41091401296573343,
"grad_norm": 0.609375,
"learning_rate": 8.521914692142313e-05,
"loss": 1.0188,
"step": 1442
},
{
"epoch": 0.41119897413977347,
"grad_norm": 0.59375,
"learning_rate": 8.519730781501571e-05,
"loss": 0.9563,
"step": 1443
},
{
"epoch": 0.4114839353138135,
"grad_norm": 0.578125,
"learning_rate": 8.51754553894291e-05,
"loss": 0.9416,
"step": 1444
},
{
"epoch": 0.41176889648785353,
"grad_norm": 0.58984375,
"learning_rate": 8.515358965293256e-05,
"loss": 0.8633,
"step": 1445
},
{
"epoch": 0.41205385766189356,
"grad_norm": 0.5703125,
"learning_rate": 8.513171061380041e-05,
"loss": 0.9688,
"step": 1446
},
{
"epoch": 0.4123388188359336,
"grad_norm": 0.58203125,
"learning_rate": 8.510981828031199e-05,
"loss": 0.8366,
"step": 1447
},
{
"epoch": 0.41262378000997363,
"grad_norm": 0.609375,
"learning_rate": 8.508791266075171e-05,
"loss": 1.068,
"step": 1448
},
{
"epoch": 0.41290874118401366,
"grad_norm": 0.55859375,
"learning_rate": 8.506599376340895e-05,
"loss": 0.8938,
"step": 1449
},
{
"epoch": 0.4131937023580537,
"grad_norm": 0.5703125,
"learning_rate": 8.504406159657816e-05,
"loss": 1.0014,
"step": 1450
},
{
"epoch": 0.4134786635320937,
"grad_norm": 0.62890625,
"learning_rate": 8.502211616855878e-05,
"loss": 1.0489,
"step": 1451
},
{
"epoch": 0.4137636247061338,
"grad_norm": 0.59375,
"learning_rate": 8.50001574876553e-05,
"loss": 0.9896,
"step": 1452
},
{
"epoch": 0.41404858588017385,
"grad_norm": 0.56640625,
"learning_rate": 8.49781855621772e-05,
"loss": 0.9565,
"step": 1453
},
{
"epoch": 0.4143335470542139,
"grad_norm": 0.58203125,
"learning_rate": 8.495620040043896e-05,
"loss": 0.9884,
"step": 1454
},
{
"epoch": 0.4146185082282539,
"grad_norm": 0.5390625,
"learning_rate": 8.493420201076014e-05,
"loss": 0.8105,
"step": 1455
},
{
"epoch": 0.41490346940229395,
"grad_norm": 0.5859375,
"learning_rate": 8.49121904014652e-05,
"loss": 0.9667,
"step": 1456
},
{
"epoch": 0.415188430576334,
"grad_norm": 0.60546875,
"learning_rate": 8.48901655808837e-05,
"loss": 0.9508,
"step": 1457
},
{
"epoch": 0.415473391750374,
"grad_norm": 0.66015625,
"learning_rate": 8.486812755735011e-05,
"loss": 1.0046,
"step": 1458
},
{
"epoch": 0.41575835292441404,
"grad_norm": 0.546875,
"learning_rate": 8.484607633920398e-05,
"loss": 0.8572,
"step": 1459
},
{
"epoch": 0.4160433140984541,
"grad_norm": 0.59375,
"learning_rate": 8.48240119347898e-05,
"loss": 0.9747,
"step": 1460
},
{
"epoch": 0.4163282752724941,
"grad_norm": 0.57421875,
"learning_rate": 8.480193435245708e-05,
"loss": 0.9554,
"step": 1461
},
{
"epoch": 0.41661323644653414,
"grad_norm": 0.578125,
"learning_rate": 8.477984360056028e-05,
"loss": 0.9253,
"step": 1462
},
{
"epoch": 0.4168981976205742,
"grad_norm": 0.578125,
"learning_rate": 8.475773968745888e-05,
"loss": 0.9911,
"step": 1463
},
{
"epoch": 0.4171831587946142,
"grad_norm": 0.55859375,
"learning_rate": 8.473562262151733e-05,
"loss": 0.8795,
"step": 1464
},
{
"epoch": 0.4174681199686543,
"grad_norm": 0.58203125,
"learning_rate": 8.471349241110503e-05,
"loss": 0.9323,
"step": 1465
},
{
"epoch": 0.4177530811426943,
"grad_norm": 0.6171875,
"learning_rate": 8.469134906459642e-05,
"loss": 1.0185,
"step": 1466
},
{
"epoch": 0.41803804231673436,
"grad_norm": 0.5703125,
"learning_rate": 8.46691925903708e-05,
"loss": 1.0109,
"step": 1467
},
{
"epoch": 0.4183230034907744,
"grad_norm": 0.58203125,
"learning_rate": 8.464702299681258e-05,
"loss": 0.9631,
"step": 1468
},
{
"epoch": 0.4186079646648144,
"grad_norm": 0.56640625,
"learning_rate": 8.462484029231102e-05,
"loss": 0.9222,
"step": 1469
},
{
"epoch": 0.41889292583885446,
"grad_norm": 0.62109375,
"learning_rate": 8.460264448526038e-05,
"loss": 1.0884,
"step": 1470
},
{
"epoch": 0.4191778870128945,
"grad_norm": 0.578125,
"learning_rate": 8.458043558405989e-05,
"loss": 0.9434,
"step": 1471
},
{
"epoch": 0.4194628481869345,
"grad_norm": 0.55859375,
"learning_rate": 8.455821359711374e-05,
"loss": 0.8854,
"step": 1472
},
{
"epoch": 0.41974780936097456,
"grad_norm": 0.546875,
"learning_rate": 8.453597853283103e-05,
"loss": 0.8779,
"step": 1473
},
{
"epoch": 0.4200327705350146,
"grad_norm": 0.55859375,
"learning_rate": 8.451373039962584e-05,
"loss": 0.8777,
"step": 1474
},
{
"epoch": 0.4203177317090546,
"grad_norm": 0.60546875,
"learning_rate": 8.44914692059172e-05,
"loss": 1.0149,
"step": 1475
},
{
"epoch": 0.42060269288309465,
"grad_norm": 0.58984375,
"learning_rate": 8.446919496012909e-05,
"loss": 0.9547,
"step": 1476
},
{
"epoch": 0.42088765405713474,
"grad_norm": 0.59765625,
"learning_rate": 8.444690767069037e-05,
"loss": 0.9941,
"step": 1477
},
{
"epoch": 0.4211726152311748,
"grad_norm": 0.58203125,
"learning_rate": 8.442460734603492e-05,
"loss": 0.9185,
"step": 1478
},
{
"epoch": 0.4214575764052148,
"grad_norm": 0.578125,
"learning_rate": 8.440229399460147e-05,
"loss": 0.9171,
"step": 1479
},
{
"epoch": 0.42174253757925484,
"grad_norm": 0.5859375,
"learning_rate": 8.437996762483376e-05,
"loss": 0.846,
"step": 1480
},
{
"epoch": 0.42202749875329487,
"grad_norm": 0.59765625,
"learning_rate": 8.435762824518039e-05,
"loss": 1.0084,
"step": 1481
},
{
"epoch": 0.4223124599273349,
"grad_norm": 0.55078125,
"learning_rate": 8.433527586409493e-05,
"loss": 0.8722,
"step": 1482
},
{
"epoch": 0.42259742110137494,
"grad_norm": 0.5625,
"learning_rate": 8.431291049003584e-05,
"loss": 0.9551,
"step": 1483
},
{
"epoch": 0.42288238227541497,
"grad_norm": 0.5703125,
"learning_rate": 8.42905321314665e-05,
"loss": 0.9697,
"step": 1484
},
{
"epoch": 0.423167343449455,
"grad_norm": 0.6015625,
"learning_rate": 8.426814079685521e-05,
"loss": 1.0667,
"step": 1485
},
{
"epoch": 0.42345230462349503,
"grad_norm": 0.6015625,
"learning_rate": 8.42457364946752e-05,
"loss": 1.029,
"step": 1486
},
{
"epoch": 0.42373726579753507,
"grad_norm": 0.55078125,
"learning_rate": 8.422331923340457e-05,
"loss": 0.899,
"step": 1487
},
{
"epoch": 0.4240222269715751,
"grad_norm": 0.56640625,
"learning_rate": 8.420088902152636e-05,
"loss": 1.0318,
"step": 1488
},
{
"epoch": 0.42430718814561513,
"grad_norm": 0.546875,
"learning_rate": 8.417844586752845e-05,
"loss": 0.8648,
"step": 1489
},
{
"epoch": 0.4245921493196552,
"grad_norm": 0.59765625,
"learning_rate": 8.41559897799037e-05,
"loss": 1.043,
"step": 1490
},
{
"epoch": 0.42487711049369525,
"grad_norm": 0.578125,
"learning_rate": 8.413352076714982e-05,
"loss": 0.9173,
"step": 1491
},
{
"epoch": 0.4251620716677353,
"grad_norm": 0.59765625,
"learning_rate": 8.411103883776941e-05,
"loss": 0.9297,
"step": 1492
},
{
"epoch": 0.4254470328417753,
"grad_norm": 0.62109375,
"learning_rate": 8.408854400026996e-05,
"loss": 1.0108,
"step": 1493
},
{
"epoch": 0.42573199401581535,
"grad_norm": 0.609375,
"learning_rate": 8.406603626316382e-05,
"loss": 0.9977,
"step": 1494
},
{
"epoch": 0.4260169551898554,
"grad_norm": 0.64453125,
"learning_rate": 8.404351563496831e-05,
"loss": 1.2144,
"step": 1495
},
{
"epoch": 0.4263019163638954,
"grad_norm": 0.5625,
"learning_rate": 8.402098212420553e-05,
"loss": 0.9607,
"step": 1496
},
{
"epoch": 0.42658687753793545,
"grad_norm": 0.53125,
"learning_rate": 8.399843573940248e-05,
"loss": 0.8164,
"step": 1497
},
{
"epoch": 0.4268718387119755,
"grad_norm": 0.56640625,
"learning_rate": 8.397587648909107e-05,
"loss": 0.987,
"step": 1498
},
{
"epoch": 0.4271567998860155,
"grad_norm": 0.578125,
"learning_rate": 8.395330438180803e-05,
"loss": 0.9275,
"step": 1499
},
{
"epoch": 0.42744176106005555,
"grad_norm": 0.55859375,
"learning_rate": 8.393071942609501e-05,
"loss": 0.877,
"step": 1500
},
{
"epoch": 0.4277267222340956,
"grad_norm": 0.59375,
"learning_rate": 8.390812163049845e-05,
"loss": 1.0126,
"step": 1501
},
{
"epoch": 0.42801168340813567,
"grad_norm": 0.57421875,
"learning_rate": 8.388551100356971e-05,
"loss": 0.9069,
"step": 1502
},
{
"epoch": 0.4282966445821757,
"grad_norm": 0.72265625,
"learning_rate": 8.386288755386499e-05,
"loss": 1.0696,
"step": 1503
},
{
"epoch": 0.42858160575621573,
"grad_norm": 0.5703125,
"learning_rate": 8.384025128994532e-05,
"loss": 0.9017,
"step": 1504
},
{
"epoch": 0.42886656693025577,
"grad_norm": 0.55859375,
"learning_rate": 8.38176022203766e-05,
"loss": 0.9497,
"step": 1505
},
{
"epoch": 0.4291515281042958,
"grad_norm": 0.578125,
"learning_rate": 8.37949403537296e-05,
"loss": 0.9465,
"step": 1506
},
{
"epoch": 0.42943648927833583,
"grad_norm": 0.57421875,
"learning_rate": 8.377226569857985e-05,
"loss": 0.9455,
"step": 1507
},
{
"epoch": 0.42972145045237586,
"grad_norm": 0.5546875,
"learning_rate": 8.374957826350781e-05,
"loss": 0.9299,
"step": 1508
},
{
"epoch": 0.4300064116264159,
"grad_norm": 0.51953125,
"learning_rate": 8.372687805709873e-05,
"loss": 0.8362,
"step": 1509
},
{
"epoch": 0.43029137280045593,
"grad_norm": 0.59765625,
"learning_rate": 8.37041650879427e-05,
"loss": 1.0492,
"step": 1510
},
{
"epoch": 0.43057633397449596,
"grad_norm": 0.53515625,
"learning_rate": 8.368143936463465e-05,
"loss": 0.9604,
"step": 1511
},
{
"epoch": 0.430861295148536,
"grad_norm": 0.54296875,
"learning_rate": 8.365870089577431e-05,
"loss": 0.8321,
"step": 1512
},
{
"epoch": 0.431146256322576,
"grad_norm": 0.57421875,
"learning_rate": 8.363594968996628e-05,
"loss": 1.0465,
"step": 1513
},
{
"epoch": 0.43143121749661606,
"grad_norm": 0.640625,
"learning_rate": 8.361318575581992e-05,
"loss": 1.0719,
"step": 1514
},
{
"epoch": 0.43171617867065615,
"grad_norm": 0.5546875,
"learning_rate": 8.359040910194946e-05,
"loss": 0.9021,
"step": 1515
},
{
"epoch": 0.4320011398446962,
"grad_norm": 0.5625,
"learning_rate": 8.35676197369739e-05,
"loss": 0.9804,
"step": 1516
},
{
"epoch": 0.4322861010187362,
"grad_norm": 0.84765625,
"learning_rate": 8.354481766951712e-05,
"loss": 0.8445,
"step": 1517
},
{
"epoch": 0.43257106219277625,
"grad_norm": 0.56640625,
"learning_rate": 8.35220029082077e-05,
"loss": 0.8799,
"step": 1518
},
{
"epoch": 0.4328560233668163,
"grad_norm": 0.67578125,
"learning_rate": 8.349917546167909e-05,
"loss": 1.0454,
"step": 1519
},
{
"epoch": 0.4331409845408563,
"grad_norm": 0.59375,
"learning_rate": 8.347633533856956e-05,
"loss": 0.9651,
"step": 1520
},
{
"epoch": 0.43342594571489634,
"grad_norm": 1.0078125,
"learning_rate": 8.345348254752214e-05,
"loss": 1.0017,
"step": 1521
},
{
"epoch": 0.4337109068889364,
"grad_norm": 0.625,
"learning_rate": 8.343061709718465e-05,
"loss": 0.9982,
"step": 1522
},
{
"epoch": 0.4339958680629764,
"grad_norm": 0.58984375,
"learning_rate": 8.340773899620971e-05,
"loss": 1.0013,
"step": 1523
},
{
"epoch": 0.43428082923701644,
"grad_norm": 0.57421875,
"learning_rate": 8.338484825325476e-05,
"loss": 1.0574,
"step": 1524
},
{
"epoch": 0.4345657904110565,
"grad_norm": 0.55078125,
"learning_rate": 8.336194487698194e-05,
"loss": 0.9522,
"step": 1525
},
{
"epoch": 0.4348507515850965,
"grad_norm": 0.58203125,
"learning_rate": 8.333902887605829e-05,
"loss": 0.9004,
"step": 1526
},
{
"epoch": 0.4351357127591366,
"grad_norm": 0.5703125,
"learning_rate": 8.331610025915551e-05,
"loss": 0.9407,
"step": 1527
},
{
"epoch": 0.4354206739331766,
"grad_norm": 0.56640625,
"learning_rate": 8.329315903495016e-05,
"loss": 0.9734,
"step": 1528
},
{
"epoch": 0.43570563510721666,
"grad_norm": 0.55859375,
"learning_rate": 8.327020521212352e-05,
"loss": 0.8726,
"step": 1529
},
{
"epoch": 0.4359905962812567,
"grad_norm": 0.5703125,
"learning_rate": 8.324723879936164e-05,
"loss": 0.9743,
"step": 1530
},
{
"epoch": 0.4362755574552967,
"grad_norm": 0.58984375,
"learning_rate": 8.32242598053554e-05,
"loss": 0.9513,
"step": 1531
},
{
"epoch": 0.43656051862933676,
"grad_norm": 0.57421875,
"learning_rate": 8.320126823880032e-05,
"loss": 0.9463,
"step": 1532
},
{
"epoch": 0.4368454798033768,
"grad_norm": 0.56640625,
"learning_rate": 8.31782641083968e-05,
"loss": 0.7969,
"step": 1533
},
{
"epoch": 0.4371304409774168,
"grad_norm": 0.60546875,
"learning_rate": 8.315524742284992e-05,
"loss": 0.9063,
"step": 1534
},
{
"epoch": 0.43741540215145686,
"grad_norm": 0.5625,
"learning_rate": 8.313221819086954e-05,
"loss": 0.8677,
"step": 1535
},
{
"epoch": 0.4377003633254969,
"grad_norm": 0.609375,
"learning_rate": 8.310917642117024e-05,
"loss": 1.0184,
"step": 1536
},
{
"epoch": 0.4379853244995369,
"grad_norm": 0.57421875,
"learning_rate": 8.30861221224714e-05,
"loss": 0.8789,
"step": 1537
},
{
"epoch": 0.43827028567357695,
"grad_norm": 0.53125,
"learning_rate": 8.306305530349708e-05,
"loss": 0.8259,
"step": 1538
},
{
"epoch": 0.438555246847617,
"grad_norm": 0.59375,
"learning_rate": 8.303997597297612e-05,
"loss": 0.9158,
"step": 1539
},
{
"epoch": 0.4388402080216571,
"grad_norm": 0.59765625,
"learning_rate": 8.301688413964207e-05,
"loss": 1.0237,
"step": 1540
},
{
"epoch": 0.4391251691956971,
"grad_norm": 0.578125,
"learning_rate": 8.29937798122332e-05,
"loss": 0.9758,
"step": 1541
},
{
"epoch": 0.43941013036973714,
"grad_norm": 0.515625,
"learning_rate": 8.297066299949255e-05,
"loss": 0.8428,
"step": 1542
},
{
"epoch": 0.4396950915437772,
"grad_norm": 0.58203125,
"learning_rate": 8.294753371016786e-05,
"loss": 0.927,
"step": 1543
},
{
"epoch": 0.4399800527178172,
"grad_norm": 0.58984375,
"learning_rate": 8.29243919530116e-05,
"loss": 1.0507,
"step": 1544
},
{
"epoch": 0.44026501389185724,
"grad_norm": 0.53125,
"learning_rate": 8.290123773678093e-05,
"loss": 0.8273,
"step": 1545
},
{
"epoch": 0.44054997506589727,
"grad_norm": 0.55078125,
"learning_rate": 8.287807107023776e-05,
"loss": 0.8995,
"step": 1546
},
{
"epoch": 0.4408349362399373,
"grad_norm": 0.5859375,
"learning_rate": 8.28548919621487e-05,
"loss": 0.9847,
"step": 1547
},
{
"epoch": 0.44111989741397734,
"grad_norm": 0.59765625,
"learning_rate": 8.283170042128508e-05,
"loss": 0.9554,
"step": 1548
},
{
"epoch": 0.44140485858801737,
"grad_norm": 0.5859375,
"learning_rate": 8.280849645642288e-05,
"loss": 0.9638,
"step": 1549
},
{
"epoch": 0.4416898197620574,
"grad_norm": 0.5546875,
"learning_rate": 8.278528007634286e-05,
"loss": 0.9492,
"step": 1550
},
{
"epoch": 0.44197478093609743,
"grad_norm": 0.55859375,
"learning_rate": 8.27620512898304e-05,
"loss": 0.9208,
"step": 1551
},
{
"epoch": 0.4422597421101375,
"grad_norm": 0.578125,
"learning_rate": 8.273881010567566e-05,
"loss": 0.9669,
"step": 1552
},
{
"epoch": 0.44254470328417755,
"grad_norm": 0.55078125,
"learning_rate": 8.271555653267342e-05,
"loss": 0.9314,
"step": 1553
},
{
"epoch": 0.4428296644582176,
"grad_norm": 0.6328125,
"learning_rate": 8.269229057962318e-05,
"loss": 1.1307,
"step": 1554
},
{
"epoch": 0.4431146256322576,
"grad_norm": 0.5625,
"learning_rate": 8.266901225532911e-05,
"loss": 0.9241,
"step": 1555
},
{
"epoch": 0.44339958680629765,
"grad_norm": 0.56640625,
"learning_rate": 8.264572156860008e-05,
"loss": 0.8712,
"step": 1556
},
{
"epoch": 0.4436845479803377,
"grad_norm": 0.58984375,
"learning_rate": 8.262241852824964e-05,
"loss": 0.9752,
"step": 1557
},
{
"epoch": 0.4439695091543777,
"grad_norm": 0.5703125,
"learning_rate": 8.259910314309598e-05,
"loss": 0.9804,
"step": 1558
},
{
"epoch": 0.44425447032841775,
"grad_norm": 0.578125,
"learning_rate": 8.257577542196198e-05,
"loss": 0.948,
"step": 1559
},
{
"epoch": 0.4445394315024578,
"grad_norm": 0.5546875,
"learning_rate": 8.255243537367523e-05,
"loss": 0.9416,
"step": 1560
},
{
"epoch": 0.4448243926764978,
"grad_norm": 0.57421875,
"learning_rate": 8.252908300706792e-05,
"loss": 0.9388,
"step": 1561
},
{
"epoch": 0.44510935385053785,
"grad_norm": 0.56640625,
"learning_rate": 8.250571833097693e-05,
"loss": 0.929,
"step": 1562
},
{
"epoch": 0.4453943150245779,
"grad_norm": 0.5859375,
"learning_rate": 8.248234135424382e-05,
"loss": 0.9338,
"step": 1563
},
{
"epoch": 0.4456792761986179,
"grad_norm": 0.54296875,
"learning_rate": 8.245895208571475e-05,
"loss": 0.92,
"step": 1564
},
{
"epoch": 0.445964237372658,
"grad_norm": 0.6640625,
"learning_rate": 8.243555053424057e-05,
"loss": 0.9015,
"step": 1565
},
{
"epoch": 0.44624919854669803,
"grad_norm": 0.53125,
"learning_rate": 8.241213670867679e-05,
"loss": 0.7701,
"step": 1566
},
{
"epoch": 0.44653415972073807,
"grad_norm": 0.54296875,
"learning_rate": 8.238871061788353e-05,
"loss": 0.8525,
"step": 1567
},
{
"epoch": 0.4468191208947781,
"grad_norm": 0.5625,
"learning_rate": 8.236527227072558e-05,
"loss": 0.9254,
"step": 1568
},
{
"epoch": 0.44710408206881813,
"grad_norm": 0.57421875,
"learning_rate": 8.234182167607235e-05,
"loss": 0.8946,
"step": 1569
},
{
"epoch": 0.44738904324285816,
"grad_norm": 0.578125,
"learning_rate": 8.231835884279788e-05,
"loss": 0.8708,
"step": 1570
},
{
"epoch": 0.4476740044168982,
"grad_norm": 0.515625,
"learning_rate": 8.229488377978087e-05,
"loss": 0.7654,
"step": 1571
},
{
"epoch": 0.44795896559093823,
"grad_norm": 0.5859375,
"learning_rate": 8.22713964959046e-05,
"loss": 0.9207,
"step": 1572
},
{
"epoch": 0.44824392676497826,
"grad_norm": 0.58203125,
"learning_rate": 8.224789700005704e-05,
"loss": 0.9206,
"step": 1573
},
{
"epoch": 0.4485288879390183,
"grad_norm": 0.60546875,
"learning_rate": 8.222438530113071e-05,
"loss": 0.9539,
"step": 1574
},
{
"epoch": 0.4488138491130583,
"grad_norm": 0.59765625,
"learning_rate": 8.220086140802281e-05,
"loss": 0.9634,
"step": 1575
},
{
"epoch": 0.44909881028709836,
"grad_norm": 0.5859375,
"learning_rate": 8.217732532963513e-05,
"loss": 0.9676,
"step": 1576
},
{
"epoch": 0.4493837714611384,
"grad_norm": 0.58984375,
"learning_rate": 8.215377707487404e-05,
"loss": 0.9847,
"step": 1577
},
{
"epoch": 0.4496687326351785,
"grad_norm": 0.57421875,
"learning_rate": 8.213021665265057e-05,
"loss": 0.9436,
"step": 1578
},
{
"epoch": 0.4499536938092185,
"grad_norm": 0.625,
"learning_rate": 8.210664407188032e-05,
"loss": 0.9658,
"step": 1579
},
{
"epoch": 0.45023865498325855,
"grad_norm": 0.62109375,
"learning_rate": 8.20830593414835e-05,
"loss": 1.0291,
"step": 1580
},
{
"epoch": 0.45023865498325855,
"eval_loss": 0.9564817547798157,
"eval_model_preparation_time": 0.0565,
"eval_runtime": 300.2915,
"eval_samples_per_second": 5.138,
"eval_steps_per_second": 1.285,
"step": 1580
}
],
"logging_steps": 1,
"max_steps": 5265,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 1580,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.1604940962994954e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}