lfm-cpt / trainer_state.json
Ba2han's picture
Final continued pretraining checkpoint
714fe5d verified
Raw
History Blame Contribute Delete
353 kB
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 4205,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0004756949606065111,
"grad_norm": 17.125,
"learning_rate": 3.80952380952381e-07,
"loss": 3.0223777294158936,
"step": 2
},
{
"epoch": 0.0009513899212130222,
"grad_norm": 18.875,
"learning_rate": 1.142857142857143e-06,
"loss": 3.0833311080932617,
"step": 4
},
{
"epoch": 0.0014270848818195333,
"grad_norm": 17.5,
"learning_rate": 1.904761904761905e-06,
"loss": 3.0257577896118164,
"step": 6
},
{
"epoch": 0.0019027798424260443,
"grad_norm": 14.375,
"learning_rate": 2.666666666666667e-06,
"loss": 2.894188642501831,
"step": 8
},
{
"epoch": 0.002378474803032555,
"grad_norm": 11.6875,
"learning_rate": 3.428571428571429e-06,
"loss": 2.8602077960968018,
"step": 10
},
{
"epoch": 0.0028541697636390666,
"grad_norm": 9.1875,
"learning_rate": 4.190476190476191e-06,
"loss": 2.8444406986236572,
"step": 12
},
{
"epoch": 0.0033298647242455777,
"grad_norm": 5.75,
"learning_rate": 4.952380952380953e-06,
"loss": 2.7274436950683594,
"step": 14
},
{
"epoch": 0.0038055596848520887,
"grad_norm": 3.96875,
"learning_rate": 5.7142857142857145e-06,
"loss": 2.6735000610351562,
"step": 16
},
{
"epoch": 0.0042812546454586,
"grad_norm": 3.46875,
"learning_rate": 6.476190476190477e-06,
"loss": 2.7295286655426025,
"step": 18
},
{
"epoch": 0.00475694960606511,
"grad_norm": 3.390625,
"learning_rate": 7.238095238095239e-06,
"loss": 2.674283027648926,
"step": 20
},
{
"epoch": 0.005232644566671622,
"grad_norm": 2.375,
"learning_rate": 8.000000000000001e-06,
"loss": 2.604132652282715,
"step": 22
},
{
"epoch": 0.005708339527278133,
"grad_norm": 1.984375,
"learning_rate": 8.761904761904763e-06,
"loss": 2.5608315467834473,
"step": 24
},
{
"epoch": 0.006184034487884644,
"grad_norm": 1.6484375,
"learning_rate": 9.523809523809525e-06,
"loss": 2.4369235038757324,
"step": 26
},
{
"epoch": 0.006659729448491155,
"grad_norm": 1.3125,
"learning_rate": 1.0285714285714285e-05,
"loss": 2.526822090148926,
"step": 28
},
{
"epoch": 0.007135424409097666,
"grad_norm": 1.1015625,
"learning_rate": 1.104761904761905e-05,
"loss": 2.478888511657715,
"step": 30
},
{
"epoch": 0.007611119369704177,
"grad_norm": 1.015625,
"learning_rate": 1.180952380952381e-05,
"loss": 2.4330101013183594,
"step": 32
},
{
"epoch": 0.008086814330310688,
"grad_norm": 0.88671875,
"learning_rate": 1.2571428571428572e-05,
"loss": 2.412613868713379,
"step": 34
},
{
"epoch": 0.0085625092909172,
"grad_norm": 0.828125,
"learning_rate": 1.3333333333333333e-05,
"loss": 2.4466609954833984,
"step": 36
},
{
"epoch": 0.009038204251523711,
"grad_norm": 0.9140625,
"learning_rate": 1.4095238095238097e-05,
"loss": 2.3658487796783447,
"step": 38
},
{
"epoch": 0.00951389921213022,
"grad_norm": 0.84375,
"learning_rate": 1.4857142857142858e-05,
"loss": 2.355985164642334,
"step": 40
},
{
"epoch": 0.009989594172736732,
"grad_norm": 0.796875,
"learning_rate": 1.5619047619047622e-05,
"loss": 2.2941904067993164,
"step": 42
},
{
"epoch": 0.010465289133343244,
"grad_norm": 0.80078125,
"learning_rate": 1.6380952380952384e-05,
"loss": 2.2010486125946045,
"step": 44
},
{
"epoch": 0.010940984093949755,
"grad_norm": 0.890625,
"learning_rate": 1.7142857142857142e-05,
"loss": 2.2753114700317383,
"step": 46
},
{
"epoch": 0.011416679054556267,
"grad_norm": 0.79296875,
"learning_rate": 1.7904761904761907e-05,
"loss": 2.248267650604248,
"step": 48
},
{
"epoch": 0.011892374015162776,
"grad_norm": 0.76953125,
"learning_rate": 1.866666666666667e-05,
"loss": 2.219675064086914,
"step": 50
},
{
"epoch": 0.012368068975769288,
"grad_norm": 0.77734375,
"learning_rate": 1.942857142857143e-05,
"loss": 2.195375919342041,
"step": 52
},
{
"epoch": 0.0128437639363758,
"grad_norm": 0.8203125,
"learning_rate": 2.0190476190476192e-05,
"loss": 2.113194227218628,
"step": 54
},
{
"epoch": 0.01331945889698231,
"grad_norm": 0.89453125,
"learning_rate": 2.0952380952380954e-05,
"loss": 2.1435773372650146,
"step": 56
},
{
"epoch": 0.01379515385758882,
"grad_norm": 0.9609375,
"learning_rate": 2.1714285714285715e-05,
"loss": 2.1617753505706787,
"step": 58
},
{
"epoch": 0.014270848818195332,
"grad_norm": 0.9140625,
"learning_rate": 2.2476190476190477e-05,
"loss": 2.0906386375427246,
"step": 60
},
{
"epoch": 0.014746543778801843,
"grad_norm": 0.890625,
"learning_rate": 2.3238095238095242e-05,
"loss": 2.13519024848938,
"step": 62
},
{
"epoch": 0.015222238739408355,
"grad_norm": 0.74609375,
"learning_rate": 2.4e-05,
"loss": 2.0451605319976807,
"step": 64
},
{
"epoch": 0.015697933700014866,
"grad_norm": 0.703125,
"learning_rate": 2.4761904761904766e-05,
"loss": 2.18241548538208,
"step": 66
},
{
"epoch": 0.016173628660621376,
"grad_norm": 0.734375,
"learning_rate": 2.5523809523809524e-05,
"loss": 2.0362777709960938,
"step": 68
},
{
"epoch": 0.01664932362122789,
"grad_norm": 0.68359375,
"learning_rate": 2.628571428571429e-05,
"loss": 2.0474891662597656,
"step": 70
},
{
"epoch": 0.0171250185818344,
"grad_norm": 0.81640625,
"learning_rate": 2.704761904761905e-05,
"loss": 2.0943374633789062,
"step": 72
},
{
"epoch": 0.01760071354244091,
"grad_norm": 0.703125,
"learning_rate": 2.780952380952381e-05,
"loss": 2.007606029510498,
"step": 74
},
{
"epoch": 0.018076408503047422,
"grad_norm": 0.79296875,
"learning_rate": 2.8571428571428574e-05,
"loss": 2.010784149169922,
"step": 76
},
{
"epoch": 0.01855210346365393,
"grad_norm": 0.859375,
"learning_rate": 2.9333333333333333e-05,
"loss": 2.0787105560302734,
"step": 78
},
{
"epoch": 0.01902779842426044,
"grad_norm": 0.78515625,
"learning_rate": 3.0095238095238098e-05,
"loss": 2.0407357215881348,
"step": 80
},
{
"epoch": 0.019503493384866954,
"grad_norm": 0.75390625,
"learning_rate": 3.085714285714286e-05,
"loss": 1.9725031852722168,
"step": 82
},
{
"epoch": 0.019979188345473464,
"grad_norm": 0.8671875,
"learning_rate": 3.161904761904762e-05,
"loss": 1.9424755573272705,
"step": 84
},
{
"epoch": 0.020454883306079977,
"grad_norm": 0.9921875,
"learning_rate": 3.2380952380952386e-05,
"loss": 2.0050058364868164,
"step": 86
},
{
"epoch": 0.020930578266686487,
"grad_norm": 0.7421875,
"learning_rate": 3.314285714285715e-05,
"loss": 1.9476414918899536,
"step": 88
},
{
"epoch": 0.021406273227292997,
"grad_norm": 0.94140625,
"learning_rate": 3.390476190476191e-05,
"loss": 1.9908151626586914,
"step": 90
},
{
"epoch": 0.02188196818789951,
"grad_norm": 0.81640625,
"learning_rate": 3.466666666666667e-05,
"loss": 1.9584457874298096,
"step": 92
},
{
"epoch": 0.02235766314850602,
"grad_norm": 0.9609375,
"learning_rate": 3.542857142857143e-05,
"loss": 1.9743244647979736,
"step": 94
},
{
"epoch": 0.022833358109112533,
"grad_norm": 1.1015625,
"learning_rate": 3.6190476190476195e-05,
"loss": 1.9396190643310547,
"step": 96
},
{
"epoch": 0.023309053069719043,
"grad_norm": 0.93359375,
"learning_rate": 3.6952380952380956e-05,
"loss": 1.8824760913848877,
"step": 98
},
{
"epoch": 0.023784748030325552,
"grad_norm": 0.859375,
"learning_rate": 3.771428571428572e-05,
"loss": 1.8970260620117188,
"step": 100
},
{
"epoch": 0.024260442990932066,
"grad_norm": 0.7890625,
"learning_rate": 3.847619047619048e-05,
"loss": 1.9105536937713623,
"step": 102
},
{
"epoch": 0.024736137951538575,
"grad_norm": 0.84765625,
"learning_rate": 3.923809523809524e-05,
"loss": 1.8593096733093262,
"step": 104
},
{
"epoch": 0.02521183291214509,
"grad_norm": 0.76953125,
"learning_rate": 4e-05,
"loss": 1.8682916164398193,
"step": 106
},
{
"epoch": 0.0256875278727516,
"grad_norm": 0.83984375,
"learning_rate": 4.0761904761904765e-05,
"loss": 1.9372856616973877,
"step": 108
},
{
"epoch": 0.026163222833358108,
"grad_norm": 1.2890625,
"learning_rate": 4.1523809523809533e-05,
"loss": 1.9114850759506226,
"step": 110
},
{
"epoch": 0.02663891779396462,
"grad_norm": 1.0625,
"learning_rate": 4.228571428571429e-05,
"loss": 1.8821630477905273,
"step": 112
},
{
"epoch": 0.02711461275457113,
"grad_norm": 1.015625,
"learning_rate": 4.304761904761905e-05,
"loss": 1.8732749223709106,
"step": 114
},
{
"epoch": 0.02759030771517764,
"grad_norm": 0.7890625,
"learning_rate": 4.380952380952382e-05,
"loss": 1.8635611534118652,
"step": 116
},
{
"epoch": 0.028066002675784154,
"grad_norm": 0.90625,
"learning_rate": 4.4571428571428574e-05,
"loss": 1.8261184692382812,
"step": 118
},
{
"epoch": 0.028541697636390664,
"grad_norm": 0.9140625,
"learning_rate": 4.5333333333333335e-05,
"loss": 1.8533995151519775,
"step": 120
},
{
"epoch": 0.029017392596997177,
"grad_norm": 1.03125,
"learning_rate": 4.60952380952381e-05,
"loss": 1.816650390625,
"step": 122
},
{
"epoch": 0.029493087557603687,
"grad_norm": 0.87890625,
"learning_rate": 4.6857142857142865e-05,
"loss": 1.823215365409851,
"step": 124
},
{
"epoch": 0.029968782518210196,
"grad_norm": 0.83984375,
"learning_rate": 4.761904761904762e-05,
"loss": 1.8113462924957275,
"step": 126
},
{
"epoch": 0.03044447747881671,
"grad_norm": 0.8046875,
"learning_rate": 4.838095238095238e-05,
"loss": 1.7880184650421143,
"step": 128
},
{
"epoch": 0.03092017243942322,
"grad_norm": 0.97265625,
"learning_rate": 4.914285714285715e-05,
"loss": 1.8012118339538574,
"step": 130
},
{
"epoch": 0.03139586740002973,
"grad_norm": 1.046875,
"learning_rate": 4.990476190476191e-05,
"loss": 1.783468246459961,
"step": 132
},
{
"epoch": 0.03187156236063624,
"grad_norm": 1.015625,
"learning_rate": 5.066666666666667e-05,
"loss": 1.7474174499511719,
"step": 134
},
{
"epoch": 0.03234725732124275,
"grad_norm": 0.95703125,
"learning_rate": 5.1428571428571436e-05,
"loss": 1.8491697311401367,
"step": 136
},
{
"epoch": 0.03282295228184926,
"grad_norm": 0.984375,
"learning_rate": 5.21904761904762e-05,
"loss": 1.77945077419281,
"step": 138
},
{
"epoch": 0.03329864724245578,
"grad_norm": 1.046875,
"learning_rate": 5.295238095238095e-05,
"loss": 1.7462689876556396,
"step": 140
},
{
"epoch": 0.03377434220306229,
"grad_norm": 1.25,
"learning_rate": 5.3714285714285714e-05,
"loss": 1.77305006980896,
"step": 142
},
{
"epoch": 0.0342500371636688,
"grad_norm": 0.91015625,
"learning_rate": 5.447619047619048e-05,
"loss": 1.7020612955093384,
"step": 144
},
{
"epoch": 0.03472573212427531,
"grad_norm": 0.921875,
"learning_rate": 5.5238095238095244e-05,
"loss": 1.8065619468688965,
"step": 146
},
{
"epoch": 0.03520142708488182,
"grad_norm": 0.8828125,
"learning_rate": 5.6e-05,
"loss": 1.6848450899124146,
"step": 148
},
{
"epoch": 0.035677122045488334,
"grad_norm": 0.80859375,
"learning_rate": 5.676190476190477e-05,
"loss": 1.747304916381836,
"step": 150
},
{
"epoch": 0.036152817006094844,
"grad_norm": 1.1015625,
"learning_rate": 5.752380952380953e-05,
"loss": 1.7690556049346924,
"step": 152
},
{
"epoch": 0.03662851196670135,
"grad_norm": 1.03125,
"learning_rate": 5.828571428571429e-05,
"loss": 1.7610713243484497,
"step": 154
},
{
"epoch": 0.03710420692730786,
"grad_norm": 1.1640625,
"learning_rate": 5.904761904761905e-05,
"loss": 1.6751768589019775,
"step": 156
},
{
"epoch": 0.03757990188791437,
"grad_norm": 0.9375,
"learning_rate": 5.9809523809523814e-05,
"loss": 1.6568293571472168,
"step": 158
},
{
"epoch": 0.03805559684852088,
"grad_norm": 0.84765625,
"learning_rate": 6.0571428571428576e-05,
"loss": 1.7163995504379272,
"step": 160
},
{
"epoch": 0.0385312918091274,
"grad_norm": 1.1875,
"learning_rate": 6.133333333333334e-05,
"loss": 1.7115540504455566,
"step": 162
},
{
"epoch": 0.03900698676973391,
"grad_norm": 0.9453125,
"learning_rate": 6.20952380952381e-05,
"loss": 1.6549824476242065,
"step": 164
},
{
"epoch": 0.03948268173034042,
"grad_norm": 0.828125,
"learning_rate": 6.285714285714286e-05,
"loss": 1.6670048236846924,
"step": 166
},
{
"epoch": 0.03995837669094693,
"grad_norm": 0.89453125,
"learning_rate": 6.361904761904762e-05,
"loss": 1.6787292957305908,
"step": 168
},
{
"epoch": 0.04043407165155344,
"grad_norm": 0.87890625,
"learning_rate": 6.438095238095238e-05,
"loss": 1.6751407384872437,
"step": 170
},
{
"epoch": 0.040909766612159955,
"grad_norm": 0.78125,
"learning_rate": 6.514285714285715e-05,
"loss": 1.679162621498108,
"step": 172
},
{
"epoch": 0.041385461572766465,
"grad_norm": 0.75,
"learning_rate": 6.590476190476191e-05,
"loss": 1.6422595977783203,
"step": 174
},
{
"epoch": 0.041861156533372974,
"grad_norm": 0.97265625,
"learning_rate": 6.666666666666667e-05,
"loss": 1.693905234336853,
"step": 176
},
{
"epoch": 0.042336851493979484,
"grad_norm": 0.99609375,
"learning_rate": 6.742857142857143e-05,
"loss": 1.7319214344024658,
"step": 178
},
{
"epoch": 0.042812546454585994,
"grad_norm": 1.1171875,
"learning_rate": 6.81904761904762e-05,
"loss": 1.7077994346618652,
"step": 180
},
{
"epoch": 0.04328824141519251,
"grad_norm": 0.87890625,
"learning_rate": 6.895238095238095e-05,
"loss": 1.6633131504058838,
"step": 182
},
{
"epoch": 0.04376393637579902,
"grad_norm": 1.3515625,
"learning_rate": 6.971428571428572e-05,
"loss": 1.6569929122924805,
"step": 184
},
{
"epoch": 0.04423963133640553,
"grad_norm": 1.078125,
"learning_rate": 7.047619047619048e-05,
"loss": 1.6756895780563354,
"step": 186
},
{
"epoch": 0.04471532629701204,
"grad_norm": 1.296875,
"learning_rate": 7.123809523809524e-05,
"loss": 1.7126249074935913,
"step": 188
},
{
"epoch": 0.04519102125761855,
"grad_norm": 0.9609375,
"learning_rate": 7.2e-05,
"loss": 1.6484733819961548,
"step": 190
},
{
"epoch": 0.045666716218225066,
"grad_norm": 1.1015625,
"learning_rate": 7.276190476190476e-05,
"loss": 1.71817147731781,
"step": 192
},
{
"epoch": 0.046142411178831576,
"grad_norm": 0.96484375,
"learning_rate": 7.352380952380953e-05,
"loss": 1.7061476707458496,
"step": 194
},
{
"epoch": 0.046618106139438086,
"grad_norm": 0.9375,
"learning_rate": 7.42857142857143e-05,
"loss": 1.654850959777832,
"step": 196
},
{
"epoch": 0.047093801100044595,
"grad_norm": 0.8984375,
"learning_rate": 7.504761904761905e-05,
"loss": 1.6332194805145264,
"step": 198
},
{
"epoch": 0.047569496060651105,
"grad_norm": 0.890625,
"learning_rate": 7.580952380952381e-05,
"loss": 1.6425645351409912,
"step": 200
},
{
"epoch": 0.04804519102125762,
"grad_norm": 0.80078125,
"learning_rate": 7.657142857142859e-05,
"loss": 1.6112370491027832,
"step": 202
},
{
"epoch": 0.04852088598186413,
"grad_norm": 0.875,
"learning_rate": 7.733333333333333e-05,
"loss": 1.6736791133880615,
"step": 204
},
{
"epoch": 0.04899658094247064,
"grad_norm": 0.90234375,
"learning_rate": 7.80952380952381e-05,
"loss": 1.5582149028778076,
"step": 206
},
{
"epoch": 0.04947227590307715,
"grad_norm": 0.859375,
"learning_rate": 7.885714285714287e-05,
"loss": 1.605231523513794,
"step": 208
},
{
"epoch": 0.04994797086368366,
"grad_norm": 0.95703125,
"learning_rate": 7.961904761904763e-05,
"loss": 1.6272740364074707,
"step": 210
},
{
"epoch": 0.05042366582429018,
"grad_norm": 1.0234375,
"learning_rate": 8e-05,
"loss": 1.6227126121520996,
"step": 212
},
{
"epoch": 0.05089936078489669,
"grad_norm": 0.99609375,
"learning_rate": 8e-05,
"loss": 1.7201282978057861,
"step": 214
},
{
"epoch": 0.0513750557455032,
"grad_norm": 0.8203125,
"learning_rate": 8e-05,
"loss": 1.671586036682129,
"step": 216
},
{
"epoch": 0.051850750706109706,
"grad_norm": 0.82421875,
"learning_rate": 8e-05,
"loss": 1.596938133239746,
"step": 218
},
{
"epoch": 0.052326445666716216,
"grad_norm": 0.71875,
"learning_rate": 8e-05,
"loss": 1.5416910648345947,
"step": 220
},
{
"epoch": 0.052802140627322726,
"grad_norm": 0.93359375,
"learning_rate": 8e-05,
"loss": 1.6337580680847168,
"step": 222
},
{
"epoch": 0.05327783558792924,
"grad_norm": 0.89453125,
"learning_rate": 8e-05,
"loss": 1.694180965423584,
"step": 224
},
{
"epoch": 0.05375353054853575,
"grad_norm": 1.1015625,
"learning_rate": 8e-05,
"loss": 1.5831806659698486,
"step": 226
},
{
"epoch": 0.05422922550914226,
"grad_norm": 1.328125,
"learning_rate": 8e-05,
"loss": 1.6646983623504639,
"step": 228
},
{
"epoch": 0.05470492046974877,
"grad_norm": 1.53125,
"learning_rate": 8e-05,
"loss": 1.632063627243042,
"step": 230
},
{
"epoch": 0.05518061543035528,
"grad_norm": 1.3515625,
"learning_rate": 8e-05,
"loss": 1.6186381578445435,
"step": 232
},
{
"epoch": 0.0556563103909618,
"grad_norm": 0.9296875,
"learning_rate": 8e-05,
"loss": 1.5822536945343018,
"step": 234
},
{
"epoch": 0.05613200535156831,
"grad_norm": 0.9765625,
"learning_rate": 8e-05,
"loss": 1.598821759223938,
"step": 236
},
{
"epoch": 0.05660770031217482,
"grad_norm": 0.8203125,
"learning_rate": 8e-05,
"loss": 1.6583571434020996,
"step": 238
},
{
"epoch": 0.05708339527278133,
"grad_norm": 0.94921875,
"learning_rate": 8e-05,
"loss": 1.6493302583694458,
"step": 240
},
{
"epoch": 0.05755909023338784,
"grad_norm": 0.9296875,
"learning_rate": 8e-05,
"loss": 1.5849549770355225,
"step": 242
},
{
"epoch": 0.058034785193994354,
"grad_norm": 0.71875,
"learning_rate": 8e-05,
"loss": 1.5187630653381348,
"step": 244
},
{
"epoch": 0.058510480154600863,
"grad_norm": 0.8046875,
"learning_rate": 8e-05,
"loss": 1.6261816024780273,
"step": 246
},
{
"epoch": 0.05898617511520737,
"grad_norm": 0.94140625,
"learning_rate": 8e-05,
"loss": 1.5440542697906494,
"step": 248
},
{
"epoch": 0.05946187007581388,
"grad_norm": 0.80859375,
"learning_rate": 8e-05,
"loss": 1.5579140186309814,
"step": 250
},
{
"epoch": 0.05993756503642039,
"grad_norm": 0.671875,
"learning_rate": 8e-05,
"loss": 1.5661745071411133,
"step": 252
},
{
"epoch": 0.06041325999702691,
"grad_norm": 0.76171875,
"learning_rate": 8e-05,
"loss": 1.5916748046875,
"step": 254
},
{
"epoch": 0.06088895495763342,
"grad_norm": 0.765625,
"learning_rate": 8e-05,
"loss": 1.582345724105835,
"step": 256
},
{
"epoch": 0.06136464991823993,
"grad_norm": 0.78125,
"learning_rate": 8e-05,
"loss": 1.6371424198150635,
"step": 258
},
{
"epoch": 0.06184034487884644,
"grad_norm": 0.7265625,
"learning_rate": 8e-05,
"loss": 1.5874426364898682,
"step": 260
},
{
"epoch": 0.06231603983945295,
"grad_norm": 0.6796875,
"learning_rate": 8e-05,
"loss": 1.5973892211914062,
"step": 262
},
{
"epoch": 0.06279173480005946,
"grad_norm": 0.859375,
"learning_rate": 8e-05,
"loss": 1.6360384225845337,
"step": 264
},
{
"epoch": 0.06326742976066597,
"grad_norm": 0.703125,
"learning_rate": 8e-05,
"loss": 1.4682174921035767,
"step": 266
},
{
"epoch": 0.06374312472127248,
"grad_norm": 0.69140625,
"learning_rate": 8e-05,
"loss": 1.5434261560440063,
"step": 268
},
{
"epoch": 0.064218819681879,
"grad_norm": 0.671875,
"learning_rate": 8e-05,
"loss": 1.5429248809814453,
"step": 270
},
{
"epoch": 0.0646945146424855,
"grad_norm": 0.6328125,
"learning_rate": 8e-05,
"loss": 1.520768404006958,
"step": 272
},
{
"epoch": 0.06517020960309201,
"grad_norm": 0.63671875,
"learning_rate": 8e-05,
"loss": 1.6287932395935059,
"step": 274
},
{
"epoch": 0.06564590456369852,
"grad_norm": 0.75,
"learning_rate": 8e-05,
"loss": 1.574143409729004,
"step": 276
},
{
"epoch": 0.06612159952430503,
"grad_norm": 0.64453125,
"learning_rate": 8e-05,
"loss": 1.5749611854553223,
"step": 278
},
{
"epoch": 0.06659729448491156,
"grad_norm": 0.66796875,
"learning_rate": 8e-05,
"loss": 1.511296033859253,
"step": 280
},
{
"epoch": 0.06707298944551807,
"grad_norm": 0.6171875,
"learning_rate": 8e-05,
"loss": 1.5300962924957275,
"step": 282
},
{
"epoch": 0.06754868440612458,
"grad_norm": 0.59375,
"learning_rate": 8e-05,
"loss": 1.5132012367248535,
"step": 284
},
{
"epoch": 0.06802437936673109,
"grad_norm": 0.59375,
"learning_rate": 8e-05,
"loss": 1.5248513221740723,
"step": 286
},
{
"epoch": 0.0685000743273376,
"grad_norm": 0.64453125,
"learning_rate": 8e-05,
"loss": 1.4714152812957764,
"step": 288
},
{
"epoch": 0.0689757692879441,
"grad_norm": 0.5859375,
"learning_rate": 8e-05,
"loss": 1.536270022392273,
"step": 290
},
{
"epoch": 0.06945146424855062,
"grad_norm": 0.66796875,
"learning_rate": 8e-05,
"loss": 1.5859988927841187,
"step": 292
},
{
"epoch": 0.06992715920915712,
"grad_norm": 0.640625,
"learning_rate": 8e-05,
"loss": 1.6076054573059082,
"step": 294
},
{
"epoch": 0.07040285416976363,
"grad_norm": 0.65234375,
"learning_rate": 8e-05,
"loss": 1.537914752960205,
"step": 296
},
{
"epoch": 0.07087854913037014,
"grad_norm": 0.68359375,
"learning_rate": 8e-05,
"loss": 1.530918002128601,
"step": 298
},
{
"epoch": 0.07135424409097667,
"grad_norm": 0.63671875,
"learning_rate": 8e-05,
"loss": 1.5722606182098389,
"step": 300
},
{
"epoch": 0.07182993905158318,
"grad_norm": 0.6328125,
"learning_rate": 8e-05,
"loss": 1.5380263328552246,
"step": 302
},
{
"epoch": 0.07230563401218969,
"grad_norm": 0.62109375,
"learning_rate": 8e-05,
"loss": 1.543945074081421,
"step": 304
},
{
"epoch": 0.0727813289727962,
"grad_norm": 0.671875,
"learning_rate": 8e-05,
"loss": 1.4990713596343994,
"step": 306
},
{
"epoch": 0.0732570239334027,
"grad_norm": 0.5703125,
"learning_rate": 8e-05,
"loss": 1.5049118995666504,
"step": 308
},
{
"epoch": 0.07373271889400922,
"grad_norm": 0.546875,
"learning_rate": 8e-05,
"loss": 1.5481094121932983,
"step": 310
},
{
"epoch": 0.07420841385461573,
"grad_norm": 0.6015625,
"learning_rate": 8e-05,
"loss": 1.5431925058364868,
"step": 312
},
{
"epoch": 0.07468410881522224,
"grad_norm": 0.59375,
"learning_rate": 8e-05,
"loss": 1.5000509023666382,
"step": 314
},
{
"epoch": 0.07515980377582875,
"grad_norm": 0.640625,
"learning_rate": 8e-05,
"loss": 1.453176498413086,
"step": 316
},
{
"epoch": 0.07563549873643526,
"grad_norm": 0.59375,
"learning_rate": 8e-05,
"loss": 1.5792964696884155,
"step": 318
},
{
"epoch": 0.07611119369704177,
"grad_norm": 0.6171875,
"learning_rate": 8e-05,
"loss": 1.5420498847961426,
"step": 320
},
{
"epoch": 0.07658688865764829,
"grad_norm": 0.578125,
"learning_rate": 8e-05,
"loss": 1.5112196207046509,
"step": 322
},
{
"epoch": 0.0770625836182548,
"grad_norm": 0.5859375,
"learning_rate": 8e-05,
"loss": 1.5195896625518799,
"step": 324
},
{
"epoch": 0.07753827857886131,
"grad_norm": 0.578125,
"learning_rate": 8e-05,
"loss": 1.4650981426239014,
"step": 326
},
{
"epoch": 0.07801397353946782,
"grad_norm": 0.6015625,
"learning_rate": 8e-05,
"loss": 1.4874310493469238,
"step": 328
},
{
"epoch": 0.07848966850007433,
"grad_norm": 0.5546875,
"learning_rate": 8e-05,
"loss": 1.4700895547866821,
"step": 330
},
{
"epoch": 0.07896536346068084,
"grad_norm": 0.59765625,
"learning_rate": 8e-05,
"loss": 1.4657363891601562,
"step": 332
},
{
"epoch": 0.07944105842128735,
"grad_norm": 0.63671875,
"learning_rate": 8e-05,
"loss": 1.5102603435516357,
"step": 334
},
{
"epoch": 0.07991675338189386,
"grad_norm": 0.63671875,
"learning_rate": 8e-05,
"loss": 1.48199462890625,
"step": 336
},
{
"epoch": 0.08039244834250037,
"grad_norm": 0.58984375,
"learning_rate": 8e-05,
"loss": 1.573965072631836,
"step": 338
},
{
"epoch": 0.08086814330310688,
"grad_norm": 0.5859375,
"learning_rate": 8e-05,
"loss": 1.552716851234436,
"step": 340
},
{
"epoch": 0.0813438382637134,
"grad_norm": 0.59765625,
"learning_rate": 8e-05,
"loss": 1.5109150409698486,
"step": 342
},
{
"epoch": 0.08181953322431991,
"grad_norm": 0.62890625,
"learning_rate": 8e-05,
"loss": 1.478667974472046,
"step": 344
},
{
"epoch": 0.08229522818492642,
"grad_norm": 0.55859375,
"learning_rate": 8e-05,
"loss": 1.4951369762420654,
"step": 346
},
{
"epoch": 0.08277092314553293,
"grad_norm": 0.5234375,
"learning_rate": 8e-05,
"loss": 1.4743764400482178,
"step": 348
},
{
"epoch": 0.08324661810613944,
"grad_norm": 0.59375,
"learning_rate": 8e-05,
"loss": 1.5435149669647217,
"step": 350
},
{
"epoch": 0.08372231306674595,
"grad_norm": 0.5546875,
"learning_rate": 8e-05,
"loss": 1.4307265281677246,
"step": 352
},
{
"epoch": 0.08419800802735246,
"grad_norm": 0.60546875,
"learning_rate": 8e-05,
"loss": 1.5382444858551025,
"step": 354
},
{
"epoch": 0.08467370298795897,
"grad_norm": 0.56640625,
"learning_rate": 8e-05,
"loss": 1.4578557014465332,
"step": 356
},
{
"epoch": 0.08514939794856548,
"grad_norm": 0.5625,
"learning_rate": 8e-05,
"loss": 1.5319006443023682,
"step": 358
},
{
"epoch": 0.08562509290917199,
"grad_norm": 0.52734375,
"learning_rate": 8e-05,
"loss": 1.511313796043396,
"step": 360
},
{
"epoch": 0.08610078786977851,
"grad_norm": 0.55078125,
"learning_rate": 8e-05,
"loss": 1.4577925205230713,
"step": 362
},
{
"epoch": 0.08657648283038502,
"grad_norm": 0.54296875,
"learning_rate": 8e-05,
"loss": 1.5221188068389893,
"step": 364
},
{
"epoch": 0.08705217779099153,
"grad_norm": 0.515625,
"learning_rate": 8e-05,
"loss": 1.4162304401397705,
"step": 366
},
{
"epoch": 0.08752787275159804,
"grad_norm": 0.53515625,
"learning_rate": 8e-05,
"loss": 1.4972211122512817,
"step": 368
},
{
"epoch": 0.08800356771220455,
"grad_norm": 0.53515625,
"learning_rate": 8e-05,
"loss": 1.5569958686828613,
"step": 370
},
{
"epoch": 0.08847926267281106,
"grad_norm": 0.54296875,
"learning_rate": 8e-05,
"loss": 1.467348337173462,
"step": 372
},
{
"epoch": 0.08895495763341757,
"grad_norm": 0.53515625,
"learning_rate": 8e-05,
"loss": 1.4739539623260498,
"step": 374
},
{
"epoch": 0.08943065259402408,
"grad_norm": 0.546875,
"learning_rate": 8e-05,
"loss": 1.478308916091919,
"step": 376
},
{
"epoch": 0.08990634755463059,
"grad_norm": 0.5078125,
"learning_rate": 8e-05,
"loss": 1.4767718315124512,
"step": 378
},
{
"epoch": 0.0903820425152371,
"grad_norm": 0.52734375,
"learning_rate": 8e-05,
"loss": 1.51509690284729,
"step": 380
},
{
"epoch": 0.09085773747584361,
"grad_norm": 0.5234375,
"learning_rate": 8e-05,
"loss": 1.4501855373382568,
"step": 382
},
{
"epoch": 0.09133343243645013,
"grad_norm": 0.5390625,
"learning_rate": 8e-05,
"loss": 1.45088529586792,
"step": 384
},
{
"epoch": 0.09180912739705664,
"grad_norm": 0.52734375,
"learning_rate": 8e-05,
"loss": 1.4980132579803467,
"step": 386
},
{
"epoch": 0.09228482235766315,
"grad_norm": 0.51953125,
"learning_rate": 8e-05,
"loss": 1.5103974342346191,
"step": 388
},
{
"epoch": 0.09276051731826966,
"grad_norm": 0.58203125,
"learning_rate": 8e-05,
"loss": 1.4751368761062622,
"step": 390
},
{
"epoch": 0.09323621227887617,
"grad_norm": 0.62109375,
"learning_rate": 8e-05,
"loss": 1.494489073753357,
"step": 392
},
{
"epoch": 0.09371190723948268,
"grad_norm": 0.61328125,
"learning_rate": 8e-05,
"loss": 1.497837781906128,
"step": 394
},
{
"epoch": 0.09418760220008919,
"grad_norm": 0.55859375,
"learning_rate": 8e-05,
"loss": 1.5514724254608154,
"step": 396
},
{
"epoch": 0.0946632971606957,
"grad_norm": 0.86328125,
"learning_rate": 8e-05,
"loss": 1.5110323429107666,
"step": 398
},
{
"epoch": 0.09513899212130221,
"grad_norm": 0.63671875,
"learning_rate": 8e-05,
"loss": 1.4153172969818115,
"step": 400
},
{
"epoch": 0.09561468708190872,
"grad_norm": 0.51953125,
"learning_rate": 8e-05,
"loss": 1.4518225193023682,
"step": 402
},
{
"epoch": 0.09609038204251524,
"grad_norm": 0.578125,
"learning_rate": 8e-05,
"loss": 1.4477956295013428,
"step": 404
},
{
"epoch": 0.09656607700312175,
"grad_norm": 0.62109375,
"learning_rate": 8e-05,
"loss": 1.4921960830688477,
"step": 406
},
{
"epoch": 0.09704177196372826,
"grad_norm": 0.6328125,
"learning_rate": 8e-05,
"loss": 1.4914698600769043,
"step": 408
},
{
"epoch": 0.09751746692433477,
"grad_norm": 0.59375,
"learning_rate": 8e-05,
"loss": 1.4477533102035522,
"step": 410
},
{
"epoch": 0.09799316188494128,
"grad_norm": 0.546875,
"learning_rate": 8e-05,
"loss": 1.499894142150879,
"step": 412
},
{
"epoch": 0.09846885684554779,
"grad_norm": 0.5859375,
"learning_rate": 8e-05,
"loss": 1.5634784698486328,
"step": 414
},
{
"epoch": 0.0989445518061543,
"grad_norm": 0.515625,
"learning_rate": 8e-05,
"loss": 1.4383997917175293,
"step": 416
},
{
"epoch": 0.09942024676676081,
"grad_norm": 0.55859375,
"learning_rate": 8e-05,
"loss": 1.4373674392700195,
"step": 418
},
{
"epoch": 0.09989594172736732,
"grad_norm": 0.5703125,
"learning_rate": 8e-05,
"loss": 1.475003719329834,
"step": 420
},
{
"epoch": 0.10037163668797383,
"grad_norm": 0.5859375,
"learning_rate": 8e-05,
"loss": 1.4678364992141724,
"step": 422
},
{
"epoch": 0.10084733164858035,
"grad_norm": 0.5390625,
"learning_rate": 8e-05,
"loss": 1.45807945728302,
"step": 424
},
{
"epoch": 0.10132302660918686,
"grad_norm": 0.54296875,
"learning_rate": 8e-05,
"loss": 1.5057690143585205,
"step": 426
},
{
"epoch": 0.10179872156979337,
"grad_norm": 0.50390625,
"learning_rate": 8e-05,
"loss": 1.4296057224273682,
"step": 428
},
{
"epoch": 0.10227441653039988,
"grad_norm": 0.54296875,
"learning_rate": 8e-05,
"loss": 1.5102698802947998,
"step": 430
},
{
"epoch": 0.1027501114910064,
"grad_norm": 0.5078125,
"learning_rate": 8e-05,
"loss": 1.37981116771698,
"step": 432
},
{
"epoch": 0.1032258064516129,
"grad_norm": 0.52734375,
"learning_rate": 8e-05,
"loss": 1.4619908332824707,
"step": 434
},
{
"epoch": 0.10370150141221941,
"grad_norm": 0.55859375,
"learning_rate": 8e-05,
"loss": 1.4256863594055176,
"step": 436
},
{
"epoch": 0.10417719637282592,
"grad_norm": 0.6171875,
"learning_rate": 8e-05,
"loss": 1.4363038539886475,
"step": 438
},
{
"epoch": 0.10465289133343243,
"grad_norm": 0.58203125,
"learning_rate": 8e-05,
"loss": 1.3884978294372559,
"step": 440
},
{
"epoch": 0.10512858629403894,
"grad_norm": 0.62109375,
"learning_rate": 8e-05,
"loss": 1.4545469284057617,
"step": 442
},
{
"epoch": 0.10560428125464545,
"grad_norm": 0.5703125,
"learning_rate": 8e-05,
"loss": 1.4439201354980469,
"step": 444
},
{
"epoch": 0.10607997621525198,
"grad_norm": 0.58984375,
"learning_rate": 8e-05,
"loss": 1.5349268913269043,
"step": 446
},
{
"epoch": 0.10655567117585849,
"grad_norm": 0.609375,
"learning_rate": 8e-05,
"loss": 1.591422438621521,
"step": 448
},
{
"epoch": 0.107031366136465,
"grad_norm": 0.6640625,
"learning_rate": 8e-05,
"loss": 1.4320346117019653,
"step": 450
},
{
"epoch": 0.1075070610970715,
"grad_norm": 0.62109375,
"learning_rate": 8e-05,
"loss": 1.4518539905548096,
"step": 452
},
{
"epoch": 0.10798275605767801,
"grad_norm": 0.5859375,
"learning_rate": 8e-05,
"loss": 1.5056893825531006,
"step": 454
},
{
"epoch": 0.10845845101828452,
"grad_norm": 0.54296875,
"learning_rate": 8e-05,
"loss": 1.3535287380218506,
"step": 456
},
{
"epoch": 0.10893414597889103,
"grad_norm": 0.57421875,
"learning_rate": 8e-05,
"loss": 1.4125394821166992,
"step": 458
},
{
"epoch": 0.10940984093949754,
"grad_norm": 0.51171875,
"learning_rate": 8e-05,
"loss": 1.3990814685821533,
"step": 460
},
{
"epoch": 0.10988553590010405,
"grad_norm": 0.5546875,
"learning_rate": 8e-05,
"loss": 1.4865885972976685,
"step": 462
},
{
"epoch": 0.11036123086071056,
"grad_norm": 0.5234375,
"learning_rate": 8e-05,
"loss": 1.415689468383789,
"step": 464
},
{
"epoch": 0.11083692582131709,
"grad_norm": 0.53515625,
"learning_rate": 8e-05,
"loss": 1.453460931777954,
"step": 466
},
{
"epoch": 0.1113126207819236,
"grad_norm": 0.56640625,
"learning_rate": 8e-05,
"loss": 1.4493913650512695,
"step": 468
},
{
"epoch": 0.1117883157425301,
"grad_norm": 0.578125,
"learning_rate": 8e-05,
"loss": 1.4510160684585571,
"step": 470
},
{
"epoch": 0.11226401070313662,
"grad_norm": 0.5859375,
"learning_rate": 8e-05,
"loss": 1.5191359519958496,
"step": 472
},
{
"epoch": 0.11273970566374313,
"grad_norm": 0.59375,
"learning_rate": 8e-05,
"loss": 1.5257389545440674,
"step": 474
},
{
"epoch": 0.11321540062434964,
"grad_norm": 0.54296875,
"learning_rate": 8e-05,
"loss": 1.4620857238769531,
"step": 476
},
{
"epoch": 0.11369109558495614,
"grad_norm": 0.5390625,
"learning_rate": 8e-05,
"loss": 1.4049038887023926,
"step": 478
},
{
"epoch": 0.11416679054556265,
"grad_norm": 0.54296875,
"learning_rate": 8e-05,
"loss": 1.387476921081543,
"step": 480
},
{
"epoch": 0.11464248550616916,
"grad_norm": 0.53125,
"learning_rate": 8e-05,
"loss": 1.4375786781311035,
"step": 482
},
{
"epoch": 0.11511818046677567,
"grad_norm": 0.5,
"learning_rate": 8e-05,
"loss": 1.4502665996551514,
"step": 484
},
{
"epoch": 0.1155938754273822,
"grad_norm": 0.51171875,
"learning_rate": 8e-05,
"loss": 1.4172968864440918,
"step": 486
},
{
"epoch": 0.11606957038798871,
"grad_norm": 0.51171875,
"learning_rate": 8e-05,
"loss": 1.3814518451690674,
"step": 488
},
{
"epoch": 0.11654526534859522,
"grad_norm": 0.546875,
"learning_rate": 8e-05,
"loss": 1.4727611541748047,
"step": 490
},
{
"epoch": 0.11702096030920173,
"grad_norm": 0.55859375,
"learning_rate": 8e-05,
"loss": 1.4043948650360107,
"step": 492
},
{
"epoch": 0.11749665526980824,
"grad_norm": 0.53125,
"learning_rate": 8e-05,
"loss": 1.4327163696289062,
"step": 494
},
{
"epoch": 0.11797235023041475,
"grad_norm": 0.578125,
"learning_rate": 8e-05,
"loss": 1.4427610635757446,
"step": 496
},
{
"epoch": 0.11844804519102126,
"grad_norm": 0.5234375,
"learning_rate": 8e-05,
"loss": 1.4240474700927734,
"step": 498
},
{
"epoch": 0.11892374015162777,
"grad_norm": 0.578125,
"learning_rate": 8e-05,
"loss": 1.471658706665039,
"step": 500
},
{
"epoch": 0.11939943511223428,
"grad_norm": 0.63671875,
"learning_rate": 8e-05,
"loss": 1.4233098030090332,
"step": 502
},
{
"epoch": 0.11987513007284079,
"grad_norm": 0.5859375,
"learning_rate": 8e-05,
"loss": 1.3341891765594482,
"step": 504
},
{
"epoch": 0.1203508250334473,
"grad_norm": 0.5390625,
"learning_rate": 8e-05,
"loss": 1.3719563484191895,
"step": 506
},
{
"epoch": 0.12082651999405382,
"grad_norm": 0.6171875,
"learning_rate": 8e-05,
"loss": 1.4592832326889038,
"step": 508
},
{
"epoch": 0.12130221495466033,
"grad_norm": 0.6015625,
"learning_rate": 8e-05,
"loss": 1.4251080751419067,
"step": 510
},
{
"epoch": 0.12177790991526684,
"grad_norm": 0.54296875,
"learning_rate": 8e-05,
"loss": 1.4636266231536865,
"step": 512
},
{
"epoch": 0.12225360487587335,
"grad_norm": 0.5703125,
"learning_rate": 8e-05,
"loss": 1.3918344974517822,
"step": 514
},
{
"epoch": 0.12272929983647986,
"grad_norm": 0.5625,
"learning_rate": 8e-05,
"loss": 1.4410995244979858,
"step": 516
},
{
"epoch": 0.12320499479708637,
"grad_norm": 0.53125,
"learning_rate": 8e-05,
"loss": 1.4553332328796387,
"step": 518
},
{
"epoch": 0.12368068975769288,
"grad_norm": 0.48828125,
"learning_rate": 8e-05,
"loss": 1.4932277202606201,
"step": 520
},
{
"epoch": 0.12415638471829939,
"grad_norm": 0.5234375,
"learning_rate": 8e-05,
"loss": 1.4398219585418701,
"step": 522
},
{
"epoch": 0.1246320796789059,
"grad_norm": 0.5234375,
"learning_rate": 8e-05,
"loss": 1.4100382328033447,
"step": 524
},
{
"epoch": 0.12510777463951242,
"grad_norm": 0.52734375,
"learning_rate": 8e-05,
"loss": 1.4194281101226807,
"step": 526
},
{
"epoch": 0.12558346960011893,
"grad_norm": 0.50390625,
"learning_rate": 8e-05,
"loss": 1.4007759094238281,
"step": 528
},
{
"epoch": 0.12605916456072544,
"grad_norm": 0.515625,
"learning_rate": 8e-05,
"loss": 1.3943548202514648,
"step": 530
},
{
"epoch": 0.12653485952133195,
"grad_norm": 0.5,
"learning_rate": 8e-05,
"loss": 1.3672170639038086,
"step": 532
},
{
"epoch": 0.12701055448193846,
"grad_norm": 0.58203125,
"learning_rate": 8e-05,
"loss": 1.3937108516693115,
"step": 534
},
{
"epoch": 0.12748624944254497,
"grad_norm": 0.59375,
"learning_rate": 8e-05,
"loss": 1.4582862854003906,
"step": 536
},
{
"epoch": 0.12796194440315148,
"grad_norm": 0.52734375,
"learning_rate": 8e-05,
"loss": 1.417186975479126,
"step": 538
},
{
"epoch": 0.128437639363758,
"grad_norm": 0.5,
"learning_rate": 8e-05,
"loss": 1.3944048881530762,
"step": 540
},
{
"epoch": 0.1289133343243645,
"grad_norm": 0.515625,
"learning_rate": 8e-05,
"loss": 1.4348058700561523,
"step": 542
},
{
"epoch": 0.129389029284971,
"grad_norm": 0.56640625,
"learning_rate": 8e-05,
"loss": 1.4025098085403442,
"step": 544
},
{
"epoch": 0.12986472424557752,
"grad_norm": 0.5859375,
"learning_rate": 8e-05,
"loss": 1.4666318893432617,
"step": 546
},
{
"epoch": 0.13034041920618403,
"grad_norm": 0.50390625,
"learning_rate": 8e-05,
"loss": 1.3959743976593018,
"step": 548
},
{
"epoch": 0.13081611416679054,
"grad_norm": 0.515625,
"learning_rate": 8e-05,
"loss": 1.435917615890503,
"step": 550
},
{
"epoch": 0.13129180912739705,
"grad_norm": 0.5234375,
"learning_rate": 8e-05,
"loss": 1.4515659809112549,
"step": 552
},
{
"epoch": 0.13176750408800356,
"grad_norm": 0.51953125,
"learning_rate": 8e-05,
"loss": 1.4431695938110352,
"step": 554
},
{
"epoch": 0.13224319904861007,
"grad_norm": 0.5703125,
"learning_rate": 8e-05,
"loss": 1.3696998357772827,
"step": 556
},
{
"epoch": 0.1327188940092166,
"grad_norm": 0.515625,
"learning_rate": 8e-05,
"loss": 1.4764920473098755,
"step": 558
},
{
"epoch": 0.1331945889698231,
"grad_norm": 0.484375,
"learning_rate": 8e-05,
"loss": 1.3894143104553223,
"step": 560
},
{
"epoch": 0.13367028393042962,
"grad_norm": 0.51171875,
"learning_rate": 8e-05,
"loss": 1.4079980850219727,
"step": 562
},
{
"epoch": 0.13414597889103613,
"grad_norm": 0.49609375,
"learning_rate": 8e-05,
"loss": 1.3896784782409668,
"step": 564
},
{
"epoch": 0.13462167385164264,
"grad_norm": 0.498046875,
"learning_rate": 8e-05,
"loss": 1.4342916011810303,
"step": 566
},
{
"epoch": 0.13509736881224915,
"grad_norm": 0.5078125,
"learning_rate": 8e-05,
"loss": 1.4243568181991577,
"step": 568
},
{
"epoch": 0.13557306377285566,
"grad_norm": 0.47265625,
"learning_rate": 8e-05,
"loss": 1.4043259620666504,
"step": 570
},
{
"epoch": 0.13604875873346217,
"grad_norm": 0.474609375,
"learning_rate": 8e-05,
"loss": 1.4198546409606934,
"step": 572
},
{
"epoch": 0.13652445369406868,
"grad_norm": 0.453125,
"learning_rate": 8e-05,
"loss": 1.3509985208511353,
"step": 574
},
{
"epoch": 0.1370001486546752,
"grad_norm": 0.48046875,
"learning_rate": 8e-05,
"loss": 1.3983509540557861,
"step": 576
},
{
"epoch": 0.1374758436152817,
"grad_norm": 0.4921875,
"learning_rate": 8e-05,
"loss": 1.4067437648773193,
"step": 578
},
{
"epoch": 0.1379515385758882,
"grad_norm": 0.515625,
"learning_rate": 8e-05,
"loss": 1.4863321781158447,
"step": 580
},
{
"epoch": 0.13842723353649472,
"grad_norm": 0.48828125,
"learning_rate": 8e-05,
"loss": 1.4052914381027222,
"step": 582
},
{
"epoch": 0.13890292849710123,
"grad_norm": 0.54296875,
"learning_rate": 8e-05,
"loss": 1.408928394317627,
"step": 584
},
{
"epoch": 0.13937862345770774,
"grad_norm": 0.55859375,
"learning_rate": 8e-05,
"loss": 1.4460136890411377,
"step": 586
},
{
"epoch": 0.13985431841831425,
"grad_norm": 0.51171875,
"learning_rate": 8e-05,
"loss": 1.4335639476776123,
"step": 588
},
{
"epoch": 0.14033001337892076,
"grad_norm": 0.53125,
"learning_rate": 8e-05,
"loss": 1.3965034484863281,
"step": 590
},
{
"epoch": 0.14080570833952727,
"grad_norm": 0.53125,
"learning_rate": 8e-05,
"loss": 1.4012255668640137,
"step": 592
},
{
"epoch": 0.14128140330013378,
"grad_norm": 0.53515625,
"learning_rate": 8e-05,
"loss": 1.4261143207550049,
"step": 594
},
{
"epoch": 0.1417570982607403,
"grad_norm": 0.515625,
"learning_rate": 8e-05,
"loss": 1.4008715152740479,
"step": 596
},
{
"epoch": 0.1422327932213468,
"grad_norm": 0.52734375,
"learning_rate": 8e-05,
"loss": 1.4034451246261597,
"step": 598
},
{
"epoch": 0.14270848818195334,
"grad_norm": 0.5,
"learning_rate": 8e-05,
"loss": 1.4082181453704834,
"step": 600
},
{
"epoch": 0.14318418314255985,
"grad_norm": 0.48828125,
"learning_rate": 8e-05,
"loss": 1.3725682497024536,
"step": 602
},
{
"epoch": 0.14365987810316636,
"grad_norm": 0.52734375,
"learning_rate": 8e-05,
"loss": 1.4293782711029053,
"step": 604
},
{
"epoch": 0.14413557306377286,
"grad_norm": 0.5,
"learning_rate": 8e-05,
"loss": 1.4520360231399536,
"step": 606
},
{
"epoch": 0.14461126802437937,
"grad_norm": 0.48828125,
"learning_rate": 8e-05,
"loss": 1.3525224924087524,
"step": 608
},
{
"epoch": 0.14508696298498588,
"grad_norm": 0.5078125,
"learning_rate": 8e-05,
"loss": 1.4208955764770508,
"step": 610
},
{
"epoch": 0.1455626579455924,
"grad_norm": 0.48828125,
"learning_rate": 8e-05,
"loss": 1.3334312438964844,
"step": 612
},
{
"epoch": 0.1460383529061989,
"grad_norm": 0.53125,
"learning_rate": 8e-05,
"loss": 1.3503882884979248,
"step": 614
},
{
"epoch": 0.1465140478668054,
"grad_norm": 0.54296875,
"learning_rate": 8e-05,
"loss": 1.4191619157791138,
"step": 616
},
{
"epoch": 0.14698974282741192,
"grad_norm": 0.470703125,
"learning_rate": 8e-05,
"loss": 1.3381874561309814,
"step": 618
},
{
"epoch": 0.14746543778801843,
"grad_norm": 0.48828125,
"learning_rate": 8e-05,
"loss": 1.3882290124893188,
"step": 620
},
{
"epoch": 0.14794113274862494,
"grad_norm": 0.486328125,
"learning_rate": 8e-05,
"loss": 1.4173054695129395,
"step": 622
},
{
"epoch": 0.14841682770923145,
"grad_norm": 0.49609375,
"learning_rate": 8e-05,
"loss": 1.3113012313842773,
"step": 624
},
{
"epoch": 0.14889252266983796,
"grad_norm": 0.5390625,
"learning_rate": 8e-05,
"loss": 1.407628059387207,
"step": 626
},
{
"epoch": 0.14936821763044447,
"grad_norm": 0.5390625,
"learning_rate": 8e-05,
"loss": 1.447212815284729,
"step": 628
},
{
"epoch": 0.14984391259105098,
"grad_norm": 0.51171875,
"learning_rate": 8e-05,
"loss": 1.3606481552124023,
"step": 630
},
{
"epoch": 0.1503196075516575,
"grad_norm": 0.55078125,
"learning_rate": 8e-05,
"loss": 1.4575624465942383,
"step": 632
},
{
"epoch": 0.150795302512264,
"grad_norm": 0.56640625,
"learning_rate": 8e-05,
"loss": 1.355147123336792,
"step": 634
},
{
"epoch": 0.1512709974728705,
"grad_norm": 0.498046875,
"learning_rate": 8e-05,
"loss": 1.37825345993042,
"step": 636
},
{
"epoch": 0.15174669243347702,
"grad_norm": 0.50390625,
"learning_rate": 8e-05,
"loss": 1.4053802490234375,
"step": 638
},
{
"epoch": 0.15222238739408353,
"grad_norm": 0.478515625,
"learning_rate": 8e-05,
"loss": 1.3817956447601318,
"step": 640
},
{
"epoch": 0.15269808235469007,
"grad_norm": 0.498046875,
"learning_rate": 8e-05,
"loss": 1.3938934803009033,
"step": 642
},
{
"epoch": 0.15317377731529658,
"grad_norm": 0.53125,
"learning_rate": 8e-05,
"loss": 1.35261869430542,
"step": 644
},
{
"epoch": 0.1536494722759031,
"grad_norm": 0.51171875,
"learning_rate": 8e-05,
"loss": 1.3819756507873535,
"step": 646
},
{
"epoch": 0.1541251672365096,
"grad_norm": 0.48828125,
"learning_rate": 8e-05,
"loss": 1.4077363014221191,
"step": 648
},
{
"epoch": 0.1546008621971161,
"grad_norm": 0.50390625,
"learning_rate": 8e-05,
"loss": 1.4303150177001953,
"step": 650
},
{
"epoch": 0.15507655715772262,
"grad_norm": 0.49609375,
"learning_rate": 8e-05,
"loss": 1.3727548122406006,
"step": 652
},
{
"epoch": 0.15555225211832913,
"grad_norm": 0.4921875,
"learning_rate": 8e-05,
"loss": 1.4012013673782349,
"step": 654
},
{
"epoch": 0.15602794707893564,
"grad_norm": 0.48046875,
"learning_rate": 8e-05,
"loss": 1.3778249025344849,
"step": 656
},
{
"epoch": 0.15650364203954215,
"grad_norm": 0.470703125,
"learning_rate": 8e-05,
"loss": 1.384866714477539,
"step": 658
},
{
"epoch": 0.15697933700014866,
"grad_norm": 0.51171875,
"learning_rate": 8e-05,
"loss": 1.4081860780715942,
"step": 660
},
{
"epoch": 0.15745503196075517,
"grad_norm": 0.482421875,
"learning_rate": 8e-05,
"loss": 1.3876349925994873,
"step": 662
},
{
"epoch": 0.15793072692136167,
"grad_norm": 0.52734375,
"learning_rate": 8e-05,
"loss": 1.420417070388794,
"step": 664
},
{
"epoch": 0.15840642188196818,
"grad_norm": 0.50390625,
"learning_rate": 8e-05,
"loss": 1.427546501159668,
"step": 666
},
{
"epoch": 0.1588821168425747,
"grad_norm": 0.484375,
"learning_rate": 8e-05,
"loss": 1.404707431793213,
"step": 668
},
{
"epoch": 0.1593578118031812,
"grad_norm": 0.458984375,
"learning_rate": 8e-05,
"loss": 1.4167988300323486,
"step": 670
},
{
"epoch": 0.1598335067637877,
"grad_norm": 0.482421875,
"learning_rate": 8e-05,
"loss": 1.36492919921875,
"step": 672
},
{
"epoch": 0.16030920172439422,
"grad_norm": 0.546875,
"learning_rate": 8e-05,
"loss": 1.4290658235549927,
"step": 674
},
{
"epoch": 0.16078489668500073,
"grad_norm": 0.50390625,
"learning_rate": 8e-05,
"loss": 1.3939204216003418,
"step": 676
},
{
"epoch": 0.16126059164560724,
"grad_norm": 0.47265625,
"learning_rate": 8e-05,
"loss": 1.368532419204712,
"step": 678
},
{
"epoch": 0.16173628660621375,
"grad_norm": 0.466796875,
"learning_rate": 8e-05,
"loss": 1.4039356708526611,
"step": 680
},
{
"epoch": 0.1622119815668203,
"grad_norm": 0.453125,
"learning_rate": 8e-05,
"loss": 1.4631690979003906,
"step": 682
},
{
"epoch": 0.1626876765274268,
"grad_norm": 0.458984375,
"learning_rate": 8e-05,
"loss": 1.3834668397903442,
"step": 684
},
{
"epoch": 0.1631633714880333,
"grad_norm": 0.46484375,
"learning_rate": 8e-05,
"loss": 1.373947262763977,
"step": 686
},
{
"epoch": 0.16363906644863982,
"grad_norm": 0.451171875,
"learning_rate": 8e-05,
"loss": 1.4164583683013916,
"step": 688
},
{
"epoch": 0.16411476140924633,
"grad_norm": 0.474609375,
"learning_rate": 8e-05,
"loss": 1.4322106838226318,
"step": 690
},
{
"epoch": 0.16459045636985284,
"grad_norm": 0.48828125,
"learning_rate": 8e-05,
"loss": 1.3455379009246826,
"step": 692
},
{
"epoch": 0.16506615133045935,
"grad_norm": 0.482421875,
"learning_rate": 8e-05,
"loss": 1.34842050075531,
"step": 694
},
{
"epoch": 0.16554184629106586,
"grad_norm": 0.490234375,
"learning_rate": 8e-05,
"loss": 1.428257942199707,
"step": 696
},
{
"epoch": 0.16601754125167237,
"grad_norm": 0.5,
"learning_rate": 8e-05,
"loss": 1.417719841003418,
"step": 698
},
{
"epoch": 0.16649323621227888,
"grad_norm": 0.453125,
"learning_rate": 8e-05,
"loss": 1.4131088256835938,
"step": 700
},
{
"epoch": 0.1669689311728854,
"grad_norm": 0.5078125,
"learning_rate": 8e-05,
"loss": 1.356804370880127,
"step": 702
},
{
"epoch": 0.1674446261334919,
"grad_norm": 0.5,
"learning_rate": 8e-05,
"loss": 1.355417251586914,
"step": 704
},
{
"epoch": 0.1679203210940984,
"grad_norm": 0.54296875,
"learning_rate": 8e-05,
"loss": 1.4380789995193481,
"step": 706
},
{
"epoch": 0.16839601605470492,
"grad_norm": 0.4921875,
"learning_rate": 8e-05,
"loss": 1.3789442777633667,
"step": 708
},
{
"epoch": 0.16887171101531143,
"grad_norm": 0.474609375,
"learning_rate": 8e-05,
"loss": 1.3488481044769287,
"step": 710
},
{
"epoch": 0.16934740597591794,
"grad_norm": 0.48828125,
"learning_rate": 8e-05,
"loss": 1.3990561962127686,
"step": 712
},
{
"epoch": 0.16982310093652445,
"grad_norm": 0.490234375,
"learning_rate": 8e-05,
"loss": 1.3976104259490967,
"step": 714
},
{
"epoch": 0.17029879589713096,
"grad_norm": 0.494140625,
"learning_rate": 8e-05,
"loss": 1.430433750152588,
"step": 716
},
{
"epoch": 0.17077449085773747,
"grad_norm": 0.462890625,
"learning_rate": 8e-05,
"loss": 1.359434723854065,
"step": 718
},
{
"epoch": 0.17125018581834398,
"grad_norm": 0.515625,
"learning_rate": 8e-05,
"loss": 1.4045766592025757,
"step": 720
},
{
"epoch": 0.17172588077895048,
"grad_norm": 0.49609375,
"learning_rate": 8e-05,
"loss": 1.3606858253479004,
"step": 722
},
{
"epoch": 0.17220157573955702,
"grad_norm": 0.466796875,
"learning_rate": 8e-05,
"loss": 1.4614171981811523,
"step": 724
},
{
"epoch": 0.17267727070016353,
"grad_norm": 0.47265625,
"learning_rate": 8e-05,
"loss": 1.416619062423706,
"step": 726
},
{
"epoch": 0.17315296566077004,
"grad_norm": 0.44921875,
"learning_rate": 8e-05,
"loss": 1.2924635410308838,
"step": 728
},
{
"epoch": 0.17362866062137655,
"grad_norm": 0.47265625,
"learning_rate": 8e-05,
"loss": 1.3354673385620117,
"step": 730
},
{
"epoch": 0.17410435558198306,
"grad_norm": 0.4765625,
"learning_rate": 8e-05,
"loss": 1.3578845262527466,
"step": 732
},
{
"epoch": 0.17458005054258957,
"grad_norm": 0.49609375,
"learning_rate": 8e-05,
"loss": 1.4009724855422974,
"step": 734
},
{
"epoch": 0.17505574550319608,
"grad_norm": 0.48046875,
"learning_rate": 8e-05,
"loss": 1.4139372110366821,
"step": 736
},
{
"epoch": 0.1755314404638026,
"grad_norm": 0.45703125,
"learning_rate": 8e-05,
"loss": 1.3427128791809082,
"step": 738
},
{
"epoch": 0.1760071354244091,
"grad_norm": 0.51171875,
"learning_rate": 8e-05,
"loss": 1.3915586471557617,
"step": 740
},
{
"epoch": 0.1764828303850156,
"grad_norm": 0.48046875,
"learning_rate": 8e-05,
"loss": 1.3710131645202637,
"step": 742
},
{
"epoch": 0.17695852534562212,
"grad_norm": 0.48046875,
"learning_rate": 8e-05,
"loss": 1.3700971603393555,
"step": 744
},
{
"epoch": 0.17743422030622863,
"grad_norm": 0.486328125,
"learning_rate": 8e-05,
"loss": 1.3621227741241455,
"step": 746
},
{
"epoch": 0.17790991526683514,
"grad_norm": 0.5,
"learning_rate": 8e-05,
"loss": 1.38692307472229,
"step": 748
},
{
"epoch": 0.17838561022744165,
"grad_norm": 0.44921875,
"learning_rate": 8e-05,
"loss": 1.352332592010498,
"step": 750
},
{
"epoch": 0.17886130518804816,
"grad_norm": 0.515625,
"learning_rate": 8e-05,
"loss": 1.4046599864959717,
"step": 752
},
{
"epoch": 0.17933700014865467,
"grad_norm": 0.5390625,
"learning_rate": 8e-05,
"loss": 1.3857762813568115,
"step": 754
},
{
"epoch": 0.17981269510926118,
"grad_norm": 0.546875,
"learning_rate": 8e-05,
"loss": 1.3184947967529297,
"step": 756
},
{
"epoch": 0.1802883900698677,
"grad_norm": 0.5078125,
"learning_rate": 8e-05,
"loss": 1.3482776880264282,
"step": 758
},
{
"epoch": 0.1807640850304742,
"grad_norm": 0.51953125,
"learning_rate": 8e-05,
"loss": 1.434415340423584,
"step": 760
},
{
"epoch": 0.1812397799910807,
"grad_norm": 0.490234375,
"learning_rate": 8e-05,
"loss": 1.3801504373550415,
"step": 762
},
{
"epoch": 0.18171547495168722,
"grad_norm": 0.486328125,
"learning_rate": 8e-05,
"loss": 1.3628723621368408,
"step": 764
},
{
"epoch": 0.18219116991229375,
"grad_norm": 0.46484375,
"learning_rate": 8e-05,
"loss": 1.3805229663848877,
"step": 766
},
{
"epoch": 0.18266686487290026,
"grad_norm": 0.494140625,
"learning_rate": 8e-05,
"loss": 1.3568819761276245,
"step": 768
},
{
"epoch": 0.18314255983350677,
"grad_norm": 0.451171875,
"learning_rate": 8e-05,
"loss": 1.3049235343933105,
"step": 770
},
{
"epoch": 0.18361825479411328,
"grad_norm": 0.46875,
"learning_rate": 8e-05,
"loss": 1.3783180713653564,
"step": 772
},
{
"epoch": 0.1840939497547198,
"grad_norm": 0.48046875,
"learning_rate": 8e-05,
"loss": 1.3785371780395508,
"step": 774
},
{
"epoch": 0.1845696447153263,
"grad_norm": 0.455078125,
"learning_rate": 8e-05,
"loss": 1.4072458744049072,
"step": 776
},
{
"epoch": 0.1850453396759328,
"grad_norm": 0.47265625,
"learning_rate": 8e-05,
"loss": 1.3426545858383179,
"step": 778
},
{
"epoch": 0.18552103463653932,
"grad_norm": 0.453125,
"learning_rate": 8e-05,
"loss": 1.352428674697876,
"step": 780
},
{
"epoch": 0.18599672959714583,
"grad_norm": 0.46875,
"learning_rate": 8e-05,
"loss": 1.3136948347091675,
"step": 782
},
{
"epoch": 0.18647242455775234,
"grad_norm": 0.5,
"learning_rate": 8e-05,
"loss": 1.3649238348007202,
"step": 784
},
{
"epoch": 0.18694811951835885,
"grad_norm": 0.50390625,
"learning_rate": 8e-05,
"loss": 1.4003939628601074,
"step": 786
},
{
"epoch": 0.18742381447896536,
"grad_norm": 0.470703125,
"learning_rate": 8e-05,
"loss": 1.3522775173187256,
"step": 788
},
{
"epoch": 0.18789950943957187,
"grad_norm": 0.478515625,
"learning_rate": 8e-05,
"loss": 1.353920578956604,
"step": 790
},
{
"epoch": 0.18837520440017838,
"grad_norm": 0.458984375,
"learning_rate": 8e-05,
"loss": 1.4120471477508545,
"step": 792
},
{
"epoch": 0.1888508993607849,
"grad_norm": 0.451171875,
"learning_rate": 8e-05,
"loss": 1.425047755241394,
"step": 794
},
{
"epoch": 0.1893265943213914,
"grad_norm": 0.4375,
"learning_rate": 8e-05,
"loss": 1.3698722124099731,
"step": 796
},
{
"epoch": 0.1898022892819979,
"grad_norm": 0.451171875,
"learning_rate": 8e-05,
"loss": 1.3825695514678955,
"step": 798
},
{
"epoch": 0.19027798424260442,
"grad_norm": 0.466796875,
"learning_rate": 8e-05,
"loss": 1.384330153465271,
"step": 800
},
{
"epoch": 0.19075367920321093,
"grad_norm": 0.4765625,
"learning_rate": 8e-05,
"loss": 1.365710735321045,
"step": 802
},
{
"epoch": 0.19122937416381744,
"grad_norm": 0.466796875,
"learning_rate": 8e-05,
"loss": 1.351928472518921,
"step": 804
},
{
"epoch": 0.19170506912442398,
"grad_norm": 0.458984375,
"learning_rate": 8e-05,
"loss": 1.364558458328247,
"step": 806
},
{
"epoch": 0.1921807640850305,
"grad_norm": 0.466796875,
"learning_rate": 8e-05,
"loss": 1.4033458232879639,
"step": 808
},
{
"epoch": 0.192656459045637,
"grad_norm": 0.494140625,
"learning_rate": 8e-05,
"loss": 1.378347635269165,
"step": 810
},
{
"epoch": 0.1931321540062435,
"grad_norm": 0.48828125,
"learning_rate": 8e-05,
"loss": 1.364283561706543,
"step": 812
},
{
"epoch": 0.19360784896685002,
"grad_norm": 0.53125,
"learning_rate": 8e-05,
"loss": 1.414649248123169,
"step": 814
},
{
"epoch": 0.19408354392745653,
"grad_norm": 0.4453125,
"learning_rate": 8e-05,
"loss": 1.3634638786315918,
"step": 816
},
{
"epoch": 0.19455923888806304,
"grad_norm": 0.55859375,
"learning_rate": 8e-05,
"loss": 1.4743528366088867,
"step": 818
},
{
"epoch": 0.19503493384866954,
"grad_norm": 0.52734375,
"learning_rate": 8e-05,
"loss": 1.3400163650512695,
"step": 820
},
{
"epoch": 0.19551062880927605,
"grad_norm": 0.48828125,
"learning_rate": 8e-05,
"loss": 1.3880252838134766,
"step": 822
},
{
"epoch": 0.19598632376988256,
"grad_norm": 0.5,
"learning_rate": 8e-05,
"loss": 1.395135521888733,
"step": 824
},
{
"epoch": 0.19646201873048907,
"grad_norm": 0.4609375,
"learning_rate": 8e-05,
"loss": 1.3433012962341309,
"step": 826
},
{
"epoch": 0.19693771369109558,
"grad_norm": 0.462890625,
"learning_rate": 8e-05,
"loss": 1.317229986190796,
"step": 828
},
{
"epoch": 0.1974134086517021,
"grad_norm": 0.431640625,
"learning_rate": 8e-05,
"loss": 1.3179906606674194,
"step": 830
},
{
"epoch": 0.1978891036123086,
"grad_norm": 0.451171875,
"learning_rate": 8e-05,
"loss": 1.3207850456237793,
"step": 832
},
{
"epoch": 0.1983647985729151,
"grad_norm": 0.484375,
"learning_rate": 8e-05,
"loss": 1.4160897731781006,
"step": 834
},
{
"epoch": 0.19884049353352162,
"grad_norm": 0.51953125,
"learning_rate": 8e-05,
"loss": 1.34122633934021,
"step": 836
},
{
"epoch": 0.19931618849412813,
"grad_norm": 0.486328125,
"learning_rate": 8e-05,
"loss": 1.3531912565231323,
"step": 838
},
{
"epoch": 0.19979188345473464,
"grad_norm": 0.482421875,
"learning_rate": 8e-05,
"loss": 1.3703558444976807,
"step": 840
},
{
"epoch": 0.20026757841534115,
"grad_norm": 0.46875,
"learning_rate": 8e-05,
"loss": 1.3876454830169678,
"step": 842
},
{
"epoch": 0.20074327337594766,
"grad_norm": 0.453125,
"learning_rate": 8e-05,
"loss": 1.3795206546783447,
"step": 844
},
{
"epoch": 0.20121896833655417,
"grad_norm": 0.482421875,
"learning_rate": 8e-05,
"loss": 1.3170604705810547,
"step": 846
},
{
"epoch": 0.2016946632971607,
"grad_norm": 0.5078125,
"learning_rate": 8e-05,
"loss": 1.3548598289489746,
"step": 848
},
{
"epoch": 0.20217035825776722,
"grad_norm": 0.5546875,
"learning_rate": 8e-05,
"loss": 1.359254002571106,
"step": 850
},
{
"epoch": 0.20264605321837373,
"grad_norm": 0.46484375,
"learning_rate": 8e-05,
"loss": 1.389087438583374,
"step": 852
},
{
"epoch": 0.20312174817898024,
"grad_norm": 0.447265625,
"learning_rate": 8e-05,
"loss": 1.3296732902526855,
"step": 854
},
{
"epoch": 0.20359744313958675,
"grad_norm": 0.486328125,
"learning_rate": 8e-05,
"loss": 1.3714617490768433,
"step": 856
},
{
"epoch": 0.20407313810019326,
"grad_norm": 0.4765625,
"learning_rate": 8e-05,
"loss": 1.3371829986572266,
"step": 858
},
{
"epoch": 0.20454883306079977,
"grad_norm": 0.498046875,
"learning_rate": 8e-05,
"loss": 1.386389970779419,
"step": 860
},
{
"epoch": 0.20502452802140628,
"grad_norm": 0.46484375,
"learning_rate": 8e-05,
"loss": 1.338608741760254,
"step": 862
},
{
"epoch": 0.2055002229820128,
"grad_norm": 0.515625,
"learning_rate": 8e-05,
"loss": 1.328315019607544,
"step": 864
},
{
"epoch": 0.2059759179426193,
"grad_norm": 0.4921875,
"learning_rate": 8e-05,
"loss": 1.3264660835266113,
"step": 866
},
{
"epoch": 0.2064516129032258,
"grad_norm": 0.484375,
"learning_rate": 8e-05,
"loss": 1.4002896547317505,
"step": 868
},
{
"epoch": 0.20692730786383232,
"grad_norm": 0.5,
"learning_rate": 8e-05,
"loss": 1.3713188171386719,
"step": 870
},
{
"epoch": 0.20740300282443883,
"grad_norm": 0.5078125,
"learning_rate": 8e-05,
"loss": 1.350874900817871,
"step": 872
},
{
"epoch": 0.20787869778504534,
"grad_norm": 0.5078125,
"learning_rate": 8e-05,
"loss": 1.3784689903259277,
"step": 874
},
{
"epoch": 0.20835439274565185,
"grad_norm": 0.59765625,
"learning_rate": 8e-05,
"loss": 1.3428910970687866,
"step": 876
},
{
"epoch": 0.20883008770625835,
"grad_norm": 0.48828125,
"learning_rate": 8e-05,
"loss": 1.3714317083358765,
"step": 878
},
{
"epoch": 0.20930578266686486,
"grad_norm": 0.494140625,
"learning_rate": 8e-05,
"loss": 1.3759992122650146,
"step": 880
},
{
"epoch": 0.20978147762747137,
"grad_norm": 0.484375,
"learning_rate": 8e-05,
"loss": 1.2990326881408691,
"step": 882
},
{
"epoch": 0.21025717258807788,
"grad_norm": 0.462890625,
"learning_rate": 8e-05,
"loss": 1.3624963760375977,
"step": 884
},
{
"epoch": 0.2107328675486844,
"grad_norm": 0.44140625,
"learning_rate": 8e-05,
"loss": 1.3324933052062988,
"step": 886
},
{
"epoch": 0.2112085625092909,
"grad_norm": 0.44921875,
"learning_rate": 8e-05,
"loss": 1.4013808965682983,
"step": 888
},
{
"epoch": 0.21168425746989744,
"grad_norm": 0.4765625,
"learning_rate": 8e-05,
"loss": 1.338510274887085,
"step": 890
},
{
"epoch": 0.21215995243050395,
"grad_norm": 0.5078125,
"learning_rate": 8e-05,
"loss": 1.4126381874084473,
"step": 892
},
{
"epoch": 0.21263564739111046,
"grad_norm": 0.484375,
"learning_rate": 8e-05,
"loss": 1.371493935585022,
"step": 894
},
{
"epoch": 0.21311134235171697,
"grad_norm": 0.486328125,
"learning_rate": 8e-05,
"loss": 1.383441686630249,
"step": 896
},
{
"epoch": 0.21358703731232348,
"grad_norm": 0.51953125,
"learning_rate": 8e-05,
"loss": 1.3659964799880981,
"step": 898
},
{
"epoch": 0.21406273227293,
"grad_norm": 0.484375,
"learning_rate": 8e-05,
"loss": 1.2908456325531006,
"step": 900
},
{
"epoch": 0.2145384272335365,
"grad_norm": 0.498046875,
"learning_rate": 8e-05,
"loss": 1.3610074520111084,
"step": 902
},
{
"epoch": 0.215014122194143,
"grad_norm": 0.48828125,
"learning_rate": 8e-05,
"loss": 1.3580766916275024,
"step": 904
},
{
"epoch": 0.21548981715474952,
"grad_norm": 0.4921875,
"learning_rate": 8e-05,
"loss": 1.458742618560791,
"step": 906
},
{
"epoch": 0.21596551211535603,
"grad_norm": 0.4921875,
"learning_rate": 8e-05,
"loss": 1.2720565795898438,
"step": 908
},
{
"epoch": 0.21644120707596254,
"grad_norm": 0.44140625,
"learning_rate": 8e-05,
"loss": 1.3226542472839355,
"step": 910
},
{
"epoch": 0.21691690203656905,
"grad_norm": 0.46875,
"learning_rate": 8e-05,
"loss": 1.3100987672805786,
"step": 912
},
{
"epoch": 0.21739259699717556,
"grad_norm": 0.482421875,
"learning_rate": 8e-05,
"loss": 1.3754308223724365,
"step": 914
},
{
"epoch": 0.21786829195778207,
"grad_norm": 0.53515625,
"learning_rate": 8e-05,
"loss": 1.3694303035736084,
"step": 916
},
{
"epoch": 0.21834398691838858,
"grad_norm": 0.515625,
"learning_rate": 8e-05,
"loss": 1.394423007965088,
"step": 918
},
{
"epoch": 0.2188196818789951,
"grad_norm": 0.4609375,
"learning_rate": 8e-05,
"loss": 1.3577532768249512,
"step": 920
},
{
"epoch": 0.2192953768396016,
"grad_norm": 0.453125,
"learning_rate": 8e-05,
"loss": 1.2522318363189697,
"step": 922
},
{
"epoch": 0.2197710718002081,
"grad_norm": 0.466796875,
"learning_rate": 8e-05,
"loss": 1.3532583713531494,
"step": 924
},
{
"epoch": 0.22024676676081462,
"grad_norm": 0.5078125,
"learning_rate": 8e-05,
"loss": 1.3816845417022705,
"step": 926
},
{
"epoch": 0.22072246172142113,
"grad_norm": 0.494140625,
"learning_rate": 8e-05,
"loss": 1.362253189086914,
"step": 928
},
{
"epoch": 0.22119815668202766,
"grad_norm": 0.494140625,
"learning_rate": 8e-05,
"loss": 1.3231050968170166,
"step": 930
},
{
"epoch": 0.22167385164263417,
"grad_norm": 0.486328125,
"learning_rate": 8e-05,
"loss": 1.3791143894195557,
"step": 932
},
{
"epoch": 0.22214954660324068,
"grad_norm": 0.470703125,
"learning_rate": 8e-05,
"loss": 1.3647040128707886,
"step": 934
},
{
"epoch": 0.2226252415638472,
"grad_norm": 0.45703125,
"learning_rate": 8e-05,
"loss": 1.3556348085403442,
"step": 936
},
{
"epoch": 0.2231009365244537,
"grad_norm": 0.435546875,
"learning_rate": 8e-05,
"loss": 1.3153495788574219,
"step": 938
},
{
"epoch": 0.2235766314850602,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.2898294925689697,
"step": 940
},
{
"epoch": 0.22405232644566672,
"grad_norm": 0.419921875,
"learning_rate": 8e-05,
"loss": 1.3297260999679565,
"step": 942
},
{
"epoch": 0.22452802140627323,
"grad_norm": 0.4375,
"learning_rate": 8e-05,
"loss": 1.3505053520202637,
"step": 944
},
{
"epoch": 0.22500371636687974,
"grad_norm": 0.439453125,
"learning_rate": 8e-05,
"loss": 1.3983497619628906,
"step": 946
},
{
"epoch": 0.22547941132748625,
"grad_norm": 0.431640625,
"learning_rate": 8e-05,
"loss": 1.3593679666519165,
"step": 948
},
{
"epoch": 0.22595510628809276,
"grad_norm": 0.4296875,
"learning_rate": 8e-05,
"loss": 1.336477518081665,
"step": 950
},
{
"epoch": 0.22643080124869927,
"grad_norm": 0.453125,
"learning_rate": 8e-05,
"loss": 1.3708462715148926,
"step": 952
},
{
"epoch": 0.22690649620930578,
"grad_norm": 0.439453125,
"learning_rate": 8e-05,
"loss": 1.4065918922424316,
"step": 954
},
{
"epoch": 0.2273821911699123,
"grad_norm": 0.484375,
"learning_rate": 8e-05,
"loss": 1.361413836479187,
"step": 956
},
{
"epoch": 0.2278578861305188,
"grad_norm": 0.49609375,
"learning_rate": 8e-05,
"loss": 1.3337655067443848,
"step": 958
},
{
"epoch": 0.2283335810911253,
"grad_norm": 0.443359375,
"learning_rate": 8e-05,
"loss": 1.2802634239196777,
"step": 960
},
{
"epoch": 0.22880927605173182,
"grad_norm": 0.46484375,
"learning_rate": 8e-05,
"loss": 1.3333477973937988,
"step": 962
},
{
"epoch": 0.22928497101233833,
"grad_norm": 0.48046875,
"learning_rate": 8e-05,
"loss": 1.3998594284057617,
"step": 964
},
{
"epoch": 0.22976066597294484,
"grad_norm": 0.439453125,
"learning_rate": 8e-05,
"loss": 1.3377106189727783,
"step": 966
},
{
"epoch": 0.23023636093355135,
"grad_norm": 0.458984375,
"learning_rate": 8e-05,
"loss": 1.2901934385299683,
"step": 968
},
{
"epoch": 0.23071205589415786,
"grad_norm": 0.498046875,
"learning_rate": 8e-05,
"loss": 1.3435245752334595,
"step": 970
},
{
"epoch": 0.2311877508547644,
"grad_norm": 0.486328125,
"learning_rate": 8e-05,
"loss": 1.3464173078536987,
"step": 972
},
{
"epoch": 0.2316634458153709,
"grad_norm": 0.453125,
"learning_rate": 8e-05,
"loss": 1.3196808099746704,
"step": 974
},
{
"epoch": 0.23213914077597742,
"grad_norm": 0.462890625,
"learning_rate": 8e-05,
"loss": 1.3490209579467773,
"step": 976
},
{
"epoch": 0.23261483573658392,
"grad_norm": 0.474609375,
"learning_rate": 8e-05,
"loss": 1.3755543231964111,
"step": 978
},
{
"epoch": 0.23309053069719043,
"grad_norm": 0.455078125,
"learning_rate": 8e-05,
"loss": 1.3209686279296875,
"step": 980
},
{
"epoch": 0.23356622565779694,
"grad_norm": 0.474609375,
"learning_rate": 8e-05,
"loss": 1.3944644927978516,
"step": 982
},
{
"epoch": 0.23404192061840345,
"grad_norm": 0.455078125,
"learning_rate": 8e-05,
"loss": 1.3737695217132568,
"step": 984
},
{
"epoch": 0.23451761557900996,
"grad_norm": 0.447265625,
"learning_rate": 8e-05,
"loss": 1.352348804473877,
"step": 986
},
{
"epoch": 0.23499331053961647,
"grad_norm": 0.42578125,
"learning_rate": 8e-05,
"loss": 1.2491270303726196,
"step": 988
},
{
"epoch": 0.23546900550022298,
"grad_norm": 0.478515625,
"learning_rate": 8e-05,
"loss": 1.4017226696014404,
"step": 990
},
{
"epoch": 0.2359447004608295,
"grad_norm": 0.46875,
"learning_rate": 8e-05,
"loss": 1.3329687118530273,
"step": 992
},
{
"epoch": 0.236420395421436,
"grad_norm": 0.44921875,
"learning_rate": 8e-05,
"loss": 1.2956058979034424,
"step": 994
},
{
"epoch": 0.2368960903820425,
"grad_norm": 0.462890625,
"learning_rate": 8e-05,
"loss": 1.376798391342163,
"step": 996
},
{
"epoch": 0.23737178534264902,
"grad_norm": 0.486328125,
"learning_rate": 8e-05,
"loss": 1.3533029556274414,
"step": 998
},
{
"epoch": 0.23784748030325553,
"grad_norm": 0.4453125,
"learning_rate": 8e-05,
"loss": 1.3347084522247314,
"step": 1000
},
{
"epoch": 0.23832317526386204,
"grad_norm": 0.4296875,
"learning_rate": 8e-05,
"loss": 1.3693647384643555,
"step": 1002
},
{
"epoch": 0.23879887022446855,
"grad_norm": 0.4453125,
"learning_rate": 8e-05,
"loss": 1.3653826713562012,
"step": 1004
},
{
"epoch": 0.23927456518507506,
"grad_norm": 0.427734375,
"learning_rate": 8e-05,
"loss": 1.345597505569458,
"step": 1006
},
{
"epoch": 0.23975026014568157,
"grad_norm": 0.443359375,
"learning_rate": 8e-05,
"loss": 1.2825236320495605,
"step": 1008
},
{
"epoch": 0.24022595510628808,
"grad_norm": 0.44140625,
"learning_rate": 8e-05,
"loss": 1.3502631187438965,
"step": 1010
},
{
"epoch": 0.2407016500668946,
"grad_norm": 0.41015625,
"learning_rate": 8e-05,
"loss": 1.3079496622085571,
"step": 1012
},
{
"epoch": 0.24117734502750113,
"grad_norm": 0.423828125,
"learning_rate": 8e-05,
"loss": 1.3474180698394775,
"step": 1014
},
{
"epoch": 0.24165303998810764,
"grad_norm": 0.4765625,
"learning_rate": 8e-05,
"loss": 1.3570088148117065,
"step": 1016
},
{
"epoch": 0.24212873494871415,
"grad_norm": 0.466796875,
"learning_rate": 8e-05,
"loss": 1.3702566623687744,
"step": 1018
},
{
"epoch": 0.24260442990932066,
"grad_norm": 0.453125,
"learning_rate": 8e-05,
"loss": 1.3773030042648315,
"step": 1020
},
{
"epoch": 0.24308012486992717,
"grad_norm": 0.470703125,
"learning_rate": 8e-05,
"loss": 1.333245873451233,
"step": 1022
},
{
"epoch": 0.24355581983053368,
"grad_norm": 0.4609375,
"learning_rate": 8e-05,
"loss": 1.3305965662002563,
"step": 1024
},
{
"epoch": 0.24403151479114019,
"grad_norm": 0.435546875,
"learning_rate": 8e-05,
"loss": 1.3149254322052002,
"step": 1026
},
{
"epoch": 0.2445072097517467,
"grad_norm": 0.423828125,
"learning_rate": 8e-05,
"loss": 1.34800124168396,
"step": 1028
},
{
"epoch": 0.2449829047123532,
"grad_norm": 0.443359375,
"learning_rate": 8e-05,
"loss": 1.3632348775863647,
"step": 1030
},
{
"epoch": 0.24545859967295972,
"grad_norm": 0.482421875,
"learning_rate": 8e-05,
"loss": 1.3612074851989746,
"step": 1032
},
{
"epoch": 0.24593429463356622,
"grad_norm": 0.439453125,
"learning_rate": 8e-05,
"loss": 1.3488757610321045,
"step": 1034
},
{
"epoch": 0.24640998959417273,
"grad_norm": 0.451171875,
"learning_rate": 8e-05,
"loss": 1.3559046983718872,
"step": 1036
},
{
"epoch": 0.24688568455477924,
"grad_norm": 0.453125,
"learning_rate": 8e-05,
"loss": 1.3708908557891846,
"step": 1038
},
{
"epoch": 0.24736137951538575,
"grad_norm": 0.44140625,
"learning_rate": 8e-05,
"loss": 1.33760666847229,
"step": 1040
},
{
"epoch": 0.24783707447599226,
"grad_norm": 0.427734375,
"learning_rate": 8e-05,
"loss": 1.2863125801086426,
"step": 1042
},
{
"epoch": 0.24831276943659877,
"grad_norm": 0.435546875,
"learning_rate": 8e-05,
"loss": 1.3174580335617065,
"step": 1044
},
{
"epoch": 0.24878846439720528,
"grad_norm": 0.45703125,
"learning_rate": 8e-05,
"loss": 1.3641953468322754,
"step": 1046
},
{
"epoch": 0.2492641593578118,
"grad_norm": 0.443359375,
"learning_rate": 8e-05,
"loss": 1.3469069004058838,
"step": 1048
},
{
"epoch": 0.2497398543184183,
"grad_norm": 0.455078125,
"learning_rate": 8e-05,
"loss": 1.3538458347320557,
"step": 1050
},
{
"epoch": 0.25021554927902484,
"grad_norm": 0.48046875,
"learning_rate": 8e-05,
"loss": 1.354750633239746,
"step": 1052
},
{
"epoch": 0.25069124423963135,
"grad_norm": 0.4609375,
"learning_rate": 8e-05,
"loss": 1.3567293882369995,
"step": 1054
},
{
"epoch": 0.25116693920023786,
"grad_norm": 0.4921875,
"learning_rate": 8e-05,
"loss": 1.3444650173187256,
"step": 1056
},
{
"epoch": 0.25164263416084437,
"grad_norm": 0.4296875,
"learning_rate": 8e-05,
"loss": 1.3146984577178955,
"step": 1058
},
{
"epoch": 0.2521183291214509,
"grad_norm": 0.486328125,
"learning_rate": 8e-05,
"loss": 1.3101708889007568,
"step": 1060
},
{
"epoch": 0.2525940240820574,
"grad_norm": 0.48046875,
"learning_rate": 8e-05,
"loss": 1.3136630058288574,
"step": 1062
},
{
"epoch": 0.2530697190426639,
"grad_norm": 0.462890625,
"learning_rate": 8e-05,
"loss": 1.35286545753479,
"step": 1064
},
{
"epoch": 0.2535454140032704,
"grad_norm": 0.462890625,
"learning_rate": 8e-05,
"loss": 1.319314956665039,
"step": 1066
},
{
"epoch": 0.2540211089638769,
"grad_norm": 0.455078125,
"learning_rate": 8e-05,
"loss": 1.3844151496887207,
"step": 1068
},
{
"epoch": 0.25449680392448343,
"grad_norm": 0.421875,
"learning_rate": 8e-05,
"loss": 1.2962524890899658,
"step": 1070
},
{
"epoch": 0.25497249888508994,
"grad_norm": 0.44921875,
"learning_rate": 8e-05,
"loss": 1.3922219276428223,
"step": 1072
},
{
"epoch": 0.25544819384569645,
"grad_norm": 0.435546875,
"learning_rate": 8e-05,
"loss": 1.3131260871887207,
"step": 1074
},
{
"epoch": 0.25592388880630296,
"grad_norm": 0.423828125,
"learning_rate": 8e-05,
"loss": 1.322244644165039,
"step": 1076
},
{
"epoch": 0.25639958376690947,
"grad_norm": 0.462890625,
"learning_rate": 8e-05,
"loss": 1.3516499996185303,
"step": 1078
},
{
"epoch": 0.256875278727516,
"grad_norm": 0.470703125,
"learning_rate": 8e-05,
"loss": 1.3630871772766113,
"step": 1080
},
{
"epoch": 0.2573509736881225,
"grad_norm": 0.431640625,
"learning_rate": 8e-05,
"loss": 1.3094751834869385,
"step": 1082
},
{
"epoch": 0.257826668648729,
"grad_norm": 0.484375,
"learning_rate": 8e-05,
"loss": 1.306774616241455,
"step": 1084
},
{
"epoch": 0.2583023636093355,
"grad_norm": 0.443359375,
"learning_rate": 8e-05,
"loss": 1.3254430294036865,
"step": 1086
},
{
"epoch": 0.258778058569942,
"grad_norm": 0.45703125,
"learning_rate": 8e-05,
"loss": 1.306220293045044,
"step": 1088
},
{
"epoch": 0.2592537535305485,
"grad_norm": 0.431640625,
"learning_rate": 8e-05,
"loss": 1.337794303894043,
"step": 1090
},
{
"epoch": 0.25972944849115503,
"grad_norm": 0.42578125,
"learning_rate": 8e-05,
"loss": 1.324033260345459,
"step": 1092
},
{
"epoch": 0.26020514345176154,
"grad_norm": 0.46484375,
"learning_rate": 8e-05,
"loss": 1.3060619831085205,
"step": 1094
},
{
"epoch": 0.26068083841236805,
"grad_norm": 0.46875,
"learning_rate": 8e-05,
"loss": 1.411613941192627,
"step": 1096
},
{
"epoch": 0.26115653337297456,
"grad_norm": 0.451171875,
"learning_rate": 8e-05,
"loss": 1.3333206176757812,
"step": 1098
},
{
"epoch": 0.2616322283335811,
"grad_norm": 0.494140625,
"learning_rate": 8e-05,
"loss": 1.3340492248535156,
"step": 1100
},
{
"epoch": 0.2621079232941876,
"grad_norm": 0.5234375,
"learning_rate": 8e-05,
"loss": 1.3538923263549805,
"step": 1102
},
{
"epoch": 0.2625836182547941,
"grad_norm": 0.443359375,
"learning_rate": 8e-05,
"loss": 1.2844221591949463,
"step": 1104
},
{
"epoch": 0.2630593132154006,
"grad_norm": 0.41015625,
"learning_rate": 8e-05,
"loss": 1.2786577939987183,
"step": 1106
},
{
"epoch": 0.2635350081760071,
"grad_norm": 0.408203125,
"learning_rate": 8e-05,
"loss": 1.3393871784210205,
"step": 1108
},
{
"epoch": 0.2640107031366136,
"grad_norm": 0.451171875,
"learning_rate": 8e-05,
"loss": 1.3317300081253052,
"step": 1110
},
{
"epoch": 0.26448639809722013,
"grad_norm": 0.470703125,
"learning_rate": 8e-05,
"loss": 1.329606056213379,
"step": 1112
},
{
"epoch": 0.26496209305782664,
"grad_norm": 0.455078125,
"learning_rate": 8e-05,
"loss": 1.3412857055664062,
"step": 1114
},
{
"epoch": 0.2654377880184332,
"grad_norm": 0.44140625,
"learning_rate": 8e-05,
"loss": 1.3141382932662964,
"step": 1116
},
{
"epoch": 0.2659134829790397,
"grad_norm": 0.44921875,
"learning_rate": 8e-05,
"loss": 1.3066372871398926,
"step": 1118
},
{
"epoch": 0.2663891779396462,
"grad_norm": 0.416015625,
"learning_rate": 8e-05,
"loss": 1.336663007736206,
"step": 1120
},
{
"epoch": 0.26686487290025274,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.3393672704696655,
"step": 1122
},
{
"epoch": 0.26734056786085925,
"grad_norm": 0.4296875,
"learning_rate": 8e-05,
"loss": 1.338028907775879,
"step": 1124
},
{
"epoch": 0.26781626282146576,
"grad_norm": 0.447265625,
"learning_rate": 8e-05,
"loss": 1.25938880443573,
"step": 1126
},
{
"epoch": 0.26829195778207227,
"grad_norm": 0.5,
"learning_rate": 8e-05,
"loss": 1.3836978673934937,
"step": 1128
},
{
"epoch": 0.2687676527426788,
"grad_norm": 0.45703125,
"learning_rate": 8e-05,
"loss": 1.3326656818389893,
"step": 1130
},
{
"epoch": 0.2692433477032853,
"grad_norm": 0.470703125,
"learning_rate": 8e-05,
"loss": 1.2927348613739014,
"step": 1132
},
{
"epoch": 0.2697190426638918,
"grad_norm": 0.52734375,
"learning_rate": 8e-05,
"loss": 1.4277849197387695,
"step": 1134
},
{
"epoch": 0.2701947376244983,
"grad_norm": 0.498046875,
"learning_rate": 8e-05,
"loss": 1.3989144563674927,
"step": 1136
},
{
"epoch": 0.2706704325851048,
"grad_norm": 0.45703125,
"learning_rate": 8e-05,
"loss": 1.3444643020629883,
"step": 1138
},
{
"epoch": 0.2711461275457113,
"grad_norm": 0.42578125,
"learning_rate": 8e-05,
"loss": 1.2833266258239746,
"step": 1140
},
{
"epoch": 0.27162182250631783,
"grad_norm": 0.455078125,
"learning_rate": 8e-05,
"loss": 1.358147382736206,
"step": 1142
},
{
"epoch": 0.27209751746692434,
"grad_norm": 0.443359375,
"learning_rate": 8e-05,
"loss": 1.3105173110961914,
"step": 1144
},
{
"epoch": 0.27257321242753085,
"grad_norm": 0.478515625,
"learning_rate": 8e-05,
"loss": 1.3114371299743652,
"step": 1146
},
{
"epoch": 0.27304890738813736,
"grad_norm": 0.443359375,
"learning_rate": 8e-05,
"loss": 1.2814993858337402,
"step": 1148
},
{
"epoch": 0.2735246023487439,
"grad_norm": 0.455078125,
"learning_rate": 8e-05,
"loss": 1.3203294277191162,
"step": 1150
},
{
"epoch": 0.2740002973093504,
"grad_norm": 0.431640625,
"learning_rate": 8e-05,
"loss": 1.2979755401611328,
"step": 1152
},
{
"epoch": 0.2744759922699569,
"grad_norm": 0.42578125,
"learning_rate": 8e-05,
"loss": 1.3081634044647217,
"step": 1154
},
{
"epoch": 0.2749516872305634,
"grad_norm": 0.423828125,
"learning_rate": 8e-05,
"loss": 1.3281807899475098,
"step": 1156
},
{
"epoch": 0.2754273821911699,
"grad_norm": 0.42578125,
"learning_rate": 8e-05,
"loss": 1.3202593326568604,
"step": 1158
},
{
"epoch": 0.2759030771517764,
"grad_norm": 0.43359375,
"learning_rate": 8e-05,
"loss": 1.3806310892105103,
"step": 1160
},
{
"epoch": 0.27637877211238293,
"grad_norm": 0.43359375,
"learning_rate": 8e-05,
"loss": 1.3173789978027344,
"step": 1162
},
{
"epoch": 0.27685446707298944,
"grad_norm": 0.41796875,
"learning_rate": 8e-05,
"loss": 1.3223962783813477,
"step": 1164
},
{
"epoch": 0.27733016203359595,
"grad_norm": 0.421875,
"learning_rate": 8e-05,
"loss": 1.3117542266845703,
"step": 1166
},
{
"epoch": 0.27780585699420246,
"grad_norm": 0.416015625,
"learning_rate": 8e-05,
"loss": 1.3533828258514404,
"step": 1168
},
{
"epoch": 0.27828155195480897,
"grad_norm": 0.427734375,
"learning_rate": 8e-05,
"loss": 1.3214187622070312,
"step": 1170
},
{
"epoch": 0.2787572469154155,
"grad_norm": 0.458984375,
"learning_rate": 8e-05,
"loss": 1.3294625282287598,
"step": 1172
},
{
"epoch": 0.279232941876022,
"grad_norm": 0.42578125,
"learning_rate": 8e-05,
"loss": 1.3171840906143188,
"step": 1174
},
{
"epoch": 0.2797086368366285,
"grad_norm": 0.439453125,
"learning_rate": 8e-05,
"loss": 1.3028992414474487,
"step": 1176
},
{
"epoch": 0.280184331797235,
"grad_norm": 0.47265625,
"learning_rate": 8e-05,
"loss": 1.3446723222732544,
"step": 1178
},
{
"epoch": 0.2806600267578415,
"grad_norm": 0.462890625,
"learning_rate": 8e-05,
"loss": 1.296749472618103,
"step": 1180
},
{
"epoch": 0.28113572171844803,
"grad_norm": 0.5078125,
"learning_rate": 8e-05,
"loss": 1.3563461303710938,
"step": 1182
},
{
"epoch": 0.28161141667905454,
"grad_norm": 0.5,
"learning_rate": 8e-05,
"loss": 1.3181467056274414,
"step": 1184
},
{
"epoch": 0.28208711163966105,
"grad_norm": 0.48046875,
"learning_rate": 8e-05,
"loss": 1.3786540031433105,
"step": 1186
},
{
"epoch": 0.28256280660026756,
"grad_norm": 0.439453125,
"learning_rate": 8e-05,
"loss": 1.3328609466552734,
"step": 1188
},
{
"epoch": 0.28303850156087407,
"grad_norm": 0.439453125,
"learning_rate": 8e-05,
"loss": 1.3462462425231934,
"step": 1190
},
{
"epoch": 0.2835141965214806,
"grad_norm": 0.455078125,
"learning_rate": 8e-05,
"loss": 1.3369724750518799,
"step": 1192
},
{
"epoch": 0.2839898914820871,
"grad_norm": 0.43359375,
"learning_rate": 8e-05,
"loss": 1.3191611766815186,
"step": 1194
},
{
"epoch": 0.2844655864426936,
"grad_norm": 0.443359375,
"learning_rate": 8e-05,
"loss": 1.3197510242462158,
"step": 1196
},
{
"epoch": 0.2849412814033001,
"grad_norm": 0.45703125,
"learning_rate": 8e-05,
"loss": 1.3486484289169312,
"step": 1198
},
{
"epoch": 0.28541697636390667,
"grad_norm": 0.44140625,
"learning_rate": 8e-05,
"loss": 1.3362209796905518,
"step": 1200
},
{
"epoch": 0.2858926713245132,
"grad_norm": 0.427734375,
"learning_rate": 8e-05,
"loss": 1.322070837020874,
"step": 1202
},
{
"epoch": 0.2863683662851197,
"grad_norm": 0.431640625,
"learning_rate": 8e-05,
"loss": 1.3231661319732666,
"step": 1204
},
{
"epoch": 0.2868440612457262,
"grad_norm": 0.421875,
"learning_rate": 8e-05,
"loss": 1.2671769857406616,
"step": 1206
},
{
"epoch": 0.2873197562063327,
"grad_norm": 0.451171875,
"learning_rate": 8e-05,
"loss": 1.3193705081939697,
"step": 1208
},
{
"epoch": 0.2877954511669392,
"grad_norm": 0.42578125,
"learning_rate": 8e-05,
"loss": 1.3454172611236572,
"step": 1210
},
{
"epoch": 0.28827114612754573,
"grad_norm": 0.4375,
"learning_rate": 8e-05,
"loss": 1.351203441619873,
"step": 1212
},
{
"epoch": 0.28874684108815224,
"grad_norm": 0.43359375,
"learning_rate": 8e-05,
"loss": 1.3364512920379639,
"step": 1214
},
{
"epoch": 0.28922253604875875,
"grad_norm": 0.46875,
"learning_rate": 8e-05,
"loss": 1.3189178705215454,
"step": 1216
},
{
"epoch": 0.28969823100936526,
"grad_norm": 0.48828125,
"learning_rate": 8e-05,
"loss": 1.2867472171783447,
"step": 1218
},
{
"epoch": 0.29017392596997177,
"grad_norm": 0.46484375,
"learning_rate": 8e-05,
"loss": 1.3088388442993164,
"step": 1220
},
{
"epoch": 0.2906496209305783,
"grad_norm": 0.431640625,
"learning_rate": 8e-05,
"loss": 1.2755553722381592,
"step": 1222
},
{
"epoch": 0.2911253158911848,
"grad_norm": 0.44140625,
"learning_rate": 8e-05,
"loss": 1.3116247653961182,
"step": 1224
},
{
"epoch": 0.2916010108517913,
"grad_norm": 0.4453125,
"learning_rate": 8e-05,
"loss": 1.2760411500930786,
"step": 1226
},
{
"epoch": 0.2920767058123978,
"grad_norm": 0.435546875,
"learning_rate": 8e-05,
"loss": 1.3090481758117676,
"step": 1228
},
{
"epoch": 0.2925524007730043,
"grad_norm": 0.439453125,
"learning_rate": 8e-05,
"loss": 1.3445281982421875,
"step": 1230
},
{
"epoch": 0.2930280957336108,
"grad_norm": 0.427734375,
"learning_rate": 8e-05,
"loss": 1.3193836212158203,
"step": 1232
},
{
"epoch": 0.29350379069421734,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.20814847946167,
"step": 1234
},
{
"epoch": 0.29397948565482385,
"grad_norm": 0.419921875,
"learning_rate": 8e-05,
"loss": 1.266977310180664,
"step": 1236
},
{
"epoch": 0.29445518061543036,
"grad_norm": 0.486328125,
"learning_rate": 8e-05,
"loss": 1.3388676643371582,
"step": 1238
},
{
"epoch": 0.29493087557603687,
"grad_norm": 0.46484375,
"learning_rate": 8e-05,
"loss": 1.3216158151626587,
"step": 1240
},
{
"epoch": 0.2954065705366434,
"grad_norm": 0.4609375,
"learning_rate": 8e-05,
"loss": 1.3377256393432617,
"step": 1242
},
{
"epoch": 0.2958822654972499,
"grad_norm": 0.45703125,
"learning_rate": 8e-05,
"loss": 1.3120627403259277,
"step": 1244
},
{
"epoch": 0.2963579604578564,
"grad_norm": 0.482421875,
"learning_rate": 8e-05,
"loss": 1.38155198097229,
"step": 1246
},
{
"epoch": 0.2968336554184629,
"grad_norm": 0.44921875,
"learning_rate": 8e-05,
"loss": 1.3631592988967896,
"step": 1248
},
{
"epoch": 0.2973093503790694,
"grad_norm": 0.466796875,
"learning_rate": 8e-05,
"loss": 1.342321515083313,
"step": 1250
},
{
"epoch": 0.2977850453396759,
"grad_norm": 0.478515625,
"learning_rate": 8e-05,
"loss": 1.391056776046753,
"step": 1252
},
{
"epoch": 0.29826074030028243,
"grad_norm": 0.447265625,
"learning_rate": 8e-05,
"loss": 1.275477409362793,
"step": 1254
},
{
"epoch": 0.29873643526088894,
"grad_norm": 0.462890625,
"learning_rate": 8e-05,
"loss": 1.3247549533843994,
"step": 1256
},
{
"epoch": 0.29921213022149545,
"grad_norm": 0.439453125,
"learning_rate": 8e-05,
"loss": 1.3454852104187012,
"step": 1258
},
{
"epoch": 0.29968782518210196,
"grad_norm": 0.46875,
"learning_rate": 8e-05,
"loss": 1.3124552965164185,
"step": 1260
},
{
"epoch": 0.3001635201427085,
"grad_norm": 0.43359375,
"learning_rate": 8e-05,
"loss": 1.309496521949768,
"step": 1262
},
{
"epoch": 0.300639215103315,
"grad_norm": 0.412109375,
"learning_rate": 8e-05,
"loss": 1.3047943115234375,
"step": 1264
},
{
"epoch": 0.3011149100639215,
"grad_norm": 0.427734375,
"learning_rate": 8e-05,
"loss": 1.348783016204834,
"step": 1266
},
{
"epoch": 0.301590605024528,
"grad_norm": 0.4453125,
"learning_rate": 8e-05,
"loss": 1.2833664417266846,
"step": 1268
},
{
"epoch": 0.3020662999851345,
"grad_norm": 0.44921875,
"learning_rate": 8e-05,
"loss": 1.3276420831680298,
"step": 1270
},
{
"epoch": 0.302541994945741,
"grad_norm": 0.427734375,
"learning_rate": 8e-05,
"loss": 1.3440744876861572,
"step": 1272
},
{
"epoch": 0.30301768990634753,
"grad_norm": 0.421875,
"learning_rate": 8e-05,
"loss": 1.2924749851226807,
"step": 1274
},
{
"epoch": 0.30349338486695404,
"grad_norm": 0.4140625,
"learning_rate": 8e-05,
"loss": 1.3171639442443848,
"step": 1276
},
{
"epoch": 0.30396907982756055,
"grad_norm": 0.435546875,
"learning_rate": 8e-05,
"loss": 1.3555333614349365,
"step": 1278
},
{
"epoch": 0.30444477478816706,
"grad_norm": 0.421875,
"learning_rate": 8e-05,
"loss": 1.2927653789520264,
"step": 1280
},
{
"epoch": 0.3049204697487736,
"grad_norm": 0.4296875,
"learning_rate": 8e-05,
"loss": 1.34412682056427,
"step": 1282
},
{
"epoch": 0.30539616470938014,
"grad_norm": 0.42578125,
"learning_rate": 8e-05,
"loss": 1.3178520202636719,
"step": 1284
},
{
"epoch": 0.30587185966998665,
"grad_norm": 0.4296875,
"learning_rate": 8e-05,
"loss": 1.278599739074707,
"step": 1286
},
{
"epoch": 0.30634755463059316,
"grad_norm": 0.404296875,
"learning_rate": 8e-05,
"loss": 1.254746437072754,
"step": 1288
},
{
"epoch": 0.30682324959119966,
"grad_norm": 0.4296875,
"learning_rate": 8e-05,
"loss": 1.3239991664886475,
"step": 1290
},
{
"epoch": 0.3072989445518062,
"grad_norm": 0.4140625,
"learning_rate": 8e-05,
"loss": 1.2205564975738525,
"step": 1292
},
{
"epoch": 0.3077746395124127,
"grad_norm": 0.412109375,
"learning_rate": 8e-05,
"loss": 1.301189661026001,
"step": 1294
},
{
"epoch": 0.3082503344730192,
"grad_norm": 0.404296875,
"learning_rate": 8e-05,
"loss": 1.3148789405822754,
"step": 1296
},
{
"epoch": 0.3087260294336257,
"grad_norm": 0.412109375,
"learning_rate": 8e-05,
"loss": 1.3096203804016113,
"step": 1298
},
{
"epoch": 0.3092017243942322,
"grad_norm": 0.41015625,
"learning_rate": 8e-05,
"loss": 1.3105592727661133,
"step": 1300
},
{
"epoch": 0.3096774193548387,
"grad_norm": 0.41796875,
"learning_rate": 8e-05,
"loss": 1.312976360321045,
"step": 1302
},
{
"epoch": 0.31015311431544523,
"grad_norm": 0.412109375,
"learning_rate": 8e-05,
"loss": 1.2987452745437622,
"step": 1304
},
{
"epoch": 0.31062880927605174,
"grad_norm": 0.416015625,
"learning_rate": 8e-05,
"loss": 1.3321504592895508,
"step": 1306
},
{
"epoch": 0.31110450423665825,
"grad_norm": 0.41796875,
"learning_rate": 8e-05,
"loss": 1.2754004001617432,
"step": 1308
},
{
"epoch": 0.31158019919726476,
"grad_norm": 0.4375,
"learning_rate": 8e-05,
"loss": 1.3137989044189453,
"step": 1310
},
{
"epoch": 0.31205589415787127,
"grad_norm": 0.41796875,
"learning_rate": 8e-05,
"loss": 1.311619520187378,
"step": 1312
},
{
"epoch": 0.3125315891184778,
"grad_norm": 0.44140625,
"learning_rate": 8e-05,
"loss": 1.3559669256210327,
"step": 1314
},
{
"epoch": 0.3130072840790843,
"grad_norm": 0.43359375,
"learning_rate": 8e-05,
"loss": 1.3143202066421509,
"step": 1316
},
{
"epoch": 0.3134829790396908,
"grad_norm": 0.431640625,
"learning_rate": 8e-05,
"loss": 1.27866530418396,
"step": 1318
},
{
"epoch": 0.3139586740002973,
"grad_norm": 0.43359375,
"learning_rate": 8e-05,
"loss": 1.3592901229858398,
"step": 1320
},
{
"epoch": 0.3144343689609038,
"grad_norm": 0.41796875,
"learning_rate": 8e-05,
"loss": 1.2884161472320557,
"step": 1322
},
{
"epoch": 0.31491006392151033,
"grad_norm": 0.423828125,
"learning_rate": 8e-05,
"loss": 1.3565433025360107,
"step": 1324
},
{
"epoch": 0.31538575888211684,
"grad_norm": 0.4140625,
"learning_rate": 8e-05,
"loss": 1.237494945526123,
"step": 1326
},
{
"epoch": 0.31586145384272335,
"grad_norm": 0.431640625,
"learning_rate": 8e-05,
"loss": 1.2647333145141602,
"step": 1328
},
{
"epoch": 0.31633714880332986,
"grad_norm": 0.451171875,
"learning_rate": 8e-05,
"loss": 1.303452968597412,
"step": 1330
},
{
"epoch": 0.31681284376393637,
"grad_norm": 0.4140625,
"learning_rate": 8e-05,
"loss": 1.297559142112732,
"step": 1332
},
{
"epoch": 0.3172885387245429,
"grad_norm": 0.42578125,
"learning_rate": 8e-05,
"loss": 1.2743000984191895,
"step": 1334
},
{
"epoch": 0.3177642336851494,
"grad_norm": 0.4453125,
"learning_rate": 8e-05,
"loss": 1.3135099411010742,
"step": 1336
},
{
"epoch": 0.3182399286457559,
"grad_norm": 0.400390625,
"learning_rate": 8e-05,
"loss": 1.3139266967773438,
"step": 1338
},
{
"epoch": 0.3187156236063624,
"grad_norm": 0.419921875,
"learning_rate": 8e-05,
"loss": 1.2548094987869263,
"step": 1340
},
{
"epoch": 0.3191913185669689,
"grad_norm": 0.43359375,
"learning_rate": 8e-05,
"loss": 1.324897289276123,
"step": 1342
},
{
"epoch": 0.3196670135275754,
"grad_norm": 0.427734375,
"learning_rate": 8e-05,
"loss": 1.256618618965149,
"step": 1344
},
{
"epoch": 0.32014270848818194,
"grad_norm": 0.455078125,
"learning_rate": 8e-05,
"loss": 1.3173911571502686,
"step": 1346
},
{
"epoch": 0.32061840344878845,
"grad_norm": 0.462890625,
"learning_rate": 8e-05,
"loss": 1.3058103322982788,
"step": 1348
},
{
"epoch": 0.32109409840939496,
"grad_norm": 0.435546875,
"learning_rate": 8e-05,
"loss": 1.2872593402862549,
"step": 1350
},
{
"epoch": 0.32156979337000147,
"grad_norm": 0.451171875,
"learning_rate": 8e-05,
"loss": 1.3403403759002686,
"step": 1352
},
{
"epoch": 0.322045488330608,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.320298433303833,
"step": 1354
},
{
"epoch": 0.3225211832912145,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.2507272958755493,
"step": 1356
},
{
"epoch": 0.322996878251821,
"grad_norm": 0.400390625,
"learning_rate": 8e-05,
"loss": 1.2136597633361816,
"step": 1358
},
{
"epoch": 0.3234725732124275,
"grad_norm": 0.447265625,
"learning_rate": 8e-05,
"loss": 1.302070140838623,
"step": 1360
},
{
"epoch": 0.323948268173034,
"grad_norm": 0.4140625,
"learning_rate": 8e-05,
"loss": 1.3372619152069092,
"step": 1362
},
{
"epoch": 0.3244239631336406,
"grad_norm": 0.44140625,
"learning_rate": 8e-05,
"loss": 1.2919752597808838,
"step": 1364
},
{
"epoch": 0.3248996580942471,
"grad_norm": 0.41796875,
"learning_rate": 8e-05,
"loss": 1.3035356998443604,
"step": 1366
},
{
"epoch": 0.3253753530548536,
"grad_norm": 0.427734375,
"learning_rate": 8e-05,
"loss": 1.3196332454681396,
"step": 1368
},
{
"epoch": 0.3258510480154601,
"grad_norm": 0.44921875,
"learning_rate": 8e-05,
"loss": 1.2436224222183228,
"step": 1370
},
{
"epoch": 0.3263267429760666,
"grad_norm": 0.423828125,
"learning_rate": 8e-05,
"loss": 1.3428776264190674,
"step": 1372
},
{
"epoch": 0.32680243793667313,
"grad_norm": 0.421875,
"learning_rate": 8e-05,
"loss": 1.2375438213348389,
"step": 1374
},
{
"epoch": 0.32727813289727964,
"grad_norm": 0.4609375,
"learning_rate": 8e-05,
"loss": 1.2973229885101318,
"step": 1376
},
{
"epoch": 0.32775382785788615,
"grad_norm": 0.5078125,
"learning_rate": 8e-05,
"loss": 1.327859878540039,
"step": 1378
},
{
"epoch": 0.32822952281849266,
"grad_norm": 0.47265625,
"learning_rate": 8e-05,
"loss": 1.286755919456482,
"step": 1380
},
{
"epoch": 0.32870521777909917,
"grad_norm": 0.462890625,
"learning_rate": 8e-05,
"loss": 1.2941248416900635,
"step": 1382
},
{
"epoch": 0.3291809127397057,
"grad_norm": 0.4609375,
"learning_rate": 8e-05,
"loss": 1.304626226425171,
"step": 1384
},
{
"epoch": 0.3296566077003122,
"grad_norm": 0.419921875,
"learning_rate": 8e-05,
"loss": 1.2754319906234741,
"step": 1386
},
{
"epoch": 0.3301323026609187,
"grad_norm": 0.431640625,
"learning_rate": 8e-05,
"loss": 1.2561947107315063,
"step": 1388
},
{
"epoch": 0.3306079976215252,
"grad_norm": 0.431640625,
"learning_rate": 8e-05,
"loss": 1.2895267009735107,
"step": 1390
},
{
"epoch": 0.3310836925821317,
"grad_norm": 0.474609375,
"learning_rate": 8e-05,
"loss": 1.386023759841919,
"step": 1392
},
{
"epoch": 0.3315593875427382,
"grad_norm": 0.466796875,
"learning_rate": 8e-05,
"loss": 1.2892621755599976,
"step": 1394
},
{
"epoch": 0.33203508250334474,
"grad_norm": 0.466796875,
"learning_rate": 8e-05,
"loss": 1.2891567945480347,
"step": 1396
},
{
"epoch": 0.33251077746395125,
"grad_norm": 0.5078125,
"learning_rate": 8e-05,
"loss": 1.322417140007019,
"step": 1398
},
{
"epoch": 0.33298647242455776,
"grad_norm": 0.439453125,
"learning_rate": 8e-05,
"loss": 1.3004422187805176,
"step": 1400
},
{
"epoch": 0.33346216738516427,
"grad_norm": 0.43359375,
"learning_rate": 8e-05,
"loss": 1.2988290786743164,
"step": 1402
},
{
"epoch": 0.3339378623457708,
"grad_norm": 0.43359375,
"learning_rate": 8e-05,
"loss": 1.3143279552459717,
"step": 1404
},
{
"epoch": 0.3344135573063773,
"grad_norm": 0.43359375,
"learning_rate": 8e-05,
"loss": 1.281175136566162,
"step": 1406
},
{
"epoch": 0.3348892522669838,
"grad_norm": 0.44921875,
"learning_rate": 8e-05,
"loss": 1.289182424545288,
"step": 1408
},
{
"epoch": 0.3353649472275903,
"grad_norm": 0.435546875,
"learning_rate": 8e-05,
"loss": 1.2937148809432983,
"step": 1410
},
{
"epoch": 0.3358406421881968,
"grad_norm": 0.427734375,
"learning_rate": 8e-05,
"loss": 1.2338749170303345,
"step": 1412
},
{
"epoch": 0.3363163371488033,
"grad_norm": 0.42578125,
"learning_rate": 8e-05,
"loss": 1.2975019216537476,
"step": 1414
},
{
"epoch": 0.33679203210940983,
"grad_norm": 0.427734375,
"learning_rate": 8e-05,
"loss": 1.2488545179367065,
"step": 1416
},
{
"epoch": 0.33726772707001634,
"grad_norm": 0.4140625,
"learning_rate": 8e-05,
"loss": 1.289847493171692,
"step": 1418
},
{
"epoch": 0.33774342203062285,
"grad_norm": 0.423828125,
"learning_rate": 8e-05,
"loss": 1.2936429977416992,
"step": 1420
},
{
"epoch": 0.33821911699122936,
"grad_norm": 0.419921875,
"learning_rate": 8e-05,
"loss": 1.2761449813842773,
"step": 1422
},
{
"epoch": 0.3386948119518359,
"grad_norm": 0.412109375,
"learning_rate": 8e-05,
"loss": 1.2870736122131348,
"step": 1424
},
{
"epoch": 0.3391705069124424,
"grad_norm": 0.408203125,
"learning_rate": 8e-05,
"loss": 1.33278489112854,
"step": 1426
},
{
"epoch": 0.3396462018730489,
"grad_norm": 0.40625,
"learning_rate": 8e-05,
"loss": 1.2804453372955322,
"step": 1428
},
{
"epoch": 0.3401218968336554,
"grad_norm": 0.443359375,
"learning_rate": 8e-05,
"loss": 1.303015112876892,
"step": 1430
},
{
"epoch": 0.3405975917942619,
"grad_norm": 0.419921875,
"learning_rate": 8e-05,
"loss": 1.333322286605835,
"step": 1432
},
{
"epoch": 0.3410732867548684,
"grad_norm": 0.447265625,
"learning_rate": 8e-05,
"loss": 1.320211410522461,
"step": 1434
},
{
"epoch": 0.34154898171547493,
"grad_norm": 0.423828125,
"learning_rate": 8e-05,
"loss": 1.3331108093261719,
"step": 1436
},
{
"epoch": 0.34202467667608144,
"grad_norm": 0.435546875,
"learning_rate": 8e-05,
"loss": 1.26707923412323,
"step": 1438
},
{
"epoch": 0.34250037163668795,
"grad_norm": 0.439453125,
"learning_rate": 8e-05,
"loss": 1.3269259929656982,
"step": 1440
},
{
"epoch": 0.34297606659729446,
"grad_norm": 0.44921875,
"learning_rate": 8e-05,
"loss": 1.3051103353500366,
"step": 1442
},
{
"epoch": 0.34345176155790097,
"grad_norm": 0.416015625,
"learning_rate": 8e-05,
"loss": 1.307328701019287,
"step": 1444
},
{
"epoch": 0.3439274565185075,
"grad_norm": 0.6171875,
"learning_rate": 8e-05,
"loss": 1.3046774864196777,
"step": 1446
},
{
"epoch": 0.34440315147911404,
"grad_norm": 0.412109375,
"learning_rate": 8e-05,
"loss": 1.3187592029571533,
"step": 1448
},
{
"epoch": 0.34487884643972055,
"grad_norm": 0.421875,
"learning_rate": 8e-05,
"loss": 1.289937973022461,
"step": 1450
},
{
"epoch": 0.34535454140032706,
"grad_norm": 0.42578125,
"learning_rate": 8e-05,
"loss": 1.300656795501709,
"step": 1452
},
{
"epoch": 0.3458302363609336,
"grad_norm": 0.423828125,
"learning_rate": 8e-05,
"loss": 1.328467607498169,
"step": 1454
},
{
"epoch": 0.3463059313215401,
"grad_norm": 0.44140625,
"learning_rate": 8e-05,
"loss": 1.3533457517623901,
"step": 1456
},
{
"epoch": 0.3467816262821466,
"grad_norm": 0.416015625,
"learning_rate": 8e-05,
"loss": 1.2739849090576172,
"step": 1458
},
{
"epoch": 0.3472573212427531,
"grad_norm": 0.419921875,
"learning_rate": 8e-05,
"loss": 1.3364741802215576,
"step": 1460
},
{
"epoch": 0.3477330162033596,
"grad_norm": 0.427734375,
"learning_rate": 8e-05,
"loss": 1.2574357986450195,
"step": 1462
},
{
"epoch": 0.3482087111639661,
"grad_norm": 0.427734375,
"learning_rate": 8e-05,
"loss": 1.3151860237121582,
"step": 1464
},
{
"epoch": 0.34868440612457263,
"grad_norm": 0.431640625,
"learning_rate": 8e-05,
"loss": 1.3361979722976685,
"step": 1466
},
{
"epoch": 0.34916010108517914,
"grad_norm": 0.4296875,
"learning_rate": 8e-05,
"loss": 1.3134095668792725,
"step": 1468
},
{
"epoch": 0.34963579604578565,
"grad_norm": 0.419921875,
"learning_rate": 8e-05,
"loss": 1.2602635622024536,
"step": 1470
},
{
"epoch": 0.35011149100639216,
"grad_norm": 0.423828125,
"learning_rate": 8e-05,
"loss": 1.2942792177200317,
"step": 1472
},
{
"epoch": 0.35058718596699867,
"grad_norm": 0.4453125,
"learning_rate": 8e-05,
"loss": 1.3020391464233398,
"step": 1474
},
{
"epoch": 0.3510628809276052,
"grad_norm": 0.443359375,
"learning_rate": 8e-05,
"loss": 1.2818697690963745,
"step": 1476
},
{
"epoch": 0.3515385758882117,
"grad_norm": 0.484375,
"learning_rate": 8e-05,
"loss": 1.357285976409912,
"step": 1478
},
{
"epoch": 0.3520142708488182,
"grad_norm": 0.4453125,
"learning_rate": 8e-05,
"loss": 1.256792664527893,
"step": 1480
},
{
"epoch": 0.3524899658094247,
"grad_norm": 0.41796875,
"learning_rate": 8e-05,
"loss": 1.2922112941741943,
"step": 1482
},
{
"epoch": 0.3529656607700312,
"grad_norm": 0.408203125,
"learning_rate": 8e-05,
"loss": 1.313403606414795,
"step": 1484
},
{
"epoch": 0.35344135573063773,
"grad_norm": 0.41015625,
"learning_rate": 8e-05,
"loss": 1.3349361419677734,
"step": 1486
},
{
"epoch": 0.35391705069124424,
"grad_norm": 0.38671875,
"learning_rate": 8e-05,
"loss": 1.2362300157546997,
"step": 1488
},
{
"epoch": 0.35439274565185075,
"grad_norm": 0.423828125,
"learning_rate": 8e-05,
"loss": 1.2810988426208496,
"step": 1490
},
{
"epoch": 0.35486844061245726,
"grad_norm": 0.4765625,
"learning_rate": 8e-05,
"loss": 1.3440229892730713,
"step": 1492
},
{
"epoch": 0.35534413557306377,
"grad_norm": 0.4375,
"learning_rate": 8e-05,
"loss": 1.3103101253509521,
"step": 1494
},
{
"epoch": 0.3558198305336703,
"grad_norm": 0.447265625,
"learning_rate": 8e-05,
"loss": 1.2558884620666504,
"step": 1496
},
{
"epoch": 0.3562955254942768,
"grad_norm": 0.447265625,
"learning_rate": 8e-05,
"loss": 1.3156042098999023,
"step": 1498
},
{
"epoch": 0.3567712204548833,
"grad_norm": 0.46484375,
"learning_rate": 8e-05,
"loss": 1.294625997543335,
"step": 1500
},
{
"epoch": 0.3572469154154898,
"grad_norm": 0.4140625,
"learning_rate": 8e-05,
"loss": 1.2535991668701172,
"step": 1502
},
{
"epoch": 0.3577226103760963,
"grad_norm": 0.416015625,
"learning_rate": 8e-05,
"loss": 1.281435489654541,
"step": 1504
},
{
"epoch": 0.3581983053367028,
"grad_norm": 0.4375,
"learning_rate": 8e-05,
"loss": 1.3105072975158691,
"step": 1506
},
{
"epoch": 0.35867400029730934,
"grad_norm": 0.40625,
"learning_rate": 8e-05,
"loss": 1.3389477729797363,
"step": 1508
},
{
"epoch": 0.35914969525791585,
"grad_norm": 0.41015625,
"learning_rate": 8e-05,
"loss": 1.3460373878479004,
"step": 1510
},
{
"epoch": 0.35962539021852236,
"grad_norm": 0.42578125,
"learning_rate": 8e-05,
"loss": 1.2376234531402588,
"step": 1512
},
{
"epoch": 0.36010108517912887,
"grad_norm": 0.416015625,
"learning_rate": 8e-05,
"loss": 1.3217864036560059,
"step": 1514
},
{
"epoch": 0.3605767801397354,
"grad_norm": 0.40625,
"learning_rate": 8e-05,
"loss": 1.2773277759552002,
"step": 1516
},
{
"epoch": 0.3610524751003419,
"grad_norm": 0.41796875,
"learning_rate": 8e-05,
"loss": 1.2766404151916504,
"step": 1518
},
{
"epoch": 0.3615281700609484,
"grad_norm": 0.390625,
"learning_rate": 8e-05,
"loss": 1.2760717868804932,
"step": 1520
},
{
"epoch": 0.3620038650215549,
"grad_norm": 0.416015625,
"learning_rate": 8e-05,
"loss": 1.3625264167785645,
"step": 1522
},
{
"epoch": 0.3624795599821614,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.247987985610962,
"step": 1524
},
{
"epoch": 0.3629552549427679,
"grad_norm": 0.40625,
"learning_rate": 8e-05,
"loss": 1.2980639934539795,
"step": 1526
},
{
"epoch": 0.36343094990337443,
"grad_norm": 0.392578125,
"learning_rate": 8e-05,
"loss": 1.2997593879699707,
"step": 1528
},
{
"epoch": 0.363906644863981,
"grad_norm": 0.384765625,
"learning_rate": 8e-05,
"loss": 1.2603178024291992,
"step": 1530
},
{
"epoch": 0.3643823398245875,
"grad_norm": 0.384765625,
"learning_rate": 8e-05,
"loss": 1.302708625793457,
"step": 1532
},
{
"epoch": 0.364858034785194,
"grad_norm": 0.416015625,
"learning_rate": 8e-05,
"loss": 1.2671213150024414,
"step": 1534
},
{
"epoch": 0.36533372974580053,
"grad_norm": 0.4140625,
"learning_rate": 8e-05,
"loss": 1.3167269229888916,
"step": 1536
},
{
"epoch": 0.36580942470640704,
"grad_norm": 0.443359375,
"learning_rate": 8e-05,
"loss": 1.3100299835205078,
"step": 1538
},
{
"epoch": 0.36628511966701355,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.2652174234390259,
"step": 1540
},
{
"epoch": 0.36676081462762006,
"grad_norm": 0.408203125,
"learning_rate": 8e-05,
"loss": 1.3316433429718018,
"step": 1542
},
{
"epoch": 0.36723650958822657,
"grad_norm": 0.421875,
"learning_rate": 8e-05,
"loss": 1.2940750122070312,
"step": 1544
},
{
"epoch": 0.3677122045488331,
"grad_norm": 0.427734375,
"learning_rate": 8e-05,
"loss": 1.3008698225021362,
"step": 1546
},
{
"epoch": 0.3681878995094396,
"grad_norm": 0.43359375,
"learning_rate": 8e-05,
"loss": 1.2859610319137573,
"step": 1548
},
{
"epoch": 0.3686635944700461,
"grad_norm": 0.4609375,
"learning_rate": 8e-05,
"loss": 1.2531521320343018,
"step": 1550
},
{
"epoch": 0.3691392894306526,
"grad_norm": 0.431640625,
"learning_rate": 8e-05,
"loss": 1.2496728897094727,
"step": 1552
},
{
"epoch": 0.3696149843912591,
"grad_norm": 0.462890625,
"learning_rate": 8e-05,
"loss": 1.33748459815979,
"step": 1554
},
{
"epoch": 0.3700906793518656,
"grad_norm": 0.47265625,
"learning_rate": 8e-05,
"loss": 1.2866451740264893,
"step": 1556
},
{
"epoch": 0.37056637431247214,
"grad_norm": 0.46484375,
"learning_rate": 8e-05,
"loss": 1.34792160987854,
"step": 1558
},
{
"epoch": 0.37104206927307865,
"grad_norm": 0.52734375,
"learning_rate": 8e-05,
"loss": 1.2783215045928955,
"step": 1560
},
{
"epoch": 0.37151776423368515,
"grad_norm": 0.50390625,
"learning_rate": 8e-05,
"loss": 1.2765138149261475,
"step": 1562
},
{
"epoch": 0.37199345919429166,
"grad_norm": 0.458984375,
"learning_rate": 8e-05,
"loss": 1.3496522903442383,
"step": 1564
},
{
"epoch": 0.3724691541548982,
"grad_norm": 0.435546875,
"learning_rate": 8e-05,
"loss": 1.3075356483459473,
"step": 1566
},
{
"epoch": 0.3729448491155047,
"grad_norm": 0.494140625,
"learning_rate": 8e-05,
"loss": 1.2998372316360474,
"step": 1568
},
{
"epoch": 0.3734205440761112,
"grad_norm": 0.45703125,
"learning_rate": 8e-05,
"loss": 1.2793023586273193,
"step": 1570
},
{
"epoch": 0.3738962390367177,
"grad_norm": 0.4375,
"learning_rate": 8e-05,
"loss": 1.2992515563964844,
"step": 1572
},
{
"epoch": 0.3743719339973242,
"grad_norm": 0.423828125,
"learning_rate": 8e-05,
"loss": 1.2910690307617188,
"step": 1574
},
{
"epoch": 0.3748476289579307,
"grad_norm": 0.455078125,
"learning_rate": 8e-05,
"loss": 1.2138452529907227,
"step": 1576
},
{
"epoch": 0.37532332391853723,
"grad_norm": 0.45703125,
"learning_rate": 8e-05,
"loss": 1.2589681148529053,
"step": 1578
},
{
"epoch": 0.37579901887914374,
"grad_norm": 0.431640625,
"learning_rate": 8e-05,
"loss": 1.3131399154663086,
"step": 1580
},
{
"epoch": 0.37627471383975025,
"grad_norm": 0.43359375,
"learning_rate": 8e-05,
"loss": 1.2620333433151245,
"step": 1582
},
{
"epoch": 0.37675040880035676,
"grad_norm": 0.4453125,
"learning_rate": 8e-05,
"loss": 1.2692234516143799,
"step": 1584
},
{
"epoch": 0.37722610376096327,
"grad_norm": 0.45703125,
"learning_rate": 8e-05,
"loss": 1.3276705741882324,
"step": 1586
},
{
"epoch": 0.3777017987215698,
"grad_norm": 0.408203125,
"learning_rate": 8e-05,
"loss": 1.2581058740615845,
"step": 1588
},
{
"epoch": 0.3781774936821763,
"grad_norm": 0.431640625,
"learning_rate": 8e-05,
"loss": 1.3049333095550537,
"step": 1590
},
{
"epoch": 0.3786531886427828,
"grad_norm": 0.439453125,
"learning_rate": 8e-05,
"loss": 1.2860021591186523,
"step": 1592
},
{
"epoch": 0.3791288836033893,
"grad_norm": 0.408203125,
"learning_rate": 8e-05,
"loss": 1.3235461711883545,
"step": 1594
},
{
"epoch": 0.3796045785639958,
"grad_norm": 0.412109375,
"learning_rate": 8e-05,
"loss": 1.2726843357086182,
"step": 1596
},
{
"epoch": 0.38008027352460233,
"grad_norm": 0.41796875,
"learning_rate": 8e-05,
"loss": 1.2600030899047852,
"step": 1598
},
{
"epoch": 0.38055596848520884,
"grad_norm": 0.427734375,
"learning_rate": 8e-05,
"loss": 1.3035047054290771,
"step": 1600
},
{
"epoch": 0.38103166344581535,
"grad_norm": 0.43359375,
"learning_rate": 8e-05,
"loss": 1.2810437679290771,
"step": 1602
},
{
"epoch": 0.38150735840642186,
"grad_norm": 0.455078125,
"learning_rate": 8e-05,
"loss": 1.3053010702133179,
"step": 1604
},
{
"epoch": 0.38198305336702837,
"grad_norm": 0.4375,
"learning_rate": 8e-05,
"loss": 1.285233497619629,
"step": 1606
},
{
"epoch": 0.3824587483276349,
"grad_norm": 0.474609375,
"learning_rate": 8e-05,
"loss": 1.328747034072876,
"step": 1608
},
{
"epoch": 0.3829344432882414,
"grad_norm": 0.4140625,
"learning_rate": 8e-05,
"loss": 1.3264154195785522,
"step": 1610
},
{
"epoch": 0.38341013824884795,
"grad_norm": 0.40625,
"learning_rate": 8e-05,
"loss": 1.2244741916656494,
"step": 1612
},
{
"epoch": 0.38388583320945446,
"grad_norm": 0.40625,
"learning_rate": 8e-05,
"loss": 1.247675895690918,
"step": 1614
},
{
"epoch": 0.384361528170061,
"grad_norm": 0.419921875,
"learning_rate": 8e-05,
"loss": 1.289712905883789,
"step": 1616
},
{
"epoch": 0.3848372231306675,
"grad_norm": 0.42578125,
"learning_rate": 8e-05,
"loss": 1.299727439880371,
"step": 1618
},
{
"epoch": 0.385312918091274,
"grad_norm": 0.404296875,
"learning_rate": 8e-05,
"loss": 1.229569911956787,
"step": 1620
},
{
"epoch": 0.3857886130518805,
"grad_norm": 0.4296875,
"learning_rate": 8e-05,
"loss": 1.322393774986267,
"step": 1622
},
{
"epoch": 0.386264308012487,
"grad_norm": 0.404296875,
"learning_rate": 8e-05,
"loss": 1.2661751508712769,
"step": 1624
},
{
"epoch": 0.3867400029730935,
"grad_norm": 0.39453125,
"learning_rate": 8e-05,
"loss": 1.2912821769714355,
"step": 1626
},
{
"epoch": 0.38721569793370003,
"grad_norm": 0.40625,
"learning_rate": 8e-05,
"loss": 1.2871348857879639,
"step": 1628
},
{
"epoch": 0.38769139289430654,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.2939109802246094,
"step": 1630
},
{
"epoch": 0.38816708785491305,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.2797859907150269,
"step": 1632
},
{
"epoch": 0.38864278281551956,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.2935690879821777,
"step": 1634
},
{
"epoch": 0.38911847777612607,
"grad_norm": 0.400390625,
"learning_rate": 8e-05,
"loss": 1.2815215587615967,
"step": 1636
},
{
"epoch": 0.3895941727367326,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.2466614246368408,
"step": 1638
},
{
"epoch": 0.3900698676973391,
"grad_norm": 0.39453125,
"learning_rate": 8e-05,
"loss": 1.3364429473876953,
"step": 1640
},
{
"epoch": 0.3905455626579456,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.2810378074645996,
"step": 1642
},
{
"epoch": 0.3910212576185521,
"grad_norm": 0.390625,
"learning_rate": 8e-05,
"loss": 1.2879207134246826,
"step": 1644
},
{
"epoch": 0.3914969525791586,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.258134365081787,
"step": 1646
},
{
"epoch": 0.39197264753976513,
"grad_norm": 0.4140625,
"learning_rate": 8e-05,
"loss": 1.2918128967285156,
"step": 1648
},
{
"epoch": 0.39244834250037164,
"grad_norm": 0.416015625,
"learning_rate": 8e-05,
"loss": 1.2425835132598877,
"step": 1650
},
{
"epoch": 0.39292403746097815,
"grad_norm": 0.400390625,
"learning_rate": 8e-05,
"loss": 1.213120698928833,
"step": 1652
},
{
"epoch": 0.39339973242158466,
"grad_norm": 0.419921875,
"learning_rate": 8e-05,
"loss": 1.2344098091125488,
"step": 1654
},
{
"epoch": 0.39387542738219117,
"grad_norm": 0.40625,
"learning_rate": 8e-05,
"loss": 1.2660351991653442,
"step": 1656
},
{
"epoch": 0.3943511223427977,
"grad_norm": 0.421875,
"learning_rate": 8e-05,
"loss": 1.330662488937378,
"step": 1658
},
{
"epoch": 0.3948268173034042,
"grad_norm": 0.41796875,
"learning_rate": 8e-05,
"loss": 1.2738041877746582,
"step": 1660
},
{
"epoch": 0.3953025122640107,
"grad_norm": 0.41796875,
"learning_rate": 8e-05,
"loss": 1.2888221740722656,
"step": 1662
},
{
"epoch": 0.3957782072246172,
"grad_norm": 0.41796875,
"learning_rate": 8e-05,
"loss": 1.288313865661621,
"step": 1664
},
{
"epoch": 0.3962539021852237,
"grad_norm": 0.40234375,
"learning_rate": 8e-05,
"loss": 1.290661096572876,
"step": 1666
},
{
"epoch": 0.3967295971458302,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.249962568283081,
"step": 1668
},
{
"epoch": 0.39720529210643674,
"grad_norm": 0.37890625,
"learning_rate": 8e-05,
"loss": 1.2480006217956543,
"step": 1670
},
{
"epoch": 0.39768098706704325,
"grad_norm": 0.36328125,
"learning_rate": 8e-05,
"loss": 1.2179031372070312,
"step": 1672
},
{
"epoch": 0.39815668202764976,
"grad_norm": 0.388671875,
"learning_rate": 8e-05,
"loss": 1.3344948291778564,
"step": 1674
},
{
"epoch": 0.39863237698825627,
"grad_norm": 0.41796875,
"learning_rate": 8e-05,
"loss": 1.2962273359298706,
"step": 1676
},
{
"epoch": 0.3991080719488628,
"grad_norm": 0.400390625,
"learning_rate": 8e-05,
"loss": 1.266413688659668,
"step": 1678
},
{
"epoch": 0.3995837669094693,
"grad_norm": 0.390625,
"learning_rate": 8e-05,
"loss": 1.2850923538208008,
"step": 1680
},
{
"epoch": 0.4000594618700758,
"grad_norm": 0.40234375,
"learning_rate": 8e-05,
"loss": 1.2746737003326416,
"step": 1682
},
{
"epoch": 0.4005351568306823,
"grad_norm": 0.412109375,
"learning_rate": 8e-05,
"loss": 1.2807002067565918,
"step": 1684
},
{
"epoch": 0.4010108517912888,
"grad_norm": 0.4140625,
"learning_rate": 8e-05,
"loss": 1.2683579921722412,
"step": 1686
},
{
"epoch": 0.4014865467518953,
"grad_norm": 0.4140625,
"learning_rate": 8e-05,
"loss": 1.2930378913879395,
"step": 1688
},
{
"epoch": 0.40196224171250183,
"grad_norm": 0.408203125,
"learning_rate": 8e-05,
"loss": 1.2729506492614746,
"step": 1690
},
{
"epoch": 0.40243793667310834,
"grad_norm": 0.443359375,
"learning_rate": 8e-05,
"loss": 1.2865461111068726,
"step": 1692
},
{
"epoch": 0.40291363163371485,
"grad_norm": 0.421875,
"learning_rate": 8e-05,
"loss": 1.249645709991455,
"step": 1694
},
{
"epoch": 0.4033893265943214,
"grad_norm": 0.3671875,
"learning_rate": 8e-05,
"loss": 1.2383712530136108,
"step": 1696
},
{
"epoch": 0.4038650215549279,
"grad_norm": 0.3828125,
"learning_rate": 8e-05,
"loss": 1.246313452720642,
"step": 1698
},
{
"epoch": 0.40434071651553444,
"grad_norm": 0.392578125,
"learning_rate": 8e-05,
"loss": 1.2288974523544312,
"step": 1700
},
{
"epoch": 0.40481641147614095,
"grad_norm": 0.38671875,
"learning_rate": 8e-05,
"loss": 1.2907836437225342,
"step": 1702
},
{
"epoch": 0.40529210643674746,
"grad_norm": 0.390625,
"learning_rate": 8e-05,
"loss": 1.2730671167373657,
"step": 1704
},
{
"epoch": 0.40576780139735397,
"grad_norm": 0.40234375,
"learning_rate": 8e-05,
"loss": 1.3130565881729126,
"step": 1706
},
{
"epoch": 0.4062434963579605,
"grad_norm": 0.404296875,
"learning_rate": 8e-05,
"loss": 1.2681382894515991,
"step": 1708
},
{
"epoch": 0.406719191318567,
"grad_norm": 0.40234375,
"learning_rate": 8e-05,
"loss": 1.296158790588379,
"step": 1710
},
{
"epoch": 0.4071948862791735,
"grad_norm": 0.3828125,
"learning_rate": 8e-05,
"loss": 1.247192144393921,
"step": 1712
},
{
"epoch": 0.40767058123978,
"grad_norm": 0.41015625,
"learning_rate": 8e-05,
"loss": 1.2639563083648682,
"step": 1714
},
{
"epoch": 0.4081462762003865,
"grad_norm": 0.435546875,
"learning_rate": 8e-05,
"loss": 1.2656540870666504,
"step": 1716
},
{
"epoch": 0.408621971160993,
"grad_norm": 0.392578125,
"learning_rate": 8e-05,
"loss": 1.205491542816162,
"step": 1718
},
{
"epoch": 0.40909766612159953,
"grad_norm": 0.40234375,
"learning_rate": 8e-05,
"loss": 1.277217984199524,
"step": 1720
},
{
"epoch": 0.40957336108220604,
"grad_norm": 0.443359375,
"learning_rate": 8e-05,
"loss": 1.3079639673233032,
"step": 1722
},
{
"epoch": 0.41004905604281255,
"grad_norm": 0.458984375,
"learning_rate": 8e-05,
"loss": 1.2672646045684814,
"step": 1724
},
{
"epoch": 0.41052475100341906,
"grad_norm": 0.474609375,
"learning_rate": 8e-05,
"loss": 1.2784157991409302,
"step": 1726
},
{
"epoch": 0.4110004459640256,
"grad_norm": 0.484375,
"learning_rate": 8e-05,
"loss": 1.2682194709777832,
"step": 1728
},
{
"epoch": 0.4114761409246321,
"grad_norm": 0.423828125,
"learning_rate": 8e-05,
"loss": 1.2786941528320312,
"step": 1730
},
{
"epoch": 0.4119518358852386,
"grad_norm": 0.3828125,
"learning_rate": 8e-05,
"loss": 1.281358003616333,
"step": 1732
},
{
"epoch": 0.4124275308458451,
"grad_norm": 0.384765625,
"learning_rate": 8e-05,
"loss": 1.246321678161621,
"step": 1734
},
{
"epoch": 0.4129032258064516,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.2259719371795654,
"step": 1736
},
{
"epoch": 0.4133789207670581,
"grad_norm": 0.390625,
"learning_rate": 8e-05,
"loss": 1.3222472667694092,
"step": 1738
},
{
"epoch": 0.41385461572766463,
"grad_norm": 0.384765625,
"learning_rate": 8e-05,
"loss": 1.216452956199646,
"step": 1740
},
{
"epoch": 0.41433031068827114,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.3299109935760498,
"step": 1742
},
{
"epoch": 0.41480600564887765,
"grad_norm": 0.40234375,
"learning_rate": 8e-05,
"loss": 1.250302791595459,
"step": 1744
},
{
"epoch": 0.41528170060948416,
"grad_norm": 0.400390625,
"learning_rate": 8e-05,
"loss": 1.2744085788726807,
"step": 1746
},
{
"epoch": 0.41575739557009067,
"grad_norm": 0.40625,
"learning_rate": 8e-05,
"loss": 1.2700858116149902,
"step": 1748
},
{
"epoch": 0.4162330905306972,
"grad_norm": 0.38671875,
"learning_rate": 8e-05,
"loss": 1.2627267837524414,
"step": 1750
},
{
"epoch": 0.4167087854913037,
"grad_norm": 0.39453125,
"learning_rate": 8e-05,
"loss": 1.269378423690796,
"step": 1752
},
{
"epoch": 0.4171844804519102,
"grad_norm": 0.384765625,
"learning_rate": 8e-05,
"loss": 1.3320337533950806,
"step": 1754
},
{
"epoch": 0.4176601754125167,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.2717125415802002,
"step": 1756
},
{
"epoch": 0.4181358703731232,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.2660107612609863,
"step": 1758
},
{
"epoch": 0.41861156533372973,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.2718021869659424,
"step": 1760
},
{
"epoch": 0.41908726029433624,
"grad_norm": 0.38671875,
"learning_rate": 8e-05,
"loss": 1.2778680324554443,
"step": 1762
},
{
"epoch": 0.41956295525494275,
"grad_norm": 0.3828125,
"learning_rate": 8e-05,
"loss": 1.247718334197998,
"step": 1764
},
{
"epoch": 0.42003865021554926,
"grad_norm": 0.4140625,
"learning_rate": 8e-05,
"loss": 1.2594034671783447,
"step": 1766
},
{
"epoch": 0.42051434517615577,
"grad_norm": 0.400390625,
"learning_rate": 8e-05,
"loss": 1.2796156406402588,
"step": 1768
},
{
"epoch": 0.4209900401367623,
"grad_norm": 0.388671875,
"learning_rate": 8e-05,
"loss": 1.2971086502075195,
"step": 1770
},
{
"epoch": 0.4214657350973688,
"grad_norm": 0.431640625,
"learning_rate": 8e-05,
"loss": 1.2875339984893799,
"step": 1772
},
{
"epoch": 0.4219414300579753,
"grad_norm": 0.416015625,
"learning_rate": 8e-05,
"loss": 1.2802563905715942,
"step": 1774
},
{
"epoch": 0.4224171250185818,
"grad_norm": 0.455078125,
"learning_rate": 8e-05,
"loss": 1.297211766242981,
"step": 1776
},
{
"epoch": 0.4228928199791884,
"grad_norm": 0.4296875,
"learning_rate": 8e-05,
"loss": 1.1946825981140137,
"step": 1778
},
{
"epoch": 0.4233685149397949,
"grad_norm": 0.421875,
"learning_rate": 8e-05,
"loss": 1.256436824798584,
"step": 1780
},
{
"epoch": 0.4238442099004014,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.2787179946899414,
"step": 1782
},
{
"epoch": 0.4243199048610079,
"grad_norm": 0.392578125,
"learning_rate": 8e-05,
"loss": 1.2537882328033447,
"step": 1784
},
{
"epoch": 0.4247955998216144,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.2897108793258667,
"step": 1786
},
{
"epoch": 0.4252712947822209,
"grad_norm": 0.40625,
"learning_rate": 8e-05,
"loss": 1.2501720190048218,
"step": 1788
},
{
"epoch": 0.42574698974282743,
"grad_norm": 0.39453125,
"learning_rate": 8e-05,
"loss": 1.2753134965896606,
"step": 1790
},
{
"epoch": 0.42622268470343394,
"grad_norm": 0.46875,
"learning_rate": 8e-05,
"loss": 1.2809417247772217,
"step": 1792
},
{
"epoch": 0.42669837966404045,
"grad_norm": 0.51171875,
"learning_rate": 8e-05,
"loss": 1.2722747325897217,
"step": 1794
},
{
"epoch": 0.42717407462464696,
"grad_norm": 0.435546875,
"learning_rate": 8e-05,
"loss": 1.294141411781311,
"step": 1796
},
{
"epoch": 0.42764976958525347,
"grad_norm": 0.435546875,
"learning_rate": 8e-05,
"loss": 1.2701992988586426,
"step": 1798
},
{
"epoch": 0.42812546454586,
"grad_norm": 0.44140625,
"learning_rate": 8e-05,
"loss": 1.2379639148712158,
"step": 1800
},
{
"epoch": 0.4286011595064665,
"grad_norm": 0.443359375,
"learning_rate": 8e-05,
"loss": 1.262845516204834,
"step": 1802
},
{
"epoch": 0.429076854467073,
"grad_norm": 0.400390625,
"learning_rate": 8e-05,
"loss": 1.213247299194336,
"step": 1804
},
{
"epoch": 0.4295525494276795,
"grad_norm": 0.421875,
"learning_rate": 8e-05,
"loss": 1.270270824432373,
"step": 1806
},
{
"epoch": 0.430028244388286,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.1981269121170044,
"step": 1808
},
{
"epoch": 0.43050393934889253,
"grad_norm": 0.4296875,
"learning_rate": 8e-05,
"loss": 1.2772140502929688,
"step": 1810
},
{
"epoch": 0.43097963430949904,
"grad_norm": 0.4296875,
"learning_rate": 8e-05,
"loss": 1.2868304252624512,
"step": 1812
},
{
"epoch": 0.43145532927010555,
"grad_norm": 0.41015625,
"learning_rate": 8e-05,
"loss": 1.2759490013122559,
"step": 1814
},
{
"epoch": 0.43193102423071206,
"grad_norm": 0.4140625,
"learning_rate": 8e-05,
"loss": 1.292006015777588,
"step": 1816
},
{
"epoch": 0.43240671919131857,
"grad_norm": 0.435546875,
"learning_rate": 8e-05,
"loss": 1.2290836572647095,
"step": 1818
},
{
"epoch": 0.4328824141519251,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.2799795866012573,
"step": 1820
},
{
"epoch": 0.4333581091125316,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.2700903415679932,
"step": 1822
},
{
"epoch": 0.4338338040731381,
"grad_norm": 0.400390625,
"learning_rate": 8e-05,
"loss": 1.276972770690918,
"step": 1824
},
{
"epoch": 0.4343094990337446,
"grad_norm": 0.376953125,
"learning_rate": 8e-05,
"loss": 1.2546138763427734,
"step": 1826
},
{
"epoch": 0.4347851939943511,
"grad_norm": 0.42578125,
"learning_rate": 8e-05,
"loss": 1.2350144386291504,
"step": 1828
},
{
"epoch": 0.4352608889549576,
"grad_norm": 0.404296875,
"learning_rate": 8e-05,
"loss": 1.255623459815979,
"step": 1830
},
{
"epoch": 0.43573658391556414,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.3046503067016602,
"step": 1832
},
{
"epoch": 0.43621227887617064,
"grad_norm": 0.400390625,
"learning_rate": 8e-05,
"loss": 1.2888526916503906,
"step": 1834
},
{
"epoch": 0.43668797383677715,
"grad_norm": 0.384765625,
"learning_rate": 8e-05,
"loss": 1.2555067539215088,
"step": 1836
},
{
"epoch": 0.43716366879738366,
"grad_norm": 0.423828125,
"learning_rate": 8e-05,
"loss": 1.3039183616638184,
"step": 1838
},
{
"epoch": 0.4376393637579902,
"grad_norm": 0.3828125,
"learning_rate": 8e-05,
"loss": 1.2199631929397583,
"step": 1840
},
{
"epoch": 0.4381150587185967,
"grad_norm": 0.40234375,
"learning_rate": 8e-05,
"loss": 1.2994630336761475,
"step": 1842
},
{
"epoch": 0.4385907536792032,
"grad_norm": 0.392578125,
"learning_rate": 8e-05,
"loss": 1.241573452949524,
"step": 1844
},
{
"epoch": 0.4390664486398097,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.2570286989212036,
"step": 1846
},
{
"epoch": 0.4395421436004162,
"grad_norm": 0.41015625,
"learning_rate": 8e-05,
"loss": 1.2594881057739258,
"step": 1848
},
{
"epoch": 0.4400178385610227,
"grad_norm": 0.423828125,
"learning_rate": 8e-05,
"loss": 1.2928112745285034,
"step": 1850
},
{
"epoch": 0.44049353352162923,
"grad_norm": 0.40234375,
"learning_rate": 8e-05,
"loss": 1.2726936340332031,
"step": 1852
},
{
"epoch": 0.44096922848223574,
"grad_norm": 0.404296875,
"learning_rate": 8e-05,
"loss": 1.32316255569458,
"step": 1854
},
{
"epoch": 0.44144492344284225,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.2438604831695557,
"step": 1856
},
{
"epoch": 0.44192061840344876,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.293677568435669,
"step": 1858
},
{
"epoch": 0.4423963133640553,
"grad_norm": 0.408203125,
"learning_rate": 8e-05,
"loss": 1.3002068996429443,
"step": 1860
},
{
"epoch": 0.44287200832466184,
"grad_norm": 0.45703125,
"learning_rate": 8e-05,
"loss": 1.294407844543457,
"step": 1862
},
{
"epoch": 0.44334770328526835,
"grad_norm": 0.400390625,
"learning_rate": 8e-05,
"loss": 1.242573857307434,
"step": 1864
},
{
"epoch": 0.44382339824587486,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.2922019958496094,
"step": 1866
},
{
"epoch": 0.44429909320648137,
"grad_norm": 0.408203125,
"learning_rate": 8e-05,
"loss": 1.2213215827941895,
"step": 1868
},
{
"epoch": 0.4447747881670879,
"grad_norm": 0.40625,
"learning_rate": 8e-05,
"loss": 1.2707006931304932,
"step": 1870
},
{
"epoch": 0.4452504831276944,
"grad_norm": 0.39453125,
"learning_rate": 8e-05,
"loss": 1.2314999103546143,
"step": 1872
},
{
"epoch": 0.4457261780883009,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.3218889236450195,
"step": 1874
},
{
"epoch": 0.4462018730489074,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.2159051895141602,
"step": 1876
},
{
"epoch": 0.4466775680095139,
"grad_norm": 0.390625,
"learning_rate": 8e-05,
"loss": 1.3076913356781006,
"step": 1878
},
{
"epoch": 0.4471532629701204,
"grad_norm": 0.37890625,
"learning_rate": 8e-05,
"loss": 1.2779921293258667,
"step": 1880
},
{
"epoch": 0.44762895793072693,
"grad_norm": 0.40234375,
"learning_rate": 8e-05,
"loss": 1.2604446411132812,
"step": 1882
},
{
"epoch": 0.44810465289133344,
"grad_norm": 0.376953125,
"learning_rate": 8e-05,
"loss": 1.2382320165634155,
"step": 1884
},
{
"epoch": 0.44858034785193995,
"grad_norm": 0.40234375,
"learning_rate": 8e-05,
"loss": 1.3830101490020752,
"step": 1886
},
{
"epoch": 0.44905604281254646,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.228447675704956,
"step": 1888
},
{
"epoch": 0.449531737773153,
"grad_norm": 0.43359375,
"learning_rate": 8e-05,
"loss": 1.2446924448013306,
"step": 1890
},
{
"epoch": 0.4500074327337595,
"grad_norm": 0.455078125,
"learning_rate": 8e-05,
"loss": 1.2444430589675903,
"step": 1892
},
{
"epoch": 0.450483127694366,
"grad_norm": 0.41015625,
"learning_rate": 8e-05,
"loss": 1.1985334157943726,
"step": 1894
},
{
"epoch": 0.4509588226549725,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.2600021362304688,
"step": 1896
},
{
"epoch": 0.451434517615579,
"grad_norm": 0.4375,
"learning_rate": 8e-05,
"loss": 1.2708806991577148,
"step": 1898
},
{
"epoch": 0.4519102125761855,
"grad_norm": 0.408203125,
"learning_rate": 8e-05,
"loss": 1.273937702178955,
"step": 1900
},
{
"epoch": 0.45238590753679203,
"grad_norm": 0.41015625,
"learning_rate": 8e-05,
"loss": 1.2232747077941895,
"step": 1902
},
{
"epoch": 0.45286160249739854,
"grad_norm": 0.400390625,
"learning_rate": 8e-05,
"loss": 1.2665836811065674,
"step": 1904
},
{
"epoch": 0.45333729745800505,
"grad_norm": 0.40234375,
"learning_rate": 8e-05,
"loss": 1.2973511219024658,
"step": 1906
},
{
"epoch": 0.45381299241861156,
"grad_norm": 0.39453125,
"learning_rate": 8e-05,
"loss": 1.239840030670166,
"step": 1908
},
{
"epoch": 0.45428868737921807,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.2163124084472656,
"step": 1910
},
{
"epoch": 0.4547643823398246,
"grad_norm": 0.3828125,
"learning_rate": 8e-05,
"loss": 1.245084285736084,
"step": 1912
},
{
"epoch": 0.4552400773004311,
"grad_norm": 0.40234375,
"learning_rate": 8e-05,
"loss": 1.3053221702575684,
"step": 1914
},
{
"epoch": 0.4557157722610376,
"grad_norm": 0.41015625,
"learning_rate": 8e-05,
"loss": 1.2788605690002441,
"step": 1916
},
{
"epoch": 0.4561914672216441,
"grad_norm": 0.404296875,
"learning_rate": 8e-05,
"loss": 1.2649834156036377,
"step": 1918
},
{
"epoch": 0.4566671621822506,
"grad_norm": 0.38671875,
"learning_rate": 8e-05,
"loss": 1.2921392917633057,
"step": 1920
},
{
"epoch": 0.45714285714285713,
"grad_norm": 0.40234375,
"learning_rate": 8e-05,
"loss": 1.2440087795257568,
"step": 1922
},
{
"epoch": 0.45761855210346364,
"grad_norm": 0.36328125,
"learning_rate": 8e-05,
"loss": 1.2340590953826904,
"step": 1924
},
{
"epoch": 0.45809424706407015,
"grad_norm": 0.390625,
"learning_rate": 8e-05,
"loss": 1.2697665691375732,
"step": 1926
},
{
"epoch": 0.45856994202467666,
"grad_norm": 0.41796875,
"learning_rate": 8e-05,
"loss": 1.2789232730865479,
"step": 1928
},
{
"epoch": 0.45904563698528317,
"grad_norm": 0.4375,
"learning_rate": 8e-05,
"loss": 1.262975811958313,
"step": 1930
},
{
"epoch": 0.4595213319458897,
"grad_norm": 0.404296875,
"learning_rate": 8e-05,
"loss": 1.286508321762085,
"step": 1932
},
{
"epoch": 0.4599970269064962,
"grad_norm": 0.41015625,
"learning_rate": 8e-05,
"loss": 1.2848749160766602,
"step": 1934
},
{
"epoch": 0.4604727218671027,
"grad_norm": 0.4296875,
"learning_rate": 8e-05,
"loss": 1.253462314605713,
"step": 1936
},
{
"epoch": 0.4609484168277092,
"grad_norm": 0.41796875,
"learning_rate": 8e-05,
"loss": 1.2408068180084229,
"step": 1938
},
{
"epoch": 0.4614241117883157,
"grad_norm": 0.427734375,
"learning_rate": 8e-05,
"loss": 1.2684673070907593,
"step": 1940
},
{
"epoch": 0.4618998067489222,
"grad_norm": 0.39453125,
"learning_rate": 8e-05,
"loss": 1.2076165676116943,
"step": 1942
},
{
"epoch": 0.4623755017095288,
"grad_norm": 0.392578125,
"learning_rate": 8e-05,
"loss": 1.2525750398635864,
"step": 1944
},
{
"epoch": 0.4628511966701353,
"grad_norm": 0.392578125,
"learning_rate": 8e-05,
"loss": 1.2482715845108032,
"step": 1946
},
{
"epoch": 0.4633268916307418,
"grad_norm": 0.41796875,
"learning_rate": 8e-05,
"loss": 1.2954301834106445,
"step": 1948
},
{
"epoch": 0.4638025865913483,
"grad_norm": 0.4375,
"learning_rate": 8e-05,
"loss": 1.2526676654815674,
"step": 1950
},
{
"epoch": 0.46427828155195483,
"grad_norm": 0.408203125,
"learning_rate": 8e-05,
"loss": 1.270956039428711,
"step": 1952
},
{
"epoch": 0.46475397651256134,
"grad_norm": 0.40234375,
"learning_rate": 8e-05,
"loss": 1.2305779457092285,
"step": 1954
},
{
"epoch": 0.46522967147316785,
"grad_norm": 0.41015625,
"learning_rate": 8e-05,
"loss": 1.3026628494262695,
"step": 1956
},
{
"epoch": 0.46570536643377436,
"grad_norm": 0.42578125,
"learning_rate": 8e-05,
"loss": 1.276360273361206,
"step": 1958
},
{
"epoch": 0.46618106139438087,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.2271491289138794,
"step": 1960
},
{
"epoch": 0.4666567563549874,
"grad_norm": 0.39453125,
"learning_rate": 8e-05,
"loss": 1.27445650100708,
"step": 1962
},
{
"epoch": 0.4671324513155939,
"grad_norm": 0.40625,
"learning_rate": 8e-05,
"loss": 1.2661042213439941,
"step": 1964
},
{
"epoch": 0.4676081462762004,
"grad_norm": 0.390625,
"learning_rate": 8e-05,
"loss": 1.2653608322143555,
"step": 1966
},
{
"epoch": 0.4680838412368069,
"grad_norm": 0.388671875,
"learning_rate": 8e-05,
"loss": 1.2979223728179932,
"step": 1968
},
{
"epoch": 0.4685595361974134,
"grad_norm": 0.376953125,
"learning_rate": 8e-05,
"loss": 1.2312049865722656,
"step": 1970
},
{
"epoch": 0.4690352311580199,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.3057024478912354,
"step": 1972
},
{
"epoch": 0.46951092611862644,
"grad_norm": 0.400390625,
"learning_rate": 8e-05,
"loss": 1.2951257228851318,
"step": 1974
},
{
"epoch": 0.46998662107923295,
"grad_norm": 0.404296875,
"learning_rate": 8e-05,
"loss": 1.2718441486358643,
"step": 1976
},
{
"epoch": 0.47046231603983946,
"grad_norm": 0.41796875,
"learning_rate": 8e-05,
"loss": 1.2305831909179688,
"step": 1978
},
{
"epoch": 0.47093801100044597,
"grad_norm": 0.392578125,
"learning_rate": 8e-05,
"loss": 1.2023491859436035,
"step": 1980
},
{
"epoch": 0.4714137059610525,
"grad_norm": 0.400390625,
"learning_rate": 8e-05,
"loss": 1.219580888748169,
"step": 1982
},
{
"epoch": 0.471889400921659,
"grad_norm": 0.41796875,
"learning_rate": 8e-05,
"loss": 1.247983455657959,
"step": 1984
},
{
"epoch": 0.4723650958822655,
"grad_norm": 0.41015625,
"learning_rate": 8e-05,
"loss": 1.2529908418655396,
"step": 1986
},
{
"epoch": 0.472840790842872,
"grad_norm": 0.38671875,
"learning_rate": 8e-05,
"loss": 1.2776696681976318,
"step": 1988
},
{
"epoch": 0.4733164858034785,
"grad_norm": 0.390625,
"learning_rate": 8e-05,
"loss": 1.249483585357666,
"step": 1990
},
{
"epoch": 0.473792180764085,
"grad_norm": 0.412109375,
"learning_rate": 8e-05,
"loss": 1.3180161714553833,
"step": 1992
},
{
"epoch": 0.47426787572469153,
"grad_norm": 0.36328125,
"learning_rate": 8e-05,
"loss": 1.2910526990890503,
"step": 1994
},
{
"epoch": 0.47474357068529804,
"grad_norm": 0.390625,
"learning_rate": 8e-05,
"loss": 1.2458434104919434,
"step": 1996
},
{
"epoch": 0.47521926564590455,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.3033870458602905,
"step": 1998
},
{
"epoch": 0.47569496060651106,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.2500510215759277,
"step": 2000
},
{
"epoch": 0.4761706555671176,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.2625623941421509,
"step": 2002
},
{
"epoch": 0.4766463505277241,
"grad_norm": 0.384765625,
"learning_rate": 8e-05,
"loss": 1.2688162326812744,
"step": 2004
},
{
"epoch": 0.4771220454883306,
"grad_norm": 0.39453125,
"learning_rate": 8e-05,
"loss": 1.2744543552398682,
"step": 2006
},
{
"epoch": 0.4775977404489371,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.28806471824646,
"step": 2008
},
{
"epoch": 0.4780734354095436,
"grad_norm": 0.392578125,
"learning_rate": 8e-05,
"loss": 1.2513093948364258,
"step": 2010
},
{
"epoch": 0.4785491303701501,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.2578754425048828,
"step": 2012
},
{
"epoch": 0.47902482533075663,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.2690682411193848,
"step": 2014
},
{
"epoch": 0.47950052029136314,
"grad_norm": 0.3828125,
"learning_rate": 8e-05,
"loss": 1.244710922241211,
"step": 2016
},
{
"epoch": 0.47997621525196965,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.2703763246536255,
"step": 2018
},
{
"epoch": 0.48045191021257616,
"grad_norm": 0.392578125,
"learning_rate": 8e-05,
"loss": 1.289364218711853,
"step": 2020
},
{
"epoch": 0.48092760517318267,
"grad_norm": 0.39453125,
"learning_rate": 8e-05,
"loss": 1.2618646621704102,
"step": 2022
},
{
"epoch": 0.4814033001337892,
"grad_norm": 0.39453125,
"learning_rate": 8e-05,
"loss": 1.2179932594299316,
"step": 2024
},
{
"epoch": 0.48187899509439575,
"grad_norm": 0.41015625,
"learning_rate": 8e-05,
"loss": 1.2760300636291504,
"step": 2026
},
{
"epoch": 0.48235469005500226,
"grad_norm": 0.40625,
"learning_rate": 8e-05,
"loss": 1.2909011840820312,
"step": 2028
},
{
"epoch": 0.48283038501560877,
"grad_norm": 0.400390625,
"learning_rate": 8e-05,
"loss": 1.2532756328582764,
"step": 2030
},
{
"epoch": 0.4833060799762153,
"grad_norm": 0.37890625,
"learning_rate": 8e-05,
"loss": 1.2610164880752563,
"step": 2032
},
{
"epoch": 0.4837817749368218,
"grad_norm": 0.376953125,
"learning_rate": 8e-05,
"loss": 1.2327613830566406,
"step": 2034
},
{
"epoch": 0.4842574698974283,
"grad_norm": 0.40625,
"learning_rate": 8e-05,
"loss": 1.2339394092559814,
"step": 2036
},
{
"epoch": 0.4847331648580348,
"grad_norm": 0.3828125,
"learning_rate": 8e-05,
"loss": 1.2110052108764648,
"step": 2038
},
{
"epoch": 0.4852088598186413,
"grad_norm": 0.3671875,
"learning_rate": 8e-05,
"loss": 1.2191238403320312,
"step": 2040
},
{
"epoch": 0.4856845547792478,
"grad_norm": 0.37109375,
"learning_rate": 8e-05,
"loss": 1.2192617654800415,
"step": 2042
},
{
"epoch": 0.48616024973985433,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.2636759281158447,
"step": 2044
},
{
"epoch": 0.48663594470046084,
"grad_norm": 0.41015625,
"learning_rate": 8e-05,
"loss": 1.207831621170044,
"step": 2046
},
{
"epoch": 0.48711163966106735,
"grad_norm": 0.419921875,
"learning_rate": 8e-05,
"loss": 1.1997129917144775,
"step": 2048
},
{
"epoch": 0.48758733462167386,
"grad_norm": 0.455078125,
"learning_rate": 8e-05,
"loss": 1.2564668655395508,
"step": 2050
},
{
"epoch": 0.48806302958228037,
"grad_norm": 0.416015625,
"learning_rate": 8e-05,
"loss": 1.2735317945480347,
"step": 2052
},
{
"epoch": 0.4885387245428869,
"grad_norm": 0.427734375,
"learning_rate": 8e-05,
"loss": 1.2732312679290771,
"step": 2054
},
{
"epoch": 0.4890144195034934,
"grad_norm": 0.408203125,
"learning_rate": 8e-05,
"loss": 1.2548601627349854,
"step": 2056
},
{
"epoch": 0.4894901144640999,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.2576682567596436,
"step": 2058
},
{
"epoch": 0.4899658094247064,
"grad_norm": 0.373046875,
"learning_rate": 8e-05,
"loss": 1.2264012098312378,
"step": 2060
},
{
"epoch": 0.4904415043853129,
"grad_norm": 0.39453125,
"learning_rate": 8e-05,
"loss": 1.2854325771331787,
"step": 2062
},
{
"epoch": 0.49091719934591943,
"grad_norm": 0.390625,
"learning_rate": 8e-05,
"loss": 1.3236126899719238,
"step": 2064
},
{
"epoch": 0.49139289430652594,
"grad_norm": 0.41796875,
"learning_rate": 8e-05,
"loss": 1.2489556074142456,
"step": 2066
},
{
"epoch": 0.49186858926713245,
"grad_norm": 0.416015625,
"learning_rate": 8e-05,
"loss": 1.2574775218963623,
"step": 2068
},
{
"epoch": 0.49234428422773896,
"grad_norm": 0.41015625,
"learning_rate": 8e-05,
"loss": 1.2095019817352295,
"step": 2070
},
{
"epoch": 0.49281997918834547,
"grad_norm": 0.36328125,
"learning_rate": 8e-05,
"loss": 1.2634193897247314,
"step": 2072
},
{
"epoch": 0.493295674148952,
"grad_norm": 0.404296875,
"learning_rate": 8e-05,
"loss": 1.2667688131332397,
"step": 2074
},
{
"epoch": 0.4937713691095585,
"grad_norm": 0.37890625,
"learning_rate": 8e-05,
"loss": 1.2418756484985352,
"step": 2076
},
{
"epoch": 0.494247064070165,
"grad_norm": 0.390625,
"learning_rate": 8e-05,
"loss": 1.2592451572418213,
"step": 2078
},
{
"epoch": 0.4947227590307715,
"grad_norm": 0.39453125,
"learning_rate": 8e-05,
"loss": 1.2803057432174683,
"step": 2080
},
{
"epoch": 0.495198453991378,
"grad_norm": 0.44921875,
"learning_rate": 8e-05,
"loss": 1.2541866302490234,
"step": 2082
},
{
"epoch": 0.49567414895198453,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.2359161376953125,
"step": 2084
},
{
"epoch": 0.49614984391259104,
"grad_norm": 0.41015625,
"learning_rate": 8e-05,
"loss": 1.2950963973999023,
"step": 2086
},
{
"epoch": 0.49662553887319755,
"grad_norm": 0.400390625,
"learning_rate": 8e-05,
"loss": 1.283148169517517,
"step": 2088
},
{
"epoch": 0.49710123383380406,
"grad_norm": 0.416015625,
"learning_rate": 8e-05,
"loss": 1.2806365489959717,
"step": 2090
},
{
"epoch": 0.49757692879441057,
"grad_norm": 0.404296875,
"learning_rate": 8e-05,
"loss": 1.21940279006958,
"step": 2092
},
{
"epoch": 0.4980526237550171,
"grad_norm": 0.41796875,
"learning_rate": 8e-05,
"loss": 1.243825912475586,
"step": 2094
},
{
"epoch": 0.4985283187156236,
"grad_norm": 0.392578125,
"learning_rate": 8e-05,
"loss": 1.271630048751831,
"step": 2096
},
{
"epoch": 0.4990040136762301,
"grad_norm": 0.38671875,
"learning_rate": 8e-05,
"loss": 1.2829158306121826,
"step": 2098
},
{
"epoch": 0.4994797086368366,
"grad_norm": 0.40625,
"learning_rate": 8e-05,
"loss": 1.244653582572937,
"step": 2100
},
{
"epoch": 0.4999554035974431,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.2149741649627686,
"step": 2102
},
{
"epoch": 0.5004310985580497,
"grad_norm": 0.404296875,
"learning_rate": 8e-05,
"loss": 1.2326526641845703,
"step": 2104
},
{
"epoch": 0.5009067935186562,
"grad_norm": 0.392578125,
"learning_rate": 8e-05,
"loss": 1.2960628271102905,
"step": 2106
},
{
"epoch": 0.5013824884792627,
"grad_norm": 0.40625,
"learning_rate": 8e-05,
"loss": 1.2571380138397217,
"step": 2108
},
{
"epoch": 0.5018581834398692,
"grad_norm": 0.39453125,
"learning_rate": 8e-05,
"loss": 1.1806836128234863,
"step": 2110
},
{
"epoch": 0.5023338784004757,
"grad_norm": 0.384765625,
"learning_rate": 8e-05,
"loss": 1.2098257541656494,
"step": 2112
},
{
"epoch": 0.5028095733610822,
"grad_norm": 0.384765625,
"learning_rate": 8e-05,
"loss": 1.245821237564087,
"step": 2114
},
{
"epoch": 0.5032852683216887,
"grad_norm": 0.37109375,
"learning_rate": 8e-05,
"loss": 1.2121808528900146,
"step": 2116
},
{
"epoch": 0.5037609632822952,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.271606683731079,
"step": 2118
},
{
"epoch": 0.5042366582429018,
"grad_norm": 0.37109375,
"learning_rate": 8e-05,
"loss": 1.2480086088180542,
"step": 2120
},
{
"epoch": 0.5047123532035083,
"grad_norm": 0.384765625,
"learning_rate": 8e-05,
"loss": 1.2532334327697754,
"step": 2122
},
{
"epoch": 0.5051880481641148,
"grad_norm": 0.376953125,
"learning_rate": 8e-05,
"loss": 1.2482119798660278,
"step": 2124
},
{
"epoch": 0.5056637431247213,
"grad_norm": 0.3828125,
"learning_rate": 8e-05,
"loss": 1.2053478956222534,
"step": 2126
},
{
"epoch": 0.5061394380853278,
"grad_norm": 0.376953125,
"learning_rate": 8e-05,
"loss": 1.2333269119262695,
"step": 2128
},
{
"epoch": 0.5066151330459343,
"grad_norm": 0.427734375,
"learning_rate": 8e-05,
"loss": 1.2575441598892212,
"step": 2130
},
{
"epoch": 0.5070908280065408,
"grad_norm": 0.443359375,
"learning_rate": 8e-05,
"loss": 1.239387035369873,
"step": 2132
},
{
"epoch": 0.5075665229671473,
"grad_norm": 0.435546875,
"learning_rate": 8e-05,
"loss": 1.2529371976852417,
"step": 2134
},
{
"epoch": 0.5080422179277538,
"grad_norm": 0.439453125,
"learning_rate": 8e-05,
"loss": 1.2657462358474731,
"step": 2136
},
{
"epoch": 0.5085179128883603,
"grad_norm": 0.390625,
"learning_rate": 8e-05,
"loss": 1.2482880353927612,
"step": 2138
},
{
"epoch": 0.5089936078489669,
"grad_norm": 0.404296875,
"learning_rate": 8e-05,
"loss": 1.2640414237976074,
"step": 2140
},
{
"epoch": 0.5094693028095734,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.2489564418792725,
"step": 2142
},
{
"epoch": 0.5099449977701799,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.1927671432495117,
"step": 2144
},
{
"epoch": 0.5104206927307864,
"grad_norm": 0.375,
"learning_rate": 8e-05,
"loss": 1.2743709087371826,
"step": 2146
},
{
"epoch": 0.5108963876913929,
"grad_norm": 0.375,
"learning_rate": 8e-05,
"loss": 1.2421848773956299,
"step": 2148
},
{
"epoch": 0.5113720826519994,
"grad_norm": 0.37890625,
"learning_rate": 8e-05,
"loss": 1.2825573682785034,
"step": 2150
},
{
"epoch": 0.5118477776126059,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.2068567276000977,
"step": 2152
},
{
"epoch": 0.5123234725732124,
"grad_norm": 0.3828125,
"learning_rate": 8e-05,
"loss": 1.2677295207977295,
"step": 2154
},
{
"epoch": 0.5127991675338189,
"grad_norm": 0.384765625,
"learning_rate": 8e-05,
"loss": 1.3184689283370972,
"step": 2156
},
{
"epoch": 0.5132748624944254,
"grad_norm": 0.3671875,
"learning_rate": 8e-05,
"loss": 1.2172642946243286,
"step": 2158
},
{
"epoch": 0.513750557455032,
"grad_norm": 0.412109375,
"learning_rate": 8e-05,
"loss": 1.2620975971221924,
"step": 2160
},
{
"epoch": 0.5142262524156385,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.1976842880249023,
"step": 2162
},
{
"epoch": 0.514701947376245,
"grad_norm": 0.412109375,
"learning_rate": 8e-05,
"loss": 1.2858420610427856,
"step": 2164
},
{
"epoch": 0.5151776423368515,
"grad_norm": 0.408203125,
"learning_rate": 8e-05,
"loss": 1.1963508129119873,
"step": 2166
},
{
"epoch": 0.515653337297458,
"grad_norm": 0.439453125,
"learning_rate": 8e-05,
"loss": 1.2777037620544434,
"step": 2168
},
{
"epoch": 0.5161290322580645,
"grad_norm": 0.4140625,
"learning_rate": 8e-05,
"loss": 1.2379100322723389,
"step": 2170
},
{
"epoch": 0.516604727218671,
"grad_norm": 0.421875,
"learning_rate": 8e-05,
"loss": 1.3012006282806396,
"step": 2172
},
{
"epoch": 0.5170804221792775,
"grad_norm": 0.41796875,
"learning_rate": 8e-05,
"loss": 1.2644760608673096,
"step": 2174
},
{
"epoch": 0.517556117139884,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.2243112325668335,
"step": 2176
},
{
"epoch": 0.5180318121004905,
"grad_norm": 0.38671875,
"learning_rate": 8e-05,
"loss": 1.282224416732788,
"step": 2178
},
{
"epoch": 0.518507507061097,
"grad_norm": 0.384765625,
"learning_rate": 8e-05,
"loss": 1.2459665536880493,
"step": 2180
},
{
"epoch": 0.5189832020217036,
"grad_norm": 0.3828125,
"learning_rate": 8e-05,
"loss": 1.2327321767807007,
"step": 2182
},
{
"epoch": 0.5194588969823101,
"grad_norm": 0.376953125,
"learning_rate": 8e-05,
"loss": 1.2502856254577637,
"step": 2184
},
{
"epoch": 0.5199345919429166,
"grad_norm": 0.37890625,
"learning_rate": 8e-05,
"loss": 1.2643475532531738,
"step": 2186
},
{
"epoch": 0.5204102869035231,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.2353267669677734,
"step": 2188
},
{
"epoch": 0.5208859818641296,
"grad_norm": 0.392578125,
"learning_rate": 8e-05,
"loss": 1.2246006727218628,
"step": 2190
},
{
"epoch": 0.5213616768247361,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.2746915817260742,
"step": 2192
},
{
"epoch": 0.5218373717853426,
"grad_norm": 0.384765625,
"learning_rate": 8e-05,
"loss": 1.2547008991241455,
"step": 2194
},
{
"epoch": 0.5223130667459491,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.2143683433532715,
"step": 2196
},
{
"epoch": 0.5227887617065556,
"grad_norm": 0.376953125,
"learning_rate": 8e-05,
"loss": 1.2223198413848877,
"step": 2198
},
{
"epoch": 0.5232644566671621,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.2719308137893677,
"step": 2200
},
{
"epoch": 0.5237401516277687,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.1987100839614868,
"step": 2202
},
{
"epoch": 0.5242158465883752,
"grad_norm": 0.404296875,
"learning_rate": 8e-05,
"loss": 1.2595856189727783,
"step": 2204
},
{
"epoch": 0.5246915415489817,
"grad_norm": 0.376953125,
"learning_rate": 8e-05,
"loss": 1.2550652027130127,
"step": 2206
},
{
"epoch": 0.5251672365095882,
"grad_norm": 0.40234375,
"learning_rate": 8e-05,
"loss": 1.2664339542388916,
"step": 2208
},
{
"epoch": 0.5256429314701947,
"grad_norm": 0.390625,
"learning_rate": 8e-05,
"loss": 1.2030470371246338,
"step": 2210
},
{
"epoch": 0.5261186264308012,
"grad_norm": 0.384765625,
"learning_rate": 8e-05,
"loss": 1.257197618484497,
"step": 2212
},
{
"epoch": 0.5265943213914077,
"grad_norm": 0.38671875,
"learning_rate": 8e-05,
"loss": 1.2173645496368408,
"step": 2214
},
{
"epoch": 0.5270700163520142,
"grad_norm": 0.412109375,
"learning_rate": 8e-05,
"loss": 1.22458815574646,
"step": 2216
},
{
"epoch": 0.5275457113126207,
"grad_norm": 0.455078125,
"learning_rate": 8e-05,
"loss": 1.2425655126571655,
"step": 2218
},
{
"epoch": 0.5280214062732272,
"grad_norm": 0.427734375,
"learning_rate": 8e-05,
"loss": 1.2677123546600342,
"step": 2220
},
{
"epoch": 0.5284971012338338,
"grad_norm": 0.416015625,
"learning_rate": 8e-05,
"loss": 1.203295111656189,
"step": 2222
},
{
"epoch": 0.5289727961944403,
"grad_norm": 0.4296875,
"learning_rate": 8e-05,
"loss": 1.260411262512207,
"step": 2224
},
{
"epoch": 0.5294484911550468,
"grad_norm": 0.40234375,
"learning_rate": 8e-05,
"loss": 1.270648717880249,
"step": 2226
},
{
"epoch": 0.5299241861156533,
"grad_norm": 0.416015625,
"learning_rate": 8e-05,
"loss": 1.2358521223068237,
"step": 2228
},
{
"epoch": 0.5303998810762598,
"grad_norm": 0.4140625,
"learning_rate": 8e-05,
"loss": 1.2879903316497803,
"step": 2230
},
{
"epoch": 0.5308755760368664,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.2541499137878418,
"step": 2232
},
{
"epoch": 0.5313512709974729,
"grad_norm": 0.392578125,
"learning_rate": 8e-05,
"loss": 1.1964361667633057,
"step": 2234
},
{
"epoch": 0.5318269659580794,
"grad_norm": 0.384765625,
"learning_rate": 8e-05,
"loss": 1.2229571342468262,
"step": 2236
},
{
"epoch": 0.5323026609186859,
"grad_norm": 0.3828125,
"learning_rate": 8e-05,
"loss": 1.173935890197754,
"step": 2238
},
{
"epoch": 0.5327783558792925,
"grad_norm": 0.392578125,
"learning_rate": 8e-05,
"loss": 1.2346327304840088,
"step": 2240
},
{
"epoch": 0.533254050839899,
"grad_norm": 0.38671875,
"learning_rate": 8e-05,
"loss": 1.2555508613586426,
"step": 2242
},
{
"epoch": 0.5337297458005055,
"grad_norm": 0.400390625,
"learning_rate": 8e-05,
"loss": 1.2254197597503662,
"step": 2244
},
{
"epoch": 0.534205440761112,
"grad_norm": 0.38671875,
"learning_rate": 8e-05,
"loss": 1.2302241325378418,
"step": 2246
},
{
"epoch": 0.5346811357217185,
"grad_norm": 0.37890625,
"learning_rate": 8e-05,
"loss": 1.2882341146469116,
"step": 2248
},
{
"epoch": 0.535156830682325,
"grad_norm": 0.3671875,
"learning_rate": 8e-05,
"loss": 1.2494275569915771,
"step": 2250
},
{
"epoch": 0.5356325256429315,
"grad_norm": 0.37890625,
"learning_rate": 8e-05,
"loss": 1.2567485570907593,
"step": 2252
},
{
"epoch": 0.536108220603538,
"grad_norm": 0.388671875,
"learning_rate": 8e-05,
"loss": 1.2737491130828857,
"step": 2254
},
{
"epoch": 0.5365839155641445,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.2650439739227295,
"step": 2256
},
{
"epoch": 0.537059610524751,
"grad_norm": 0.3828125,
"learning_rate": 8e-05,
"loss": 1.2500125169754028,
"step": 2258
},
{
"epoch": 0.5375353054853576,
"grad_norm": 0.37109375,
"learning_rate": 8e-05,
"loss": 1.2558541297912598,
"step": 2260
},
{
"epoch": 0.5380110004459641,
"grad_norm": 0.390625,
"learning_rate": 8e-05,
"loss": 1.2639573812484741,
"step": 2262
},
{
"epoch": 0.5384866954065706,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.2046759128570557,
"step": 2264
},
{
"epoch": 0.5389623903671771,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.3005247116088867,
"step": 2266
},
{
"epoch": 0.5394380853277836,
"grad_norm": 0.3828125,
"learning_rate": 8e-05,
"loss": 1.2678768634796143,
"step": 2268
},
{
"epoch": 0.5399137802883901,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.2596511840820312,
"step": 2270
},
{
"epoch": 0.5403894752489966,
"grad_norm": 0.384765625,
"learning_rate": 8e-05,
"loss": 1.2320735454559326,
"step": 2272
},
{
"epoch": 0.5408651702096031,
"grad_norm": 0.423828125,
"learning_rate": 8e-05,
"loss": 1.2533533573150635,
"step": 2274
},
{
"epoch": 0.5413408651702096,
"grad_norm": 0.40625,
"learning_rate": 8e-05,
"loss": 1.2232404947280884,
"step": 2276
},
{
"epoch": 0.5418165601308161,
"grad_norm": 0.412109375,
"learning_rate": 8e-05,
"loss": 1.2465150356292725,
"step": 2278
},
{
"epoch": 0.5422922550914226,
"grad_norm": 0.38671875,
"learning_rate": 8e-05,
"loss": 1.2359066009521484,
"step": 2280
},
{
"epoch": 0.5427679500520292,
"grad_norm": 0.373046875,
"learning_rate": 8e-05,
"loss": 1.179032325744629,
"step": 2282
},
{
"epoch": 0.5432436450126357,
"grad_norm": 0.37890625,
"learning_rate": 8e-05,
"loss": 1.2295031547546387,
"step": 2284
},
{
"epoch": 0.5437193399732422,
"grad_norm": 0.392578125,
"learning_rate": 8e-05,
"loss": 1.2600789070129395,
"step": 2286
},
{
"epoch": 0.5441950349338487,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.2444316148757935,
"step": 2288
},
{
"epoch": 0.5446707298944552,
"grad_norm": 0.3671875,
"learning_rate": 8e-05,
"loss": 1.1947736740112305,
"step": 2290
},
{
"epoch": 0.5451464248550617,
"grad_norm": 0.3671875,
"learning_rate": 8e-05,
"loss": 1.2450361251831055,
"step": 2292
},
{
"epoch": 0.5456221198156682,
"grad_norm": 0.3671875,
"learning_rate": 8e-05,
"loss": 1.189820647239685,
"step": 2294
},
{
"epoch": 0.5460978147762747,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.2700152397155762,
"step": 2296
},
{
"epoch": 0.5465735097368812,
"grad_norm": 0.3828125,
"learning_rate": 8e-05,
"loss": 1.1997301578521729,
"step": 2298
},
{
"epoch": 0.5470492046974877,
"grad_norm": 0.37109375,
"learning_rate": 8e-05,
"loss": 1.2718980312347412,
"step": 2300
},
{
"epoch": 0.5475248996580943,
"grad_norm": 0.408203125,
"learning_rate": 8e-05,
"loss": 1.2360846996307373,
"step": 2302
},
{
"epoch": 0.5480005946187008,
"grad_norm": 0.419921875,
"learning_rate": 8e-05,
"loss": 1.2985812425613403,
"step": 2304
},
{
"epoch": 0.5484762895793073,
"grad_norm": 0.40625,
"learning_rate": 8e-05,
"loss": 1.2672054767608643,
"step": 2306
},
{
"epoch": 0.5489519845399138,
"grad_norm": 0.416015625,
"learning_rate": 8e-05,
"loss": 1.1932951211929321,
"step": 2308
},
{
"epoch": 0.5494276795005203,
"grad_norm": 0.416015625,
"learning_rate": 8e-05,
"loss": 1.2733159065246582,
"step": 2310
},
{
"epoch": 0.5499033744611268,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.3332585096359253,
"step": 2312
},
{
"epoch": 0.5503790694217333,
"grad_norm": 0.400390625,
"learning_rate": 8e-05,
"loss": 1.267357587814331,
"step": 2314
},
{
"epoch": 0.5508547643823398,
"grad_norm": 0.388671875,
"learning_rate": 8e-05,
"loss": 1.2183654308319092,
"step": 2316
},
{
"epoch": 0.5513304593429463,
"grad_norm": 0.376953125,
"learning_rate": 8e-05,
"loss": 1.244970440864563,
"step": 2318
},
{
"epoch": 0.5518061543035528,
"grad_norm": 0.376953125,
"learning_rate": 8e-05,
"loss": 1.2315990924835205,
"step": 2320
},
{
"epoch": 0.5522818492641594,
"grad_norm": 0.388671875,
"learning_rate": 8e-05,
"loss": 1.251598834991455,
"step": 2322
},
{
"epoch": 0.5527575442247659,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.277734637260437,
"step": 2324
},
{
"epoch": 0.5532332391853724,
"grad_norm": 0.4140625,
"learning_rate": 8e-05,
"loss": 1.27321195602417,
"step": 2326
},
{
"epoch": 0.5537089341459789,
"grad_norm": 0.40625,
"learning_rate": 8e-05,
"loss": 1.2716056108474731,
"step": 2328
},
{
"epoch": 0.5541846291065854,
"grad_norm": 0.373046875,
"learning_rate": 8e-05,
"loss": 1.2450883388519287,
"step": 2330
},
{
"epoch": 0.5546603240671919,
"grad_norm": 0.39453125,
"learning_rate": 8e-05,
"loss": 1.2274010181427002,
"step": 2332
},
{
"epoch": 0.5551360190277984,
"grad_norm": 0.427734375,
"learning_rate": 8e-05,
"loss": 1.2627809047698975,
"step": 2334
},
{
"epoch": 0.5556117139884049,
"grad_norm": 0.404296875,
"learning_rate": 8e-05,
"loss": 1.1710472106933594,
"step": 2336
},
{
"epoch": 0.5560874089490114,
"grad_norm": 0.388671875,
"learning_rate": 8e-05,
"loss": 1.2715725898742676,
"step": 2338
},
{
"epoch": 0.5565631039096179,
"grad_norm": 0.369140625,
"learning_rate": 8e-05,
"loss": 1.2005977630615234,
"step": 2340
},
{
"epoch": 0.5570387988702244,
"grad_norm": 0.388671875,
"learning_rate": 8e-05,
"loss": 1.254575252532959,
"step": 2342
},
{
"epoch": 0.557514493830831,
"grad_norm": 0.37109375,
"learning_rate": 8e-05,
"loss": 1.2813735008239746,
"step": 2344
},
{
"epoch": 0.5579901887914375,
"grad_norm": 0.400390625,
"learning_rate": 8e-05,
"loss": 1.2456145286560059,
"step": 2346
},
{
"epoch": 0.558465883752044,
"grad_norm": 0.3828125,
"learning_rate": 8e-05,
"loss": 1.2311184406280518,
"step": 2348
},
{
"epoch": 0.5589415787126505,
"grad_norm": 0.384765625,
"learning_rate": 8e-05,
"loss": 1.2404234409332275,
"step": 2350
},
{
"epoch": 0.559417273673257,
"grad_norm": 0.404296875,
"learning_rate": 8e-05,
"loss": 1.280110239982605,
"step": 2352
},
{
"epoch": 0.5598929686338635,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.250441551208496,
"step": 2354
},
{
"epoch": 0.56036866359447,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.2462382316589355,
"step": 2356
},
{
"epoch": 0.5608443585550765,
"grad_norm": 0.375,
"learning_rate": 8e-05,
"loss": 1.2737480401992798,
"step": 2358
},
{
"epoch": 0.561320053515683,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.276233434677124,
"step": 2360
},
{
"epoch": 0.5617957484762895,
"grad_norm": 0.3828125,
"learning_rate": 8e-05,
"loss": 1.2580044269561768,
"step": 2362
},
{
"epoch": 0.5622714434368961,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.247071385383606,
"step": 2364
},
{
"epoch": 0.5627471383975026,
"grad_norm": 0.373046875,
"learning_rate": 8e-05,
"loss": 1.2175970077514648,
"step": 2366
},
{
"epoch": 0.5632228333581091,
"grad_norm": 0.359375,
"learning_rate": 8e-05,
"loss": 1.217498540878296,
"step": 2368
},
{
"epoch": 0.5636985283187156,
"grad_norm": 0.37890625,
"learning_rate": 8e-05,
"loss": 1.242640495300293,
"step": 2370
},
{
"epoch": 0.5641742232793221,
"grad_norm": 0.39453125,
"learning_rate": 8e-05,
"loss": 1.2354379892349243,
"step": 2372
},
{
"epoch": 0.5646499182399286,
"grad_norm": 0.400390625,
"learning_rate": 8e-05,
"loss": 1.3069782257080078,
"step": 2374
},
{
"epoch": 0.5651256132005351,
"grad_norm": 0.388671875,
"learning_rate": 8e-05,
"loss": 1.2084176540374756,
"step": 2376
},
{
"epoch": 0.5656013081611416,
"grad_norm": 0.39453125,
"learning_rate": 8e-05,
"loss": 1.2104275226593018,
"step": 2378
},
{
"epoch": 0.5660770031217481,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.2741985321044922,
"step": 2380
},
{
"epoch": 0.5665526980823546,
"grad_norm": 0.39453125,
"learning_rate": 8e-05,
"loss": 1.230583906173706,
"step": 2382
},
{
"epoch": 0.5670283930429612,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.1865020990371704,
"step": 2384
},
{
"epoch": 0.5675040880035677,
"grad_norm": 0.375,
"learning_rate": 8e-05,
"loss": 1.2901579141616821,
"step": 2386
},
{
"epoch": 0.5679797829641742,
"grad_norm": 0.40625,
"learning_rate": 8e-05,
"loss": 1.2829887866973877,
"step": 2388
},
{
"epoch": 0.5684554779247807,
"grad_norm": 0.412109375,
"learning_rate": 8e-05,
"loss": 1.2711780071258545,
"step": 2390
},
{
"epoch": 0.5689311728853872,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.2897326946258545,
"step": 2392
},
{
"epoch": 0.5694068678459937,
"grad_norm": 0.37890625,
"learning_rate": 8e-05,
"loss": 1.2700235843658447,
"step": 2394
},
{
"epoch": 0.5698825628066002,
"grad_norm": 0.35546875,
"learning_rate": 8e-05,
"loss": 1.2746386528015137,
"step": 2396
},
{
"epoch": 0.5703582577672068,
"grad_norm": 0.349609375,
"learning_rate": 8e-05,
"loss": 1.2726595401763916,
"step": 2398
},
{
"epoch": 0.5708339527278133,
"grad_norm": 0.376953125,
"learning_rate": 8e-05,
"loss": 1.2205541133880615,
"step": 2400
},
{
"epoch": 0.5713096476884199,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.2636919021606445,
"step": 2402
},
{
"epoch": 0.5717853426490264,
"grad_norm": 0.390625,
"learning_rate": 8e-05,
"loss": 1.2315757274627686,
"step": 2404
},
{
"epoch": 0.5722610376096329,
"grad_norm": 0.3671875,
"learning_rate": 8e-05,
"loss": 1.2510037422180176,
"step": 2406
},
{
"epoch": 0.5727367325702394,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.2190053462982178,
"step": 2408
},
{
"epoch": 0.5732124275308459,
"grad_norm": 0.357421875,
"learning_rate": 8e-05,
"loss": 1.2241978645324707,
"step": 2410
},
{
"epoch": 0.5736881224914524,
"grad_norm": 0.376953125,
"learning_rate": 8e-05,
"loss": 1.2733025550842285,
"step": 2412
},
{
"epoch": 0.5741638174520589,
"grad_norm": 0.3671875,
"learning_rate": 8e-05,
"loss": 1.1712183952331543,
"step": 2414
},
{
"epoch": 0.5746395124126654,
"grad_norm": 0.37890625,
"learning_rate": 8e-05,
"loss": 1.2777495384216309,
"step": 2416
},
{
"epoch": 0.5751152073732719,
"grad_norm": 0.37109375,
"learning_rate": 8e-05,
"loss": 1.272843837738037,
"step": 2418
},
{
"epoch": 0.5755909023338784,
"grad_norm": 0.3671875,
"learning_rate": 8e-05,
"loss": 1.2414473295211792,
"step": 2420
},
{
"epoch": 0.576066597294485,
"grad_norm": 0.390625,
"learning_rate": 8e-05,
"loss": 1.2244441509246826,
"step": 2422
},
{
"epoch": 0.5765422922550915,
"grad_norm": 0.404296875,
"learning_rate": 8e-05,
"loss": 1.2712140083312988,
"step": 2424
},
{
"epoch": 0.577017987215698,
"grad_norm": 0.40234375,
"learning_rate": 8e-05,
"loss": 1.2499234676361084,
"step": 2426
},
{
"epoch": 0.5774936821763045,
"grad_norm": 0.404296875,
"learning_rate": 8e-05,
"loss": 1.286064624786377,
"step": 2428
},
{
"epoch": 0.577969377136911,
"grad_norm": 0.373046875,
"learning_rate": 8e-05,
"loss": 1.2061920166015625,
"step": 2430
},
{
"epoch": 0.5784450720975175,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.2993005514144897,
"step": 2432
},
{
"epoch": 0.578920767058124,
"grad_norm": 0.3828125,
"learning_rate": 8e-05,
"loss": 1.26462721824646,
"step": 2434
},
{
"epoch": 0.5793964620187305,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.2716201543807983,
"step": 2436
},
{
"epoch": 0.579872156979337,
"grad_norm": 0.376953125,
"learning_rate": 8e-05,
"loss": 1.2104942798614502,
"step": 2438
},
{
"epoch": 0.5803478519399435,
"grad_norm": 0.3828125,
"learning_rate": 8e-05,
"loss": 1.2860119342803955,
"step": 2440
},
{
"epoch": 0.58082354690055,
"grad_norm": 0.421875,
"learning_rate": 8e-05,
"loss": 1.3008224964141846,
"step": 2442
},
{
"epoch": 0.5812992418611566,
"grad_norm": 0.41796875,
"learning_rate": 8e-05,
"loss": 1.2540391683578491,
"step": 2444
},
{
"epoch": 0.5817749368217631,
"grad_norm": 0.427734375,
"learning_rate": 8e-05,
"loss": 1.2667243480682373,
"step": 2446
},
{
"epoch": 0.5822506317823696,
"grad_norm": 0.416015625,
"learning_rate": 8e-05,
"loss": 1.2277895212173462,
"step": 2448
},
{
"epoch": 0.5827263267429761,
"grad_norm": 0.416015625,
"learning_rate": 8e-05,
"loss": 1.2290725708007812,
"step": 2450
},
{
"epoch": 0.5832020217035826,
"grad_norm": 0.40234375,
"learning_rate": 8e-05,
"loss": 1.227077841758728,
"step": 2452
},
{
"epoch": 0.5836777166641891,
"grad_norm": 0.3671875,
"learning_rate": 8e-05,
"loss": 1.2680089473724365,
"step": 2454
},
{
"epoch": 0.5841534116247956,
"grad_norm": 0.36328125,
"learning_rate": 8e-05,
"loss": 1.236987590789795,
"step": 2456
},
{
"epoch": 0.5846291065854021,
"grad_norm": 0.37890625,
"learning_rate": 8e-05,
"loss": 1.255210518836975,
"step": 2458
},
{
"epoch": 0.5851048015460086,
"grad_norm": 0.388671875,
"learning_rate": 8e-05,
"loss": 1.26492440700531,
"step": 2460
},
{
"epoch": 0.5855804965066151,
"grad_norm": 0.38671875,
"learning_rate": 8e-05,
"loss": 1.2221426963806152,
"step": 2462
},
{
"epoch": 0.5860561914672217,
"grad_norm": 0.41796875,
"learning_rate": 8e-05,
"loss": 1.2715880870819092,
"step": 2464
},
{
"epoch": 0.5865318864278282,
"grad_norm": 0.3671875,
"learning_rate": 8e-05,
"loss": 1.2061941623687744,
"step": 2466
},
{
"epoch": 0.5870075813884347,
"grad_norm": 0.384765625,
"learning_rate": 8e-05,
"loss": 1.1762068271636963,
"step": 2468
},
{
"epoch": 0.5874832763490412,
"grad_norm": 0.41015625,
"learning_rate": 8e-05,
"loss": 1.2648086547851562,
"step": 2470
},
{
"epoch": 0.5879589713096477,
"grad_norm": 0.376953125,
"learning_rate": 8e-05,
"loss": 1.2263320684432983,
"step": 2472
},
{
"epoch": 0.5884346662702542,
"grad_norm": 0.400390625,
"learning_rate": 8e-05,
"loss": 1.2949507236480713,
"step": 2474
},
{
"epoch": 0.5889103612308607,
"grad_norm": 0.38671875,
"learning_rate": 8e-05,
"loss": 1.2580509185791016,
"step": 2476
},
{
"epoch": 0.5893860561914672,
"grad_norm": 0.392578125,
"learning_rate": 8e-05,
"loss": 1.2583489418029785,
"step": 2478
},
{
"epoch": 0.5898617511520737,
"grad_norm": 0.369140625,
"learning_rate": 8e-05,
"loss": 1.2290489673614502,
"step": 2480
},
{
"epoch": 0.5903374461126802,
"grad_norm": 0.35546875,
"learning_rate": 8e-05,
"loss": 1.1821609735488892,
"step": 2482
},
{
"epoch": 0.5908131410732868,
"grad_norm": 0.37109375,
"learning_rate": 8e-05,
"loss": 1.265315294265747,
"step": 2484
},
{
"epoch": 0.5912888360338933,
"grad_norm": 0.3671875,
"learning_rate": 8e-05,
"loss": 1.2531765699386597,
"step": 2486
},
{
"epoch": 0.5917645309944998,
"grad_norm": 0.373046875,
"learning_rate": 8e-05,
"loss": 1.2487919330596924,
"step": 2488
},
{
"epoch": 0.5922402259551063,
"grad_norm": 0.3671875,
"learning_rate": 8e-05,
"loss": 1.2386280298233032,
"step": 2490
},
{
"epoch": 0.5927159209157128,
"grad_norm": 0.3828125,
"learning_rate": 8e-05,
"loss": 1.2352405786514282,
"step": 2492
},
{
"epoch": 0.5931916158763193,
"grad_norm": 0.408203125,
"learning_rate": 8e-05,
"loss": 1.2241952419281006,
"step": 2494
},
{
"epoch": 0.5936673108369258,
"grad_norm": 0.40234375,
"learning_rate": 8e-05,
"loss": 1.2149953842163086,
"step": 2496
},
{
"epoch": 0.5941430057975323,
"grad_norm": 0.41796875,
"learning_rate": 8e-05,
"loss": 1.2526779174804688,
"step": 2498
},
{
"epoch": 0.5946187007581388,
"grad_norm": 0.40625,
"learning_rate": 8e-05,
"loss": 1.2221908569335938,
"step": 2500
},
{
"epoch": 0.5950943957187453,
"grad_norm": 0.373046875,
"learning_rate": 8e-05,
"loss": 1.2482354640960693,
"step": 2502
},
{
"epoch": 0.5955700906793518,
"grad_norm": 0.40625,
"learning_rate": 8e-05,
"loss": 1.189880609512329,
"step": 2504
},
{
"epoch": 0.5960457856399584,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.2685422897338867,
"step": 2506
},
{
"epoch": 0.5965214806005649,
"grad_norm": 0.365234375,
"learning_rate": 8e-05,
"loss": 1.2144076824188232,
"step": 2508
},
{
"epoch": 0.5969971755611714,
"grad_norm": 0.37890625,
"learning_rate": 8e-05,
"loss": 1.1419060230255127,
"step": 2510
},
{
"epoch": 0.5974728705217779,
"grad_norm": 0.40234375,
"learning_rate": 8e-05,
"loss": 1.2288837432861328,
"step": 2512
},
{
"epoch": 0.5979485654823844,
"grad_norm": 0.39453125,
"learning_rate": 8e-05,
"loss": 1.2173268795013428,
"step": 2514
},
{
"epoch": 0.5984242604429909,
"grad_norm": 0.408203125,
"learning_rate": 8e-05,
"loss": 1.2456581592559814,
"step": 2516
},
{
"epoch": 0.5988999554035974,
"grad_norm": 0.412109375,
"learning_rate": 8e-05,
"loss": 1.310453176498413,
"step": 2518
},
{
"epoch": 0.5993756503642039,
"grad_norm": 0.431640625,
"learning_rate": 8e-05,
"loss": 1.271226406097412,
"step": 2520
},
{
"epoch": 0.5998513453248104,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.2474400997161865,
"step": 2522
},
{
"epoch": 0.600327040285417,
"grad_norm": 0.384765625,
"learning_rate": 8e-05,
"loss": 1.2638752460479736,
"step": 2524
},
{
"epoch": 0.6008027352460235,
"grad_norm": 0.404296875,
"learning_rate": 8e-05,
"loss": 1.2449238300323486,
"step": 2526
},
{
"epoch": 0.60127843020663,
"grad_norm": 0.41796875,
"learning_rate": 8e-05,
"loss": 1.2357442378997803,
"step": 2528
},
{
"epoch": 0.6017541251672365,
"grad_norm": 0.392578125,
"learning_rate": 8e-05,
"loss": 1.2108559608459473,
"step": 2530
},
{
"epoch": 0.602229820127843,
"grad_norm": 0.388671875,
"learning_rate": 8e-05,
"loss": 1.2793264389038086,
"step": 2532
},
{
"epoch": 0.6027055150884495,
"grad_norm": 0.3828125,
"learning_rate": 8e-05,
"loss": 1.2260792255401611,
"step": 2534
},
{
"epoch": 0.603181210049056,
"grad_norm": 0.369140625,
"learning_rate": 8e-05,
"loss": 1.2476468086242676,
"step": 2536
},
{
"epoch": 0.6036569050096625,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.2396502494812012,
"step": 2538
},
{
"epoch": 0.604132599970269,
"grad_norm": 0.37890625,
"learning_rate": 8e-05,
"loss": 1.2091515064239502,
"step": 2540
},
{
"epoch": 0.6046082949308755,
"grad_norm": 0.3671875,
"learning_rate": 8e-05,
"loss": 1.2249176502227783,
"step": 2542
},
{
"epoch": 0.605083989891482,
"grad_norm": 0.392578125,
"learning_rate": 8e-05,
"loss": 1.274390459060669,
"step": 2544
},
{
"epoch": 0.6055596848520886,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.2077170610427856,
"step": 2546
},
{
"epoch": 0.6060353798126951,
"grad_norm": 0.384765625,
"learning_rate": 8e-05,
"loss": 1.2330358028411865,
"step": 2548
},
{
"epoch": 0.6065110747733016,
"grad_norm": 0.38671875,
"learning_rate": 8e-05,
"loss": 1.2728490829467773,
"step": 2550
},
{
"epoch": 0.6069867697339081,
"grad_norm": 0.35546875,
"learning_rate": 8e-05,
"loss": 1.1577916145324707,
"step": 2552
},
{
"epoch": 0.6074624646945146,
"grad_norm": 0.3828125,
"learning_rate": 8e-05,
"loss": 1.2705035209655762,
"step": 2554
},
{
"epoch": 0.6079381596551211,
"grad_norm": 0.369140625,
"learning_rate": 8e-05,
"loss": 1.2194724082946777,
"step": 2556
},
{
"epoch": 0.6084138546157276,
"grad_norm": 0.36328125,
"learning_rate": 8e-05,
"loss": 1.223502278327942,
"step": 2558
},
{
"epoch": 0.6088895495763341,
"grad_norm": 0.365234375,
"learning_rate": 8e-05,
"loss": 1.2284711599349976,
"step": 2560
},
{
"epoch": 0.6093652445369407,
"grad_norm": 0.3828125,
"learning_rate": 8e-05,
"loss": 1.2612671852111816,
"step": 2562
},
{
"epoch": 0.6098409394975473,
"grad_norm": 0.38671875,
"learning_rate": 8e-05,
"loss": 1.2475411891937256,
"step": 2564
},
{
"epoch": 0.6103166344581538,
"grad_norm": 0.40234375,
"learning_rate": 8e-05,
"loss": 1.2346876859664917,
"step": 2566
},
{
"epoch": 0.6107923294187603,
"grad_norm": 0.37890625,
"learning_rate": 8e-05,
"loss": 1.1875958442687988,
"step": 2568
},
{
"epoch": 0.6112680243793668,
"grad_norm": 0.373046875,
"learning_rate": 8e-05,
"loss": 1.2490816116333008,
"step": 2570
},
{
"epoch": 0.6117437193399733,
"grad_norm": 0.423828125,
"learning_rate": 8e-05,
"loss": 1.2314293384552002,
"step": 2572
},
{
"epoch": 0.6122194143005798,
"grad_norm": 0.416015625,
"learning_rate": 8e-05,
"loss": 1.2150312662124634,
"step": 2574
},
{
"epoch": 0.6126951092611863,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.2333028316497803,
"step": 2576
},
{
"epoch": 0.6131708042217928,
"grad_norm": 0.376953125,
"learning_rate": 8e-05,
"loss": 1.2211978435516357,
"step": 2578
},
{
"epoch": 0.6136464991823993,
"grad_norm": 0.38671875,
"learning_rate": 8e-05,
"loss": 1.2382583618164062,
"step": 2580
},
{
"epoch": 0.6141221941430058,
"grad_norm": 0.416015625,
"learning_rate": 8e-05,
"loss": 1.2812304496765137,
"step": 2582
},
{
"epoch": 0.6145978891036123,
"grad_norm": 0.400390625,
"learning_rate": 8e-05,
"loss": 1.2513656616210938,
"step": 2584
},
{
"epoch": 0.6150735840642189,
"grad_norm": 0.4140625,
"learning_rate": 8e-05,
"loss": 1.2187235355377197,
"step": 2586
},
{
"epoch": 0.6155492790248254,
"grad_norm": 0.38671875,
"learning_rate": 8e-05,
"loss": 1.2611520290374756,
"step": 2588
},
{
"epoch": 0.6160249739854319,
"grad_norm": 0.376953125,
"learning_rate": 8e-05,
"loss": 1.2477173805236816,
"step": 2590
},
{
"epoch": 0.6165006689460384,
"grad_norm": 0.35546875,
"learning_rate": 8e-05,
"loss": 1.1624467372894287,
"step": 2592
},
{
"epoch": 0.6169763639066449,
"grad_norm": 0.359375,
"learning_rate": 8e-05,
"loss": 1.2253656387329102,
"step": 2594
},
{
"epoch": 0.6174520588672514,
"grad_norm": 0.373046875,
"learning_rate": 8e-05,
"loss": 1.2614085674285889,
"step": 2596
},
{
"epoch": 0.6179277538278579,
"grad_norm": 0.369140625,
"learning_rate": 8e-05,
"loss": 1.1892552375793457,
"step": 2598
},
{
"epoch": 0.6184034487884644,
"grad_norm": 0.365234375,
"learning_rate": 8e-05,
"loss": 1.304673671722412,
"step": 2600
},
{
"epoch": 0.6188791437490709,
"grad_norm": 0.3671875,
"learning_rate": 8e-05,
"loss": 1.1897804737091064,
"step": 2602
},
{
"epoch": 0.6193548387096774,
"grad_norm": 0.3671875,
"learning_rate": 8e-05,
"loss": 1.2602784633636475,
"step": 2604
},
{
"epoch": 0.619830533670284,
"grad_norm": 0.35546875,
"learning_rate": 8e-05,
"loss": 1.1673520803451538,
"step": 2606
},
{
"epoch": 0.6203062286308905,
"grad_norm": 0.36328125,
"learning_rate": 8e-05,
"loss": 1.2243266105651855,
"step": 2608
},
{
"epoch": 0.620781923591497,
"grad_norm": 0.392578125,
"learning_rate": 8e-05,
"loss": 1.2490243911743164,
"step": 2610
},
{
"epoch": 0.6212576185521035,
"grad_norm": 0.373046875,
"learning_rate": 8e-05,
"loss": 1.1751642227172852,
"step": 2612
},
{
"epoch": 0.62173331351271,
"grad_norm": 0.390625,
"learning_rate": 8e-05,
"loss": 1.2147471904754639,
"step": 2614
},
{
"epoch": 0.6222090084733165,
"grad_norm": 0.3671875,
"learning_rate": 8e-05,
"loss": 1.2540574073791504,
"step": 2616
},
{
"epoch": 0.622684703433923,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.2662967443466187,
"step": 2618
},
{
"epoch": 0.6231603983945295,
"grad_norm": 0.373046875,
"learning_rate": 8e-05,
"loss": 1.2161445617675781,
"step": 2620
},
{
"epoch": 0.623636093355136,
"grad_norm": 0.390625,
"learning_rate": 8e-05,
"loss": 1.2706882953643799,
"step": 2622
},
{
"epoch": 0.6241117883157425,
"grad_norm": 0.375,
"learning_rate": 8e-05,
"loss": 1.2533507347106934,
"step": 2624
},
{
"epoch": 0.624587483276349,
"grad_norm": 0.375,
"learning_rate": 8e-05,
"loss": 1.2325465679168701,
"step": 2626
},
{
"epoch": 0.6250631782369556,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.2847120761871338,
"step": 2628
},
{
"epoch": 0.6255388731975621,
"grad_norm": 0.359375,
"learning_rate": 8e-05,
"loss": 1.2086182832717896,
"step": 2630
},
{
"epoch": 0.6260145681581686,
"grad_norm": 0.388671875,
"learning_rate": 8e-05,
"loss": 1.2454938888549805,
"step": 2632
},
{
"epoch": 0.6264902631187751,
"grad_norm": 0.37890625,
"learning_rate": 8e-05,
"loss": 1.253279685974121,
"step": 2634
},
{
"epoch": 0.6269659580793816,
"grad_norm": 0.40234375,
"learning_rate": 8e-05,
"loss": 1.200972318649292,
"step": 2636
},
{
"epoch": 0.6274416530399881,
"grad_norm": 0.416015625,
"learning_rate": 8e-05,
"loss": 1.2435922622680664,
"step": 2638
},
{
"epoch": 0.6279173480005946,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.2706129550933838,
"step": 2640
},
{
"epoch": 0.6283930429612011,
"grad_norm": 0.38671875,
"learning_rate": 8e-05,
"loss": 1.2585011720657349,
"step": 2642
},
{
"epoch": 0.6288687379218076,
"grad_norm": 0.39453125,
"learning_rate": 8e-05,
"loss": 1.2510229349136353,
"step": 2644
},
{
"epoch": 0.6293444328824142,
"grad_norm": 0.37890625,
"learning_rate": 8e-05,
"loss": 1.2418980598449707,
"step": 2646
},
{
"epoch": 0.6298201278430207,
"grad_norm": 0.39453125,
"learning_rate": 8e-05,
"loss": 1.2494986057281494,
"step": 2648
},
{
"epoch": 0.6302958228036272,
"grad_norm": 0.384765625,
"learning_rate": 8e-05,
"loss": 1.2714817523956299,
"step": 2650
},
{
"epoch": 0.6307715177642337,
"grad_norm": 0.38671875,
"learning_rate": 8e-05,
"loss": 1.2182557582855225,
"step": 2652
},
{
"epoch": 0.6312472127248402,
"grad_norm": 0.37890625,
"learning_rate": 8e-05,
"loss": 1.2318391799926758,
"step": 2654
},
{
"epoch": 0.6317229076854467,
"grad_norm": 0.39453125,
"learning_rate": 8e-05,
"loss": 1.2475109100341797,
"step": 2656
},
{
"epoch": 0.6321986026460532,
"grad_norm": 0.390625,
"learning_rate": 8e-05,
"loss": 1.2712761163711548,
"step": 2658
},
{
"epoch": 0.6326742976066597,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.2394472360610962,
"step": 2660
},
{
"epoch": 0.6331499925672662,
"grad_norm": 0.3828125,
"learning_rate": 8e-05,
"loss": 1.2201759815216064,
"step": 2662
},
{
"epoch": 0.6336256875278727,
"grad_norm": 0.369140625,
"learning_rate": 8e-05,
"loss": 1.1588757038116455,
"step": 2664
},
{
"epoch": 0.6341013824884792,
"grad_norm": 0.349609375,
"learning_rate": 8e-05,
"loss": 1.1701884269714355,
"step": 2666
},
{
"epoch": 0.6345770774490858,
"grad_norm": 0.359375,
"learning_rate": 8e-05,
"loss": 1.2547426223754883,
"step": 2668
},
{
"epoch": 0.6350527724096923,
"grad_norm": 0.3671875,
"learning_rate": 8e-05,
"loss": 1.2501137256622314,
"step": 2670
},
{
"epoch": 0.6355284673702988,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.2706825733184814,
"step": 2672
},
{
"epoch": 0.6360041623309053,
"grad_norm": 0.3671875,
"learning_rate": 8e-05,
"loss": 1.2127528190612793,
"step": 2674
},
{
"epoch": 0.6364798572915118,
"grad_norm": 0.3828125,
"learning_rate": 8e-05,
"loss": 1.2191646099090576,
"step": 2676
},
{
"epoch": 0.6369555522521183,
"grad_norm": 0.369140625,
"learning_rate": 8e-05,
"loss": 1.2211954593658447,
"step": 2678
},
{
"epoch": 0.6374312472127248,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.2068610191345215,
"step": 2680
},
{
"epoch": 0.6379069421733313,
"grad_norm": 0.376953125,
"learning_rate": 8e-05,
"loss": 1.2012649774551392,
"step": 2682
},
{
"epoch": 0.6383826371339378,
"grad_norm": 0.390625,
"learning_rate": 8e-05,
"loss": 1.2034168243408203,
"step": 2684
},
{
"epoch": 0.6388583320945443,
"grad_norm": 0.359375,
"learning_rate": 8e-05,
"loss": 1.1512229442596436,
"step": 2686
},
{
"epoch": 0.6393340270551509,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.273275375366211,
"step": 2688
},
{
"epoch": 0.6398097220157574,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.2607433795928955,
"step": 2690
},
{
"epoch": 0.6402854169763639,
"grad_norm": 0.375,
"learning_rate": 8e-05,
"loss": 1.2025877237319946,
"step": 2692
},
{
"epoch": 0.6407611119369704,
"grad_norm": 0.375,
"learning_rate": 8e-05,
"loss": 1.237597942352295,
"step": 2694
},
{
"epoch": 0.6412368068975769,
"grad_norm": 0.384765625,
"learning_rate": 8e-05,
"loss": 1.2055954933166504,
"step": 2696
},
{
"epoch": 0.6417125018581834,
"grad_norm": 0.359375,
"learning_rate": 8e-05,
"loss": 1.2079732418060303,
"step": 2698
},
{
"epoch": 0.6421881968187899,
"grad_norm": 0.3515625,
"learning_rate": 8e-05,
"loss": 1.2707421779632568,
"step": 2700
},
{
"epoch": 0.6426638917793964,
"grad_norm": 0.388671875,
"learning_rate": 8e-05,
"loss": 1.229077696800232,
"step": 2702
},
{
"epoch": 0.6431395867400029,
"grad_norm": 0.388671875,
"learning_rate": 8e-05,
"loss": 1.171201229095459,
"step": 2704
},
{
"epoch": 0.6436152817006094,
"grad_norm": 0.349609375,
"learning_rate": 8e-05,
"loss": 1.2386970520019531,
"step": 2706
},
{
"epoch": 0.644090976661216,
"grad_norm": 0.373046875,
"learning_rate": 8e-05,
"loss": 1.2508089542388916,
"step": 2708
},
{
"epoch": 0.6445666716218225,
"grad_norm": 0.361328125,
"learning_rate": 8e-05,
"loss": 1.2166051864624023,
"step": 2710
},
{
"epoch": 0.645042366582429,
"grad_norm": 0.36328125,
"learning_rate": 8e-05,
"loss": 1.1718792915344238,
"step": 2712
},
{
"epoch": 0.6455180615430355,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.208460807800293,
"step": 2714
},
{
"epoch": 0.645993756503642,
"grad_norm": 0.357421875,
"learning_rate": 8e-05,
"loss": 1.2658112049102783,
"step": 2716
},
{
"epoch": 0.6464694514642485,
"grad_norm": 0.359375,
"learning_rate": 8e-05,
"loss": 1.2218315601348877,
"step": 2718
},
{
"epoch": 0.646945146424855,
"grad_norm": 0.36328125,
"learning_rate": 8e-05,
"loss": 1.2742373943328857,
"step": 2720
},
{
"epoch": 0.6474208413854615,
"grad_norm": 0.392578125,
"learning_rate": 8e-05,
"loss": 1.2009773254394531,
"step": 2722
},
{
"epoch": 0.647896536346068,
"grad_norm": 0.375,
"learning_rate": 8e-05,
"loss": 1.2288341522216797,
"step": 2724
},
{
"epoch": 0.6483722313066745,
"grad_norm": 0.388671875,
"learning_rate": 8e-05,
"loss": 1.2737244367599487,
"step": 2726
},
{
"epoch": 0.6488479262672812,
"grad_norm": 0.3828125,
"learning_rate": 8e-05,
"loss": 1.289430856704712,
"step": 2728
},
{
"epoch": 0.6493236212278877,
"grad_norm": 0.4140625,
"learning_rate": 8e-05,
"loss": 1.2465755939483643,
"step": 2730
},
{
"epoch": 0.6497993161884942,
"grad_norm": 0.39453125,
"learning_rate": 8e-05,
"loss": 1.2071137428283691,
"step": 2732
},
{
"epoch": 0.6502750111491007,
"grad_norm": 0.38671875,
"learning_rate": 8e-05,
"loss": 1.194615125656128,
"step": 2734
},
{
"epoch": 0.6507507061097072,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.2709908485412598,
"step": 2736
},
{
"epoch": 0.6512264010703137,
"grad_norm": 0.392578125,
"learning_rate": 8e-05,
"loss": 1.2470756769180298,
"step": 2738
},
{
"epoch": 0.6517020960309202,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.2120921611785889,
"step": 2740
},
{
"epoch": 0.6521777909915267,
"grad_norm": 0.373046875,
"learning_rate": 8e-05,
"loss": 1.2401468753814697,
"step": 2742
},
{
"epoch": 0.6526534859521332,
"grad_norm": 0.376953125,
"learning_rate": 8e-05,
"loss": 1.2416322231292725,
"step": 2744
},
{
"epoch": 0.6531291809127397,
"grad_norm": 0.376953125,
"learning_rate": 8e-05,
"loss": 1.1909356117248535,
"step": 2746
},
{
"epoch": 0.6536048758733463,
"grad_norm": 0.37109375,
"learning_rate": 8e-05,
"loss": 1.2629019021987915,
"step": 2748
},
{
"epoch": 0.6540805708339528,
"grad_norm": 0.390625,
"learning_rate": 8e-05,
"loss": 1.2401649951934814,
"step": 2750
},
{
"epoch": 0.6545562657945593,
"grad_norm": 0.390625,
"learning_rate": 8e-05,
"loss": 1.2681682109832764,
"step": 2752
},
{
"epoch": 0.6550319607551658,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.2728466987609863,
"step": 2754
},
{
"epoch": 0.6555076557157723,
"grad_norm": 0.361328125,
"learning_rate": 8e-05,
"loss": 1.223940372467041,
"step": 2756
},
{
"epoch": 0.6559833506763788,
"grad_norm": 0.373046875,
"learning_rate": 8e-05,
"loss": 1.2075097560882568,
"step": 2758
},
{
"epoch": 0.6564590456369853,
"grad_norm": 0.38671875,
"learning_rate": 8e-05,
"loss": 1.266689419746399,
"step": 2760
},
{
"epoch": 0.6569347405975918,
"grad_norm": 0.408203125,
"learning_rate": 8e-05,
"loss": 1.2576415538787842,
"step": 2762
},
{
"epoch": 0.6574104355581983,
"grad_norm": 0.390625,
"learning_rate": 8e-05,
"loss": 1.2286550998687744,
"step": 2764
},
{
"epoch": 0.6578861305188048,
"grad_norm": 0.388671875,
"learning_rate": 8e-05,
"loss": 1.2462449073791504,
"step": 2766
},
{
"epoch": 0.6583618254794114,
"grad_norm": 0.38671875,
"learning_rate": 8e-05,
"loss": 1.2215523719787598,
"step": 2768
},
{
"epoch": 0.6588375204400179,
"grad_norm": 0.37109375,
"learning_rate": 8e-05,
"loss": 1.200398325920105,
"step": 2770
},
{
"epoch": 0.6593132154006244,
"grad_norm": 0.373046875,
"learning_rate": 8e-05,
"loss": 1.2600317001342773,
"step": 2772
},
{
"epoch": 0.6597889103612309,
"grad_norm": 0.38671875,
"learning_rate": 8e-05,
"loss": 1.2789270877838135,
"step": 2774
},
{
"epoch": 0.6602646053218374,
"grad_norm": 0.38671875,
"learning_rate": 8e-05,
"loss": 1.252886176109314,
"step": 2776
},
{
"epoch": 0.6607403002824439,
"grad_norm": 0.408203125,
"learning_rate": 8e-05,
"loss": 1.1967723369598389,
"step": 2778
},
{
"epoch": 0.6612159952430504,
"grad_norm": 0.38671875,
"learning_rate": 8e-05,
"loss": 1.171466588973999,
"step": 2780
},
{
"epoch": 0.6616916902036569,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.2854139804840088,
"step": 2782
},
{
"epoch": 0.6621673851642634,
"grad_norm": 0.39453125,
"learning_rate": 8e-05,
"loss": 1.259742021560669,
"step": 2784
},
{
"epoch": 0.6626430801248699,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.2996937036514282,
"step": 2786
},
{
"epoch": 0.6631187750854765,
"grad_norm": 0.369140625,
"learning_rate": 8e-05,
"loss": 1.2012677192687988,
"step": 2788
},
{
"epoch": 0.663594470046083,
"grad_norm": 0.400390625,
"learning_rate": 8e-05,
"loss": 1.2383891344070435,
"step": 2790
},
{
"epoch": 0.6640701650066895,
"grad_norm": 0.373046875,
"learning_rate": 8e-05,
"loss": 1.2412991523742676,
"step": 2792
},
{
"epoch": 0.664545859967296,
"grad_norm": 0.431640625,
"learning_rate": 8e-05,
"loss": 1.2173049449920654,
"step": 2794
},
{
"epoch": 0.6650215549279025,
"grad_norm": 0.40234375,
"learning_rate": 8e-05,
"loss": 1.2093093395233154,
"step": 2796
},
{
"epoch": 0.665497249888509,
"grad_norm": 0.37109375,
"learning_rate": 8e-05,
"loss": 1.2188637256622314,
"step": 2798
},
{
"epoch": 0.6659729448491155,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.2203969955444336,
"step": 2800
},
{
"epoch": 0.666448639809722,
"grad_norm": 0.384765625,
"learning_rate": 8e-05,
"loss": 1.2513853311538696,
"step": 2802
},
{
"epoch": 0.6669243347703285,
"grad_norm": 0.40625,
"learning_rate": 8e-05,
"loss": 1.1890287399291992,
"step": 2804
},
{
"epoch": 0.667400029730935,
"grad_norm": 0.40234375,
"learning_rate": 8e-05,
"loss": 1.2430322170257568,
"step": 2806
},
{
"epoch": 0.6678757246915416,
"grad_norm": 0.388671875,
"learning_rate": 8e-05,
"loss": 1.2834746837615967,
"step": 2808
},
{
"epoch": 0.6683514196521481,
"grad_norm": 0.388671875,
"learning_rate": 8e-05,
"loss": 1.2333581447601318,
"step": 2810
},
{
"epoch": 0.6688271146127546,
"grad_norm": 0.39453125,
"learning_rate": 8e-05,
"loss": 1.2029738426208496,
"step": 2812
},
{
"epoch": 0.6693028095733611,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.2190194129943848,
"step": 2814
},
{
"epoch": 0.6697785045339676,
"grad_norm": 0.4140625,
"learning_rate": 8e-05,
"loss": 1.2467260360717773,
"step": 2816
},
{
"epoch": 0.6702541994945741,
"grad_norm": 0.412109375,
"learning_rate": 8e-05,
"loss": 1.2383447885513306,
"step": 2818
},
{
"epoch": 0.6707298944551806,
"grad_norm": 0.40234375,
"learning_rate": 8e-05,
"loss": 1.2235246896743774,
"step": 2820
},
{
"epoch": 0.6712055894157871,
"grad_norm": 0.373046875,
"learning_rate": 8e-05,
"loss": 1.2301299571990967,
"step": 2822
},
{
"epoch": 0.6716812843763936,
"grad_norm": 0.373046875,
"learning_rate": 8e-05,
"loss": 1.2388970851898193,
"step": 2824
},
{
"epoch": 0.6721569793370001,
"grad_norm": 0.392578125,
"learning_rate": 8e-05,
"loss": 1.2599682807922363,
"step": 2826
},
{
"epoch": 0.6726326742976066,
"grad_norm": 0.390625,
"learning_rate": 8e-05,
"loss": 1.2594590187072754,
"step": 2828
},
{
"epoch": 0.6731083692582132,
"grad_norm": 0.3828125,
"learning_rate": 8e-05,
"loss": 1.2377604246139526,
"step": 2830
},
{
"epoch": 0.6735840642188197,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.2168340682983398,
"step": 2832
},
{
"epoch": 0.6740597591794262,
"grad_norm": 0.365234375,
"learning_rate": 8e-05,
"loss": 1.1372761726379395,
"step": 2834
},
{
"epoch": 0.6745354541400327,
"grad_norm": 0.373046875,
"learning_rate": 8e-05,
"loss": 1.17765212059021,
"step": 2836
},
{
"epoch": 0.6750111491006392,
"grad_norm": 0.39453125,
"learning_rate": 8e-05,
"loss": 1.235781192779541,
"step": 2838
},
{
"epoch": 0.6754868440612457,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.243680715560913,
"step": 2840
},
{
"epoch": 0.6759625390218522,
"grad_norm": 0.375,
"learning_rate": 8e-05,
"loss": 1.2039899826049805,
"step": 2842
},
{
"epoch": 0.6764382339824587,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.273780345916748,
"step": 2844
},
{
"epoch": 0.6769139289430652,
"grad_norm": 0.40234375,
"learning_rate": 8e-05,
"loss": 1.1936399936676025,
"step": 2846
},
{
"epoch": 0.6773896239036717,
"grad_norm": 0.37890625,
"learning_rate": 8e-05,
"loss": 1.1913855075836182,
"step": 2848
},
{
"epoch": 0.6778653188642783,
"grad_norm": 0.392578125,
"learning_rate": 8e-05,
"loss": 1.2655634880065918,
"step": 2850
},
{
"epoch": 0.6783410138248848,
"grad_norm": 0.40234375,
"learning_rate": 8e-05,
"loss": 1.229090690612793,
"step": 2852
},
{
"epoch": 0.6788167087854913,
"grad_norm": 0.388671875,
"learning_rate": 8e-05,
"loss": 1.229933738708496,
"step": 2854
},
{
"epoch": 0.6792924037460978,
"grad_norm": 0.390625,
"learning_rate": 8e-05,
"loss": 1.2448334693908691,
"step": 2856
},
{
"epoch": 0.6797680987067043,
"grad_norm": 0.38671875,
"learning_rate": 8e-05,
"loss": 1.1777703762054443,
"step": 2858
},
{
"epoch": 0.6802437936673108,
"grad_norm": 0.390625,
"learning_rate": 8e-05,
"loss": 1.1988234519958496,
"step": 2860
},
{
"epoch": 0.6807194886279173,
"grad_norm": 0.357421875,
"learning_rate": 8e-05,
"loss": 1.2633662223815918,
"step": 2862
},
{
"epoch": 0.6811951835885238,
"grad_norm": 0.396484375,
"learning_rate": 8e-05,
"loss": 1.1914260387420654,
"step": 2864
},
{
"epoch": 0.6816708785491303,
"grad_norm": 0.357421875,
"learning_rate": 8e-05,
"loss": 1.1301052570343018,
"step": 2866
},
{
"epoch": 0.6821465735097368,
"grad_norm": 0.37890625,
"learning_rate": 8e-05,
"loss": 1.2647099494934082,
"step": 2868
},
{
"epoch": 0.6826222684703434,
"grad_norm": 0.392578125,
"learning_rate": 8e-05,
"loss": 1.2434825897216797,
"step": 2870
},
{
"epoch": 0.6830979634309499,
"grad_norm": 0.369140625,
"learning_rate": 8e-05,
"loss": 1.1619213819503784,
"step": 2872
},
{
"epoch": 0.6835736583915564,
"grad_norm": 0.361328125,
"learning_rate": 8e-05,
"loss": 1.272236943244934,
"step": 2874
},
{
"epoch": 0.6840493533521629,
"grad_norm": 0.373046875,
"learning_rate": 8e-05,
"loss": 1.2712485790252686,
"step": 2876
},
{
"epoch": 0.6845250483127694,
"grad_norm": 0.380859375,
"learning_rate": 8e-05,
"loss": 1.2562975883483887,
"step": 2878
},
{
"epoch": 0.6850007432733759,
"grad_norm": 0.384765625,
"learning_rate": 8e-05,
"loss": 1.2557085752487183,
"step": 2880
},
{
"epoch": 0.6854764382339824,
"grad_norm": 0.400390625,
"learning_rate": 8e-05,
"loss": 1.2001773118972778,
"step": 2882
},
{
"epoch": 0.6859521331945889,
"grad_norm": 0.384765625,
"learning_rate": 8e-05,
"loss": 1.2661209106445312,
"step": 2884
},
{
"epoch": 0.6864278281551954,
"grad_norm": 0.392578125,
"learning_rate": 8e-05,
"loss": 1.2700567245483398,
"step": 2886
},
{
"epoch": 0.6869035231158019,
"grad_norm": 0.40234375,
"learning_rate": 8e-05,
"loss": 1.201700210571289,
"step": 2888
},
{
"epoch": 0.6873792180764084,
"grad_norm": 0.423828125,
"learning_rate": 8e-05,
"loss": 1.2309627532958984,
"step": 2890
},
{
"epoch": 0.687854913037015,
"grad_norm": 0.455078125,
"learning_rate": 8e-05,
"loss": 1.2442858219146729,
"step": 2892
},
{
"epoch": 0.6883306079976216,
"grad_norm": 0.416015625,
"learning_rate": 8e-05,
"loss": 1.2312313318252563,
"step": 2894
},
{
"epoch": 0.6888063029582281,
"grad_norm": 0.384765625,
"learning_rate": 8e-05,
"loss": 1.168707013130188,
"step": 2896
},
{
"epoch": 0.6892819979188346,
"grad_norm": 0.39453125,
"learning_rate": 8e-05,
"loss": 1.2480907440185547,
"step": 2898
},
{
"epoch": 0.6897576928794411,
"grad_norm": 0.36328125,
"learning_rate": 8e-05,
"loss": 1.2292897701263428,
"step": 2900
},
{
"epoch": 0.6902333878400476,
"grad_norm": 0.373046875,
"learning_rate": 8e-05,
"loss": 1.2023284435272217,
"step": 2902
},
{
"epoch": 0.6907090828006541,
"grad_norm": 0.3671875,
"learning_rate": 8e-05,
"loss": 1.2282081842422485,
"step": 2904
},
{
"epoch": 0.6911847777612606,
"grad_norm": 0.408203125,
"learning_rate": 8e-05,
"loss": 1.2390121221542358,
"step": 2906
},
{
"epoch": 0.6916604727218671,
"grad_norm": 0.38671875,
"learning_rate": 8e-05,
"loss": 1.255518913269043,
"step": 2908
},
{
"epoch": 0.6921361676824737,
"grad_norm": 0.365234375,
"learning_rate": 8e-05,
"loss": 1.1897988319396973,
"step": 2910
},
{
"epoch": 0.6926118626430802,
"grad_norm": 0.376953125,
"learning_rate": 8e-05,
"loss": 1.1889443397521973,
"step": 2912
},
{
"epoch": 0.6930875576036867,
"grad_norm": 0.373046875,
"learning_rate": 8e-05,
"loss": 1.2725920677185059,
"step": 2914
},
{
"epoch": 0.6935632525642932,
"grad_norm": 0.345703125,
"learning_rate": 8e-05,
"loss": 1.2496650218963623,
"step": 2916
},
{
"epoch": 0.6940389475248997,
"grad_norm": 0.35546875,
"learning_rate": 8e-05,
"loss": 1.1894876956939697,
"step": 2918
},
{
"epoch": 0.6945146424855062,
"grad_norm": 0.361328125,
"learning_rate": 8e-05,
"loss": 1.2089958190917969,
"step": 2920
},
{
"epoch": 0.6949903374461127,
"grad_norm": 0.3671875,
"learning_rate": 8e-05,
"loss": 1.2501626014709473,
"step": 2922
},
{
"epoch": 0.6954660324067192,
"grad_norm": 0.365234375,
"learning_rate": 8e-05,
"loss": 1.221423625946045,
"step": 2924
},
{
"epoch": 0.6959417273673257,
"grad_norm": 0.375,
"learning_rate": 8e-05,
"loss": 1.2127522230148315,
"step": 2926
},
{
"epoch": 0.6964174223279322,
"grad_norm": 0.3984375,
"learning_rate": 8e-05,
"loss": 1.2586814165115356,
"step": 2928
},
{
"epoch": 0.6968931172885388,
"grad_norm": 0.365234375,
"learning_rate": 8e-05,
"loss": 1.2253403663635254,
"step": 2930
},
{
"epoch": 0.6973688122491453,
"grad_norm": 0.357421875,
"learning_rate": 8e-05,
"loss": 1.2009187936782837,
"step": 2932
},
{
"epoch": 0.6978445072097518,
"grad_norm": 0.345703125,
"learning_rate": 8e-05,
"loss": 1.179222583770752,
"step": 2934
},
{
"epoch": 0.6983202021703583,
"grad_norm": 0.36328125,
"learning_rate": 8e-05,
"loss": 1.1949589252471924,
"step": 2936
},
{
"epoch": 0.6987958971309648,
"grad_norm": 0.365234375,
"learning_rate": 8e-05,
"loss": 1.2387232780456543,
"step": 2938
},
{
"epoch": 0.6992715920915713,
"grad_norm": 0.36328125,
"learning_rate": 8e-05,
"loss": 1.184262990951538,
"step": 2940
},
{
"epoch": 0.6997472870521778,
"grad_norm": 0.35546875,
"learning_rate": 8e-05,
"loss": 1.1451635360717773,
"step": 2942
},
{
"epoch": 0.7002229820127843,
"grad_norm": 0.41796875,
"learning_rate": 8e-05,
"loss": 1.2731480598449707,
"step": 2944
},
{
"epoch": 0.7006986769733908,
"grad_norm": 0.431640625,
"learning_rate": 7.999950424154985e-05,
"loss": 1.2006233930587769,
"step": 2946
},
{
"epoch": 0.7011743719339973,
"grad_norm": 0.408203125,
"learning_rate": 7.999801697848817e-05,
"loss": 1.2164214849472046,
"step": 2948
},
{
"epoch": 0.7016500668946039,
"grad_norm": 0.390625,
"learning_rate": 7.999553824768115e-05,
"loss": 1.2032701969146729,
"step": 2950
},
{
"epoch": 0.7021257618552104,
"grad_norm": 0.3671875,
"learning_rate": 7.999206811057136e-05,
"loss": 1.184319257736206,
"step": 2952
},
{
"epoch": 0.7026014568158169,
"grad_norm": 0.388671875,
"learning_rate": 7.998760665317632e-05,
"loss": 1.1767771244049072,
"step": 2954
},
{
"epoch": 0.7030771517764234,
"grad_norm": 0.365234375,
"learning_rate": 7.998215398608625e-05,
"loss": 1.1959552764892578,
"step": 2956
},
{
"epoch": 0.7035528467370299,
"grad_norm": 0.353515625,
"learning_rate": 7.997571024446146e-05,
"loss": 1.1779606342315674,
"step": 2958
},
{
"epoch": 0.7040285416976364,
"grad_norm": 0.37109375,
"learning_rate": 7.996827558802894e-05,
"loss": 1.1682159900665283,
"step": 2960
},
{
"epoch": 0.7045042366582429,
"grad_norm": 0.375,
"learning_rate": 7.995985020107833e-05,
"loss": 1.181810736656189,
"step": 2962
},
{
"epoch": 0.7049799316188494,
"grad_norm": 0.345703125,
"learning_rate": 7.995043429245751e-05,
"loss": 1.2362987995147705,
"step": 2964
},
{
"epoch": 0.7054556265794559,
"grad_norm": 0.353515625,
"learning_rate": 7.994002809556727e-05,
"loss": 1.1899755001068115,
"step": 2966
},
{
"epoch": 0.7059313215400624,
"grad_norm": 0.369140625,
"learning_rate": 7.992863186835562e-05,
"loss": 1.223832607269287,
"step": 2968
},
{
"epoch": 0.706407016500669,
"grad_norm": 0.375,
"learning_rate": 7.991624589331135e-05,
"loss": 1.2033984661102295,
"step": 2970
},
{
"epoch": 0.7068827114612755,
"grad_norm": 0.3984375,
"learning_rate": 7.990287047745706e-05,
"loss": 1.2263352870941162,
"step": 2972
},
{
"epoch": 0.707358406421882,
"grad_norm": 0.36328125,
"learning_rate": 7.988850595234152e-05,
"loss": 1.204215168952942,
"step": 2974
},
{
"epoch": 0.7078341013824885,
"grad_norm": 0.359375,
"learning_rate": 7.987315267403146e-05,
"loss": 1.2107601165771484,
"step": 2976
},
{
"epoch": 0.708309796343095,
"grad_norm": 0.376953125,
"learning_rate": 7.985681102310276e-05,
"loss": 1.2664358615875244,
"step": 2978
},
{
"epoch": 0.7087854913037015,
"grad_norm": 0.33984375,
"learning_rate": 7.983948140463098e-05,
"loss": 1.1956796646118164,
"step": 2980
},
{
"epoch": 0.709261186264308,
"grad_norm": 0.369140625,
"learning_rate": 7.982116424818139e-05,
"loss": 1.2163138389587402,
"step": 2982
},
{
"epoch": 0.7097368812249145,
"grad_norm": 0.34375,
"learning_rate": 7.980186000779822e-05,
"loss": 1.1702892780303955,
"step": 2984
},
{
"epoch": 0.710212576185521,
"grad_norm": 0.3671875,
"learning_rate": 7.978156916199348e-05,
"loss": 1.2452645301818848,
"step": 2986
},
{
"epoch": 0.7106882711461275,
"grad_norm": 0.376953125,
"learning_rate": 7.976029221373511e-05,
"loss": 1.1621694564819336,
"step": 2988
},
{
"epoch": 0.711163966106734,
"grad_norm": 0.353515625,
"learning_rate": 7.973802969043444e-05,
"loss": 1.2300595045089722,
"step": 2990
},
{
"epoch": 0.7116396610673406,
"grad_norm": 0.3515625,
"learning_rate": 7.971478214393316e-05,
"loss": 1.1861531734466553,
"step": 2992
},
{
"epoch": 0.7121153560279471,
"grad_norm": 0.359375,
"learning_rate": 7.969055015048968e-05,
"loss": 1.2321807146072388,
"step": 2994
},
{
"epoch": 0.7125910509885536,
"grad_norm": 0.3828125,
"learning_rate": 7.966533431076474e-05,
"loss": 1.197440266609192,
"step": 2996
},
{
"epoch": 0.7130667459491601,
"grad_norm": 0.353515625,
"learning_rate": 7.963913524980666e-05,
"loss": 1.1787972450256348,
"step": 2998
},
{
"epoch": 0.7135424409097666,
"grad_norm": 0.353515625,
"learning_rate": 7.961195361703569e-05,
"loss": 1.2083191871643066,
"step": 3000
},
{
"epoch": 0.7140181358703731,
"grad_norm": 0.38671875,
"learning_rate": 7.958379008622808e-05,
"loss": 1.178969144821167,
"step": 3002
},
{
"epoch": 0.7144938308309796,
"grad_norm": 0.37890625,
"learning_rate": 7.955464535549922e-05,
"loss": 1.2047157287597656,
"step": 3004
},
{
"epoch": 0.7149695257915861,
"grad_norm": 0.375,
"learning_rate": 7.952452014728645e-05,
"loss": 1.1746503114700317,
"step": 3006
},
{
"epoch": 0.7154452207521926,
"grad_norm": 0.38671875,
"learning_rate": 7.949341520833109e-05,
"loss": 1.1968495845794678,
"step": 3008
},
{
"epoch": 0.7159209157127991,
"grad_norm": 0.34765625,
"learning_rate": 7.946133130965995e-05,
"loss": 1.1814994812011719,
"step": 3010
},
{
"epoch": 0.7163966106734057,
"grad_norm": 0.373046875,
"learning_rate": 7.942826924656624e-05,
"loss": 1.2259728908538818,
"step": 3012
},
{
"epoch": 0.7168723056340122,
"grad_norm": 0.3828125,
"learning_rate": 7.939422983858982e-05,
"loss": 1.2128264904022217,
"step": 3014
},
{
"epoch": 0.7173480005946187,
"grad_norm": 0.353515625,
"learning_rate": 7.935921392949688e-05,
"loss": 1.1720407009124756,
"step": 3016
},
{
"epoch": 0.7178236955552252,
"grad_norm": 0.357421875,
"learning_rate": 7.932322238725907e-05,
"loss": 1.187741994857788,
"step": 3018
},
{
"epoch": 0.7182993905158317,
"grad_norm": 0.359375,
"learning_rate": 7.928625610403196e-05,
"loss": 1.2031012773513794,
"step": 3020
},
{
"epoch": 0.7187750854764382,
"grad_norm": 0.36328125,
"learning_rate": 7.924831599613289e-05,
"loss": 1.2213904857635498,
"step": 3022
},
{
"epoch": 0.7192507804370447,
"grad_norm": 0.384765625,
"learning_rate": 7.920940300401832e-05,
"loss": 1.2365423440933228,
"step": 3024
},
{
"epoch": 0.7197264753976512,
"grad_norm": 0.40234375,
"learning_rate": 7.91695180922605e-05,
"loss": 1.2173717021942139,
"step": 3026
},
{
"epoch": 0.7202021703582577,
"grad_norm": 0.3828125,
"learning_rate": 7.912866224952352e-05,
"loss": 1.1911011934280396,
"step": 3028
},
{
"epoch": 0.7206778653188642,
"grad_norm": 0.38671875,
"learning_rate": 7.908683648853886e-05,
"loss": 1.1721656322479248,
"step": 3030
},
{
"epoch": 0.7211535602794708,
"grad_norm": 0.384765625,
"learning_rate": 7.904404184608021e-05,
"loss": 1.2273123264312744,
"step": 3032
},
{
"epoch": 0.7216292552400773,
"grad_norm": 0.3671875,
"learning_rate": 7.900027938293788e-05,
"loss": 1.1623331308364868,
"step": 3034
},
{
"epoch": 0.7221049502006838,
"grad_norm": 0.365234375,
"learning_rate": 7.895555018389241e-05,
"loss": 1.1802709102630615,
"step": 3036
},
{
"epoch": 0.7225806451612903,
"grad_norm": 0.369140625,
"learning_rate": 7.890985535768771e-05,
"loss": 1.2304480075836182,
"step": 3038
},
{
"epoch": 0.7230563401218968,
"grad_norm": 0.35546875,
"learning_rate": 7.88631960370036e-05,
"loss": 1.182260513305664,
"step": 3040
},
{
"epoch": 0.7235320350825033,
"grad_norm": 0.361328125,
"learning_rate": 7.881557337842769e-05,
"loss": 1.2020962238311768,
"step": 3042
},
{
"epoch": 0.7240077300431098,
"grad_norm": 0.38671875,
"learning_rate": 7.876698856242677e-05,
"loss": 1.1832443475723267,
"step": 3044
},
{
"epoch": 0.7244834250037163,
"grad_norm": 0.392578125,
"learning_rate": 7.871744279331747e-05,
"loss": 1.223937749862671,
"step": 3046
},
{
"epoch": 0.7249591199643228,
"grad_norm": 0.412109375,
"learning_rate": 7.866693729923651e-05,
"loss": 1.2505052089691162,
"step": 3048
},
{
"epoch": 0.7254348149249293,
"grad_norm": 0.39453125,
"learning_rate": 7.861547333211014e-05,
"loss": 1.2611567974090576,
"step": 3050
},
{
"epoch": 0.7259105098855358,
"grad_norm": 0.41015625,
"learning_rate": 7.85630521676232e-05,
"loss": 1.2265489101409912,
"step": 3052
},
{
"epoch": 0.7263862048461424,
"grad_norm": 0.37109375,
"learning_rate": 7.850967510518743e-05,
"loss": 1.2124598026275635,
"step": 3054
},
{
"epoch": 0.7268618998067489,
"grad_norm": 0.3828125,
"learning_rate": 7.845534346790934e-05,
"loss": 1.1696916818618774,
"step": 3056
},
{
"epoch": 0.7273375947673555,
"grad_norm": 0.390625,
"learning_rate": 7.840005860255733e-05,
"loss": 1.2019386291503906,
"step": 3058
},
{
"epoch": 0.727813289727962,
"grad_norm": 0.375,
"learning_rate": 7.834382187952839e-05,
"loss": 1.2763334512710571,
"step": 3060
},
{
"epoch": 0.7282889846885685,
"grad_norm": 0.37890625,
"learning_rate": 7.828663469281401e-05,
"loss": 1.2215170860290527,
"step": 3062
},
{
"epoch": 0.728764679649175,
"grad_norm": 0.3828125,
"learning_rate": 7.822849845996578e-05,
"loss": 1.2576022148132324,
"step": 3064
},
{
"epoch": 0.7292403746097815,
"grad_norm": 0.369140625,
"learning_rate": 7.81694146220601e-05,
"loss": 1.2104671001434326,
"step": 3066
},
{
"epoch": 0.729716069570388,
"grad_norm": 0.369140625,
"learning_rate": 7.810938464366258e-05,
"loss": 1.2412121295928955,
"step": 3068
},
{
"epoch": 0.7301917645309945,
"grad_norm": 0.365234375,
"learning_rate": 7.804841001279169e-05,
"loss": 1.2783949375152588,
"step": 3070
},
{
"epoch": 0.7306674594916011,
"grad_norm": 0.359375,
"learning_rate": 7.798649224088184e-05,
"loss": 1.3144667148590088,
"step": 3072
},
{
"epoch": 0.7311431544522076,
"grad_norm": 0.37109375,
"learning_rate": 7.792363286274595e-05,
"loss": 1.1726528406143188,
"step": 3074
},
{
"epoch": 0.7316188494128141,
"grad_norm": 0.38671875,
"learning_rate": 7.785983343653742e-05,
"loss": 1.2941590547561646,
"step": 3076
},
{
"epoch": 0.7320945443734206,
"grad_norm": 0.36328125,
"learning_rate": 7.779509554371152e-05,
"loss": 1.184098482131958,
"step": 3078
},
{
"epoch": 0.7325702393340271,
"grad_norm": 0.359375,
"learning_rate": 7.772942078898607e-05,
"loss": 1.202735424041748,
"step": 3080
},
{
"epoch": 0.7330459342946336,
"grad_norm": 0.36328125,
"learning_rate": 7.766281080030182e-05,
"loss": 1.2427330017089844,
"step": 3082
},
{
"epoch": 0.7335216292552401,
"grad_norm": 0.3828125,
"learning_rate": 7.7595267228782e-05,
"loss": 1.2848570346832275,
"step": 3084
},
{
"epoch": 0.7339973242158466,
"grad_norm": 0.376953125,
"learning_rate": 7.752679174869145e-05,
"loss": 1.2101168632507324,
"step": 3086
},
{
"epoch": 0.7344730191764531,
"grad_norm": 0.373046875,
"learning_rate": 7.745738605739504e-05,
"loss": 1.2171400785446167,
"step": 3088
},
{
"epoch": 0.7349487141370596,
"grad_norm": 0.40234375,
"learning_rate": 7.738705187531568e-05,
"loss": 1.2718677520751953,
"step": 3090
},
{
"epoch": 0.7354244090976662,
"grad_norm": 0.4140625,
"learning_rate": 7.731579094589161e-05,
"loss": 1.219995141029358,
"step": 3092
},
{
"epoch": 0.7359001040582727,
"grad_norm": 0.41015625,
"learning_rate": 7.724360503553326e-05,
"loss": 1.2589280605316162,
"step": 3094
},
{
"epoch": 0.7363757990188792,
"grad_norm": 0.400390625,
"learning_rate": 7.717049593357937e-05,
"loss": 1.2229852676391602,
"step": 3096
},
{
"epoch": 0.7368514939794857,
"grad_norm": 0.375,
"learning_rate": 7.709646545225266e-05,
"loss": 1.2284798622131348,
"step": 3098
},
{
"epoch": 0.7373271889400922,
"grad_norm": 0.384765625,
"learning_rate": 7.7021515426615e-05,
"loss": 1.2732608318328857,
"step": 3100
},
{
"epoch": 0.7378028839006987,
"grad_norm": 0.404296875,
"learning_rate": 7.694564771452179e-05,
"loss": 1.215606927871704,
"step": 3102
},
{
"epoch": 0.7382785788613052,
"grad_norm": 0.416015625,
"learning_rate": 7.686886419657603e-05,
"loss": 1.194861650466919,
"step": 3104
},
{
"epoch": 0.7387542738219117,
"grad_norm": 0.3984375,
"learning_rate": 7.67911667760816e-05,
"loss": 1.1924793720245361,
"step": 3106
},
{
"epoch": 0.7392299687825182,
"grad_norm": 0.3984375,
"learning_rate": 7.671255737899613e-05,
"loss": 1.196773648262024,
"step": 3108
},
{
"epoch": 0.7397056637431247,
"grad_norm": 0.39453125,
"learning_rate": 7.663303795388326e-05,
"loss": 1.2454726696014404,
"step": 3110
},
{
"epoch": 0.7401813587037313,
"grad_norm": 0.384765625,
"learning_rate": 7.655261047186437e-05,
"loss": 1.2148265838623047,
"step": 3112
},
{
"epoch": 0.7406570536643378,
"grad_norm": 0.40234375,
"learning_rate": 7.647127692656961e-05,
"loss": 1.2220816612243652,
"step": 3114
},
{
"epoch": 0.7411327486249443,
"grad_norm": 0.404296875,
"learning_rate": 7.638903933408862e-05,
"loss": 1.186138391494751,
"step": 3116
},
{
"epoch": 0.7416084435855508,
"grad_norm": 0.392578125,
"learning_rate": 7.630589973292046e-05,
"loss": 1.1910457611083984,
"step": 3118
},
{
"epoch": 0.7420841385461573,
"grad_norm": 0.427734375,
"learning_rate": 7.622186018392313e-05,
"loss": 1.182339072227478,
"step": 3120
},
{
"epoch": 0.7425598335067638,
"grad_norm": 0.39453125,
"learning_rate": 7.613692277026247e-05,
"loss": 1.2056699991226196,
"step": 3122
},
{
"epoch": 0.7430355284673703,
"grad_norm": 0.3828125,
"learning_rate": 7.605108959736048e-05,
"loss": 1.228093147277832,
"step": 3124
},
{
"epoch": 0.7435112234279768,
"grad_norm": 0.380859375,
"learning_rate": 7.596436279284322e-05,
"loss": 1.2399944067001343,
"step": 3126
},
{
"epoch": 0.7439869183885833,
"grad_norm": 0.373046875,
"learning_rate": 7.587674450648798e-05,
"loss": 1.2229018211364746,
"step": 3128
},
{
"epoch": 0.7444626133491898,
"grad_norm": 0.34375,
"learning_rate": 7.578823691017007e-05,
"loss": 1.2147870063781738,
"step": 3130
},
{
"epoch": 0.7449383083097963,
"grad_norm": 0.349609375,
"learning_rate": 7.569884219780893e-05,
"loss": 1.18184494972229,
"step": 3132
},
{
"epoch": 0.7454140032704029,
"grad_norm": 0.36328125,
"learning_rate": 7.560856258531374e-05,
"loss": 1.2729527950286865,
"step": 3134
},
{
"epoch": 0.7458896982310094,
"grad_norm": 0.369140625,
"learning_rate": 7.551740031052857e-05,
"loss": 1.2199832201004028,
"step": 3136
},
{
"epoch": 0.7463653931916159,
"grad_norm": 0.384765625,
"learning_rate": 7.54253576331768e-05,
"loss": 1.2424662113189697,
"step": 3138
},
{
"epoch": 0.7468410881522224,
"grad_norm": 0.3515625,
"learning_rate": 7.53324368348052e-05,
"loss": 1.1974238157272339,
"step": 3140
},
{
"epoch": 0.7473167831128289,
"grad_norm": 0.365234375,
"learning_rate": 7.52386402187273e-05,
"loss": 1.2078831195831299,
"step": 3142
},
{
"epoch": 0.7477924780734354,
"grad_norm": 0.36328125,
"learning_rate": 7.514397010996637e-05,
"loss": 1.2366812229156494,
"step": 3144
},
{
"epoch": 0.7482681730340419,
"grad_norm": 0.37109375,
"learning_rate": 7.504842885519771e-05,
"loss": 1.2229359149932861,
"step": 3146
},
{
"epoch": 0.7487438679946484,
"grad_norm": 0.37109375,
"learning_rate": 7.495201882269055e-05,
"loss": 1.2356886863708496,
"step": 3148
},
{
"epoch": 0.7492195629552549,
"grad_norm": 0.36328125,
"learning_rate": 7.485474240224932e-05,
"loss": 1.2112306356430054,
"step": 3150
},
{
"epoch": 0.7496952579158614,
"grad_norm": 0.375,
"learning_rate": 7.475660200515437e-05,
"loss": 1.1738417148590088,
"step": 3152
},
{
"epoch": 0.750170952876468,
"grad_norm": 0.376953125,
"learning_rate": 7.465760006410228e-05,
"loss": 1.197131633758545,
"step": 3154
},
{
"epoch": 0.7506466478370745,
"grad_norm": 0.376953125,
"learning_rate": 7.455773903314544e-05,
"loss": 1.1941673755645752,
"step": 3156
},
{
"epoch": 0.751122342797681,
"grad_norm": 0.375,
"learning_rate": 7.445702138763142e-05,
"loss": 1.2553668022155762,
"step": 3158
},
{
"epoch": 0.7515980377582875,
"grad_norm": 0.361328125,
"learning_rate": 7.435544962414136e-05,
"loss": 1.1946885585784912,
"step": 3160
},
{
"epoch": 0.752073732718894,
"grad_norm": 0.37890625,
"learning_rate": 7.425302626042829e-05,
"loss": 1.2392586469650269,
"step": 3162
},
{
"epoch": 0.7525494276795005,
"grad_norm": 0.369140625,
"learning_rate": 7.41497538353546e-05,
"loss": 1.1681256294250488,
"step": 3164
},
{
"epoch": 0.753025122640107,
"grad_norm": 0.3515625,
"learning_rate": 7.404563490882917e-05,
"loss": 1.1748747825622559,
"step": 3166
},
{
"epoch": 0.7535008176007135,
"grad_norm": 0.369140625,
"learning_rate": 7.394067206174386e-05,
"loss": 1.1887366771697998,
"step": 3168
},
{
"epoch": 0.75397651256132,
"grad_norm": 0.36328125,
"learning_rate": 7.383486789590961e-05,
"loss": 1.1796954870224,
"step": 3170
},
{
"epoch": 0.7544522075219265,
"grad_norm": 0.375,
"learning_rate": 7.372822503399188e-05,
"loss": 1.1664338111877441,
"step": 3172
},
{
"epoch": 0.754927902482533,
"grad_norm": 0.37890625,
"learning_rate": 7.362074611944566e-05,
"loss": 1.235155463218689,
"step": 3174
},
{
"epoch": 0.7554035974431396,
"grad_norm": 0.353515625,
"learning_rate": 7.351243381644998e-05,
"loss": 1.1678838729858398,
"step": 3176
},
{
"epoch": 0.7558792924037461,
"grad_norm": 0.3671875,
"learning_rate": 7.340329080984177e-05,
"loss": 1.2551286220550537,
"step": 3178
},
{
"epoch": 0.7563549873643526,
"grad_norm": 0.361328125,
"learning_rate": 7.329331980504947e-05,
"loss": 1.200148105621338,
"step": 3180
},
{
"epoch": 0.7568306823249591,
"grad_norm": 0.353515625,
"learning_rate": 7.318252352802579e-05,
"loss": 1.255072832107544,
"step": 3182
},
{
"epoch": 0.7573063772855656,
"grad_norm": 0.34375,
"learning_rate": 7.307090472518026e-05,
"loss": 1.1907069683074951,
"step": 3184
},
{
"epoch": 0.7577820722461721,
"grad_norm": 0.3671875,
"learning_rate": 7.295846616331113e-05,
"loss": 1.202185034751892,
"step": 3186
},
{
"epoch": 0.7582577672067786,
"grad_norm": 0.341796875,
"learning_rate": 7.284521062953675e-05,
"loss": 1.169918179512024,
"step": 3188
},
{
"epoch": 0.7587334621673851,
"grad_norm": 0.34765625,
"learning_rate": 7.27311409312265e-05,
"loss": 1.1812635660171509,
"step": 3190
},
{
"epoch": 0.7592091571279916,
"grad_norm": 0.341796875,
"learning_rate": 7.261625989593127e-05,
"loss": 1.184064507484436,
"step": 3192
},
{
"epoch": 0.7596848520885982,
"grad_norm": 0.359375,
"learning_rate": 7.250057037131322e-05,
"loss": 1.1607537269592285,
"step": 3194
},
{
"epoch": 0.7601605470492047,
"grad_norm": 0.359375,
"learning_rate": 7.238407522507533e-05,
"loss": 1.2583791017532349,
"step": 3196
},
{
"epoch": 0.7606362420098112,
"grad_norm": 0.353515625,
"learning_rate": 7.226677734489026e-05,
"loss": 1.2102004289627075,
"step": 3198
},
{
"epoch": 0.7611119369704177,
"grad_norm": 0.357421875,
"learning_rate": 7.214867963832877e-05,
"loss": 1.2008968591690063,
"step": 3200
},
{
"epoch": 0.7615876319310242,
"grad_norm": 0.37890625,
"learning_rate": 7.202978503278766e-05,
"loss": 1.1674326658248901,
"step": 3202
},
{
"epoch": 0.7620633268916307,
"grad_norm": 0.373046875,
"learning_rate": 7.191009647541721e-05,
"loss": 1.168144941329956,
"step": 3204
},
{
"epoch": 0.7625390218522372,
"grad_norm": 0.345703125,
"learning_rate": 7.178961693304809e-05,
"loss": 1.1678907871246338,
"step": 3206
},
{
"epoch": 0.7630147168128437,
"grad_norm": 0.345703125,
"learning_rate": 7.166834939211786e-05,
"loss": 1.1986507177352905,
"step": 3208
},
{
"epoch": 0.7634904117734502,
"grad_norm": 0.32421875,
"learning_rate": 7.154629685859694e-05,
"loss": 1.1866064071655273,
"step": 3210
},
{
"epoch": 0.7639661067340567,
"grad_norm": 0.34375,
"learning_rate": 7.142346235791406e-05,
"loss": 1.1903237104415894,
"step": 3212
},
{
"epoch": 0.7644418016946632,
"grad_norm": 0.365234375,
"learning_rate": 7.129984893488132e-05,
"loss": 1.177189826965332,
"step": 3214
},
{
"epoch": 0.7649174966552698,
"grad_norm": 0.365234375,
"learning_rate": 7.117545965361866e-05,
"loss": 1.1988158226013184,
"step": 3216
},
{
"epoch": 0.7653931916158763,
"grad_norm": 0.34375,
"learning_rate": 7.105029759747794e-05,
"loss": 1.1733431816101074,
"step": 3218
},
{
"epoch": 0.7658688865764828,
"grad_norm": 0.3671875,
"learning_rate": 7.092436586896653e-05,
"loss": 1.287745714187622,
"step": 3220
},
{
"epoch": 0.7663445815370893,
"grad_norm": 0.359375,
"learning_rate": 7.079766758967032e-05,
"loss": 1.1643383502960205,
"step": 3222
},
{
"epoch": 0.7668202764976959,
"grad_norm": 0.34375,
"learning_rate": 7.067020590017648e-05,
"loss": 1.1338480710983276,
"step": 3224
},
{
"epoch": 0.7672959714583024,
"grad_norm": 0.359375,
"learning_rate": 7.054198395999546e-05,
"loss": 1.1828383207321167,
"step": 3226
},
{
"epoch": 0.7677716664189089,
"grad_norm": 0.333984375,
"learning_rate": 7.04130049474828e-05,
"loss": 1.215213656425476,
"step": 3228
},
{
"epoch": 0.7682473613795154,
"grad_norm": 0.341796875,
"learning_rate": 7.028327205976026e-05,
"loss": 1.2250659465789795,
"step": 3230
},
{
"epoch": 0.768723056340122,
"grad_norm": 0.3515625,
"learning_rate": 7.01527885126366e-05,
"loss": 1.2371430397033691,
"step": 3232
},
{
"epoch": 0.7691987513007285,
"grad_norm": 0.349609375,
"learning_rate": 7.002155754052789e-05,
"loss": 1.202965497970581,
"step": 3234
},
{
"epoch": 0.769674446261335,
"grad_norm": 0.3515625,
"learning_rate": 6.988958239637727e-05,
"loss": 1.1786177158355713,
"step": 3236
},
{
"epoch": 0.7701501412219415,
"grad_norm": 0.33203125,
"learning_rate": 6.975686635157441e-05,
"loss": 1.1610124111175537,
"step": 3238
},
{
"epoch": 0.770625836182548,
"grad_norm": 0.33984375,
"learning_rate": 6.962341269587436e-05,
"loss": 1.2252613306045532,
"step": 3240
},
{
"epoch": 0.7711015311431545,
"grad_norm": 0.365234375,
"learning_rate": 6.948922473731594e-05,
"loss": 1.2469508647918701,
"step": 3242
},
{
"epoch": 0.771577226103761,
"grad_norm": 0.35546875,
"learning_rate": 6.935430580213993e-05,
"loss": 1.1859698295593262,
"step": 3244
},
{
"epoch": 0.7720529210643675,
"grad_norm": 0.373046875,
"learning_rate": 6.92186592347064e-05,
"loss": 1.21319580078125,
"step": 3246
},
{
"epoch": 0.772528616024974,
"grad_norm": 0.359375,
"learning_rate": 6.908228839741198e-05,
"loss": 1.145960807800293,
"step": 3248
},
{
"epoch": 0.7730043109855805,
"grad_norm": 0.333984375,
"learning_rate": 6.894519667060638e-05,
"loss": 1.2456450462341309,
"step": 3250
},
{
"epoch": 0.773480005946187,
"grad_norm": 0.359375,
"learning_rate": 6.880738745250872e-05,
"loss": 1.186368703842163,
"step": 3252
},
{
"epoch": 0.7739557009067936,
"grad_norm": 0.359375,
"learning_rate": 6.866886415912325e-05,
"loss": 1.185645580291748,
"step": 3254
},
{
"epoch": 0.7744313958674001,
"grad_norm": 0.34765625,
"learning_rate": 6.852963022415458e-05,
"loss": 1.2109339237213135,
"step": 3256
},
{
"epoch": 0.7749070908280066,
"grad_norm": 0.34375,
"learning_rate": 6.838968909892272e-05,
"loss": 1.2080646753311157,
"step": 3258
},
{
"epoch": 0.7753827857886131,
"grad_norm": 0.35546875,
"learning_rate": 6.824904425227746e-05,
"loss": 1.23634934425354,
"step": 3260
},
{
"epoch": 0.7758584807492196,
"grad_norm": 0.33984375,
"learning_rate": 6.810769917051233e-05,
"loss": 1.1664297580718994,
"step": 3262
},
{
"epoch": 0.7763341757098261,
"grad_norm": 0.341796875,
"learning_rate": 6.796565735727829e-05,
"loss": 1.176924467086792,
"step": 3264
},
{
"epoch": 0.7768098706704326,
"grad_norm": 0.341796875,
"learning_rate": 6.782292233349676e-05,
"loss": 1.2261974811553955,
"step": 3266
},
{
"epoch": 0.7772855656310391,
"grad_norm": 0.33203125,
"learning_rate": 6.767949763727251e-05,
"loss": 1.2133498191833496,
"step": 3268
},
{
"epoch": 0.7777612605916456,
"grad_norm": 0.349609375,
"learning_rate": 6.753538682380573e-05,
"loss": 1.2278404235839844,
"step": 3270
},
{
"epoch": 0.7782369555522521,
"grad_norm": 0.34375,
"learning_rate": 6.739059346530412e-05,
"loss": 1.176490306854248,
"step": 3272
},
{
"epoch": 0.7787126505128587,
"grad_norm": 0.33203125,
"learning_rate": 6.724512115089426e-05,
"loss": 1.223867654800415,
"step": 3274
},
{
"epoch": 0.7791883454734652,
"grad_norm": 0.337890625,
"learning_rate": 6.709897348653258e-05,
"loss": 1.1769992113113403,
"step": 3276
},
{
"epoch": 0.7796640404340717,
"grad_norm": 0.359375,
"learning_rate": 6.695215409491605e-05,
"loss": 1.1771578788757324,
"step": 3278
},
{
"epoch": 0.7801397353946782,
"grad_norm": 0.345703125,
"learning_rate": 6.68046666153924e-05,
"loss": 1.2103866338729858,
"step": 3280
},
{
"epoch": 0.7806154303552847,
"grad_norm": 0.33203125,
"learning_rate": 6.66565147038698e-05,
"loss": 1.1617302894592285,
"step": 3282
},
{
"epoch": 0.7810911253158912,
"grad_norm": 0.333984375,
"learning_rate": 6.65077020327264e-05,
"loss": 1.2142266035079956,
"step": 3284
},
{
"epoch": 0.7815668202764977,
"grad_norm": 0.359375,
"learning_rate": 6.635823229071915e-05,
"loss": 1.2032921314239502,
"step": 3286
},
{
"epoch": 0.7820425152371042,
"grad_norm": 0.361328125,
"learning_rate": 6.620810918289241e-05,
"loss": 1.1510361433029175,
"step": 3288
},
{
"epoch": 0.7825182101977107,
"grad_norm": 0.3515625,
"learning_rate": 6.605733643048615e-05,
"loss": 1.209721326828003,
"step": 3290
},
{
"epoch": 0.7829939051583172,
"grad_norm": 0.359375,
"learning_rate": 6.590591777084368e-05,
"loss": 1.1635715961456299,
"step": 3292
},
{
"epoch": 0.7834696001189237,
"grad_norm": 0.33203125,
"learning_rate": 6.575385695731902e-05,
"loss": 1.1776684522628784,
"step": 3294
},
{
"epoch": 0.7839452950795303,
"grad_norm": 0.32421875,
"learning_rate": 6.560115775918379e-05,
"loss": 1.1247327327728271,
"step": 3296
},
{
"epoch": 0.7844209900401368,
"grad_norm": 0.33984375,
"learning_rate": 6.544782396153392e-05,
"loss": 1.270646572113037,
"step": 3298
},
{
"epoch": 0.7848966850007433,
"grad_norm": 0.345703125,
"learning_rate": 6.529385936519568e-05,
"loss": 1.1621270179748535,
"step": 3300
},
{
"epoch": 0.7853723799613498,
"grad_norm": 0.341796875,
"learning_rate": 6.513926778663156e-05,
"loss": 1.1540793180465698,
"step": 3302
},
{
"epoch": 0.7858480749219563,
"grad_norm": 0.337890625,
"learning_rate": 6.498405305784562e-05,
"loss": 1.1824688911437988,
"step": 3304
},
{
"epoch": 0.7863237698825628,
"grad_norm": 0.34375,
"learning_rate": 6.482821902628857e-05,
"loss": 1.182361125946045,
"step": 3306
},
{
"epoch": 0.7867994648431693,
"grad_norm": 0.353515625,
"learning_rate": 6.467176955476224e-05,
"loss": 1.2419183254241943,
"step": 3308
},
{
"epoch": 0.7872751598037758,
"grad_norm": 0.326171875,
"learning_rate": 6.451470852132409e-05,
"loss": 1.198357105255127,
"step": 3310
},
{
"epoch": 0.7877508547643823,
"grad_norm": 0.36328125,
"learning_rate": 6.435703981919077e-05,
"loss": 1.1796178817749023,
"step": 3312
},
{
"epoch": 0.7882265497249888,
"grad_norm": 0.3359375,
"learning_rate": 6.419876735664188e-05,
"loss": 1.1940312385559082,
"step": 3314
},
{
"epoch": 0.7887022446855954,
"grad_norm": 0.3359375,
"learning_rate": 6.403989505692296e-05,
"loss": 1.1873643398284912,
"step": 3316
},
{
"epoch": 0.7891779396462019,
"grad_norm": 0.341796875,
"learning_rate": 6.388042685814827e-05,
"loss": 1.1884150505065918,
"step": 3318
},
{
"epoch": 0.7896536346068084,
"grad_norm": 0.345703125,
"learning_rate": 6.372036671320315e-05,
"loss": 1.1984798908233643,
"step": 3320
},
{
"epoch": 0.7901293295674149,
"grad_norm": 0.326171875,
"learning_rate": 6.355971858964607e-05,
"loss": 1.191229224205017,
"step": 3322
},
{
"epoch": 0.7906050245280214,
"grad_norm": 0.34375,
"learning_rate": 6.339848646961029e-05,
"loss": 1.1361331939697266,
"step": 3324
},
{
"epoch": 0.7910807194886279,
"grad_norm": 0.349609375,
"learning_rate": 6.323667434970508e-05,
"loss": 1.2309892177581787,
"step": 3326
},
{
"epoch": 0.7915564144492344,
"grad_norm": 0.349609375,
"learning_rate": 6.307428624091674e-05,
"loss": 1.1435422897338867,
"step": 3328
},
{
"epoch": 0.7920321094098409,
"grad_norm": 0.373046875,
"learning_rate": 6.291132616850912e-05,
"loss": 1.181205153465271,
"step": 3330
},
{
"epoch": 0.7925078043704474,
"grad_norm": 0.37109375,
"learning_rate": 6.274779817192389e-05,
"loss": 1.1939911842346191,
"step": 3332
},
{
"epoch": 0.7929834993310539,
"grad_norm": 0.341796875,
"learning_rate": 6.258370630468032e-05,
"loss": 1.2611286640167236,
"step": 3334
},
{
"epoch": 0.7934591942916605,
"grad_norm": 0.33984375,
"learning_rate": 6.241905463427493e-05,
"loss": 1.1541907787322998,
"step": 3336
},
{
"epoch": 0.793934889252267,
"grad_norm": 0.33984375,
"learning_rate": 6.225384724208056e-05,
"loss": 1.2033154964447021,
"step": 3338
},
{
"epoch": 0.7944105842128735,
"grad_norm": 0.349609375,
"learning_rate": 6.208808822324524e-05,
"loss": 1.191408634185791,
"step": 3340
},
{
"epoch": 0.79488627917348,
"grad_norm": 0.36328125,
"learning_rate": 6.192178168659069e-05,
"loss": 1.1633325815200806,
"step": 3342
},
{
"epoch": 0.7953619741340865,
"grad_norm": 0.3359375,
"learning_rate": 6.175493175451045e-05,
"loss": 1.130890965461731,
"step": 3344
},
{
"epoch": 0.795837669094693,
"grad_norm": 0.34765625,
"learning_rate": 6.15875425628677e-05,
"loss": 1.2087476253509521,
"step": 3346
},
{
"epoch": 0.7963133640552995,
"grad_norm": 0.34765625,
"learning_rate": 6.141961826089276e-05,
"loss": 1.2083730697631836,
"step": 3348
},
{
"epoch": 0.796789059015906,
"grad_norm": 0.341796875,
"learning_rate": 6.125116301108021e-05,
"loss": 1.1795260906219482,
"step": 3350
},
{
"epoch": 0.7972647539765125,
"grad_norm": 0.37109375,
"learning_rate": 6.108218098908573e-05,
"loss": 1.160348892211914,
"step": 3352
},
{
"epoch": 0.797740448937119,
"grad_norm": 0.404296875,
"learning_rate": 6.0912676383622595e-05,
"loss": 1.2218070030212402,
"step": 3354
},
{
"epoch": 0.7982161438977255,
"grad_norm": 0.380859375,
"learning_rate": 6.074265339635782e-05,
"loss": 1.2201728820800781,
"step": 3356
},
{
"epoch": 0.7986918388583321,
"grad_norm": 0.345703125,
"learning_rate": 6.057211624180803e-05,
"loss": 1.2353184223175049,
"step": 3358
},
{
"epoch": 0.7991675338189386,
"grad_norm": 0.330078125,
"learning_rate": 6.0401069147235016e-05,
"loss": 1.199735403060913,
"step": 3360
},
{
"epoch": 0.7996432287795451,
"grad_norm": 0.33984375,
"learning_rate": 6.02295163525409e-05,
"loss": 1.1990015506744385,
"step": 3362
},
{
"epoch": 0.8001189237401516,
"grad_norm": 0.36328125,
"learning_rate": 6.0057462110163054e-05,
"loss": 1.2302005290985107,
"step": 3364
},
{
"epoch": 0.8005946187007581,
"grad_norm": 0.345703125,
"learning_rate": 5.9884910684968704e-05,
"loss": 1.1892058849334717,
"step": 3366
},
{
"epoch": 0.8010703136613646,
"grad_norm": 0.384765625,
"learning_rate": 5.9711866354149205e-05,
"loss": 1.1621990203857422,
"step": 3368
},
{
"epoch": 0.8015460086219711,
"grad_norm": 0.3515625,
"learning_rate": 5.953833340711404e-05,
"loss": 1.191482663154602,
"step": 3370
},
{
"epoch": 0.8020217035825776,
"grad_norm": 0.337890625,
"learning_rate": 5.9364316145384424e-05,
"loss": 1.2096929550170898,
"step": 3372
},
{
"epoch": 0.8024973985431841,
"grad_norm": 0.34375,
"learning_rate": 5.918981888248679e-05,
"loss": 1.1668099164962769,
"step": 3374
},
{
"epoch": 0.8029730935037906,
"grad_norm": 0.318359375,
"learning_rate": 5.901484594384574e-05,
"loss": 1.2103668451309204,
"step": 3376
},
{
"epoch": 0.8034487884643972,
"grad_norm": 0.349609375,
"learning_rate": 5.883940166667692e-05,
"loss": 1.208052396774292,
"step": 3378
},
{
"epoch": 0.8039244834250037,
"grad_norm": 0.349609375,
"learning_rate": 5.866349039987949e-05,
"loss": 1.1915090084075928,
"step": 3380
},
{
"epoch": 0.8044001783856102,
"grad_norm": 0.3359375,
"learning_rate": 5.8487116503928294e-05,
"loss": 1.1815118789672852,
"step": 3382
},
{
"epoch": 0.8048758733462167,
"grad_norm": 0.337890625,
"learning_rate": 5.8310284350765796e-05,
"loss": 1.1728663444519043,
"step": 3384
},
{
"epoch": 0.8053515683068232,
"grad_norm": 0.330078125,
"learning_rate": 5.813299832369371e-05,
"loss": 1.1404354572296143,
"step": 3386
},
{
"epoch": 0.8058272632674297,
"grad_norm": 0.3515625,
"learning_rate": 5.7955262817264333e-05,
"loss": 1.2187399864196777,
"step": 3388
},
{
"epoch": 0.8063029582280363,
"grad_norm": 0.35546875,
"learning_rate": 5.777708223717162e-05,
"loss": 1.1979572772979736,
"step": 3390
},
{
"epoch": 0.8067786531886428,
"grad_norm": 0.333984375,
"learning_rate": 5.7598461000142e-05,
"loss": 1.181311011314392,
"step": 3392
},
{
"epoch": 0.8072543481492493,
"grad_norm": 0.333984375,
"learning_rate": 5.7419403533824825e-05,
"loss": 1.1990816593170166,
"step": 3394
},
{
"epoch": 0.8077300431098559,
"grad_norm": 0.333984375,
"learning_rate": 5.7239914276682735e-05,
"loss": 1.176539659500122,
"step": 3396
},
{
"epoch": 0.8082057380704624,
"grad_norm": 0.326171875,
"learning_rate": 5.7059997677881495e-05,
"loss": 1.1944094896316528,
"step": 3398
},
{
"epoch": 0.8086814330310689,
"grad_norm": 0.322265625,
"learning_rate": 5.687965819717982e-05,
"loss": 1.1964213848114014,
"step": 3400
},
{
"epoch": 0.8091571279916754,
"grad_norm": 0.3359375,
"learning_rate": 5.66989003048188e-05,
"loss": 1.1884610652923584,
"step": 3402
},
{
"epoch": 0.8096328229522819,
"grad_norm": 0.337890625,
"learning_rate": 5.651772848141104e-05,
"loss": 1.2424553632736206,
"step": 3404
},
{
"epoch": 0.8101085179128884,
"grad_norm": 0.32421875,
"learning_rate": 5.633614721782968e-05,
"loss": 1.1997463703155518,
"step": 3406
},
{
"epoch": 0.8105842128734949,
"grad_norm": 0.3203125,
"learning_rate": 5.6154161015096985e-05,
"loss": 1.1804287433624268,
"step": 3408
},
{
"epoch": 0.8110599078341014,
"grad_norm": 0.333984375,
"learning_rate": 5.5971774384272875e-05,
"loss": 1.2427394390106201,
"step": 3410
},
{
"epoch": 0.8115356027947079,
"grad_norm": 0.326171875,
"learning_rate": 5.5788991846343e-05,
"loss": 1.2132554054260254,
"step": 3412
},
{
"epoch": 0.8120112977553144,
"grad_norm": 0.337890625,
"learning_rate": 5.5605817932106757e-05,
"loss": 1.2068378925323486,
"step": 3414
},
{
"epoch": 0.812486992715921,
"grad_norm": 0.3203125,
"learning_rate": 5.542225718206494e-05,
"loss": 1.2010424137115479,
"step": 3416
},
{
"epoch": 0.8129626876765275,
"grad_norm": 0.32421875,
"learning_rate": 5.523831414630719e-05,
"loss": 1.1713800430297852,
"step": 3418
},
{
"epoch": 0.813438382637134,
"grad_norm": 0.330078125,
"learning_rate": 5.505399338439922e-05,
"loss": 1.1795239448547363,
"step": 3420
},
{
"epoch": 0.8139140775977405,
"grad_norm": 0.337890625,
"learning_rate": 5.48692994652698e-05,
"loss": 1.2366602420806885,
"step": 3422
},
{
"epoch": 0.814389772558347,
"grad_norm": 0.3515625,
"learning_rate": 5.4684236967097475e-05,
"loss": 1.178973913192749,
"step": 3424
},
{
"epoch": 0.8148654675189535,
"grad_norm": 0.341796875,
"learning_rate": 5.449881047719713e-05,
"loss": 1.186044454574585,
"step": 3426
},
{
"epoch": 0.81534116247956,
"grad_norm": 0.3359375,
"learning_rate": 5.431302459190621e-05,
"loss": 1.2068400382995605,
"step": 3428
},
{
"epoch": 0.8158168574401665,
"grad_norm": 0.32421875,
"learning_rate": 5.412688391647084e-05,
"loss": 1.155308723449707,
"step": 3430
},
{
"epoch": 0.816292552400773,
"grad_norm": 0.3203125,
"learning_rate": 5.394039306493167e-05,
"loss": 1.1697208881378174,
"step": 3432
},
{
"epoch": 0.8167682473613795,
"grad_norm": 0.337890625,
"learning_rate": 5.3753556660009475e-05,
"loss": 1.160557746887207,
"step": 3434
},
{
"epoch": 0.817243942321986,
"grad_norm": 0.34375,
"learning_rate": 5.356637933299057e-05,
"loss": 1.1798973083496094,
"step": 3436
},
{
"epoch": 0.8177196372825926,
"grad_norm": 0.330078125,
"learning_rate": 5.337886572361205e-05,
"loss": 1.1533775329589844,
"step": 3438
},
{
"epoch": 0.8181953322431991,
"grad_norm": 0.322265625,
"learning_rate": 5.319102047994672e-05,
"loss": 1.1831254959106445,
"step": 3440
},
{
"epoch": 0.8186710272038056,
"grad_norm": 0.322265625,
"learning_rate": 5.300284825828793e-05,
"loss": 1.1955242156982422,
"step": 3442
},
{
"epoch": 0.8191467221644121,
"grad_norm": 0.326171875,
"learning_rate": 5.2814353723034126e-05,
"loss": 1.188542127609253,
"step": 3444
},
{
"epoch": 0.8196224171250186,
"grad_norm": 0.326171875,
"learning_rate": 5.262554154657324e-05,
"loss": 1.2146074771881104,
"step": 3446
},
{
"epoch": 0.8200981120856251,
"grad_norm": 0.3046875,
"learning_rate": 5.2436416409166884e-05,
"loss": 1.1553959846496582,
"step": 3448
},
{
"epoch": 0.8205738070462316,
"grad_norm": 0.3359375,
"learning_rate": 5.2246982998834276e-05,
"loss": 1.1827256679534912,
"step": 3450
},
{
"epoch": 0.8210495020068381,
"grad_norm": 0.3125,
"learning_rate": 5.205724601123614e-05,
"loss": 1.1618741750717163,
"step": 3452
},
{
"epoch": 0.8215251969674446,
"grad_norm": 0.337890625,
"learning_rate": 5.186721014955822e-05,
"loss": 1.2132587432861328,
"step": 3454
},
{
"epoch": 0.8220008919280511,
"grad_norm": 0.33203125,
"learning_rate": 5.167688012439472e-05,
"loss": 1.1444640159606934,
"step": 3456
},
{
"epoch": 0.8224765868886577,
"grad_norm": 0.328125,
"learning_rate": 5.1486260653631554e-05,
"loss": 1.1991591453552246,
"step": 3458
},
{
"epoch": 0.8229522818492642,
"grad_norm": 0.33203125,
"learning_rate": 5.129535646232941e-05,
"loss": 1.1526660919189453,
"step": 3460
},
{
"epoch": 0.8234279768098707,
"grad_norm": 0.3359375,
"learning_rate": 5.110417228260657e-05,
"loss": 1.1603717803955078,
"step": 3462
},
{
"epoch": 0.8239036717704772,
"grad_norm": 0.333984375,
"learning_rate": 5.091271285352167e-05,
"loss": 1.154017448425293,
"step": 3464
},
{
"epoch": 0.8243793667310837,
"grad_norm": 0.3359375,
"learning_rate": 5.07209829209562e-05,
"loss": 1.1853911876678467,
"step": 3466
},
{
"epoch": 0.8248550616916902,
"grad_norm": 0.357421875,
"learning_rate": 5.0528987237496866e-05,
"loss": 1.2097725868225098,
"step": 3468
},
{
"epoch": 0.8253307566522967,
"grad_norm": 0.3515625,
"learning_rate": 5.033673056231781e-05,
"loss": 1.200005054473877,
"step": 3470
},
{
"epoch": 0.8258064516129032,
"grad_norm": 0.326171875,
"learning_rate": 5.0144217661062574e-05,
"loss": 1.2073945999145508,
"step": 3472
},
{
"epoch": 0.8262821465735097,
"grad_norm": 0.32421875,
"learning_rate": 4.9951453305726055e-05,
"loss": 1.1431573629379272,
"step": 3474
},
{
"epoch": 0.8267578415341162,
"grad_norm": 0.33984375,
"learning_rate": 4.975844227453615e-05,
"loss": 1.2093596458435059,
"step": 3476
},
{
"epoch": 0.8272335364947228,
"grad_norm": 0.328125,
"learning_rate": 4.9565189351835336e-05,
"loss": 1.1971302032470703,
"step": 3478
},
{
"epoch": 0.8277092314553293,
"grad_norm": 0.326171875,
"learning_rate": 4.93716993279621e-05,
"loss": 1.1951606273651123,
"step": 3480
},
{
"epoch": 0.8281849264159358,
"grad_norm": 0.3359375,
"learning_rate": 4.917797699913215e-05,
"loss": 1.1961910724639893,
"step": 3482
},
{
"epoch": 0.8286606213765423,
"grad_norm": 0.33984375,
"learning_rate": 4.8984027167319566e-05,
"loss": 1.1884233951568604,
"step": 3484
},
{
"epoch": 0.8291363163371488,
"grad_norm": 0.3203125,
"learning_rate": 4.8789854640137736e-05,
"loss": 1.1898481845855713,
"step": 3486
},
{
"epoch": 0.8296120112977553,
"grad_norm": 0.314453125,
"learning_rate": 4.859546423072023e-05,
"loss": 1.1624311208724976,
"step": 3488
},
{
"epoch": 0.8300877062583618,
"grad_norm": 0.3359375,
"learning_rate": 4.840086075760146e-05,
"loss": 1.1634624004364014,
"step": 3490
},
{
"epoch": 0.8305634012189683,
"grad_norm": 0.306640625,
"learning_rate": 4.820604904459722e-05,
"loss": 1.1898113489151,
"step": 3492
},
{
"epoch": 0.8310390961795748,
"grad_norm": 0.318359375,
"learning_rate": 4.801103392068516e-05,
"loss": 1.2224345207214355,
"step": 3494
},
{
"epoch": 0.8315147911401813,
"grad_norm": 0.30859375,
"learning_rate": 4.781582021988507e-05,
"loss": 1.1299514770507812,
"step": 3496
},
{
"epoch": 0.8319904861007879,
"grad_norm": 0.322265625,
"learning_rate": 4.762041278113902e-05,
"loss": 1.2070683240890503,
"step": 3498
},
{
"epoch": 0.8324661810613944,
"grad_norm": 0.32421875,
"learning_rate": 4.742481644819148e-05,
"loss": 1.1668651103973389,
"step": 3500
},
{
"epoch": 0.8329418760220009,
"grad_norm": 0.314453125,
"learning_rate": 4.7229036069469193e-05,
"loss": 1.1788852214813232,
"step": 3502
},
{
"epoch": 0.8334175709826074,
"grad_norm": 0.310546875,
"learning_rate": 4.703307649796099e-05,
"loss": 1.128293752670288,
"step": 3504
},
{
"epoch": 0.8338932659432139,
"grad_norm": 0.33203125,
"learning_rate": 4.683694259109757e-05,
"loss": 1.1507880687713623,
"step": 3506
},
{
"epoch": 0.8343689609038204,
"grad_norm": 0.314453125,
"learning_rate": 4.664063921063101e-05,
"loss": 1.1574411392211914,
"step": 3508
},
{
"epoch": 0.8348446558644269,
"grad_norm": 0.330078125,
"learning_rate": 4.644417122251428e-05,
"loss": 1.1994435787200928,
"step": 3510
},
{
"epoch": 0.8353203508250334,
"grad_norm": 0.322265625,
"learning_rate": 4.6247543496780675e-05,
"loss": 1.1481845378875732,
"step": 3512
},
{
"epoch": 0.8357960457856399,
"grad_norm": 0.31640625,
"learning_rate": 4.605076090742299e-05,
"loss": 1.184557557106018,
"step": 3514
},
{
"epoch": 0.8362717407462464,
"grad_norm": 0.310546875,
"learning_rate": 4.585382833227281e-05,
"loss": 1.1902873516082764,
"step": 3516
},
{
"epoch": 0.836747435706853,
"grad_norm": 0.32421875,
"learning_rate": 4.565675065287956e-05,
"loss": 1.2748725414276123,
"step": 3518
},
{
"epoch": 0.8372231306674595,
"grad_norm": 0.322265625,
"learning_rate": 4.545953275438947e-05,
"loss": 1.1273387670516968,
"step": 3520
},
{
"epoch": 0.837698825628066,
"grad_norm": 0.31640625,
"learning_rate": 4.526217952542456e-05,
"loss": 1.1241960525512695,
"step": 3522
},
{
"epoch": 0.8381745205886725,
"grad_norm": 0.326171875,
"learning_rate": 4.506469585796133e-05,
"loss": 1.1555461883544922,
"step": 3524
},
{
"epoch": 0.838650215549279,
"grad_norm": 0.3359375,
"learning_rate": 4.486708664720965e-05,
"loss": 1.2142927646636963,
"step": 3526
},
{
"epoch": 0.8391259105098855,
"grad_norm": 0.326171875,
"learning_rate": 4.466935679149131e-05,
"loss": 1.1009758710861206,
"step": 3528
},
{
"epoch": 0.839601605470492,
"grad_norm": 0.3125,
"learning_rate": 4.4471511192118666e-05,
"loss": 1.1688785552978516,
"step": 3530
},
{
"epoch": 0.8400773004310985,
"grad_norm": 0.33203125,
"learning_rate": 4.427355475327309e-05,
"loss": 1.1534974575042725,
"step": 3532
},
{
"epoch": 0.840552995391705,
"grad_norm": 0.30859375,
"learning_rate": 4.407549238188346e-05,
"loss": 1.150222659111023,
"step": 3534
},
{
"epoch": 0.8410286903523115,
"grad_norm": 0.314453125,
"learning_rate": 4.387732898750448e-05,
"loss": 1.207751750946045,
"step": 3536
},
{
"epoch": 0.841504385312918,
"grad_norm": 0.318359375,
"learning_rate": 4.367906948219502e-05,
"loss": 1.1927155256271362,
"step": 3538
},
{
"epoch": 0.8419800802735246,
"grad_norm": 0.33984375,
"learning_rate": 4.348071878039633e-05,
"loss": 1.1655819416046143,
"step": 3540
},
{
"epoch": 0.8424557752341311,
"grad_norm": 0.345703125,
"learning_rate": 4.3282281798810256e-05,
"loss": 1.1812100410461426,
"step": 3542
},
{
"epoch": 0.8429314701947376,
"grad_norm": 0.3359375,
"learning_rate": 4.308376345627728e-05,
"loss": 1.2032802104949951,
"step": 3544
},
{
"epoch": 0.8434071651553441,
"grad_norm": 0.322265625,
"learning_rate": 4.288516867365474e-05,
"loss": 1.1608192920684814,
"step": 3546
},
{
"epoch": 0.8438828601159506,
"grad_norm": 0.322265625,
"learning_rate": 4.2686502373694684e-05,
"loss": 1.2154037952423096,
"step": 3548
},
{
"epoch": 0.8443585550765571,
"grad_norm": 0.333984375,
"learning_rate": 4.248776948092197e-05,
"loss": 1.152782917022705,
"step": 3550
},
{
"epoch": 0.8448342500371636,
"grad_norm": 0.318359375,
"learning_rate": 4.228897492151213e-05,
"loss": 1.176882028579712,
"step": 3552
},
{
"epoch": 0.8453099449977702,
"grad_norm": 0.31640625,
"learning_rate": 4.209012362316934e-05,
"loss": 1.1599602699279785,
"step": 3554
},
{
"epoch": 0.8457856399583767,
"grad_norm": 0.318359375,
"learning_rate": 4.1891220515004114e-05,
"loss": 1.2112061977386475,
"step": 3556
},
{
"epoch": 0.8462613349189833,
"grad_norm": 0.322265625,
"learning_rate": 4.169227052741134e-05,
"loss": 1.1296908855438232,
"step": 3558
},
{
"epoch": 0.8467370298795898,
"grad_norm": 0.318359375,
"learning_rate": 4.1493278591947855e-05,
"loss": 1.1762603521347046,
"step": 3560
},
{
"epoch": 0.8472127248401963,
"grad_norm": 0.3125,
"learning_rate": 4.1294249641210354e-05,
"loss": 1.2208728790283203,
"step": 3562
},
{
"epoch": 0.8476884198008028,
"grad_norm": 0.326171875,
"learning_rate": 4.109518860871305e-05,
"loss": 1.2221901416778564,
"step": 3564
},
{
"epoch": 0.8481641147614093,
"grad_norm": 0.32421875,
"learning_rate": 4.089610042876537e-05,
"loss": 1.1988012790679932,
"step": 3566
},
{
"epoch": 0.8486398097220158,
"grad_norm": 0.310546875,
"learning_rate": 4.069699003634972e-05,
"loss": 1.1596108675003052,
"step": 3568
},
{
"epoch": 0.8491155046826223,
"grad_norm": 0.3125,
"learning_rate": 4.0497862366999034e-05,
"loss": 1.1585445404052734,
"step": 3570
},
{
"epoch": 0.8495911996432288,
"grad_norm": 0.3125,
"learning_rate": 4.0298722356674584e-05,
"loss": 1.1766672134399414,
"step": 3572
},
{
"epoch": 0.8500668946038353,
"grad_norm": 0.32421875,
"learning_rate": 4.0099574941643506e-05,
"loss": 1.1228039264678955,
"step": 3574
},
{
"epoch": 0.8505425895644418,
"grad_norm": 0.31640625,
"learning_rate": 3.990042505835651e-05,
"loss": 1.1494994163513184,
"step": 3576
},
{
"epoch": 0.8510182845250484,
"grad_norm": 0.328125,
"learning_rate": 3.9701277643325416e-05,
"loss": 1.202513575553894,
"step": 3578
},
{
"epoch": 0.8514939794856549,
"grad_norm": 0.314453125,
"learning_rate": 3.950213763300097e-05,
"loss": 1.179110050201416,
"step": 3580
},
{
"epoch": 0.8519696744462614,
"grad_norm": 0.328125,
"learning_rate": 3.9303009963650306e-05,
"loss": 1.1852927207946777,
"step": 3582
},
{
"epoch": 0.8524453694068679,
"grad_norm": 0.326171875,
"learning_rate": 3.910389957123464e-05,
"loss": 1.1301989555358887,
"step": 3584
},
{
"epoch": 0.8529210643674744,
"grad_norm": 0.32421875,
"learning_rate": 3.890481139128696e-05,
"loss": 1.2232120037078857,
"step": 3586
},
{
"epoch": 0.8533967593280809,
"grad_norm": 0.310546875,
"learning_rate": 3.8705750358789646e-05,
"loss": 1.1268978118896484,
"step": 3588
},
{
"epoch": 0.8538724542886874,
"grad_norm": 0.32421875,
"learning_rate": 3.850672140805216e-05,
"loss": 1.2016334533691406,
"step": 3590
},
{
"epoch": 0.8543481492492939,
"grad_norm": 0.310546875,
"learning_rate": 3.830772947258869e-05,
"loss": 1.2152290344238281,
"step": 3592
},
{
"epoch": 0.8548238442099004,
"grad_norm": 0.32421875,
"learning_rate": 3.810877948499589e-05,
"loss": 1.209730625152588,
"step": 3594
},
{
"epoch": 0.8552995391705069,
"grad_norm": 0.314453125,
"learning_rate": 3.790987637683069e-05,
"loss": 1.1957197189331055,
"step": 3596
},
{
"epoch": 0.8557752341311134,
"grad_norm": 0.306640625,
"learning_rate": 3.7711025078487876e-05,
"loss": 1.1268858909606934,
"step": 3598
},
{
"epoch": 0.85625092909172,
"grad_norm": 0.310546875,
"learning_rate": 3.751223051907805e-05,
"loss": 1.2362475395202637,
"step": 3600
},
{
"epoch": 0.8567266240523265,
"grad_norm": 0.30859375,
"learning_rate": 3.731349762630534e-05,
"loss": 1.172964096069336,
"step": 3602
},
{
"epoch": 0.857202319012933,
"grad_norm": 0.310546875,
"learning_rate": 3.711483132634527e-05,
"loss": 1.2133592367172241,
"step": 3604
},
{
"epoch": 0.8576780139735395,
"grad_norm": 0.30078125,
"learning_rate": 3.691623654372272e-05,
"loss": 1.1895489692687988,
"step": 3606
},
{
"epoch": 0.858153708934146,
"grad_norm": 0.31640625,
"learning_rate": 3.671771820118975e-05,
"loss": 1.1736524105072021,
"step": 3608
},
{
"epoch": 0.8586294038947525,
"grad_norm": 0.306640625,
"learning_rate": 3.6519281219603675e-05,
"loss": 1.1844290494918823,
"step": 3610
},
{
"epoch": 0.859105098855359,
"grad_norm": 0.306640625,
"learning_rate": 3.632093051780498e-05,
"loss": 1.1735870838165283,
"step": 3612
},
{
"epoch": 0.8595807938159655,
"grad_norm": 0.30078125,
"learning_rate": 3.6122671012495524e-05,
"loss": 1.1634467840194702,
"step": 3614
},
{
"epoch": 0.860056488776572,
"grad_norm": 0.310546875,
"learning_rate": 3.592450761811656e-05,
"loss": 1.178370714187622,
"step": 3616
},
{
"epoch": 0.8605321837371785,
"grad_norm": 0.310546875,
"learning_rate": 3.5726445246726915e-05,
"loss": 1.153395414352417,
"step": 3618
},
{
"epoch": 0.8610078786977851,
"grad_norm": 0.298828125,
"learning_rate": 3.5528488807881354e-05,
"loss": 1.1781080961227417,
"step": 3620
},
{
"epoch": 0.8614835736583916,
"grad_norm": 0.330078125,
"learning_rate": 3.53306432085087e-05,
"loss": 1.1583459377288818,
"step": 3622
},
{
"epoch": 0.8619592686189981,
"grad_norm": 0.314453125,
"learning_rate": 3.513291335279036e-05,
"loss": 1.2509000301361084,
"step": 3624
},
{
"epoch": 0.8624349635796046,
"grad_norm": 0.314453125,
"learning_rate": 3.4935304142038686e-05,
"loss": 1.1476457118988037,
"step": 3626
},
{
"epoch": 0.8629106585402111,
"grad_norm": 0.3046875,
"learning_rate": 3.4737820474575456e-05,
"loss": 1.1432411670684814,
"step": 3628
},
{
"epoch": 0.8633863535008176,
"grad_norm": 0.314453125,
"learning_rate": 3.4540467245610534e-05,
"loss": 1.1552605628967285,
"step": 3630
},
{
"epoch": 0.8638620484614241,
"grad_norm": 0.3203125,
"learning_rate": 3.4343249347120445e-05,
"loss": 1.2122418880462646,
"step": 3632
},
{
"epoch": 0.8643377434220306,
"grad_norm": 0.302734375,
"learning_rate": 3.41461716677272e-05,
"loss": 1.1323740482330322,
"step": 3634
},
{
"epoch": 0.8648134383826371,
"grad_norm": 0.328125,
"learning_rate": 3.394923909257704e-05,
"loss": 1.2014985084533691,
"step": 3636
},
{
"epoch": 0.8652891333432436,
"grad_norm": 0.3125,
"learning_rate": 3.375245650321934e-05,
"loss": 1.188545823097229,
"step": 3638
},
{
"epoch": 0.8657648283038502,
"grad_norm": 0.318359375,
"learning_rate": 3.3555828777485726e-05,
"loss": 1.178330898284912,
"step": 3640
},
{
"epoch": 0.8662405232644567,
"grad_norm": 0.326171875,
"learning_rate": 3.335936078936899e-05,
"loss": 1.1636848449707031,
"step": 3642
},
{
"epoch": 0.8667162182250632,
"grad_norm": 0.310546875,
"learning_rate": 3.3163057408902435e-05,
"loss": 1.1589958667755127,
"step": 3644
},
{
"epoch": 0.8671919131856697,
"grad_norm": 0.310546875,
"learning_rate": 3.296692350203902e-05,
"loss": 1.1450896263122559,
"step": 3646
},
{
"epoch": 0.8676676081462762,
"grad_norm": 0.310546875,
"learning_rate": 3.277096393053082e-05,
"loss": 1.123741626739502,
"step": 3648
},
{
"epoch": 0.8681433031068827,
"grad_norm": 0.3203125,
"learning_rate": 3.257518355180853e-05,
"loss": 1.187320351600647,
"step": 3650
},
{
"epoch": 0.8686189980674892,
"grad_norm": 0.3125,
"learning_rate": 3.2379587218860976e-05,
"loss": 1.16719651222229,
"step": 3652
},
{
"epoch": 0.8690946930280957,
"grad_norm": 0.318359375,
"learning_rate": 3.2184179780114944e-05,
"loss": 1.196395993232727,
"step": 3654
},
{
"epoch": 0.8695703879887022,
"grad_norm": 0.31640625,
"learning_rate": 3.198896607931485e-05,
"loss": 1.2043986320495605,
"step": 3656
},
{
"epoch": 0.8700460829493087,
"grad_norm": 0.306640625,
"learning_rate": 3.179395095540279e-05,
"loss": 1.1552737951278687,
"step": 3658
},
{
"epoch": 0.8705217779099153,
"grad_norm": 0.3046875,
"learning_rate": 3.1599139242398556e-05,
"loss": 1.180349588394165,
"step": 3660
},
{
"epoch": 0.8709974728705218,
"grad_norm": 0.30078125,
"learning_rate": 3.1404535769279764e-05,
"loss": 1.1361455917358398,
"step": 3662
},
{
"epoch": 0.8714731678311283,
"grad_norm": 0.3046875,
"learning_rate": 3.121014535986227e-05,
"loss": 1.1576318740844727,
"step": 3664
},
{
"epoch": 0.8719488627917348,
"grad_norm": 0.29296875,
"learning_rate": 3.1015972832680454e-05,
"loss": 1.083686113357544,
"step": 3666
},
{
"epoch": 0.8724245577523413,
"grad_norm": 0.30859375,
"learning_rate": 3.0822023000867863e-05,
"loss": 1.1516526937484741,
"step": 3668
},
{
"epoch": 0.8729002527129478,
"grad_norm": 0.296875,
"learning_rate": 3.062830067203792e-05,
"loss": 1.1149940490722656,
"step": 3670
},
{
"epoch": 0.8733759476735543,
"grad_norm": 0.3125,
"learning_rate": 3.043481064816467e-05,
"loss": 1.1872518062591553,
"step": 3672
},
{
"epoch": 0.8738516426341608,
"grad_norm": 0.32421875,
"learning_rate": 3.0241557725463866e-05,
"loss": 1.133741021156311,
"step": 3674
},
{
"epoch": 0.8743273375947673,
"grad_norm": 0.306640625,
"learning_rate": 3.0048546694273965e-05,
"loss": 1.1402521133422852,
"step": 3676
},
{
"epoch": 0.8748030325553738,
"grad_norm": 0.3125,
"learning_rate": 2.9855782338937432e-05,
"loss": 1.2263612747192383,
"step": 3678
},
{
"epoch": 0.8752787275159803,
"grad_norm": 0.3125,
"learning_rate": 2.9663269437682208e-05,
"loss": 1.1547777652740479,
"step": 3680
},
{
"epoch": 0.8757544224765869,
"grad_norm": 0.40625,
"learning_rate": 2.9471012762503134e-05,
"loss": 1.1414549350738525,
"step": 3682
},
{
"epoch": 0.8762301174371934,
"grad_norm": 0.30859375,
"learning_rate": 2.9279017079043816e-05,
"loss": 1.206810474395752,
"step": 3684
},
{
"epoch": 0.8767058123977999,
"grad_norm": 0.3046875,
"learning_rate": 2.908728714647834e-05,
"loss": 1.1493148803710938,
"step": 3686
},
{
"epoch": 0.8771815073584064,
"grad_norm": 0.310546875,
"learning_rate": 2.8895827717393446e-05,
"loss": 1.1840794086456299,
"step": 3688
},
{
"epoch": 0.8776572023190129,
"grad_norm": 0.310546875,
"learning_rate": 2.8704643537670603e-05,
"loss": 1.1903091669082642,
"step": 3690
},
{
"epoch": 0.8781328972796194,
"grad_norm": 0.30078125,
"learning_rate": 2.8513739346368443e-05,
"loss": 1.1483159065246582,
"step": 3692
},
{
"epoch": 0.8786085922402259,
"grad_norm": 0.296875,
"learning_rate": 2.8323119875605288e-05,
"loss": 1.1400749683380127,
"step": 3694
},
{
"epoch": 0.8790842872008324,
"grad_norm": 0.302734375,
"learning_rate": 2.813278985044178e-05,
"loss": 1.1304882764816284,
"step": 3696
},
{
"epoch": 0.8795599821614389,
"grad_norm": 0.30078125,
"learning_rate": 2.794275398876386e-05,
"loss": 1.1478686332702637,
"step": 3698
},
{
"epoch": 0.8800356771220454,
"grad_norm": 0.302734375,
"learning_rate": 2.7753017001165737e-05,
"loss": 1.1680241823196411,
"step": 3700
},
{
"epoch": 0.880511372082652,
"grad_norm": 0.306640625,
"learning_rate": 2.7563583590833133e-05,
"loss": 1.1892788410186768,
"step": 3702
},
{
"epoch": 0.8809870670432585,
"grad_norm": 0.306640625,
"learning_rate": 2.737445845342677e-05,
"loss": 1.1995958089828491,
"step": 3704
},
{
"epoch": 0.881462762003865,
"grad_norm": 0.30078125,
"learning_rate": 2.718564627696588e-05,
"loss": 1.1075689792633057,
"step": 3706
},
{
"epoch": 0.8819384569644715,
"grad_norm": 0.296875,
"learning_rate": 2.6997151741712087e-05,
"loss": 1.1438966989517212,
"step": 3708
},
{
"epoch": 0.882414151925078,
"grad_norm": 0.310546875,
"learning_rate": 2.680897952005329e-05,
"loss": 1.209947109222412,
"step": 3710
},
{
"epoch": 0.8828898468856845,
"grad_norm": 0.29296875,
"learning_rate": 2.662113427638796e-05,
"loss": 1.116198182106018,
"step": 3712
},
{
"epoch": 0.883365541846291,
"grad_norm": 0.30859375,
"learning_rate": 2.6433620667009442e-05,
"loss": 1.1661490201950073,
"step": 3714
},
{
"epoch": 0.8838412368068975,
"grad_norm": 0.310546875,
"learning_rate": 2.6246443339990532e-05,
"loss": 1.1473069190979004,
"step": 3716
},
{
"epoch": 0.884316931767504,
"grad_norm": 0.302734375,
"learning_rate": 2.605960693506834e-05,
"loss": 1.1723562479019165,
"step": 3718
},
{
"epoch": 0.8847926267281107,
"grad_norm": 0.296875,
"learning_rate": 2.5873116083529173e-05,
"loss": 1.1769287586212158,
"step": 3720
},
{
"epoch": 0.8852683216887172,
"grad_norm": 0.30859375,
"learning_rate": 2.56869754080938e-05,
"loss": 1.1576387882232666,
"step": 3722
},
{
"epoch": 0.8857440166493237,
"grad_norm": 0.302734375,
"learning_rate": 2.550118952280288e-05,
"loss": 1.0645157098770142,
"step": 3724
},
{
"epoch": 0.8862197116099302,
"grad_norm": 0.298828125,
"learning_rate": 2.531576303290253e-05,
"loss": 1.1478241682052612,
"step": 3726
},
{
"epoch": 0.8866954065705367,
"grad_norm": 0.310546875,
"learning_rate": 2.5130700534730215e-05,
"loss": 1.1812896728515625,
"step": 3728
},
{
"epoch": 0.8871711015311432,
"grad_norm": 0.314453125,
"learning_rate": 2.494600661560079e-05,
"loss": 1.223722219467163,
"step": 3730
},
{
"epoch": 0.8876467964917497,
"grad_norm": 0.294921875,
"learning_rate": 2.4761685853692825e-05,
"loss": 1.1464184522628784,
"step": 3732
},
{
"epoch": 0.8881224914523562,
"grad_norm": 0.302734375,
"learning_rate": 2.4577742817935077e-05,
"loss": 1.167757511138916,
"step": 3734
},
{
"epoch": 0.8885981864129627,
"grad_norm": 0.30078125,
"learning_rate": 2.4394182067893243e-05,
"loss": 1.1267993450164795,
"step": 3736
},
{
"epoch": 0.8890738813735692,
"grad_norm": 0.302734375,
"learning_rate": 2.421100815365701e-05,
"loss": 1.1455817222595215,
"step": 3738
},
{
"epoch": 0.8895495763341758,
"grad_norm": 0.30078125,
"learning_rate": 2.4028225615727145e-05,
"loss": 1.1717948913574219,
"step": 3740
},
{
"epoch": 0.8900252712947823,
"grad_norm": 0.298828125,
"learning_rate": 2.384583898490302e-05,
"loss": 1.1518162488937378,
"step": 3742
},
{
"epoch": 0.8905009662553888,
"grad_norm": 0.294921875,
"learning_rate": 2.3663852782170336e-05,
"loss": 1.147728443145752,
"step": 3744
},
{
"epoch": 0.8909766612159953,
"grad_norm": 0.3125,
"learning_rate": 2.3482271518588967e-05,
"loss": 1.1500670909881592,
"step": 3746
},
{
"epoch": 0.8914523561766018,
"grad_norm": 0.3046875,
"learning_rate": 2.330109969518122e-05,
"loss": 1.1796722412109375,
"step": 3748
},
{
"epoch": 0.8919280511372083,
"grad_norm": 0.296875,
"learning_rate": 2.3120341802820197e-05,
"loss": 1.1131136417388916,
"step": 3750
},
{
"epoch": 0.8924037460978148,
"grad_norm": 0.296875,
"learning_rate": 2.2940002322118518e-05,
"loss": 1.1635349988937378,
"step": 3752
},
{
"epoch": 0.8928794410584213,
"grad_norm": 0.294921875,
"learning_rate": 2.2760085723317285e-05,
"loss": 1.1256214380264282,
"step": 3754
},
{
"epoch": 0.8933551360190278,
"grad_norm": 0.302734375,
"learning_rate": 2.258059646617517e-05,
"loss": 1.1560603380203247,
"step": 3756
},
{
"epoch": 0.8938308309796343,
"grad_norm": 0.294921875,
"learning_rate": 2.240153899985802e-05,
"loss": 1.186435580253601,
"step": 3758
},
{
"epoch": 0.8943065259402408,
"grad_norm": 0.30078125,
"learning_rate": 2.222291776282838e-05,
"loss": 1.2056632041931152,
"step": 3760
},
{
"epoch": 0.8947822209008474,
"grad_norm": 0.306640625,
"learning_rate": 2.204473718273568e-05,
"loss": 1.1999526023864746,
"step": 3762
},
{
"epoch": 0.8952579158614539,
"grad_norm": 0.306640625,
"learning_rate": 2.1867001676306306e-05,
"loss": 1.209770917892456,
"step": 3764
},
{
"epoch": 0.8957336108220604,
"grad_norm": 0.296875,
"learning_rate": 2.1689715649234208e-05,
"loss": 1.110062599182129,
"step": 3766
},
{
"epoch": 0.8962093057826669,
"grad_norm": 0.302734375,
"learning_rate": 2.1512883496071715e-05,
"loss": 1.195483922958374,
"step": 3768
},
{
"epoch": 0.8966850007432734,
"grad_norm": 0.310546875,
"learning_rate": 2.1336509600120508e-05,
"loss": 1.226474642753601,
"step": 3770
},
{
"epoch": 0.8971606957038799,
"grad_norm": 0.294921875,
"learning_rate": 2.1160598333323087e-05,
"loss": 1.1339728832244873,
"step": 3772
},
{
"epoch": 0.8976363906644864,
"grad_norm": 0.294921875,
"learning_rate": 2.0985154056154274e-05,
"loss": 1.2141457796096802,
"step": 3774
},
{
"epoch": 0.8981120856250929,
"grad_norm": 0.3046875,
"learning_rate": 2.0810181117513215e-05,
"loss": 1.1662113666534424,
"step": 3776
},
{
"epoch": 0.8985877805856994,
"grad_norm": 0.298828125,
"learning_rate": 2.0635683854615576e-05,
"loss": 1.0973902940750122,
"step": 3778
},
{
"epoch": 0.899063475546306,
"grad_norm": 0.2890625,
"learning_rate": 2.0461666592885974e-05,
"loss": 1.1171178817749023,
"step": 3780
},
{
"epoch": 0.8995391705069125,
"grad_norm": 0.296875,
"learning_rate": 2.0288133645850808e-05,
"loss": 1.1062219142913818,
"step": 3782
},
{
"epoch": 0.900014865467519,
"grad_norm": 0.322265625,
"learning_rate": 2.0115089315031323e-05,
"loss": 1.1549062728881836,
"step": 3784
},
{
"epoch": 0.9004905604281255,
"grad_norm": 0.294921875,
"learning_rate": 1.9942537889836963e-05,
"loss": 1.1845629215240479,
"step": 3786
},
{
"epoch": 0.900966255388732,
"grad_norm": 0.291015625,
"learning_rate": 1.9770483647459117e-05,
"loss": 1.1162179708480835,
"step": 3788
},
{
"epoch": 0.9014419503493385,
"grad_norm": 0.302734375,
"learning_rate": 1.9598930852764987e-05,
"loss": 1.1066762208938599,
"step": 3790
},
{
"epoch": 0.901917645309945,
"grad_norm": 0.28515625,
"learning_rate": 1.942788375819198e-05,
"loss": 1.1973916292190552,
"step": 3792
},
{
"epoch": 0.9023933402705515,
"grad_norm": 0.302734375,
"learning_rate": 1.9257346603642203e-05,
"loss": 1.1700313091278076,
"step": 3794
},
{
"epoch": 0.902869035231158,
"grad_norm": 0.298828125,
"learning_rate": 1.9087323616377414e-05,
"loss": 1.1440091133117676,
"step": 3796
},
{
"epoch": 0.9033447301917645,
"grad_norm": 0.296875,
"learning_rate": 1.8917819010914283e-05,
"loss": 1.1456643342971802,
"step": 3798
},
{
"epoch": 0.903820425152371,
"grad_norm": 0.30078125,
"learning_rate": 1.8748836988919793e-05,
"loss": 1.1695044040679932,
"step": 3800
},
{
"epoch": 0.9042961201129776,
"grad_norm": 0.291015625,
"learning_rate": 1.8580381739107252e-05,
"loss": 1.1730451583862305,
"step": 3802
},
{
"epoch": 0.9047718150735841,
"grad_norm": 0.296875,
"learning_rate": 1.8412457437132318e-05,
"loss": 1.1789326667785645,
"step": 3804
},
{
"epoch": 0.9052475100341906,
"grad_norm": 0.291015625,
"learning_rate": 1.824506824548956e-05,
"loss": 1.1460459232330322,
"step": 3806
},
{
"epoch": 0.9057232049947971,
"grad_norm": 0.30859375,
"learning_rate": 1.8078218313409324e-05,
"loss": 1.1638338565826416,
"step": 3808
},
{
"epoch": 0.9061988999554036,
"grad_norm": 0.291015625,
"learning_rate": 1.7911911776754756e-05,
"loss": 1.1171094179153442,
"step": 3810
},
{
"epoch": 0.9066745949160101,
"grad_norm": 0.306640625,
"learning_rate": 1.7746152757919445e-05,
"loss": 1.2183301448822021,
"step": 3812
},
{
"epoch": 0.9071502898766166,
"grad_norm": 0.294921875,
"learning_rate": 1.758094536572508e-05,
"loss": 1.141022801399231,
"step": 3814
},
{
"epoch": 0.9076259848372231,
"grad_norm": 0.296875,
"learning_rate": 1.741629369531968e-05,
"loss": 1.1439030170440674,
"step": 3816
},
{
"epoch": 0.9081016797978296,
"grad_norm": 0.2890625,
"learning_rate": 1.7252201828076126e-05,
"loss": 1.1290979385375977,
"step": 3818
},
{
"epoch": 0.9085773747584361,
"grad_norm": 0.294921875,
"learning_rate": 1.7088673831490893e-05,
"loss": 1.1221880912780762,
"step": 3820
},
{
"epoch": 0.9090530697190427,
"grad_norm": 0.298828125,
"learning_rate": 1.6925713759083282e-05,
"loss": 1.1449179649353027,
"step": 3822
},
{
"epoch": 0.9095287646796492,
"grad_norm": 0.294921875,
"learning_rate": 1.6763325650294933e-05,
"loss": 1.148937702178955,
"step": 3824
},
{
"epoch": 0.9100044596402557,
"grad_norm": 0.29296875,
"learning_rate": 1.6601513530389727e-05,
"loss": 1.12366783618927,
"step": 3826
},
{
"epoch": 0.9104801546008622,
"grad_norm": 0.298828125,
"learning_rate": 1.644028141035394e-05,
"loss": 1.12631356716156,
"step": 3828
},
{
"epoch": 0.9109558495614687,
"grad_norm": 0.287109375,
"learning_rate": 1.627963328679686e-05,
"loss": 1.1116429567337036,
"step": 3830
},
{
"epoch": 0.9114315445220752,
"grad_norm": 0.298828125,
"learning_rate": 1.6119573141851747e-05,
"loss": 1.1646809577941895,
"step": 3832
},
{
"epoch": 0.9119072394826817,
"grad_norm": 0.29296875,
"learning_rate": 1.5960104943077045e-05,
"loss": 1.0913721323013306,
"step": 3834
},
{
"epoch": 0.9123829344432882,
"grad_norm": 0.296875,
"learning_rate": 1.5801232643358134e-05,
"loss": 1.1654855012893677,
"step": 3836
},
{
"epoch": 0.9128586294038947,
"grad_norm": 0.294921875,
"learning_rate": 1.5642960180809255e-05,
"loss": 1.1685070991516113,
"step": 3838
},
{
"epoch": 0.9133343243645012,
"grad_norm": 0.302734375,
"learning_rate": 1.5485291478675928e-05,
"loss": 1.1893408298492432,
"step": 3840
},
{
"epoch": 0.9138100193251077,
"grad_norm": 0.298828125,
"learning_rate": 1.5328230445237758e-05,
"loss": 1.1577904224395752,
"step": 3842
},
{
"epoch": 0.9142857142857143,
"grad_norm": 0.2890625,
"learning_rate": 1.517178097371144e-05,
"loss": 1.1701260805130005,
"step": 3844
},
{
"epoch": 0.9147614092463208,
"grad_norm": 0.296875,
"learning_rate": 1.5015946942154375e-05,
"loss": 1.1752269268035889,
"step": 3846
},
{
"epoch": 0.9152371042069273,
"grad_norm": 0.294921875,
"learning_rate": 1.4860732213368452e-05,
"loss": 1.158857822418213,
"step": 3848
},
{
"epoch": 0.9157127991675338,
"grad_norm": 0.30078125,
"learning_rate": 1.4706140634804325e-05,
"loss": 1.163185954093933,
"step": 3850
},
{
"epoch": 0.9161884941281403,
"grad_norm": 0.287109375,
"learning_rate": 1.455217603846609e-05,
"loss": 1.1203261613845825,
"step": 3852
},
{
"epoch": 0.9166641890887468,
"grad_norm": 0.296875,
"learning_rate": 1.4398842240816207e-05,
"loss": 1.128927230834961,
"step": 3854
},
{
"epoch": 0.9171398840493533,
"grad_norm": 0.291015625,
"learning_rate": 1.4246143042680989e-05,
"loss": 1.1380681991577148,
"step": 3856
},
{
"epoch": 0.9176155790099598,
"grad_norm": 0.298828125,
"learning_rate": 1.4094082229156323e-05,
"loss": 1.1902419328689575,
"step": 3858
},
{
"epoch": 0.9180912739705663,
"grad_norm": 0.29296875,
"learning_rate": 1.3942663569513864e-05,
"loss": 1.1731154918670654,
"step": 3860
},
{
"epoch": 0.9185669689311728,
"grad_norm": 0.294921875,
"learning_rate": 1.3791890817107616e-05,
"loss": 1.167722225189209,
"step": 3862
},
{
"epoch": 0.9190426638917794,
"grad_norm": 0.29296875,
"learning_rate": 1.3641767709280869e-05,
"loss": 1.1482999324798584,
"step": 3864
},
{
"epoch": 0.9195183588523859,
"grad_norm": 0.294921875,
"learning_rate": 1.3492297967273609e-05,
"loss": 1.1329618692398071,
"step": 3866
},
{
"epoch": 0.9199940538129924,
"grad_norm": 0.291015625,
"learning_rate": 1.3343485296130214e-05,
"loss": 1.2048474550247192,
"step": 3868
},
{
"epoch": 0.9204697487735989,
"grad_norm": 0.291015625,
"learning_rate": 1.319533338460762e-05,
"loss": 1.1382906436920166,
"step": 3870
},
{
"epoch": 0.9209454437342054,
"grad_norm": 0.306640625,
"learning_rate": 1.3047845905083966e-05,
"loss": 1.1446309089660645,
"step": 3872
},
{
"epoch": 0.9214211386948119,
"grad_norm": 0.296875,
"learning_rate": 1.2901026513467434e-05,
"loss": 1.1889190673828125,
"step": 3874
},
{
"epoch": 0.9218968336554184,
"grad_norm": 0.29296875,
"learning_rate": 1.2754878849105752e-05,
"loss": 1.1595823764801025,
"step": 3876
},
{
"epoch": 0.9223725286160249,
"grad_norm": 0.294921875,
"learning_rate": 1.260940653469589e-05,
"loss": 1.1825573444366455,
"step": 3878
},
{
"epoch": 0.9228482235766314,
"grad_norm": 0.2890625,
"learning_rate": 1.2464613176194283e-05,
"loss": 1.113194465637207,
"step": 3880
},
{
"epoch": 0.9233239185372379,
"grad_norm": 0.2890625,
"learning_rate": 1.2320502362727518e-05,
"loss": 1.0969769954681396,
"step": 3882
},
{
"epoch": 0.9237996134978445,
"grad_norm": 0.283203125,
"learning_rate": 1.2177077666503236e-05,
"loss": 1.1694114208221436,
"step": 3884
},
{
"epoch": 0.9242753084584511,
"grad_norm": 0.298828125,
"learning_rate": 1.2034342642721723e-05,
"loss": 1.190758228302002,
"step": 3886
},
{
"epoch": 0.9247510034190576,
"grad_norm": 0.291015625,
"learning_rate": 1.1892300829487678e-05,
"loss": 1.136456847190857,
"step": 3888
},
{
"epoch": 0.9252266983796641,
"grad_norm": 0.294921875,
"learning_rate": 1.1750955747722546e-05,
"loss": 1.1714725494384766,
"step": 3890
},
{
"epoch": 0.9257023933402706,
"grad_norm": 0.3046875,
"learning_rate": 1.161031090107728e-05,
"loss": 1.1840903759002686,
"step": 3892
},
{
"epoch": 0.9261780883008771,
"grad_norm": 0.291015625,
"learning_rate": 1.1470369775845423e-05,
"loss": 1.204842209815979,
"step": 3894
},
{
"epoch": 0.9266537832614836,
"grad_norm": 0.296875,
"learning_rate": 1.1331135840876764e-05,
"loss": 1.1758289337158203,
"step": 3896
},
{
"epoch": 0.9271294782220901,
"grad_norm": 0.30859375,
"learning_rate": 1.119261254749128e-05,
"loss": 1.1592724323272705,
"step": 3898
},
{
"epoch": 0.9276051731826966,
"grad_norm": 0.29296875,
"learning_rate": 1.1054803329393625e-05,
"loss": 1.1884357929229736,
"step": 3900
},
{
"epoch": 0.9280808681433032,
"grad_norm": 0.2890625,
"learning_rate": 1.0917711602588037e-05,
"loss": 1.1424968242645264,
"step": 3902
},
{
"epoch": 0.9285565631039097,
"grad_norm": 0.291015625,
"learning_rate": 1.0781340765293606e-05,
"loss": 1.1715056896209717,
"step": 3904
},
{
"epoch": 0.9290322580645162,
"grad_norm": 0.29296875,
"learning_rate": 1.0645694197860084e-05,
"loss": 1.1644243001937866,
"step": 3906
},
{
"epoch": 0.9295079530251227,
"grad_norm": 0.28125,
"learning_rate": 1.0510775262684056e-05,
"loss": 1.1605405807495117,
"step": 3908
},
{
"epoch": 0.9299836479857292,
"grad_norm": 0.28515625,
"learning_rate": 1.0376587304125656e-05,
"loss": 1.1060264110565186,
"step": 3910
},
{
"epoch": 0.9304593429463357,
"grad_norm": 0.29296875,
"learning_rate": 1.0243133648425595e-05,
"loss": 1.0869121551513672,
"step": 3912
},
{
"epoch": 0.9309350379069422,
"grad_norm": 0.2890625,
"learning_rate": 1.0110417603622733e-05,
"loss": 1.1413328647613525,
"step": 3914
},
{
"epoch": 0.9314107328675487,
"grad_norm": 0.2890625,
"learning_rate": 9.978442459472127e-06,
"loss": 1.1426079273223877,
"step": 3916
},
{
"epoch": 0.9318864278281552,
"grad_norm": 0.2890625,
"learning_rate": 9.847211487363401e-06,
"loss": 1.1142783164978027,
"step": 3918
},
{
"epoch": 0.9323621227887617,
"grad_norm": 0.2890625,
"learning_rate": 9.71672794023975e-06,
"loss": 1.158155083656311,
"step": 3920
},
{
"epoch": 0.9328378177493682,
"grad_norm": 0.298828125,
"learning_rate": 9.586995052517208e-06,
"loss": 1.2047823667526245,
"step": 3922
},
{
"epoch": 0.9333135127099748,
"grad_norm": 0.29296875,
"learning_rate": 9.458016040004541e-06,
"loss": 1.1312339305877686,
"step": 3924
},
{
"epoch": 0.9337892076705813,
"grad_norm": 0.287109375,
"learning_rate": 9.329794099823531e-06,
"loss": 1.1283931732177734,
"step": 3926
},
{
"epoch": 0.9342649026311878,
"grad_norm": 0.28515625,
"learning_rate": 9.202332410329676e-06,
"loss": 1.1590964794158936,
"step": 3928
},
{
"epoch": 0.9347405975917943,
"grad_norm": 0.296875,
"learning_rate": 9.075634131033481e-06,
"loss": 1.196352243423462,
"step": 3930
},
{
"epoch": 0.9352162925524008,
"grad_norm": 0.29296875,
"learning_rate": 8.949702402522065e-06,
"loss": 1.1239594221115112,
"step": 3932
},
{
"epoch": 0.9356919875130073,
"grad_norm": 0.310546875,
"learning_rate": 8.824540346381343e-06,
"loss": 1.1666662693023682,
"step": 3934
},
{
"epoch": 0.9361676824736138,
"grad_norm": 0.28515625,
"learning_rate": 8.700151065118683e-06,
"loss": 1.2102231979370117,
"step": 3936
},
{
"epoch": 0.9366433774342203,
"grad_norm": 0.279296875,
"learning_rate": 8.576537642085934e-06,
"loss": 1.1497886180877686,
"step": 3938
},
{
"epoch": 0.9371190723948268,
"grad_norm": 0.29296875,
"learning_rate": 8.453703141403062e-06,
"loss": 1.1418395042419434,
"step": 3940
},
{
"epoch": 0.9375947673554333,
"grad_norm": 0.29296875,
"learning_rate": 8.331650607882146e-06,
"loss": 1.1689965724945068,
"step": 3942
},
{
"epoch": 0.9380704623160399,
"grad_norm": 0.283203125,
"learning_rate": 8.210383066951926e-06,
"loss": 1.1347894668579102,
"step": 3944
},
{
"epoch": 0.9385461572766464,
"grad_norm": 0.30078125,
"learning_rate": 8.08990352458281e-06,
"loss": 1.1696358919143677,
"step": 3946
},
{
"epoch": 0.9390218522372529,
"grad_norm": 0.2890625,
"learning_rate": 7.970214967212349e-06,
"loss": 1.2054082155227661,
"step": 3948
},
{
"epoch": 0.9394975471978594,
"grad_norm": 0.291015625,
"learning_rate": 7.851320361671244e-06,
"loss": 1.2238609790802002,
"step": 3950
},
{
"epoch": 0.9399732421584659,
"grad_norm": 0.2890625,
"learning_rate": 7.733222655109758e-06,
"loss": 1.1731221675872803,
"step": 3952
},
{
"epoch": 0.9404489371190724,
"grad_norm": 0.28515625,
"learning_rate": 7.615924774924681e-06,
"loss": 1.1514570713043213,
"step": 3954
},
{
"epoch": 0.9409246320796789,
"grad_norm": 0.28515625,
"learning_rate": 7.499429628686794e-06,
"loss": 1.1528222560882568,
"step": 3956
},
{
"epoch": 0.9414003270402854,
"grad_norm": 0.302734375,
"learning_rate": 7.383740104068735e-06,
"loss": 1.119846224784851,
"step": 3958
},
{
"epoch": 0.9418760220008919,
"grad_norm": 0.2890625,
"learning_rate": 7.268859068773495e-06,
"loss": 1.1658766269683838,
"step": 3960
},
{
"epoch": 0.9423517169614984,
"grad_norm": 0.29296875,
"learning_rate": 7.154789370463256e-06,
"loss": 1.1100010871887207,
"step": 3962
},
{
"epoch": 0.942827411922105,
"grad_norm": 0.30859375,
"learning_rate": 7.041533836688881e-06,
"loss": 1.1952953338623047,
"step": 3964
},
{
"epoch": 0.9433031068827115,
"grad_norm": 0.287109375,
"learning_rate": 6.9290952748197524e-06,
"loss": 1.1750929355621338,
"step": 3966
},
{
"epoch": 0.943778801843318,
"grad_norm": 0.294921875,
"learning_rate": 6.81747647197422e-06,
"loss": 1.143003225326538,
"step": 3968
},
{
"epoch": 0.9442544968039245,
"grad_norm": 0.27734375,
"learning_rate": 6.706680194950541e-06,
"loss": 1.1299149990081787,
"step": 3970
},
{
"epoch": 0.944730191764531,
"grad_norm": 0.291015625,
"learning_rate": 6.596709190158224e-06,
"loss": 1.157487154006958,
"step": 3972
},
{
"epoch": 0.9452058867251375,
"grad_norm": 0.294921875,
"learning_rate": 6.4875661835500295e-06,
"loss": 1.124016523361206,
"step": 3974
},
{
"epoch": 0.945681581685744,
"grad_norm": 0.2890625,
"learning_rate": 6.379253880554337e-06,
"loss": 1.1457756757736206,
"step": 3976
},
{
"epoch": 0.9461572766463505,
"grad_norm": 0.2890625,
"learning_rate": 6.271774966008117e-06,
"loss": 1.1654269695281982,
"step": 3978
},
{
"epoch": 0.946632971606957,
"grad_norm": 0.294921875,
"learning_rate": 6.1651321040903946e-06,
"loss": 1.2042397260665894,
"step": 3980
},
{
"epoch": 0.9471086665675635,
"grad_norm": 0.296875,
"learning_rate": 6.059327938256148e-06,
"loss": 1.1625417470932007,
"step": 3982
},
{
"epoch": 0.94758436152817,
"grad_norm": 0.2890625,
"learning_rate": 5.954365091170848e-06,
"loss": 1.1616830825805664,
"step": 3984
},
{
"epoch": 0.9480600564887766,
"grad_norm": 0.30078125,
"learning_rate": 5.850246164645414e-06,
"loss": 1.2000601291656494,
"step": 3986
},
{
"epoch": 0.9485357514493831,
"grad_norm": 0.296875,
"learning_rate": 5.746973739571719e-06,
"loss": 1.1334123611450195,
"step": 3988
},
{
"epoch": 0.9490114464099896,
"grad_norm": 0.283203125,
"learning_rate": 5.6445503758586485e-06,
"loss": 1.129727840423584,
"step": 3990
},
{
"epoch": 0.9494871413705961,
"grad_norm": 0.30078125,
"learning_rate": 5.542978612368588e-06,
"loss": 1.142544150352478,
"step": 3992
},
{
"epoch": 0.9499628363312026,
"grad_norm": 0.310546875,
"learning_rate": 5.442260966854563e-06,
"loss": 1.1486105918884277,
"step": 3994
},
{
"epoch": 0.9504385312918091,
"grad_norm": 0.283203125,
"learning_rate": 5.342399935897748e-06,
"loss": 1.0392706394195557,
"step": 3996
},
{
"epoch": 0.9509142262524156,
"grad_norm": 0.294921875,
"learning_rate": 5.2433979948456385e-06,
"loss": 1.1802358627319336,
"step": 3998
},
{
"epoch": 0.9513899212130221,
"grad_norm": 0.296875,
"learning_rate": 5.1452575977506905e-06,
"loss": 1.1869316101074219,
"step": 4000
},
{
"epoch": 0.9518656161736286,
"grad_norm": 0.287109375,
"learning_rate": 5.047981177309447e-06,
"loss": 1.1039962768554688,
"step": 4002
},
{
"epoch": 0.9523413111342351,
"grad_norm": 0.28125,
"learning_rate": 4.9515711448022966e-06,
"loss": 1.108412504196167,
"step": 4004
},
{
"epoch": 0.9528170060948417,
"grad_norm": 0.298828125,
"learning_rate": 4.856029890033647e-06,
"loss": 1.1982967853546143,
"step": 4006
},
{
"epoch": 0.9532927010554482,
"grad_norm": 0.298828125,
"learning_rate": 4.761359781272705e-06,
"loss": 1.1908378601074219,
"step": 4008
},
{
"epoch": 0.9537683960160547,
"grad_norm": 0.3125,
"learning_rate": 4.667563165194815e-06,
"loss": 1.2247347831726074,
"step": 4010
},
{
"epoch": 0.9542440909766612,
"grad_norm": 0.296875,
"learning_rate": 4.574642366823199e-06,
"loss": 1.174034595489502,
"step": 4012
},
{
"epoch": 0.9547197859372677,
"grad_norm": 0.29296875,
"learning_rate": 4.482599689471437e-06,
"loss": 1.1458334922790527,
"step": 4014
},
{
"epoch": 0.9551954808978742,
"grad_norm": 0.287109375,
"learning_rate": 4.391437414686261e-06,
"loss": 1.1437745094299316,
"step": 4016
},
{
"epoch": 0.9556711758584807,
"grad_norm": 0.294921875,
"learning_rate": 4.301157802191078e-06,
"loss": 1.1791338920593262,
"step": 4018
},
{
"epoch": 0.9561468708190872,
"grad_norm": 0.30078125,
"learning_rate": 4.211763089829934e-06,
"loss": 1.2103009223937988,
"step": 4020
},
{
"epoch": 0.9566225657796937,
"grad_norm": 0.28515625,
"learning_rate": 4.123255493512028e-06,
"loss": 1.1193060874938965,
"step": 4022
},
{
"epoch": 0.9570982607403002,
"grad_norm": 0.29296875,
"learning_rate": 4.035637207156798e-06,
"loss": 1.1846659183502197,
"step": 4024
},
{
"epoch": 0.9575739557009068,
"grad_norm": 0.2890625,
"learning_rate": 3.94891040263953e-06,
"loss": 1.1607009172439575,
"step": 4026
},
{
"epoch": 0.9580496506615133,
"grad_norm": 0.29296875,
"learning_rate": 3.863077229737546e-06,
"loss": 1.1519575119018555,
"step": 4028
},
{
"epoch": 0.9585253456221198,
"grad_norm": 0.29296875,
"learning_rate": 3.778139816076878e-06,
"loss": 1.1820671558380127,
"step": 4030
},
{
"epoch": 0.9590010405827263,
"grad_norm": 0.30078125,
"learning_rate": 3.694100267079548e-06,
"loss": 1.1689975261688232,
"step": 4032
},
{
"epoch": 0.9594767355433328,
"grad_norm": 0.306640625,
"learning_rate": 3.610960665911396e-06,
"loss": 1.187016248703003,
"step": 4034
},
{
"epoch": 0.9599524305039393,
"grad_norm": 0.291015625,
"learning_rate": 3.5287230734304002e-06,
"loss": 1.1339020729064941,
"step": 4036
},
{
"epoch": 0.9604281254645458,
"grad_norm": 0.28515625,
"learning_rate": 3.4473895281356497e-06,
"loss": 1.1432700157165527,
"step": 4038
},
{
"epoch": 0.9609038204251523,
"grad_norm": 0.302734375,
"learning_rate": 3.3669620461167464e-06,
"loss": 1.1758100986480713,
"step": 4040
},
{
"epoch": 0.9613795153857588,
"grad_norm": 0.298828125,
"learning_rate": 3.2874426210038802e-06,
"loss": 1.1896083354949951,
"step": 4042
},
{
"epoch": 0.9618552103463653,
"grad_norm": 0.287109375,
"learning_rate": 3.208833223918415e-06,
"loss": 1.169938564300537,
"step": 4044
},
{
"epoch": 0.9623309053069719,
"grad_norm": 0.29296875,
"learning_rate": 3.1311358034239725e-06,
"loss": 1.24098539352417,
"step": 4046
},
{
"epoch": 0.9628066002675784,
"grad_norm": 0.287109375,
"learning_rate": 3.0543522854782127e-06,
"loss": 1.1295160055160522,
"step": 4048
},
{
"epoch": 0.963282295228185,
"grad_norm": 0.29296875,
"learning_rate": 2.9784845733850144e-06,
"loss": 1.193390130996704,
"step": 4050
},
{
"epoch": 0.9637579901887915,
"grad_norm": 0.29296875,
"learning_rate": 2.9035345477473485e-06,
"loss": 1.1334125995635986,
"step": 4052
},
{
"epoch": 0.964233685149398,
"grad_norm": 0.283203125,
"learning_rate": 2.8295040664206454e-06,
"loss": 1.156846284866333,
"step": 4054
},
{
"epoch": 0.9647093801100045,
"grad_norm": 0.29296875,
"learning_rate": 2.7563949644667354e-06,
"loss": 1.1609504222869873,
"step": 4056
},
{
"epoch": 0.965185075070611,
"grad_norm": 0.29296875,
"learning_rate": 2.6842090541083775e-06,
"loss": 1.1681158542633057,
"step": 4058
},
{
"epoch": 0.9656607700312175,
"grad_norm": 0.28515625,
"learning_rate": 2.6129481246843248e-06,
"loss": 1.1730051040649414,
"step": 4060
},
{
"epoch": 0.966136464991824,
"grad_norm": 0.29296875,
"learning_rate": 2.542613942604968e-06,
"loss": 1.2059528827667236,
"step": 4062
},
{
"epoch": 0.9666121599524305,
"grad_norm": 0.302734375,
"learning_rate": 2.4732082513085587e-06,
"loss": 1.1665153503417969,
"step": 4064
},
{
"epoch": 0.9670878549130371,
"grad_norm": 0.287109375,
"learning_rate": 2.404732771218008e-06,
"loss": 1.146468162536621,
"step": 4066
},
{
"epoch": 0.9675635498736436,
"grad_norm": 0.291015625,
"learning_rate": 2.3371891996982e-06,
"loss": 1.1147561073303223,
"step": 4068
},
{
"epoch": 0.9680392448342501,
"grad_norm": 0.296875,
"learning_rate": 2.27057921101395e-06,
"loss": 1.1539335250854492,
"step": 4070
},
{
"epoch": 0.9685149397948566,
"grad_norm": 0.30078125,
"learning_rate": 2.204904456288497e-06,
"loss": 1.1748045682907104,
"step": 4072
},
{
"epoch": 0.9689906347554631,
"grad_norm": 0.29296875,
"learning_rate": 2.1401665634625823e-06,
"loss": 1.141796588897705,
"step": 4074
},
{
"epoch": 0.9694663297160696,
"grad_norm": 0.279296875,
"learning_rate": 2.0763671372540585e-06,
"loss": 1.0855543613433838,
"step": 4076
},
{
"epoch": 0.9699420246766761,
"grad_norm": 0.28515625,
"learning_rate": 2.013507759118176e-06,
"loss": 1.103421688079834,
"step": 4078
},
{
"epoch": 0.9704177196372826,
"grad_norm": 0.29296875,
"learning_rate": 1.95158998720832e-06,
"loss": 1.1640735864639282,
"step": 4080
},
{
"epoch": 0.9708934145978891,
"grad_norm": 0.283203125,
"learning_rate": 1.8906153563374196e-06,
"loss": 1.1282706260681152,
"step": 4082
},
{
"epoch": 0.9713691095584956,
"grad_norm": 0.2890625,
"learning_rate": 1.8305853779399108e-06,
"loss": 1.0961542129516602,
"step": 4084
},
{
"epoch": 0.9718448045191022,
"grad_norm": 0.28515625,
"learning_rate": 1.7715015400342305e-06,
"loss": 1.1879502534866333,
"step": 4086
},
{
"epoch": 0.9723204994797087,
"grad_norm": 0.291015625,
"learning_rate": 1.7133653071859947e-06,
"loss": 1.1628968715667725,
"step": 4088
},
{
"epoch": 0.9727961944403152,
"grad_norm": 0.30078125,
"learning_rate": 1.656178120471621e-06,
"loss": 1.1832327842712402,
"step": 4090
},
{
"epoch": 0.9732718894009217,
"grad_norm": 0.291015625,
"learning_rate": 1.5999413974426658e-06,
"loss": 1.2111151218414307,
"step": 4092
},
{
"epoch": 0.9737475843615282,
"grad_norm": 0.306640625,
"learning_rate": 1.5446565320906692e-06,
"loss": 1.1401962041854858,
"step": 4094
},
{
"epoch": 0.9742232793221347,
"grad_norm": 0.294921875,
"learning_rate": 1.4903248948125782e-06,
"loss": 1.1747379302978516,
"step": 4096
},
{
"epoch": 0.9746989742827412,
"grad_norm": 0.287109375,
"learning_rate": 1.4369478323768183e-06,
"loss": 1.2249683141708374,
"step": 4098
},
{
"epoch": 0.9751746692433477,
"grad_norm": 0.28125,
"learning_rate": 1.3845266678898673e-06,
"loss": 1.1771612167358398,
"step": 4100
},
{
"epoch": 0.9756503642039542,
"grad_norm": 0.291015625,
"learning_rate": 1.3330627007634943e-06,
"loss": 1.1556856632232666,
"step": 4102
},
{
"epoch": 0.9761260591645607,
"grad_norm": 0.28515625,
"learning_rate": 1.2825572066825288e-06,
"loss": 1.1458361148834229,
"step": 4104
},
{
"epoch": 0.9766017541251673,
"grad_norm": 0.287109375,
"learning_rate": 1.233011437573244e-06,
"loss": 1.1212427616119385,
"step": 4106
},
{
"epoch": 0.9770774490857738,
"grad_norm": 0.287109375,
"learning_rate": 1.184426621572321e-06,
"loss": 1.1551880836486816,
"step": 4108
},
{
"epoch": 0.9775531440463803,
"grad_norm": 0.29296875,
"learning_rate": 1.1368039629964155e-06,
"loss": 1.1765400171279907,
"step": 4110
},
{
"epoch": 0.9780288390069868,
"grad_norm": 0.283203125,
"learning_rate": 1.0901446423123007e-06,
"loss": 1.1351805925369263,
"step": 4112
},
{
"epoch": 0.9785045339675933,
"grad_norm": 0.28125,
"learning_rate": 1.0444498161075977e-06,
"loss": 1.1993989944458008,
"step": 4114
},
{
"epoch": 0.9789802289281998,
"grad_norm": 0.291015625,
"learning_rate": 9.997206170621187e-07,
"loss": 1.148155689239502,
"step": 4116
},
{
"epoch": 0.9794559238888063,
"grad_norm": 0.279296875,
"learning_rate": 9.559581539197916e-07,
"loss": 1.0902024507522583,
"step": 4118
},
{
"epoch": 0.9799316188494128,
"grad_norm": 0.291015625,
"learning_rate": 9.131635114611481e-07,
"loss": 1.1051156520843506,
"step": 4120
},
{
"epoch": 0.9804073138100193,
"grad_norm": 0.302734375,
"learning_rate": 8.713377504764797e-07,
"loss": 1.170903205871582,
"step": 4122
},
{
"epoch": 0.9808830087706258,
"grad_norm": 0.294921875,
"learning_rate": 8.304819077395065e-07,
"loss": 1.185584545135498,
"step": 4124
},
{
"epoch": 0.9813587037312324,
"grad_norm": 0.302734375,
"learning_rate": 7.905969959816828e-07,
"loss": 1.1473748683929443,
"step": 4126
},
{
"epoch": 0.9818343986918389,
"grad_norm": 0.296875,
"learning_rate": 7.51684003867128e-07,
"loss": 1.1639072895050049,
"step": 4128
},
{
"epoch": 0.9823100936524454,
"grad_norm": 0.29296875,
"learning_rate": 7.137438959680554e-07,
"loss": 1.234483003616333,
"step": 4130
},
{
"epoch": 0.9827857886130519,
"grad_norm": 0.3046875,
"learning_rate": 6.767776127409375e-07,
"loss": 1.1430094242095947,
"step": 4132
},
{
"epoch": 0.9832614835736584,
"grad_norm": 0.287109375,
"learning_rate": 6.407860705031299e-07,
"loss": 1.1307320594787598,
"step": 4134
},
{
"epoch": 0.9837371785342649,
"grad_norm": 0.279296875,
"learning_rate": 6.057701614101862e-07,
"loss": 1.2102608680725098,
"step": 4136
},
{
"epoch": 0.9842128734948714,
"grad_norm": 0.291015625,
"learning_rate": 5.717307534337613e-07,
"loss": 1.1357035636901855,
"step": 4138
},
{
"epoch": 0.9846885684554779,
"grad_norm": 0.29296875,
"learning_rate": 5.386686903400496e-07,
"loss": 1.1917630434036255,
"step": 4140
},
{
"epoch": 0.9851642634160844,
"grad_norm": 0.37109375,
"learning_rate": 5.065847916689226e-07,
"loss": 1.145763635635376,
"step": 4142
},
{
"epoch": 0.9856399583766909,
"grad_norm": 0.28515625,
"learning_rate": 4.754798527135629e-07,
"loss": 1.123291015625,
"step": 4144
},
{
"epoch": 0.9861156533372974,
"grad_norm": 0.287109375,
"learning_rate": 4.4535464450079056e-07,
"loss": 1.19578218460083,
"step": 4146
},
{
"epoch": 0.986591348297904,
"grad_norm": 0.287109375,
"learning_rate": 4.162099137719322e-07,
"loss": 1.1768969297409058,
"step": 4148
},
{
"epoch": 0.9870670432585105,
"grad_norm": 0.28515625,
"learning_rate": 3.880463829643155e-07,
"loss": 1.1089352369308472,
"step": 4150
},
{
"epoch": 0.987542738219117,
"grad_norm": 0.296875,
"learning_rate": 3.608647501933549e-07,
"loss": 1.1268953084945679,
"step": 4152
},
{
"epoch": 0.9880184331797235,
"grad_norm": 0.298828125,
"learning_rate": 3.346656892352673e-07,
"loss": 1.2365374565124512,
"step": 4154
},
{
"epoch": 0.98849412814033,
"grad_norm": 0.29296875,
"learning_rate": 3.0944984951033485e-07,
"loss": 1.1548500061035156,
"step": 4156
},
{
"epoch": 0.9889698231009365,
"grad_norm": 0.29296875,
"learning_rate": 2.8521785606684616e-07,
"loss": 1.1455793380737305,
"step": 4158
},
{
"epoch": 0.989445518061543,
"grad_norm": 0.298828125,
"learning_rate": 2.619703095655712e-07,
"loss": 1.156882882118225,
"step": 4160
},
{
"epoch": 0.9899212130221495,
"grad_norm": 0.294921875,
"learning_rate": 2.397077862648978e-07,
"loss": 1.1094558238983154,
"step": 4162
},
{
"epoch": 0.990396907982756,
"grad_norm": 0.283203125,
"learning_rate": 2.1843083800652255e-07,
"loss": 1.1157076358795166,
"step": 4164
},
{
"epoch": 0.9908726029433625,
"grad_norm": 0.2890625,
"learning_rate": 1.9813999220179125e-07,
"loss": 1.1705288887023926,
"step": 4166
},
{
"epoch": 0.9913482979039691,
"grad_norm": 0.30078125,
"learning_rate": 1.7883575181862012e-07,
"loss": 1.1335409879684448,
"step": 4168
},
{
"epoch": 0.9918239928645756,
"grad_norm": 0.294921875,
"learning_rate": 1.6051859536902136e-07,
"loss": 1.1639494895935059,
"step": 4170
},
{
"epoch": 0.9922996878251821,
"grad_norm": 0.29296875,
"learning_rate": 1.4318897689725053e-07,
"loss": 1.145524024963379,
"step": 4172
},
{
"epoch": 0.9927753827857886,
"grad_norm": 0.283203125,
"learning_rate": 1.2684732596854876e-07,
"loss": 1.1588659286499023,
"step": 4174
},
{
"epoch": 0.9932510777463951,
"grad_norm": 0.28515625,
"learning_rate": 1.1149404765848915e-07,
"loss": 1.138121485710144,
"step": 4176
},
{
"epoch": 0.9937267727070016,
"grad_norm": 0.296875,
"learning_rate": 9.712952254294471e-08,
"loss": 1.1140878200531006,
"step": 4178
},
{
"epoch": 0.9942024676676081,
"grad_norm": 0.30078125,
"learning_rate": 8.375410668865602e-08,
"loss": 1.1625972986221313,
"step": 4180
},
{
"epoch": 0.9946781626282146,
"grad_norm": 0.2890625,
"learning_rate": 7.136813164438927e-08,
"loss": 1.140109896659851,
"step": 4182
},
{
"epoch": 0.9951538575888211,
"grad_norm": 0.29296875,
"learning_rate": 5.997190443274292e-08,
"loss": 1.1538417339324951,
"step": 4184
},
{
"epoch": 0.9956295525494276,
"grad_norm": 0.291015625,
"learning_rate": 4.9565707542500454e-08,
"loss": 1.1804558038711548,
"step": 4186
},
{
"epoch": 0.9961052475100342,
"grad_norm": 0.2890625,
"learning_rate": 4.014979892167592e-08,
"loss": 1.1386924982070923,
"step": 4188
},
{
"epoch": 0.9965809424706407,
"grad_norm": 0.287109375,
"learning_rate": 3.172441197107468e-08,
"loss": 1.1561048030853271,
"step": 4190
},
{
"epoch": 0.9970566374312472,
"grad_norm": 0.294921875,
"learning_rate": 2.4289755538537962e-08,
"loss": 1.165192723274231,
"step": 4192
},
{
"epoch": 0.9975323323918537,
"grad_norm": 0.294921875,
"learning_rate": 1.7846013913755957e-08,
"loss": 1.1334145069122314,
"step": 4194
},
{
"epoch": 0.9980080273524602,
"grad_norm": 0.294921875,
"learning_rate": 1.2393346823693641e-08,
"loss": 1.139329195022583,
"step": 4196
},
{
"epoch": 0.9984837223130667,
"grad_norm": 0.29296875,
"learning_rate": 7.93188942864287e-09,
"loss": 1.1700010299682617,
"step": 4198
},
{
"epoch": 0.9989594172736732,
"grad_norm": 0.283203125,
"learning_rate": 4.461752318860591e-09,
"loss": 1.1264121532440186,
"step": 4200
},
{
"epoch": 0.9994351122342797,
"grad_norm": 0.3046875,
"learning_rate": 1.9830215118377128e-09,
"loss": 1.1346487998962402,
"step": 4202
},
{
"epoch": 0.9999108071948862,
"grad_norm": 0.287109375,
"learning_rate": 4.957584501674717e-10,
"loss": 1.121924877166748,
"step": 4204
},
{
"epoch": 1.0,
"step": 4205,
"total_flos": 1.6807134688362627e+19,
"train_loss": 1.3080670000681838,
"train_runtime": 67307.8983,
"train_samples_per_second": 7.995,
"train_steps_per_second": 0.062
}
],
"logging_steps": 2,
"max_steps": 4205,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1051,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.6807134688362627e+19,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}