model_5fedd473 / checkpoint-2708 /trainer_state.json
ugaoo's picture
Upload folder using huggingface_hub
e3275dc verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9997538764459759,
"eval_steps": 500,
"global_step": 2708,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00036918533103618014,
"grad_norm": 2.4412319660186768,
"learning_rate": 5.0000000000000004e-08,
"loss": 1.1388,
"step": 1
},
{
"epoch": 0.0007383706620723603,
"grad_norm": 2.275918483734131,
"learning_rate": 1.0000000000000001e-07,
"loss": 1.1206,
"step": 2
},
{
"epoch": 0.0011075559931085406,
"grad_norm": 2.2667036056518555,
"learning_rate": 1.5000000000000002e-07,
"loss": 1.0982,
"step": 3
},
{
"epoch": 0.0014767413241447206,
"grad_norm": 2.4527230262756348,
"learning_rate": 2.0000000000000002e-07,
"loss": 1.1072,
"step": 4
},
{
"epoch": 0.0018459266551809008,
"grad_norm": 2.393638849258423,
"learning_rate": 2.5000000000000004e-07,
"loss": 1.0996,
"step": 5
},
{
"epoch": 0.002215111986217081,
"grad_norm": 2.389622449874878,
"learning_rate": 3.0000000000000004e-07,
"loss": 1.0901,
"step": 6
},
{
"epoch": 0.002584297317253261,
"grad_norm": 2.34468150138855,
"learning_rate": 3.5000000000000004e-07,
"loss": 1.1209,
"step": 7
},
{
"epoch": 0.002953482648289441,
"grad_norm": 2.48166561126709,
"learning_rate": 4.0000000000000003e-07,
"loss": 1.1034,
"step": 8
},
{
"epoch": 0.0033226679793256215,
"grad_norm": 2.1924567222595215,
"learning_rate": 4.5000000000000003e-07,
"loss": 1.1516,
"step": 9
},
{
"epoch": 0.0036918533103618015,
"grad_norm": 2.408474922180176,
"learning_rate": 5.000000000000001e-07,
"loss": 1.1405,
"step": 10
},
{
"epoch": 0.004061038641397982,
"grad_norm": 2.1132235527038574,
"learning_rate": 5.5e-07,
"loss": 1.1031,
"step": 11
},
{
"epoch": 0.004430223972434162,
"grad_norm": 2.1916306018829346,
"learning_rate": 6.000000000000001e-07,
"loss": 1.1238,
"step": 12
},
{
"epoch": 0.004799409303470342,
"grad_norm": 2.040755033493042,
"learning_rate": 6.5e-07,
"loss": 1.1197,
"step": 13
},
{
"epoch": 0.005168594634506522,
"grad_norm": 1.9937278032302856,
"learning_rate": 7.000000000000001e-07,
"loss": 1.0578,
"step": 14
},
{
"epoch": 0.005537779965542703,
"grad_norm": 2.0224194526672363,
"learning_rate": 7.5e-07,
"loss": 1.0846,
"step": 15
},
{
"epoch": 0.005906965296578882,
"grad_norm": 1.80266273021698,
"learning_rate": 8.000000000000001e-07,
"loss": 1.0908,
"step": 16
},
{
"epoch": 0.006276150627615063,
"grad_norm": 1.8800767660140991,
"learning_rate": 8.500000000000001e-07,
"loss": 1.0928,
"step": 17
},
{
"epoch": 0.006645335958651243,
"grad_norm": 1.6004233360290527,
"learning_rate": 9.000000000000001e-07,
"loss": 1.0828,
"step": 18
},
{
"epoch": 0.0070145212896874235,
"grad_norm": 1.653378963470459,
"learning_rate": 9.500000000000001e-07,
"loss": 1.1113,
"step": 19
},
{
"epoch": 0.007383706620723603,
"grad_norm": 1.6406723260879517,
"learning_rate": 1.0000000000000002e-06,
"loss": 1.0682,
"step": 20
},
{
"epoch": 0.0077528919517597834,
"grad_norm": 1.6181585788726807,
"learning_rate": 1.0500000000000001e-06,
"loss": 1.1055,
"step": 21
},
{
"epoch": 0.008122077282795964,
"grad_norm": 1.551200270652771,
"learning_rate": 1.1e-06,
"loss": 1.0795,
"step": 22
},
{
"epoch": 0.008491262613832144,
"grad_norm": 1.3663794994354248,
"learning_rate": 1.1500000000000002e-06,
"loss": 1.0283,
"step": 23
},
{
"epoch": 0.008860447944868325,
"grad_norm": 1.3748595714569092,
"learning_rate": 1.2000000000000002e-06,
"loss": 1.0784,
"step": 24
},
{
"epoch": 0.009229633275904503,
"grad_norm": 1.2911611795425415,
"learning_rate": 1.25e-06,
"loss": 0.9854,
"step": 25
},
{
"epoch": 0.009598818606940684,
"grad_norm": 1.3047049045562744,
"learning_rate": 1.3e-06,
"loss": 1.0503,
"step": 26
},
{
"epoch": 0.009968003937976864,
"grad_norm": 1.17142653465271,
"learning_rate": 1.3500000000000002e-06,
"loss": 1.0588,
"step": 27
},
{
"epoch": 0.010337189269013045,
"grad_norm": 1.2030054330825806,
"learning_rate": 1.4000000000000001e-06,
"loss": 1.0328,
"step": 28
},
{
"epoch": 0.010706374600049225,
"grad_norm": 1.131135106086731,
"learning_rate": 1.45e-06,
"loss": 1.0273,
"step": 29
},
{
"epoch": 0.011075559931085405,
"grad_norm": 1.0866118669509888,
"learning_rate": 1.5e-06,
"loss": 0.9883,
"step": 30
},
{
"epoch": 0.011444745262121586,
"grad_norm": 1.0986360311508179,
"learning_rate": 1.5500000000000002e-06,
"loss": 1.0138,
"step": 31
},
{
"epoch": 0.011813930593157764,
"grad_norm": 0.9595009088516235,
"learning_rate": 1.6000000000000001e-06,
"loss": 0.988,
"step": 32
},
{
"epoch": 0.012183115924193945,
"grad_norm": 1.054680347442627,
"learning_rate": 1.6500000000000003e-06,
"loss": 1.0522,
"step": 33
},
{
"epoch": 0.012552301255230125,
"grad_norm": 0.9745041131973267,
"learning_rate": 1.7000000000000002e-06,
"loss": 0.9978,
"step": 34
},
{
"epoch": 0.012921486586266306,
"grad_norm": 0.9892019629478455,
"learning_rate": 1.75e-06,
"loss": 1.046,
"step": 35
},
{
"epoch": 0.013290671917302486,
"grad_norm": 0.9731583595275879,
"learning_rate": 1.8000000000000001e-06,
"loss": 0.9907,
"step": 36
},
{
"epoch": 0.013659857248338667,
"grad_norm": 0.9275212287902832,
"learning_rate": 1.85e-06,
"loss": 0.9293,
"step": 37
},
{
"epoch": 0.014029042579374847,
"grad_norm": 0.9423267245292664,
"learning_rate": 1.9000000000000002e-06,
"loss": 0.9377,
"step": 38
},
{
"epoch": 0.014398227910411026,
"grad_norm": 0.9474686980247498,
"learning_rate": 1.9500000000000004e-06,
"loss": 0.9594,
"step": 39
},
{
"epoch": 0.014767413241447206,
"grad_norm": 0.9592716693878174,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.9871,
"step": 40
},
{
"epoch": 0.015136598572483386,
"grad_norm": 0.9387710094451904,
"learning_rate": 2.05e-06,
"loss": 0.9823,
"step": 41
},
{
"epoch": 0.015505783903519567,
"grad_norm": 0.9290558695793152,
"learning_rate": 2.1000000000000002e-06,
"loss": 0.9354,
"step": 42
},
{
"epoch": 0.015874969234555746,
"grad_norm": 0.9229576587677002,
"learning_rate": 2.15e-06,
"loss": 0.9828,
"step": 43
},
{
"epoch": 0.016244154565591928,
"grad_norm": 0.9243917465209961,
"learning_rate": 2.2e-06,
"loss": 1.0032,
"step": 44
},
{
"epoch": 0.016613339896628106,
"grad_norm": 0.9277251958847046,
"learning_rate": 2.25e-06,
"loss": 0.9702,
"step": 45
},
{
"epoch": 0.01698252522766429,
"grad_norm": 0.9161118865013123,
"learning_rate": 2.3000000000000004e-06,
"loss": 0.9846,
"step": 46
},
{
"epoch": 0.017351710558700467,
"grad_norm": 0.8675879240036011,
"learning_rate": 2.35e-06,
"loss": 0.9333,
"step": 47
},
{
"epoch": 0.01772089588973665,
"grad_norm": 0.8792003393173218,
"learning_rate": 2.4000000000000003e-06,
"loss": 1.0106,
"step": 48
},
{
"epoch": 0.018090081220772828,
"grad_norm": 0.894873857498169,
"learning_rate": 2.4500000000000003e-06,
"loss": 0.9514,
"step": 49
},
{
"epoch": 0.018459266551809007,
"grad_norm": 0.8866007328033447,
"learning_rate": 2.5e-06,
"loss": 1.0057,
"step": 50
},
{
"epoch": 0.01882845188284519,
"grad_norm": 0.8863010406494141,
"learning_rate": 2.55e-06,
"loss": 0.9586,
"step": 51
},
{
"epoch": 0.019197637213881368,
"grad_norm": 0.912958025932312,
"learning_rate": 2.6e-06,
"loss": 0.9121,
"step": 52
},
{
"epoch": 0.01956682254491755,
"grad_norm": 0.8827121257781982,
"learning_rate": 2.6500000000000005e-06,
"loss": 0.9517,
"step": 53
},
{
"epoch": 0.01993600787595373,
"grad_norm": 0.8558551669120789,
"learning_rate": 2.7000000000000004e-06,
"loss": 0.9365,
"step": 54
},
{
"epoch": 0.02030519320698991,
"grad_norm": 0.8515662550926208,
"learning_rate": 2.7500000000000004e-06,
"loss": 0.9204,
"step": 55
},
{
"epoch": 0.02067437853802609,
"grad_norm": 0.844958484172821,
"learning_rate": 2.8000000000000003e-06,
"loss": 0.9801,
"step": 56
},
{
"epoch": 0.021043563869062268,
"grad_norm": 0.8569675087928772,
"learning_rate": 2.85e-06,
"loss": 0.9336,
"step": 57
},
{
"epoch": 0.02141274920009845,
"grad_norm": 0.8839316964149475,
"learning_rate": 2.9e-06,
"loss": 0.9296,
"step": 58
},
{
"epoch": 0.02178193453113463,
"grad_norm": 1.0445549488067627,
"learning_rate": 2.95e-06,
"loss": 0.9134,
"step": 59
},
{
"epoch": 0.02215111986217081,
"grad_norm": 0.8282931447029114,
"learning_rate": 3e-06,
"loss": 0.9421,
"step": 60
},
{
"epoch": 0.02252030519320699,
"grad_norm": 0.8246078491210938,
"learning_rate": 3.05e-06,
"loss": 0.8743,
"step": 61
},
{
"epoch": 0.02288949052424317,
"grad_norm": 0.8587180376052856,
"learning_rate": 3.1000000000000004e-06,
"loss": 0.9303,
"step": 62
},
{
"epoch": 0.02325867585527935,
"grad_norm": 0.8575277924537659,
"learning_rate": 3.1500000000000003e-06,
"loss": 0.9503,
"step": 63
},
{
"epoch": 0.02362786118631553,
"grad_norm": 0.8518301844596863,
"learning_rate": 3.2000000000000003e-06,
"loss": 0.9225,
"step": 64
},
{
"epoch": 0.02399704651735171,
"grad_norm": 0.8324997425079346,
"learning_rate": 3.2500000000000002e-06,
"loss": 0.9158,
"step": 65
},
{
"epoch": 0.02436623184838789,
"grad_norm": 0.8463263511657715,
"learning_rate": 3.3000000000000006e-06,
"loss": 0.9311,
"step": 66
},
{
"epoch": 0.024735417179424072,
"grad_norm": 0.8261412978172302,
"learning_rate": 3.3500000000000005e-06,
"loss": 0.9121,
"step": 67
},
{
"epoch": 0.02510460251046025,
"grad_norm": 0.8466128706932068,
"learning_rate": 3.4000000000000005e-06,
"loss": 0.9587,
"step": 68
},
{
"epoch": 0.025473787841496433,
"grad_norm": 0.8264670372009277,
"learning_rate": 3.45e-06,
"loss": 0.9301,
"step": 69
},
{
"epoch": 0.02584297317253261,
"grad_norm": 0.8121640086174011,
"learning_rate": 3.5e-06,
"loss": 0.9116,
"step": 70
},
{
"epoch": 0.02621215850356879,
"grad_norm": 0.8689830303192139,
"learning_rate": 3.5500000000000003e-06,
"loss": 0.9224,
"step": 71
},
{
"epoch": 0.026581343834604972,
"grad_norm": 0.8256193399429321,
"learning_rate": 3.6000000000000003e-06,
"loss": 0.8696,
"step": 72
},
{
"epoch": 0.02695052916564115,
"grad_norm": 0.8338184952735901,
"learning_rate": 3.65e-06,
"loss": 0.934,
"step": 73
},
{
"epoch": 0.027319714496677333,
"grad_norm": 0.8366256356239319,
"learning_rate": 3.7e-06,
"loss": 0.9109,
"step": 74
},
{
"epoch": 0.027688899827713512,
"grad_norm": 0.7981867790222168,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.8785,
"step": 75
},
{
"epoch": 0.028058085158749694,
"grad_norm": 0.8150340914726257,
"learning_rate": 3.8000000000000005e-06,
"loss": 0.9343,
"step": 76
},
{
"epoch": 0.028427270489785873,
"grad_norm": 0.820603609085083,
"learning_rate": 3.85e-06,
"loss": 0.9195,
"step": 77
},
{
"epoch": 0.02879645582082205,
"grad_norm": 0.8418338894844055,
"learning_rate": 3.900000000000001e-06,
"loss": 0.9318,
"step": 78
},
{
"epoch": 0.029165641151858233,
"grad_norm": 0.8316344022750854,
"learning_rate": 3.95e-06,
"loss": 0.9175,
"step": 79
},
{
"epoch": 0.029534826482894412,
"grad_norm": 0.8164108991622925,
"learning_rate": 4.000000000000001e-06,
"loss": 0.914,
"step": 80
},
{
"epoch": 0.029904011813930594,
"grad_norm": 0.7849715948104858,
"learning_rate": 4.05e-06,
"loss": 0.8593,
"step": 81
},
{
"epoch": 0.030273197144966773,
"grad_norm": 0.859346866607666,
"learning_rate": 4.1e-06,
"loss": 0.8864,
"step": 82
},
{
"epoch": 0.030642382476002955,
"grad_norm": 0.8121856451034546,
"learning_rate": 4.15e-06,
"loss": 0.8686,
"step": 83
},
{
"epoch": 0.031011567807039134,
"grad_norm": 0.8445794582366943,
"learning_rate": 4.2000000000000004e-06,
"loss": 0.9037,
"step": 84
},
{
"epoch": 0.03138075313807531,
"grad_norm": 0.8285534381866455,
"learning_rate": 4.25e-06,
"loss": 0.9309,
"step": 85
},
{
"epoch": 0.03174993846911149,
"grad_norm": 0.794826090335846,
"learning_rate": 4.3e-06,
"loss": 0.8836,
"step": 86
},
{
"epoch": 0.03211912380014768,
"grad_norm": 0.853547990322113,
"learning_rate": 4.350000000000001e-06,
"loss": 0.8828,
"step": 87
},
{
"epoch": 0.032488309131183855,
"grad_norm": 0.8319276571273804,
"learning_rate": 4.4e-06,
"loss": 0.8678,
"step": 88
},
{
"epoch": 0.032857494462220034,
"grad_norm": 0.8368034958839417,
"learning_rate": 4.450000000000001e-06,
"loss": 0.9213,
"step": 89
},
{
"epoch": 0.03322667979325621,
"grad_norm": 0.7604875564575195,
"learning_rate": 4.5e-06,
"loss": 0.8462,
"step": 90
},
{
"epoch": 0.03359586512429239,
"grad_norm": 0.8023838400840759,
"learning_rate": 4.5500000000000005e-06,
"loss": 0.8957,
"step": 91
},
{
"epoch": 0.03396505045532858,
"grad_norm": 0.8296010494232178,
"learning_rate": 4.600000000000001e-06,
"loss": 0.8578,
"step": 92
},
{
"epoch": 0.034334235786364756,
"grad_norm": 0.802651047706604,
"learning_rate": 4.65e-06,
"loss": 0.8946,
"step": 93
},
{
"epoch": 0.034703421117400934,
"grad_norm": 0.8469492793083191,
"learning_rate": 4.7e-06,
"loss": 0.8709,
"step": 94
},
{
"epoch": 0.03507260644843711,
"grad_norm": 0.8248879313468933,
"learning_rate": 4.75e-06,
"loss": 0.9127,
"step": 95
},
{
"epoch": 0.0354417917794733,
"grad_norm": 0.8265485167503357,
"learning_rate": 4.800000000000001e-06,
"loss": 0.9046,
"step": 96
},
{
"epoch": 0.03581097711050948,
"grad_norm": 0.9016802310943604,
"learning_rate": 4.85e-06,
"loss": 0.8448,
"step": 97
},
{
"epoch": 0.036180162441545656,
"grad_norm": 0.8030735850334167,
"learning_rate": 4.9000000000000005e-06,
"loss": 0.8666,
"step": 98
},
{
"epoch": 0.036549347772581835,
"grad_norm": 0.8260457515716553,
"learning_rate": 4.95e-06,
"loss": 0.87,
"step": 99
},
{
"epoch": 0.03691853310361801,
"grad_norm": 0.8534119725227356,
"learning_rate": 5e-06,
"loss": 0.8781,
"step": 100
},
{
"epoch": 0.0372877184346542,
"grad_norm": 0.820518970489502,
"learning_rate": 4.999999952687895e-06,
"loss": 0.8719,
"step": 101
},
{
"epoch": 0.03765690376569038,
"grad_norm": 0.8288585543632507,
"learning_rate": 4.99999981075158e-06,
"loss": 0.8646,
"step": 102
},
{
"epoch": 0.038026089096726556,
"grad_norm": 0.8551903963088989,
"learning_rate": 4.999999574191062e-06,
"loss": 0.8824,
"step": 103
},
{
"epoch": 0.038395274427762735,
"grad_norm": 0.8003144860267639,
"learning_rate": 4.999999243006348e-06,
"loss": 0.8534,
"step": 104
},
{
"epoch": 0.038764459758798914,
"grad_norm": 0.8019240498542786,
"learning_rate": 4.9999988171974525e-06,
"loss": 0.8696,
"step": 105
},
{
"epoch": 0.0391336450898351,
"grad_norm": 0.8382614254951477,
"learning_rate": 4.999998296764391e-06,
"loss": 0.8746,
"step": 106
},
{
"epoch": 0.03950283042087128,
"grad_norm": 0.8664233088493347,
"learning_rate": 4.999997681707182e-06,
"loss": 0.9053,
"step": 107
},
{
"epoch": 0.03987201575190746,
"grad_norm": 0.8756380677223206,
"learning_rate": 4.99999697202585e-06,
"loss": 0.8714,
"step": 108
},
{
"epoch": 0.040241201082943635,
"grad_norm": 1.0035847425460815,
"learning_rate": 4.9999961677204224e-06,
"loss": 0.8111,
"step": 109
},
{
"epoch": 0.04061038641397982,
"grad_norm": 0.8640075922012329,
"learning_rate": 4.999995268790928e-06,
"loss": 0.9003,
"step": 110
},
{
"epoch": 0.040979571745016,
"grad_norm": 0.8350078463554382,
"learning_rate": 4.999994275237402e-06,
"loss": 0.8816,
"step": 111
},
{
"epoch": 0.04134875707605218,
"grad_norm": 0.8058568835258484,
"learning_rate": 4.999993187059882e-06,
"loss": 0.8243,
"step": 112
},
{
"epoch": 0.04171794240708836,
"grad_norm": 0.8728750348091125,
"learning_rate": 4.999992004258409e-06,
"loss": 0.8457,
"step": 113
},
{
"epoch": 0.042087127738124536,
"grad_norm": 0.8918057084083557,
"learning_rate": 4.999990726833027e-06,
"loss": 0.8412,
"step": 114
},
{
"epoch": 0.04245631306916072,
"grad_norm": 0.8393918871879578,
"learning_rate": 4.9999893547837855e-06,
"loss": 0.8806,
"step": 115
},
{
"epoch": 0.0428254984001969,
"grad_norm": 0.8215784430503845,
"learning_rate": 4.999987888110736e-06,
"loss": 0.8957,
"step": 116
},
{
"epoch": 0.04319468373123308,
"grad_norm": 0.8378515243530273,
"learning_rate": 4.999986326813933e-06,
"loss": 0.816,
"step": 117
},
{
"epoch": 0.04356386906226926,
"grad_norm": 0.8467821478843689,
"learning_rate": 4.999984670893438e-06,
"loss": 0.8879,
"step": 118
},
{
"epoch": 0.043933054393305436,
"grad_norm": 0.8451763391494751,
"learning_rate": 4.999982920349311e-06,
"loss": 0.8752,
"step": 119
},
{
"epoch": 0.04430223972434162,
"grad_norm": 0.8014891743659973,
"learning_rate": 4.99998107518162e-06,
"loss": 0.8279,
"step": 120
},
{
"epoch": 0.0446714250553778,
"grad_norm": 0.8478080630302429,
"learning_rate": 4.999979135390434e-06,
"loss": 0.9071,
"step": 121
},
{
"epoch": 0.04504061038641398,
"grad_norm": 0.8662890791893005,
"learning_rate": 4.999977100975827e-06,
"loss": 0.8752,
"step": 122
},
{
"epoch": 0.04540979571745016,
"grad_norm": 0.8697710633277893,
"learning_rate": 4.999974971937875e-06,
"loss": 0.8386,
"step": 123
},
{
"epoch": 0.04577898104848634,
"grad_norm": 0.8541619181632996,
"learning_rate": 4.99997274827666e-06,
"loss": 0.9063,
"step": 124
},
{
"epoch": 0.04614816637952252,
"grad_norm": 0.8500178456306458,
"learning_rate": 4.999970429992266e-06,
"loss": 0.8862,
"step": 125
},
{
"epoch": 0.0465173517105587,
"grad_norm": 0.8678516149520874,
"learning_rate": 4.9999680170847794e-06,
"loss": 0.8178,
"step": 126
},
{
"epoch": 0.04688653704159488,
"grad_norm": 0.867600679397583,
"learning_rate": 4.999965509554293e-06,
"loss": 0.9158,
"step": 127
},
{
"epoch": 0.04725572237263106,
"grad_norm": 0.8475435376167297,
"learning_rate": 4.9999629074009005e-06,
"loss": 0.8593,
"step": 128
},
{
"epoch": 0.047624907703667244,
"grad_norm": 0.8186939358711243,
"learning_rate": 4.999960210624701e-06,
"loss": 0.8621,
"step": 129
},
{
"epoch": 0.04799409303470342,
"grad_norm": 0.9259890913963318,
"learning_rate": 4.999957419225797e-06,
"loss": 0.8642,
"step": 130
},
{
"epoch": 0.0483632783657396,
"grad_norm": 0.8416559100151062,
"learning_rate": 4.999954533204293e-06,
"loss": 0.8799,
"step": 131
},
{
"epoch": 0.04873246369677578,
"grad_norm": 0.8305281400680542,
"learning_rate": 4.9999515525603e-06,
"loss": 0.8314,
"step": 132
},
{
"epoch": 0.04910164902781196,
"grad_norm": 0.8432246446609497,
"learning_rate": 4.999948477293929e-06,
"loss": 0.8687,
"step": 133
},
{
"epoch": 0.049470834358848144,
"grad_norm": 0.8665896058082581,
"learning_rate": 4.999945307405297e-06,
"loss": 0.861,
"step": 134
},
{
"epoch": 0.04984001968988432,
"grad_norm": 0.8684259057044983,
"learning_rate": 4.9999420428945236e-06,
"loss": 0.9069,
"step": 135
},
{
"epoch": 0.0502092050209205,
"grad_norm": 0.8950573801994324,
"learning_rate": 4.999938683761733e-06,
"loss": 0.8553,
"step": 136
},
{
"epoch": 0.05057839035195668,
"grad_norm": 0.8906726837158203,
"learning_rate": 4.9999352300070535e-06,
"loss": 0.865,
"step": 137
},
{
"epoch": 0.050947575682992866,
"grad_norm": 0.9318857789039612,
"learning_rate": 4.999931681630614e-06,
"loss": 0.8697,
"step": 138
},
{
"epoch": 0.051316761014029044,
"grad_norm": 0.8434486389160156,
"learning_rate": 4.999928038632549e-06,
"loss": 0.8665,
"step": 139
},
{
"epoch": 0.05168594634506522,
"grad_norm": 0.8642570376396179,
"learning_rate": 4.999924301012997e-06,
"loss": 0.8999,
"step": 140
},
{
"epoch": 0.0520551316761014,
"grad_norm": 0.8331663012504578,
"learning_rate": 4.999920468772099e-06,
"loss": 0.8458,
"step": 141
},
{
"epoch": 0.05242431700713758,
"grad_norm": 0.949670135974884,
"learning_rate": 4.9999165419100005e-06,
"loss": 0.8552,
"step": 142
},
{
"epoch": 0.052793502338173766,
"grad_norm": 0.8622894287109375,
"learning_rate": 4.999912520426849e-06,
"loss": 0.867,
"step": 143
},
{
"epoch": 0.053162687669209945,
"grad_norm": 0.8427858948707581,
"learning_rate": 4.999908404322799e-06,
"loss": 0.8842,
"step": 144
},
{
"epoch": 0.05353187300024612,
"grad_norm": 0.8718158006668091,
"learning_rate": 4.999904193598003e-06,
"loss": 0.8591,
"step": 145
},
{
"epoch": 0.0539010583312823,
"grad_norm": 0.8681777715682983,
"learning_rate": 4.999899888252624e-06,
"loss": 0.8984,
"step": 146
},
{
"epoch": 0.05427024366231848,
"grad_norm": 0.8601359128952026,
"learning_rate": 4.999895488286822e-06,
"loss": 0.8386,
"step": 147
},
{
"epoch": 0.054639428993354666,
"grad_norm": 0.8220537304878235,
"learning_rate": 4.999890993700766e-06,
"loss": 0.8362,
"step": 148
},
{
"epoch": 0.055008614324390845,
"grad_norm": 0.8281165361404419,
"learning_rate": 4.999886404494624e-06,
"loss": 0.8587,
"step": 149
},
{
"epoch": 0.055377799655427024,
"grad_norm": 0.8356446623802185,
"learning_rate": 4.999881720668571e-06,
"loss": 0.8988,
"step": 150
},
{
"epoch": 0.0557469849864632,
"grad_norm": 0.8825479745864868,
"learning_rate": 4.999876942222783e-06,
"loss": 0.8419,
"step": 151
},
{
"epoch": 0.05611617031749939,
"grad_norm": 0.8695886135101318,
"learning_rate": 4.999872069157443e-06,
"loss": 0.852,
"step": 152
},
{
"epoch": 0.056485355648535567,
"grad_norm": 0.8561027646064758,
"learning_rate": 4.999867101472733e-06,
"loss": 0.806,
"step": 153
},
{
"epoch": 0.056854540979571745,
"grad_norm": 0.8631170988082886,
"learning_rate": 4.999862039168843e-06,
"loss": 0.8227,
"step": 154
},
{
"epoch": 0.057223726310607924,
"grad_norm": 0.8831436634063721,
"learning_rate": 4.999856882245963e-06,
"loss": 0.8584,
"step": 155
},
{
"epoch": 0.0575929116416441,
"grad_norm": 0.8398553133010864,
"learning_rate": 4.9998516307042895e-06,
"loss": 0.8675,
"step": 156
},
{
"epoch": 0.05796209697268029,
"grad_norm": 0.9215529561042786,
"learning_rate": 4.999846284544021e-06,
"loss": 0.844,
"step": 157
},
{
"epoch": 0.05833128230371647,
"grad_norm": 0.8526574373245239,
"learning_rate": 4.999840843765359e-06,
"loss": 0.825,
"step": 158
},
{
"epoch": 0.058700467634752646,
"grad_norm": 0.8621156811714172,
"learning_rate": 4.99983530836851e-06,
"loss": 0.8762,
"step": 159
},
{
"epoch": 0.059069652965788824,
"grad_norm": 0.86864173412323,
"learning_rate": 4.999829678353684e-06,
"loss": 0.8886,
"step": 160
},
{
"epoch": 0.059438838296825,
"grad_norm": 0.8976534008979797,
"learning_rate": 4.9998239537210935e-06,
"loss": 0.8452,
"step": 161
},
{
"epoch": 0.05980802362786119,
"grad_norm": 0.8699798583984375,
"learning_rate": 4.999818134470955e-06,
"loss": 0.8482,
"step": 162
},
{
"epoch": 0.06017720895889737,
"grad_norm": 0.8608055710792542,
"learning_rate": 4.99981222060349e-06,
"loss": 0.8432,
"step": 163
},
{
"epoch": 0.060546394289933546,
"grad_norm": 0.8606122732162476,
"learning_rate": 4.999806212118921e-06,
"loss": 0.8591,
"step": 164
},
{
"epoch": 0.060915579620969725,
"grad_norm": 0.8934593200683594,
"learning_rate": 4.9998001090174745e-06,
"loss": 0.8672,
"step": 165
},
{
"epoch": 0.06128476495200591,
"grad_norm": 0.8507917523384094,
"learning_rate": 4.999793911299384e-06,
"loss": 0.8604,
"step": 166
},
{
"epoch": 0.06165395028304209,
"grad_norm": 0.8685324192047119,
"learning_rate": 4.999787618964883e-06,
"loss": 0.8796,
"step": 167
},
{
"epoch": 0.06202313561407827,
"grad_norm": 0.9141797423362732,
"learning_rate": 4.9997812320142095e-06,
"loss": 0.8473,
"step": 168
},
{
"epoch": 0.062392320945114446,
"grad_norm": 1.0198613405227661,
"learning_rate": 4.9997747504476045e-06,
"loss": 0.8412,
"step": 169
},
{
"epoch": 0.06276150627615062,
"grad_norm": 0.8494629263877869,
"learning_rate": 4.999768174265315e-06,
"loss": 0.8515,
"step": 170
},
{
"epoch": 0.0631306916071868,
"grad_norm": 0.8553645610809326,
"learning_rate": 4.999761503467589e-06,
"loss": 0.8504,
"step": 171
},
{
"epoch": 0.06349987693822298,
"grad_norm": 0.846227765083313,
"learning_rate": 4.999754738054678e-06,
"loss": 0.837,
"step": 172
},
{
"epoch": 0.06386906226925917,
"grad_norm": 0.8288367986679077,
"learning_rate": 4.999747878026841e-06,
"loss": 0.8366,
"step": 173
},
{
"epoch": 0.06423824760029535,
"grad_norm": 0.8543452620506287,
"learning_rate": 4.9997409233843345e-06,
"loss": 0.8545,
"step": 174
},
{
"epoch": 0.06460743293133153,
"grad_norm": 0.8958789110183716,
"learning_rate": 4.999733874127423e-06,
"loss": 0.8324,
"step": 175
},
{
"epoch": 0.06497661826236771,
"grad_norm": 0.8512812852859497,
"learning_rate": 4.999726730256373e-06,
"loss": 0.8172,
"step": 176
},
{
"epoch": 0.06534580359340389,
"grad_norm": 0.8509172797203064,
"learning_rate": 4.999719491771457e-06,
"loss": 0.8648,
"step": 177
},
{
"epoch": 0.06571498892444007,
"grad_norm": 0.8630576133728027,
"learning_rate": 4.999712158672945e-06,
"loss": 0.8518,
"step": 178
},
{
"epoch": 0.06608417425547625,
"grad_norm": 0.8487321138381958,
"learning_rate": 4.999704730961118e-06,
"loss": 0.8454,
"step": 179
},
{
"epoch": 0.06645335958651243,
"grad_norm": 0.8370772004127502,
"learning_rate": 4.999697208636255e-06,
"loss": 0.7777,
"step": 180
},
{
"epoch": 0.0668225449175486,
"grad_norm": 0.867073655128479,
"learning_rate": 4.999689591698642e-06,
"loss": 0.8204,
"step": 181
},
{
"epoch": 0.06719173024858478,
"grad_norm": 0.8590624332427979,
"learning_rate": 4.999681880148567e-06,
"loss": 0.885,
"step": 182
},
{
"epoch": 0.06756091557962098,
"grad_norm": 0.8786302804946899,
"learning_rate": 4.999674073986322e-06,
"loss": 0.8107,
"step": 183
},
{
"epoch": 0.06793010091065715,
"grad_norm": 0.8392751216888428,
"learning_rate": 4.999666173212201e-06,
"loss": 0.8198,
"step": 184
},
{
"epoch": 0.06829928624169333,
"grad_norm": 0.8693823218345642,
"learning_rate": 4.999658177826505e-06,
"loss": 0.8278,
"step": 185
},
{
"epoch": 0.06866847157272951,
"grad_norm": 0.9009088277816772,
"learning_rate": 4.999650087829536e-06,
"loss": 0.8554,
"step": 186
},
{
"epoch": 0.06903765690376569,
"grad_norm": 0.8450184464454651,
"learning_rate": 4.9996419032216e-06,
"loss": 0.7952,
"step": 187
},
{
"epoch": 0.06940684223480187,
"grad_norm": 0.851325511932373,
"learning_rate": 4.9996336240030065e-06,
"loss": 0.866,
"step": 188
},
{
"epoch": 0.06977602756583805,
"grad_norm": 0.846808135509491,
"learning_rate": 4.99962525017407e-06,
"loss": 0.8526,
"step": 189
},
{
"epoch": 0.07014521289687423,
"grad_norm": 0.8484999537467957,
"learning_rate": 4.999616781735106e-06,
"loss": 0.8134,
"step": 190
},
{
"epoch": 0.0705143982279104,
"grad_norm": 0.8739628195762634,
"learning_rate": 4.999608218686436e-06,
"loss": 0.7972,
"step": 191
},
{
"epoch": 0.0708835835589466,
"grad_norm": 0.8760291934013367,
"learning_rate": 4.999599561028384e-06,
"loss": 0.846,
"step": 192
},
{
"epoch": 0.07125276888998278,
"grad_norm": 0.8653873801231384,
"learning_rate": 4.999590808761277e-06,
"loss": 0.8571,
"step": 193
},
{
"epoch": 0.07162195422101895,
"grad_norm": 0.89369797706604,
"learning_rate": 4.999581961885447e-06,
"loss": 0.8503,
"step": 194
},
{
"epoch": 0.07199113955205513,
"grad_norm": 0.8840173482894897,
"learning_rate": 4.999573020401229e-06,
"loss": 0.8419,
"step": 195
},
{
"epoch": 0.07236032488309131,
"grad_norm": 0.8561863899230957,
"learning_rate": 4.9995639843089605e-06,
"loss": 0.7862,
"step": 196
},
{
"epoch": 0.07272951021412749,
"grad_norm": 0.8365263342857361,
"learning_rate": 4.9995548536089845e-06,
"loss": 0.8587,
"step": 197
},
{
"epoch": 0.07309869554516367,
"grad_norm": 0.9029537439346313,
"learning_rate": 4.9995456283016455e-06,
"loss": 0.8483,
"step": 198
},
{
"epoch": 0.07346788087619985,
"grad_norm": 0.8581278324127197,
"learning_rate": 4.999536308387294e-06,
"loss": 0.847,
"step": 199
},
{
"epoch": 0.07383706620723603,
"grad_norm": 0.8499622344970703,
"learning_rate": 4.999526893866282e-06,
"loss": 0.8161,
"step": 200
},
{
"epoch": 0.07420625153827222,
"grad_norm": 0.8650686740875244,
"learning_rate": 4.999517384738966e-06,
"loss": 0.8218,
"step": 201
},
{
"epoch": 0.0745754368693084,
"grad_norm": 0.9009180665016174,
"learning_rate": 4.999507781005705e-06,
"loss": 0.8505,
"step": 202
},
{
"epoch": 0.07494462220034458,
"grad_norm": 0.897419273853302,
"learning_rate": 4.9994980826668646e-06,
"loss": 0.7851,
"step": 203
},
{
"epoch": 0.07531380753138076,
"grad_norm": 0.8872283697128296,
"learning_rate": 4.99948828972281e-06,
"loss": 0.789,
"step": 204
},
{
"epoch": 0.07568299286241693,
"grad_norm": 0.8454247713088989,
"learning_rate": 4.9994784021739115e-06,
"loss": 0.8373,
"step": 205
},
{
"epoch": 0.07605217819345311,
"grad_norm": 0.869134247303009,
"learning_rate": 4.999468420020546e-06,
"loss": 0.837,
"step": 206
},
{
"epoch": 0.07642136352448929,
"grad_norm": 0.8689702153205872,
"learning_rate": 4.999458343263089e-06,
"loss": 0.8106,
"step": 207
},
{
"epoch": 0.07679054885552547,
"grad_norm": 0.8920548558235168,
"learning_rate": 4.999448171901923e-06,
"loss": 0.8608,
"step": 208
},
{
"epoch": 0.07715973418656165,
"grad_norm": 0.8637383580207825,
"learning_rate": 4.999437905937431e-06,
"loss": 0.8396,
"step": 209
},
{
"epoch": 0.07752891951759783,
"grad_norm": 0.9378350973129272,
"learning_rate": 4.9994275453700045e-06,
"loss": 0.8381,
"step": 210
},
{
"epoch": 0.07789810484863402,
"grad_norm": 0.8814995288848877,
"learning_rate": 4.9994170902000335e-06,
"loss": 0.8495,
"step": 211
},
{
"epoch": 0.0782672901796702,
"grad_norm": 0.8853291869163513,
"learning_rate": 4.9994065404279155e-06,
"loss": 0.8429,
"step": 212
},
{
"epoch": 0.07863647551070638,
"grad_norm": 0.8535809516906738,
"learning_rate": 4.999395896054048e-06,
"loss": 0.8198,
"step": 213
},
{
"epoch": 0.07900566084174256,
"grad_norm": 0.889284074306488,
"learning_rate": 4.999385157078835e-06,
"loss": 0.8836,
"step": 214
},
{
"epoch": 0.07937484617277873,
"grad_norm": 0.8783283829689026,
"learning_rate": 4.999374323502683e-06,
"loss": 0.8611,
"step": 215
},
{
"epoch": 0.07974403150381491,
"grad_norm": 0.851722240447998,
"learning_rate": 4.999363395326e-06,
"loss": 0.828,
"step": 216
},
{
"epoch": 0.08011321683485109,
"grad_norm": 0.8967769145965576,
"learning_rate": 4.999352372549203e-06,
"loss": 0.7991,
"step": 217
},
{
"epoch": 0.08048240216588727,
"grad_norm": 0.9296314716339111,
"learning_rate": 4.999341255172707e-06,
"loss": 0.893,
"step": 218
},
{
"epoch": 0.08085158749692345,
"grad_norm": 0.9072420001029968,
"learning_rate": 4.999330043196933e-06,
"loss": 0.8014,
"step": 219
},
{
"epoch": 0.08122077282795964,
"grad_norm": 0.9063705205917358,
"learning_rate": 4.999318736622306e-06,
"loss": 0.8206,
"step": 220
},
{
"epoch": 0.08158995815899582,
"grad_norm": 0.8354766964912415,
"learning_rate": 4.9993073354492525e-06,
"loss": 0.8148,
"step": 221
},
{
"epoch": 0.081959143490032,
"grad_norm": 0.878901481628418,
"learning_rate": 4.999295839678206e-06,
"loss": 0.8758,
"step": 222
},
{
"epoch": 0.08232832882106818,
"grad_norm": 0.8754085302352905,
"learning_rate": 4.999284249309602e-06,
"loss": 0.8447,
"step": 223
},
{
"epoch": 0.08269751415210436,
"grad_norm": 0.8729782104492188,
"learning_rate": 4.9992725643438765e-06,
"loss": 0.7938,
"step": 224
},
{
"epoch": 0.08306669948314054,
"grad_norm": 0.8772115111351013,
"learning_rate": 4.999260784781473e-06,
"loss": 0.8683,
"step": 225
},
{
"epoch": 0.08343588481417671,
"grad_norm": 0.8647124767303467,
"learning_rate": 4.999248910622838e-06,
"loss": 0.8111,
"step": 226
},
{
"epoch": 0.08380507014521289,
"grad_norm": 0.85676509141922,
"learning_rate": 4.999236941868421e-06,
"loss": 0.8005,
"step": 227
},
{
"epoch": 0.08417425547624907,
"grad_norm": 0.8511557579040527,
"learning_rate": 4.999224878518674e-06,
"loss": 0.8074,
"step": 228
},
{
"epoch": 0.08454344080728526,
"grad_norm": 0.8975043892860413,
"learning_rate": 4.9992127205740545e-06,
"loss": 0.8327,
"step": 229
},
{
"epoch": 0.08491262613832144,
"grad_norm": 0.863868772983551,
"learning_rate": 4.999200468035021e-06,
"loss": 0.8226,
"step": 230
},
{
"epoch": 0.08528181146935762,
"grad_norm": 0.8852335214614868,
"learning_rate": 4.9991881209020406e-06,
"loss": 0.8353,
"step": 231
},
{
"epoch": 0.0856509968003938,
"grad_norm": 0.8811020851135254,
"learning_rate": 4.999175679175577e-06,
"loss": 0.8028,
"step": 232
},
{
"epoch": 0.08602018213142998,
"grad_norm": 0.8707005381584167,
"learning_rate": 4.999163142856104e-06,
"loss": 0.7983,
"step": 233
},
{
"epoch": 0.08638936746246616,
"grad_norm": 0.857227087020874,
"learning_rate": 4.999150511944094e-06,
"loss": 0.7777,
"step": 234
},
{
"epoch": 0.08675855279350234,
"grad_norm": 0.8872169256210327,
"learning_rate": 4.999137786440026e-06,
"loss": 0.8692,
"step": 235
},
{
"epoch": 0.08712773812453851,
"grad_norm": 0.8813910484313965,
"learning_rate": 4.999124966344381e-06,
"loss": 0.8101,
"step": 236
},
{
"epoch": 0.0874969234555747,
"grad_norm": 0.8994487524032593,
"learning_rate": 4.999112051657646e-06,
"loss": 0.8646,
"step": 237
},
{
"epoch": 0.08786610878661087,
"grad_norm": 0.9409844875335693,
"learning_rate": 4.999099042380307e-06,
"loss": 0.8218,
"step": 238
},
{
"epoch": 0.08823529411764706,
"grad_norm": 0.8659898042678833,
"learning_rate": 4.999085938512859e-06,
"loss": 0.8347,
"step": 239
},
{
"epoch": 0.08860447944868324,
"grad_norm": 0.8241569995880127,
"learning_rate": 4.9990727400557965e-06,
"loss": 0.8112,
"step": 240
},
{
"epoch": 0.08897366477971942,
"grad_norm": 0.8582605123519897,
"learning_rate": 4.99905944700962e-06,
"loss": 0.8115,
"step": 241
},
{
"epoch": 0.0893428501107556,
"grad_norm": 0.8941754698753357,
"learning_rate": 4.999046059374831e-06,
"loss": 0.8304,
"step": 242
},
{
"epoch": 0.08971203544179178,
"grad_norm": 0.8905880451202393,
"learning_rate": 4.999032577151939e-06,
"loss": 0.8168,
"step": 243
},
{
"epoch": 0.09008122077282796,
"grad_norm": 0.8816720247268677,
"learning_rate": 4.999019000341452e-06,
"loss": 0.8422,
"step": 244
},
{
"epoch": 0.09045040610386414,
"grad_norm": 1.047232747077942,
"learning_rate": 4.999005328943884e-06,
"loss": 0.8136,
"step": 245
},
{
"epoch": 0.09081959143490032,
"grad_norm": 0.9141537547111511,
"learning_rate": 4.998991562959753e-06,
"loss": 0.8415,
"step": 246
},
{
"epoch": 0.0911887767659365,
"grad_norm": 0.8919954895973206,
"learning_rate": 4.998977702389581e-06,
"loss": 0.8224,
"step": 247
},
{
"epoch": 0.09155796209697269,
"grad_norm": 0.8710380792617798,
"learning_rate": 4.998963747233891e-06,
"loss": 0.7944,
"step": 248
},
{
"epoch": 0.09192714742800887,
"grad_norm": 0.8452226519584656,
"learning_rate": 4.998949697493212e-06,
"loss": 0.8128,
"step": 249
},
{
"epoch": 0.09229633275904504,
"grad_norm": 0.8582141399383545,
"learning_rate": 4.998935553168075e-06,
"loss": 0.8467,
"step": 250
},
{
"epoch": 0.09266551809008122,
"grad_norm": 0.8774867057800293,
"learning_rate": 4.998921314259017e-06,
"loss": 0.8261,
"step": 251
},
{
"epoch": 0.0930347034211174,
"grad_norm": 0.879334568977356,
"learning_rate": 4.998906980766576e-06,
"loss": 0.8346,
"step": 252
},
{
"epoch": 0.09340388875215358,
"grad_norm": 0.886013925075531,
"learning_rate": 4.998892552691294e-06,
"loss": 0.8562,
"step": 253
},
{
"epoch": 0.09377307408318976,
"grad_norm": 0.8755276203155518,
"learning_rate": 4.998878030033717e-06,
"loss": 0.8005,
"step": 254
},
{
"epoch": 0.09414225941422594,
"grad_norm": 0.9009412527084351,
"learning_rate": 4.998863412794396e-06,
"loss": 0.8057,
"step": 255
},
{
"epoch": 0.09451144474526212,
"grad_norm": 0.9637260437011719,
"learning_rate": 4.998848700973883e-06,
"loss": 0.8285,
"step": 256
},
{
"epoch": 0.09488063007629831,
"grad_norm": 0.8925495743751526,
"learning_rate": 4.9988338945727355e-06,
"loss": 0.8466,
"step": 257
},
{
"epoch": 0.09524981540733449,
"grad_norm": 0.88019198179245,
"learning_rate": 4.998818993591513e-06,
"loss": 0.828,
"step": 258
},
{
"epoch": 0.09561900073837067,
"grad_norm": 0.8707719445228577,
"learning_rate": 4.998803998030781e-06,
"loss": 0.7912,
"step": 259
},
{
"epoch": 0.09598818606940684,
"grad_norm": 0.9469668865203857,
"learning_rate": 4.998788907891107e-06,
"loss": 0.8255,
"step": 260
},
{
"epoch": 0.09635737140044302,
"grad_norm": 0.8590503931045532,
"learning_rate": 4.998773723173061e-06,
"loss": 0.834,
"step": 261
},
{
"epoch": 0.0967265567314792,
"grad_norm": 0.8726522922515869,
"learning_rate": 4.998758443877217e-06,
"loss": 0.8434,
"step": 262
},
{
"epoch": 0.09709574206251538,
"grad_norm": 0.8915356397628784,
"learning_rate": 4.998743070004156e-06,
"loss": 0.8455,
"step": 263
},
{
"epoch": 0.09746492739355156,
"grad_norm": 0.8678056597709656,
"learning_rate": 4.998727601554458e-06,
"loss": 0.7851,
"step": 264
},
{
"epoch": 0.09783411272458774,
"grad_norm": 0.8804232478141785,
"learning_rate": 4.998712038528709e-06,
"loss": 0.8163,
"step": 265
},
{
"epoch": 0.09820329805562392,
"grad_norm": 0.8934099674224854,
"learning_rate": 4.998696380927497e-06,
"loss": 0.8479,
"step": 266
},
{
"epoch": 0.09857248338666011,
"grad_norm": 0.9582729339599609,
"learning_rate": 4.998680628751417e-06,
"loss": 0.8351,
"step": 267
},
{
"epoch": 0.09894166871769629,
"grad_norm": 0.8772808909416199,
"learning_rate": 4.998664782001063e-06,
"loss": 0.8135,
"step": 268
},
{
"epoch": 0.09931085404873247,
"grad_norm": 0.8562557101249695,
"learning_rate": 4.998648840677035e-06,
"loss": 0.7817,
"step": 269
},
{
"epoch": 0.09968003937976865,
"grad_norm": 0.9073139429092407,
"learning_rate": 4.9986328047799385e-06,
"loss": 0.7951,
"step": 270
},
{
"epoch": 0.10004922471080482,
"grad_norm": 0.9205917119979858,
"learning_rate": 4.9986166743103774e-06,
"loss": 0.8265,
"step": 271
},
{
"epoch": 0.100418410041841,
"grad_norm": 0.8653632998466492,
"learning_rate": 4.9986004492689644e-06,
"loss": 0.8301,
"step": 272
},
{
"epoch": 0.10078759537287718,
"grad_norm": 0.9200085997581482,
"learning_rate": 4.9985841296563135e-06,
"loss": 0.8534,
"step": 273
},
{
"epoch": 0.10115678070391336,
"grad_norm": 0.8734180331230164,
"learning_rate": 4.998567715473041e-06,
"loss": 0.8183,
"step": 274
},
{
"epoch": 0.10152596603494954,
"grad_norm": 0.866165816783905,
"learning_rate": 4.99855120671977e-06,
"loss": 0.827,
"step": 275
},
{
"epoch": 0.10189515136598573,
"grad_norm": 0.9260159134864807,
"learning_rate": 4.998534603397123e-06,
"loss": 0.7965,
"step": 276
},
{
"epoch": 0.10226433669702191,
"grad_norm": 0.8854061365127563,
"learning_rate": 4.998517905505731e-06,
"loss": 0.808,
"step": 277
},
{
"epoch": 0.10263352202805809,
"grad_norm": 0.8753185272216797,
"learning_rate": 4.998501113046224e-06,
"loss": 0.8205,
"step": 278
},
{
"epoch": 0.10300270735909427,
"grad_norm": 0.8909716606140137,
"learning_rate": 4.998484226019239e-06,
"loss": 0.8521,
"step": 279
},
{
"epoch": 0.10337189269013045,
"grad_norm": 0.8754370212554932,
"learning_rate": 4.9984672444254145e-06,
"loss": 0.7891,
"step": 280
},
{
"epoch": 0.10374107802116662,
"grad_norm": 0.8687075972557068,
"learning_rate": 4.998450168265393e-06,
"loss": 0.7974,
"step": 281
},
{
"epoch": 0.1041102633522028,
"grad_norm": 0.8216086030006409,
"learning_rate": 4.998432997539821e-06,
"loss": 0.7793,
"step": 282
},
{
"epoch": 0.10447944868323898,
"grad_norm": 0.899731457233429,
"learning_rate": 4.998415732249349e-06,
"loss": 0.8075,
"step": 283
},
{
"epoch": 0.10484863401427516,
"grad_norm": 0.8399525880813599,
"learning_rate": 4.998398372394631e-06,
"loss": 0.8167,
"step": 284
},
{
"epoch": 0.10521781934531135,
"grad_norm": 0.8872588276863098,
"learning_rate": 4.998380917976321e-06,
"loss": 0.7981,
"step": 285
},
{
"epoch": 0.10558700467634753,
"grad_norm": 0.8770443797111511,
"learning_rate": 4.998363368995083e-06,
"loss": 0.8156,
"step": 286
},
{
"epoch": 0.10595619000738371,
"grad_norm": 0.9171691536903381,
"learning_rate": 4.99834572545158e-06,
"loss": 0.7958,
"step": 287
},
{
"epoch": 0.10632537533841989,
"grad_norm": 0.8948536515235901,
"learning_rate": 4.99832798734648e-06,
"loss": 0.8092,
"step": 288
},
{
"epoch": 0.10669456066945607,
"grad_norm": 0.8909181356430054,
"learning_rate": 4.998310154680453e-06,
"loss": 0.8001,
"step": 289
},
{
"epoch": 0.10706374600049225,
"grad_norm": 0.9211814403533936,
"learning_rate": 4.9982922274541765e-06,
"loss": 0.8416,
"step": 290
},
{
"epoch": 0.10743293133152843,
"grad_norm": 0.9189214706420898,
"learning_rate": 4.998274205668326e-06,
"loss": 0.7836,
"step": 291
},
{
"epoch": 0.1078021166625646,
"grad_norm": 0.9062879085540771,
"learning_rate": 4.998256089323587e-06,
"loss": 0.81,
"step": 292
},
{
"epoch": 0.10817130199360078,
"grad_norm": 0.8764585256576538,
"learning_rate": 4.998237878420643e-06,
"loss": 0.8161,
"step": 293
},
{
"epoch": 0.10854048732463696,
"grad_norm": 0.8838504552841187,
"learning_rate": 4.998219572960183e-06,
"loss": 0.8339,
"step": 294
},
{
"epoch": 0.10890967265567315,
"grad_norm": 0.847440779209137,
"learning_rate": 4.998201172942901e-06,
"loss": 0.8275,
"step": 295
},
{
"epoch": 0.10927885798670933,
"grad_norm": 0.9169361591339111,
"learning_rate": 4.998182678369494e-06,
"loss": 0.8022,
"step": 296
},
{
"epoch": 0.10964804331774551,
"grad_norm": 0.8472815752029419,
"learning_rate": 4.99816408924066e-06,
"loss": 0.8192,
"step": 297
},
{
"epoch": 0.11001722864878169,
"grad_norm": 0.9293531775474548,
"learning_rate": 4.9981454055571045e-06,
"loss": 0.8554,
"step": 298
},
{
"epoch": 0.11038641397981787,
"grad_norm": 0.8915771245956421,
"learning_rate": 4.998126627319533e-06,
"loss": 0.8023,
"step": 299
},
{
"epoch": 0.11075559931085405,
"grad_norm": 0.9370061755180359,
"learning_rate": 4.998107754528657e-06,
"loss": 0.8097,
"step": 300
},
{
"epoch": 0.11112478464189023,
"grad_norm": 0.937268853187561,
"learning_rate": 4.998088787185192e-06,
"loss": 0.8048,
"step": 301
},
{
"epoch": 0.1114939699729264,
"grad_norm": 0.8639885187149048,
"learning_rate": 4.998069725289854e-06,
"loss": 0.7987,
"step": 302
},
{
"epoch": 0.11186315530396258,
"grad_norm": 0.9094707369804382,
"learning_rate": 4.998050568843364e-06,
"loss": 0.8551,
"step": 303
},
{
"epoch": 0.11223234063499878,
"grad_norm": 0.8595545291900635,
"learning_rate": 4.9980313178464504e-06,
"loss": 0.7923,
"step": 304
},
{
"epoch": 0.11260152596603495,
"grad_norm": 0.9705724120140076,
"learning_rate": 4.9980119722998396e-06,
"loss": 0.8356,
"step": 305
},
{
"epoch": 0.11297071129707113,
"grad_norm": 0.8938621282577515,
"learning_rate": 4.9979925322042635e-06,
"loss": 0.7797,
"step": 306
},
{
"epoch": 0.11333989662810731,
"grad_norm": 0.8729509711265564,
"learning_rate": 4.9979729975604584e-06,
"loss": 0.799,
"step": 307
},
{
"epoch": 0.11370908195914349,
"grad_norm": 0.8773247599601746,
"learning_rate": 4.997953368369164e-06,
"loss": 0.8094,
"step": 308
},
{
"epoch": 0.11407826729017967,
"grad_norm": 0.8874313831329346,
"learning_rate": 4.997933644631122e-06,
"loss": 0.7785,
"step": 309
},
{
"epoch": 0.11444745262121585,
"grad_norm": 0.9289500713348389,
"learning_rate": 4.997913826347082e-06,
"loss": 0.8127,
"step": 310
},
{
"epoch": 0.11481663795225203,
"grad_norm": 0.901099681854248,
"learning_rate": 4.99789391351779e-06,
"loss": 0.8038,
"step": 311
},
{
"epoch": 0.1151858232832882,
"grad_norm": 0.8916109204292297,
"learning_rate": 4.997873906144002e-06,
"loss": 0.8115,
"step": 312
},
{
"epoch": 0.1155550086143244,
"grad_norm": 0.9170032143592834,
"learning_rate": 4.997853804226476e-06,
"loss": 0.8181,
"step": 313
},
{
"epoch": 0.11592419394536058,
"grad_norm": 0.9100698828697205,
"learning_rate": 4.997833607765971e-06,
"loss": 0.7888,
"step": 314
},
{
"epoch": 0.11629337927639675,
"grad_norm": 0.897360622882843,
"learning_rate": 4.997813316763252e-06,
"loss": 0.8152,
"step": 315
},
{
"epoch": 0.11666256460743293,
"grad_norm": 0.893099308013916,
"learning_rate": 4.997792931219089e-06,
"loss": 0.7683,
"step": 316
},
{
"epoch": 0.11703174993846911,
"grad_norm": 0.9319385290145874,
"learning_rate": 4.9977724511342504e-06,
"loss": 0.7882,
"step": 317
},
{
"epoch": 0.11740093526950529,
"grad_norm": 0.9166727662086487,
"learning_rate": 4.997751876509513e-06,
"loss": 0.7975,
"step": 318
},
{
"epoch": 0.11777012060054147,
"grad_norm": 0.9373429417610168,
"learning_rate": 4.997731207345655e-06,
"loss": 0.8274,
"step": 319
},
{
"epoch": 0.11813930593157765,
"grad_norm": 0.8399270176887512,
"learning_rate": 4.997710443643461e-06,
"loss": 0.7732,
"step": 320
},
{
"epoch": 0.11850849126261383,
"grad_norm": 0.8919075727462769,
"learning_rate": 4.997689585403713e-06,
"loss": 0.7933,
"step": 321
},
{
"epoch": 0.11887767659365,
"grad_norm": 0.9245322942733765,
"learning_rate": 4.997668632627203e-06,
"loss": 0.8306,
"step": 322
},
{
"epoch": 0.1192468619246862,
"grad_norm": 0.89954674243927,
"learning_rate": 4.997647585314723e-06,
"loss": 0.8254,
"step": 323
},
{
"epoch": 0.11961604725572238,
"grad_norm": 0.9208563566207886,
"learning_rate": 4.9976264434670714e-06,
"loss": 0.8275,
"step": 324
},
{
"epoch": 0.11998523258675856,
"grad_norm": 1.0518693923950195,
"learning_rate": 4.9976052070850465e-06,
"loss": 0.8041,
"step": 325
},
{
"epoch": 0.12035441791779473,
"grad_norm": 0.8718807697296143,
"learning_rate": 4.997583876169453e-06,
"loss": 0.8171,
"step": 326
},
{
"epoch": 0.12072360324883091,
"grad_norm": 0.8952045440673828,
"learning_rate": 4.997562450721098e-06,
"loss": 0.8005,
"step": 327
},
{
"epoch": 0.12109278857986709,
"grad_norm": 0.8763337135314941,
"learning_rate": 4.997540930740792e-06,
"loss": 0.7838,
"step": 328
},
{
"epoch": 0.12146197391090327,
"grad_norm": 0.8835309147834778,
"learning_rate": 4.9975193162293505e-06,
"loss": 0.793,
"step": 329
},
{
"epoch": 0.12183115924193945,
"grad_norm": 0.9191171526908875,
"learning_rate": 4.997497607187591e-06,
"loss": 0.8317,
"step": 330
},
{
"epoch": 0.12220034457297563,
"grad_norm": 0.8889843225479126,
"learning_rate": 4.9974758036163355e-06,
"loss": 0.7937,
"step": 331
},
{
"epoch": 0.12256952990401182,
"grad_norm": 0.9089657664299011,
"learning_rate": 4.997453905516408e-06,
"loss": 0.8223,
"step": 332
},
{
"epoch": 0.122938715235048,
"grad_norm": 0.92867112159729,
"learning_rate": 4.9974319128886396e-06,
"loss": 0.8092,
"step": 333
},
{
"epoch": 0.12330790056608418,
"grad_norm": 0.8917028903961182,
"learning_rate": 4.997409825733861e-06,
"loss": 0.7728,
"step": 334
},
{
"epoch": 0.12367708589712036,
"grad_norm": 0.8886356949806213,
"learning_rate": 4.997387644052909e-06,
"loss": 0.8593,
"step": 335
},
{
"epoch": 0.12404627122815653,
"grad_norm": 0.8747639060020447,
"learning_rate": 4.997365367846623e-06,
"loss": 0.7963,
"step": 336
},
{
"epoch": 0.12441545655919271,
"grad_norm": 0.8846672177314758,
"learning_rate": 4.997342997115846e-06,
"loss": 0.774,
"step": 337
},
{
"epoch": 0.12478464189022889,
"grad_norm": 0.8727664947509766,
"learning_rate": 4.997320531861424e-06,
"loss": 0.7894,
"step": 338
},
{
"epoch": 0.12515382722126508,
"grad_norm": 0.9146867394447327,
"learning_rate": 4.997297972084209e-06,
"loss": 0.788,
"step": 339
},
{
"epoch": 0.12552301255230125,
"grad_norm": 0.8793301582336426,
"learning_rate": 4.997275317785053e-06,
"loss": 0.7873,
"step": 340
},
{
"epoch": 0.12589219788333744,
"grad_norm": 0.899469792842865,
"learning_rate": 4.997252568964814e-06,
"loss": 0.8054,
"step": 341
},
{
"epoch": 0.1262613832143736,
"grad_norm": 0.8407626152038574,
"learning_rate": 4.997229725624354e-06,
"loss": 0.7782,
"step": 342
},
{
"epoch": 0.1266305685454098,
"grad_norm": 0.9121686220169067,
"learning_rate": 4.997206787764537e-06,
"loss": 0.8135,
"step": 343
},
{
"epoch": 0.12699975387644596,
"grad_norm": 0.8709003925323486,
"learning_rate": 4.9971837553862324e-06,
"loss": 0.805,
"step": 344
},
{
"epoch": 0.12736893920748216,
"grad_norm": 0.8822476267814636,
"learning_rate": 4.997160628490309e-06,
"loss": 0.7888,
"step": 345
},
{
"epoch": 0.12773812453851835,
"grad_norm": 0.8969622254371643,
"learning_rate": 4.997137407077645e-06,
"loss": 0.8076,
"step": 346
},
{
"epoch": 0.12810730986955451,
"grad_norm": 0.9052038192749023,
"learning_rate": 4.997114091149118e-06,
"loss": 0.8207,
"step": 347
},
{
"epoch": 0.1284764952005907,
"grad_norm": 0.860755205154419,
"learning_rate": 4.997090680705611e-06,
"loss": 0.795,
"step": 348
},
{
"epoch": 0.12884568053162687,
"grad_norm": 0.8602296710014343,
"learning_rate": 4.99706717574801e-06,
"loss": 0.8034,
"step": 349
},
{
"epoch": 0.12921486586266306,
"grad_norm": 0.8612799644470215,
"learning_rate": 4.997043576277203e-06,
"loss": 0.7905,
"step": 350
},
{
"epoch": 0.12958405119369923,
"grad_norm": 0.9908099174499512,
"learning_rate": 4.997019882294086e-06,
"loss": 0.8259,
"step": 351
},
{
"epoch": 0.12995323652473542,
"grad_norm": 0.8944092988967896,
"learning_rate": 4.996996093799554e-06,
"loss": 0.7836,
"step": 352
},
{
"epoch": 0.1303224218557716,
"grad_norm": 0.9142276644706726,
"learning_rate": 4.996972210794509e-06,
"loss": 0.8118,
"step": 353
},
{
"epoch": 0.13069160718680778,
"grad_norm": 0.9402908682823181,
"learning_rate": 4.996948233279852e-06,
"loss": 0.8101,
"step": 354
},
{
"epoch": 0.13106079251784397,
"grad_norm": 0.9145587682723999,
"learning_rate": 4.996924161256494e-06,
"loss": 0.8238,
"step": 355
},
{
"epoch": 0.13142997784888014,
"grad_norm": 0.9261123538017273,
"learning_rate": 4.996899994725344e-06,
"loss": 0.8304,
"step": 356
},
{
"epoch": 0.13179916317991633,
"grad_norm": 0.8930022120475769,
"learning_rate": 4.996875733687317e-06,
"loss": 0.7747,
"step": 357
},
{
"epoch": 0.1321683485109525,
"grad_norm": 0.8823668360710144,
"learning_rate": 4.9968513781433315e-06,
"loss": 0.8134,
"step": 358
},
{
"epoch": 0.1325375338419887,
"grad_norm": 0.8473939299583435,
"learning_rate": 4.996826928094309e-06,
"loss": 0.7833,
"step": 359
},
{
"epoch": 0.13290671917302485,
"grad_norm": 0.8742851614952087,
"learning_rate": 4.996802383541176e-06,
"loss": 0.7532,
"step": 360
},
{
"epoch": 0.13327590450406104,
"grad_norm": 0.8971150517463684,
"learning_rate": 4.996777744484861e-06,
"loss": 0.7914,
"step": 361
},
{
"epoch": 0.1336450898350972,
"grad_norm": 0.8893861174583435,
"learning_rate": 4.996753010926296e-06,
"loss": 0.8268,
"step": 362
},
{
"epoch": 0.1340142751661334,
"grad_norm": 0.8671346306800842,
"learning_rate": 4.996728182866418e-06,
"loss": 0.7804,
"step": 363
},
{
"epoch": 0.13438346049716957,
"grad_norm": 0.8901523351669312,
"learning_rate": 4.9967032603061655e-06,
"loss": 0.7814,
"step": 364
},
{
"epoch": 0.13475264582820576,
"grad_norm": 0.9003875255584717,
"learning_rate": 4.996678243246483e-06,
"loss": 0.8193,
"step": 365
},
{
"epoch": 0.13512183115924195,
"grad_norm": 0.8973804116249084,
"learning_rate": 4.996653131688316e-06,
"loss": 0.8228,
"step": 366
},
{
"epoch": 0.13549101649027812,
"grad_norm": 0.8611428737640381,
"learning_rate": 4.996627925632617e-06,
"loss": 0.7733,
"step": 367
},
{
"epoch": 0.1358602018213143,
"grad_norm": 0.8708634376525879,
"learning_rate": 4.996602625080339e-06,
"loss": 0.7709,
"step": 368
},
{
"epoch": 0.13622938715235047,
"grad_norm": 0.930029571056366,
"learning_rate": 4.996577230032439e-06,
"loss": 0.7984,
"step": 369
},
{
"epoch": 0.13659857248338667,
"grad_norm": 0.8698320984840393,
"learning_rate": 4.996551740489879e-06,
"loss": 0.7694,
"step": 370
},
{
"epoch": 0.13696775781442283,
"grad_norm": 0.8765986561775208,
"learning_rate": 4.996526156453624e-06,
"loss": 0.7992,
"step": 371
},
{
"epoch": 0.13733694314545902,
"grad_norm": 0.9100019335746765,
"learning_rate": 4.996500477924642e-06,
"loss": 0.8417,
"step": 372
},
{
"epoch": 0.1377061284764952,
"grad_norm": 0.8693497180938721,
"learning_rate": 4.996474704903904e-06,
"loss": 0.7603,
"step": 373
},
{
"epoch": 0.13807531380753138,
"grad_norm": 0.874281644821167,
"learning_rate": 4.9964488373923865e-06,
"loss": 0.792,
"step": 374
},
{
"epoch": 0.13844449913856757,
"grad_norm": 0.9282156825065613,
"learning_rate": 4.9964228753910685e-06,
"loss": 0.8703,
"step": 375
},
{
"epoch": 0.13881368446960374,
"grad_norm": 0.8873798251152039,
"learning_rate": 4.9963968189009324e-06,
"loss": 0.786,
"step": 376
},
{
"epoch": 0.13918286980063993,
"grad_norm": 0.868864119052887,
"learning_rate": 4.996370667922965e-06,
"loss": 0.7815,
"step": 377
},
{
"epoch": 0.1395520551316761,
"grad_norm": 0.9150336980819702,
"learning_rate": 4.996344422458155e-06,
"loss": 0.7949,
"step": 378
},
{
"epoch": 0.1399212404627123,
"grad_norm": 0.8794249892234802,
"learning_rate": 4.996318082507497e-06,
"loss": 0.7503,
"step": 379
},
{
"epoch": 0.14029042579374845,
"grad_norm": 0.9185070395469666,
"learning_rate": 4.996291648071988e-06,
"loss": 0.8043,
"step": 380
},
{
"epoch": 0.14065961112478464,
"grad_norm": 0.8789845108985901,
"learning_rate": 4.996265119152627e-06,
"loss": 0.7647,
"step": 381
},
{
"epoch": 0.1410287964558208,
"grad_norm": 0.8894780278205872,
"learning_rate": 4.99623849575042e-06,
"loss": 0.788,
"step": 382
},
{
"epoch": 0.141397981786857,
"grad_norm": 0.9412915110588074,
"learning_rate": 4.996211777866372e-06,
"loss": 0.7974,
"step": 383
},
{
"epoch": 0.1417671671178932,
"grad_norm": 0.8994157910346985,
"learning_rate": 4.996184965501497e-06,
"loss": 0.78,
"step": 384
},
{
"epoch": 0.14213635244892936,
"grad_norm": 0.9516313076019287,
"learning_rate": 4.9961580586568095e-06,
"loss": 0.8062,
"step": 385
},
{
"epoch": 0.14250553777996555,
"grad_norm": 0.8979402184486389,
"learning_rate": 4.996131057333327e-06,
"loss": 0.8339,
"step": 386
},
{
"epoch": 0.14287472311100172,
"grad_norm": 0.8623480200767517,
"learning_rate": 4.996103961532072e-06,
"loss": 0.8066,
"step": 387
},
{
"epoch": 0.1432439084420379,
"grad_norm": 0.8892715573310852,
"learning_rate": 4.996076771254068e-06,
"loss": 0.7618,
"step": 388
},
{
"epoch": 0.14361309377307407,
"grad_norm": 0.9217121005058289,
"learning_rate": 4.9960494865003486e-06,
"loss": 0.8128,
"step": 389
},
{
"epoch": 0.14398227910411027,
"grad_norm": 0.9867552518844604,
"learning_rate": 4.996022107271942e-06,
"loss": 0.7973,
"step": 390
},
{
"epoch": 0.14435146443514643,
"grad_norm": 0.903056263923645,
"learning_rate": 4.995994633569888e-06,
"loss": 0.804,
"step": 391
},
{
"epoch": 0.14472064976618262,
"grad_norm": 0.8795948028564453,
"learning_rate": 4.995967065395223e-06,
"loss": 0.7495,
"step": 392
},
{
"epoch": 0.14508983509721882,
"grad_norm": 0.8725371956825256,
"learning_rate": 4.9959394027489934e-06,
"loss": 0.7933,
"step": 393
},
{
"epoch": 0.14545902042825498,
"grad_norm": 0.8918120265007019,
"learning_rate": 4.995911645632245e-06,
"loss": 0.7678,
"step": 394
},
{
"epoch": 0.14582820575929117,
"grad_norm": 0.934451162815094,
"learning_rate": 4.995883794046029e-06,
"loss": 0.8161,
"step": 395
},
{
"epoch": 0.14619739109032734,
"grad_norm": 0.8813429474830627,
"learning_rate": 4.995855847991398e-06,
"loss": 0.7993,
"step": 396
},
{
"epoch": 0.14656657642136353,
"grad_norm": 0.8930681943893433,
"learning_rate": 4.995827807469412e-06,
"loss": 0.7737,
"step": 397
},
{
"epoch": 0.1469357617523997,
"grad_norm": 0.8840509057044983,
"learning_rate": 4.995799672481131e-06,
"loss": 0.7996,
"step": 398
},
{
"epoch": 0.1473049470834359,
"grad_norm": 0.887534499168396,
"learning_rate": 4.9957714430276196e-06,
"loss": 0.8072,
"step": 399
},
{
"epoch": 0.14767413241447205,
"grad_norm": 0.9916796684265137,
"learning_rate": 4.995743119109947e-06,
"loss": 0.847,
"step": 400
},
{
"epoch": 0.14804331774550825,
"grad_norm": 0.9485662579536438,
"learning_rate": 4.995714700729184e-06,
"loss": 0.8371,
"step": 401
},
{
"epoch": 0.14841250307654444,
"grad_norm": 0.9004511833190918,
"learning_rate": 4.995686187886408e-06,
"loss": 0.7994,
"step": 402
},
{
"epoch": 0.1487816884075806,
"grad_norm": 0.9183670282363892,
"learning_rate": 4.995657580582699e-06,
"loss": 0.7913,
"step": 403
},
{
"epoch": 0.1491508737386168,
"grad_norm": 0.8997277617454529,
"learning_rate": 4.995628878819137e-06,
"loss": 0.7709,
"step": 404
},
{
"epoch": 0.14952005906965296,
"grad_norm": 0.9115433096885681,
"learning_rate": 4.9956000825968086e-06,
"loss": 0.8312,
"step": 405
},
{
"epoch": 0.14988924440068915,
"grad_norm": 0.9109799861907959,
"learning_rate": 4.995571191916805e-06,
"loss": 0.8222,
"step": 406
},
{
"epoch": 0.15025842973172532,
"grad_norm": 0.8981993794441223,
"learning_rate": 4.9955422067802205e-06,
"loss": 0.8333,
"step": 407
},
{
"epoch": 0.1506276150627615,
"grad_norm": 1.7644963264465332,
"learning_rate": 4.995513127188151e-06,
"loss": 0.7885,
"step": 408
},
{
"epoch": 0.15099680039379768,
"grad_norm": 0.9482977986335754,
"learning_rate": 4.995483953141696e-06,
"loss": 0.8211,
"step": 409
},
{
"epoch": 0.15136598572483387,
"grad_norm": 0.9331035017967224,
"learning_rate": 4.995454684641961e-06,
"loss": 0.7764,
"step": 410
},
{
"epoch": 0.15173517105587006,
"grad_norm": 0.890426516532898,
"learning_rate": 4.995425321690055e-06,
"loss": 0.7796,
"step": 411
},
{
"epoch": 0.15210435638690623,
"grad_norm": 0.9329193234443665,
"learning_rate": 4.995395864287088e-06,
"loss": 0.7815,
"step": 412
},
{
"epoch": 0.15247354171794242,
"grad_norm": 0.8611469864845276,
"learning_rate": 4.995366312434174e-06,
"loss": 0.7848,
"step": 413
},
{
"epoch": 0.15284272704897858,
"grad_norm": 0.8654336333274841,
"learning_rate": 4.995336666132434e-06,
"loss": 0.7821,
"step": 414
},
{
"epoch": 0.15321191238001478,
"grad_norm": 0.904015064239502,
"learning_rate": 4.9953069253829875e-06,
"loss": 0.8034,
"step": 415
},
{
"epoch": 0.15358109771105094,
"grad_norm": 0.9867867827415466,
"learning_rate": 4.995277090186962e-06,
"loss": 0.7989,
"step": 416
},
{
"epoch": 0.15395028304208713,
"grad_norm": 0.899686336517334,
"learning_rate": 4.995247160545487e-06,
"loss": 0.7969,
"step": 417
},
{
"epoch": 0.1543194683731233,
"grad_norm": 0.883914589881897,
"learning_rate": 4.995217136459693e-06,
"loss": 0.8133,
"step": 418
},
{
"epoch": 0.1546886537041595,
"grad_norm": 0.8943600654602051,
"learning_rate": 4.995187017930718e-06,
"loss": 0.7676,
"step": 419
},
{
"epoch": 0.15505783903519565,
"grad_norm": 0.8856242299079895,
"learning_rate": 4.995156804959702e-06,
"loss": 0.7129,
"step": 420
},
{
"epoch": 0.15542702436623185,
"grad_norm": 0.9339002966880798,
"learning_rate": 4.9951264975477895e-06,
"loss": 0.7945,
"step": 421
},
{
"epoch": 0.15579620969726804,
"grad_norm": 1.004530429840088,
"learning_rate": 4.995096095696126e-06,
"loss": 0.8219,
"step": 422
},
{
"epoch": 0.1561653950283042,
"grad_norm": 0.9293914437294006,
"learning_rate": 4.995065599405862e-06,
"loss": 0.8059,
"step": 423
},
{
"epoch": 0.1565345803593404,
"grad_norm": 0.897552490234375,
"learning_rate": 4.995035008678153e-06,
"loss": 0.7482,
"step": 424
},
{
"epoch": 0.15690376569037656,
"grad_norm": 0.9051419496536255,
"learning_rate": 4.995004323514157e-06,
"loss": 0.8244,
"step": 425
},
{
"epoch": 0.15727295102141275,
"grad_norm": 0.8879945278167725,
"learning_rate": 4.9949735439150335e-06,
"loss": 0.7653,
"step": 426
},
{
"epoch": 0.15764213635244892,
"grad_norm": 0.901598334312439,
"learning_rate": 4.99494266988195e-06,
"loss": 0.7852,
"step": 427
},
{
"epoch": 0.1580113216834851,
"grad_norm": 0.9374473690986633,
"learning_rate": 4.994911701416073e-06,
"loss": 0.7544,
"step": 428
},
{
"epoch": 0.15838050701452128,
"grad_norm": 0.8810298442840576,
"learning_rate": 4.994880638518575e-06,
"loss": 0.7623,
"step": 429
},
{
"epoch": 0.15874969234555747,
"grad_norm": 0.9226915836334229,
"learning_rate": 4.994849481190634e-06,
"loss": 0.7503,
"step": 430
},
{
"epoch": 0.15911887767659366,
"grad_norm": 0.923017144203186,
"learning_rate": 4.994818229433427e-06,
"loss": 0.8182,
"step": 431
},
{
"epoch": 0.15948806300762983,
"grad_norm": 0.9048720002174377,
"learning_rate": 4.994786883248137e-06,
"loss": 0.7976,
"step": 432
},
{
"epoch": 0.15985724833866602,
"grad_norm": 0.8946258425712585,
"learning_rate": 4.99475544263595e-06,
"loss": 0.7349,
"step": 433
},
{
"epoch": 0.16022643366970218,
"grad_norm": 0.903343915939331,
"learning_rate": 4.994723907598058e-06,
"loss": 0.7816,
"step": 434
},
{
"epoch": 0.16059561900073838,
"grad_norm": 0.907153844833374,
"learning_rate": 4.994692278135653e-06,
"loss": 0.7753,
"step": 435
},
{
"epoch": 0.16096480433177454,
"grad_norm": 0.950080931186676,
"learning_rate": 4.994660554249933e-06,
"loss": 0.7821,
"step": 436
},
{
"epoch": 0.16133398966281073,
"grad_norm": 0.8720895648002625,
"learning_rate": 4.994628735942098e-06,
"loss": 0.8067,
"step": 437
},
{
"epoch": 0.1617031749938469,
"grad_norm": 0.9267756938934326,
"learning_rate": 4.994596823213353e-06,
"loss": 0.8121,
"step": 438
},
{
"epoch": 0.1620723603248831,
"grad_norm": 0.9312788844108582,
"learning_rate": 4.9945648160649054e-06,
"loss": 0.8059,
"step": 439
},
{
"epoch": 0.16244154565591928,
"grad_norm": 0.8700461983680725,
"learning_rate": 4.994532714497966e-06,
"loss": 0.7626,
"step": 440
},
{
"epoch": 0.16281073098695545,
"grad_norm": 0.8887227773666382,
"learning_rate": 4.9945005185137515e-06,
"loss": 0.7747,
"step": 441
},
{
"epoch": 0.16317991631799164,
"grad_norm": 0.9699864983558655,
"learning_rate": 4.99446822811348e-06,
"loss": 0.7935,
"step": 442
},
{
"epoch": 0.1635491016490278,
"grad_norm": 0.9109600782394409,
"learning_rate": 4.994435843298372e-06,
"loss": 0.7375,
"step": 443
},
{
"epoch": 0.163918286980064,
"grad_norm": 0.8919878005981445,
"learning_rate": 4.994403364069656e-06,
"loss": 0.7691,
"step": 444
},
{
"epoch": 0.16428747231110016,
"grad_norm": 0.9388747215270996,
"learning_rate": 4.994370790428559e-06,
"loss": 0.7982,
"step": 445
},
{
"epoch": 0.16465665764213636,
"grad_norm": 0.9099019169807434,
"learning_rate": 4.994338122376315e-06,
"loss": 0.7594,
"step": 446
},
{
"epoch": 0.16502584297317252,
"grad_norm": 0.9286575317382812,
"learning_rate": 4.994305359914161e-06,
"loss": 0.8153,
"step": 447
},
{
"epoch": 0.1653950283042087,
"grad_norm": 0.896110475063324,
"learning_rate": 4.9942725030433356e-06,
"loss": 0.8013,
"step": 448
},
{
"epoch": 0.1657642136352449,
"grad_norm": 0.8885270953178406,
"learning_rate": 4.994239551765083e-06,
"loss": 0.7642,
"step": 449
},
{
"epoch": 0.16613339896628107,
"grad_norm": 0.8727377653121948,
"learning_rate": 4.994206506080651e-06,
"loss": 0.7358,
"step": 450
},
{
"epoch": 0.16650258429731726,
"grad_norm": 0.9173669219017029,
"learning_rate": 4.9941733659912905e-06,
"loss": 0.8196,
"step": 451
},
{
"epoch": 0.16687176962835343,
"grad_norm": 0.9255698323249817,
"learning_rate": 4.994140131498254e-06,
"loss": 0.8184,
"step": 452
},
{
"epoch": 0.16724095495938962,
"grad_norm": 0.9053508639335632,
"learning_rate": 4.994106802602802e-06,
"loss": 0.7938,
"step": 453
},
{
"epoch": 0.16761014029042579,
"grad_norm": 0.9308109283447266,
"learning_rate": 4.994073379306193e-06,
"loss": 0.8067,
"step": 454
},
{
"epoch": 0.16797932562146198,
"grad_norm": 0.8316235542297363,
"learning_rate": 4.994039861609696e-06,
"loss": 0.7799,
"step": 455
},
{
"epoch": 0.16834851095249814,
"grad_norm": 0.8959935903549194,
"learning_rate": 4.994006249514575e-06,
"loss": 0.797,
"step": 456
},
{
"epoch": 0.16871769628353434,
"grad_norm": 0.8984429240226746,
"learning_rate": 4.993972543022106e-06,
"loss": 0.7763,
"step": 457
},
{
"epoch": 0.16908688161457053,
"grad_norm": 0.8788191676139832,
"learning_rate": 4.9939387421335626e-06,
"loss": 0.811,
"step": 458
},
{
"epoch": 0.1694560669456067,
"grad_norm": 0.921341061592102,
"learning_rate": 4.993904846850226e-06,
"loss": 0.8022,
"step": 459
},
{
"epoch": 0.16982525227664289,
"grad_norm": 0.9294300675392151,
"learning_rate": 4.993870857173378e-06,
"loss": 0.7544,
"step": 460
},
{
"epoch": 0.17019443760767905,
"grad_norm": 0.9679121375083923,
"learning_rate": 4.9938367731043035e-06,
"loss": 0.793,
"step": 461
},
{
"epoch": 0.17056362293871524,
"grad_norm": 0.9177256226539612,
"learning_rate": 4.993802594644295e-06,
"loss": 0.7825,
"step": 462
},
{
"epoch": 0.1709328082697514,
"grad_norm": 0.8937193155288696,
"learning_rate": 4.993768321794645e-06,
"loss": 0.7613,
"step": 463
},
{
"epoch": 0.1713019936007876,
"grad_norm": 0.9800901412963867,
"learning_rate": 4.993733954556652e-06,
"loss": 0.8105,
"step": 464
},
{
"epoch": 0.17167117893182376,
"grad_norm": 0.8884421586990356,
"learning_rate": 4.9936994929316155e-06,
"loss": 0.759,
"step": 465
},
{
"epoch": 0.17204036426285996,
"grad_norm": 0.980379045009613,
"learning_rate": 4.99366493692084e-06,
"loss": 0.8023,
"step": 466
},
{
"epoch": 0.17240954959389612,
"grad_norm": 0.8764387369155884,
"learning_rate": 4.993630286525634e-06,
"loss": 0.7574,
"step": 467
},
{
"epoch": 0.17277873492493231,
"grad_norm": 0.913542628288269,
"learning_rate": 4.993595541747309e-06,
"loss": 0.8313,
"step": 468
},
{
"epoch": 0.1731479202559685,
"grad_norm": 0.9282394647598267,
"learning_rate": 4.993560702587179e-06,
"loss": 0.7718,
"step": 469
},
{
"epoch": 0.17351710558700467,
"grad_norm": 0.9678360223770142,
"learning_rate": 4.9935257690465634e-06,
"loss": 0.7783,
"step": 470
},
{
"epoch": 0.17388629091804086,
"grad_norm": 0.949858546257019,
"learning_rate": 4.993490741126785e-06,
"loss": 0.814,
"step": 471
},
{
"epoch": 0.17425547624907703,
"grad_norm": 0.8861430883407593,
"learning_rate": 4.9934556188291685e-06,
"loss": 0.8069,
"step": 472
},
{
"epoch": 0.17462466158011322,
"grad_norm": 0.8986914157867432,
"learning_rate": 4.993420402155044e-06,
"loss": 0.7622,
"step": 473
},
{
"epoch": 0.1749938469111494,
"grad_norm": 0.8991053104400635,
"learning_rate": 4.993385091105743e-06,
"loss": 0.7721,
"step": 474
},
{
"epoch": 0.17536303224218558,
"grad_norm": 0.9329451322555542,
"learning_rate": 4.993349685682605e-06,
"loss": 0.7656,
"step": 475
},
{
"epoch": 0.17573221757322174,
"grad_norm": 0.876977801322937,
"learning_rate": 4.993314185886967e-06,
"loss": 0.8482,
"step": 476
},
{
"epoch": 0.17610140290425794,
"grad_norm": 0.9570649266242981,
"learning_rate": 4.9932785917201754e-06,
"loss": 0.8036,
"step": 477
},
{
"epoch": 0.17647058823529413,
"grad_norm": 0.8990687727928162,
"learning_rate": 4.993242903183575e-06,
"loss": 0.7766,
"step": 478
},
{
"epoch": 0.1768397735663303,
"grad_norm": 0.9275680780410767,
"learning_rate": 4.993207120278518e-06,
"loss": 0.8204,
"step": 479
},
{
"epoch": 0.1772089588973665,
"grad_norm": 0.9137722253799438,
"learning_rate": 4.9931712430063585e-06,
"loss": 0.7201,
"step": 480
},
{
"epoch": 0.17757814422840265,
"grad_norm": 0.938310980796814,
"learning_rate": 4.993135271368454e-06,
"loss": 0.7859,
"step": 481
},
{
"epoch": 0.17794732955943884,
"grad_norm": 0.941462516784668,
"learning_rate": 4.993099205366166e-06,
"loss": 0.8033,
"step": 482
},
{
"epoch": 0.178316514890475,
"grad_norm": 0.9050039649009705,
"learning_rate": 4.99306304500086e-06,
"loss": 0.8441,
"step": 483
},
{
"epoch": 0.1786857002215112,
"grad_norm": 0.9100262522697449,
"learning_rate": 4.993026790273905e-06,
"loss": 0.7973,
"step": 484
},
{
"epoch": 0.17905488555254737,
"grad_norm": 0.8839780688285828,
"learning_rate": 4.992990441186672e-06,
"loss": 0.8029,
"step": 485
},
{
"epoch": 0.17942407088358356,
"grad_norm": 0.8999461531639099,
"learning_rate": 4.992953997740538e-06,
"loss": 0.7783,
"step": 486
},
{
"epoch": 0.17979325621461975,
"grad_norm": 0.9286245107650757,
"learning_rate": 4.992917459936882e-06,
"loss": 0.7623,
"step": 487
},
{
"epoch": 0.18016244154565592,
"grad_norm": 0.9243547320365906,
"learning_rate": 4.992880827777088e-06,
"loss": 0.7888,
"step": 488
},
{
"epoch": 0.1805316268766921,
"grad_norm": 0.8967995643615723,
"learning_rate": 4.992844101262541e-06,
"loss": 0.7806,
"step": 489
},
{
"epoch": 0.18090081220772827,
"grad_norm": 0.9205966591835022,
"learning_rate": 4.99280728039463e-06,
"loss": 0.7851,
"step": 490
},
{
"epoch": 0.18126999753876447,
"grad_norm": 0.9394510388374329,
"learning_rate": 4.992770365174752e-06,
"loss": 0.8065,
"step": 491
},
{
"epoch": 0.18163918286980063,
"grad_norm": 0.911313533782959,
"learning_rate": 4.992733355604301e-06,
"loss": 0.8055,
"step": 492
},
{
"epoch": 0.18200836820083682,
"grad_norm": 0.9121047854423523,
"learning_rate": 4.99269625168468e-06,
"loss": 0.757,
"step": 493
},
{
"epoch": 0.182377553531873,
"grad_norm": 0.8973436951637268,
"learning_rate": 4.9926590534172926e-06,
"loss": 0.7526,
"step": 494
},
{
"epoch": 0.18274673886290918,
"grad_norm": 0.9058072566986084,
"learning_rate": 4.992621760803547e-06,
"loss": 0.8152,
"step": 495
},
{
"epoch": 0.18311592419394537,
"grad_norm": 0.8761561512947083,
"learning_rate": 4.992584373844853e-06,
"loss": 0.7594,
"step": 496
},
{
"epoch": 0.18348510952498154,
"grad_norm": 0.9290655851364136,
"learning_rate": 4.992546892542628e-06,
"loss": 0.801,
"step": 497
},
{
"epoch": 0.18385429485601773,
"grad_norm": 0.9174765944480896,
"learning_rate": 4.99250931689829e-06,
"loss": 0.7823,
"step": 498
},
{
"epoch": 0.1842234801870539,
"grad_norm": 0.9156612157821655,
"learning_rate": 4.992471646913261e-06,
"loss": 0.8129,
"step": 499
},
{
"epoch": 0.1845926655180901,
"grad_norm": 0.9134384989738464,
"learning_rate": 4.992433882588967e-06,
"loss": 0.7436,
"step": 500
},
{
"epoch": 0.18496185084912625,
"grad_norm": 0.9095898866653442,
"learning_rate": 4.9923960239268365e-06,
"loss": 0.7668,
"step": 501
},
{
"epoch": 0.18533103618016245,
"grad_norm": 0.9294151663780212,
"learning_rate": 4.992358070928304e-06,
"loss": 0.8083,
"step": 502
},
{
"epoch": 0.1857002215111986,
"grad_norm": 0.9146490693092346,
"learning_rate": 4.992320023594803e-06,
"loss": 0.7678,
"step": 503
},
{
"epoch": 0.1860694068422348,
"grad_norm": 0.9225884079933167,
"learning_rate": 4.992281881927778e-06,
"loss": 0.7853,
"step": 504
},
{
"epoch": 0.186438592173271,
"grad_norm": 0.9287518262863159,
"learning_rate": 4.992243645928669e-06,
"loss": 0.8154,
"step": 505
},
{
"epoch": 0.18680777750430716,
"grad_norm": 0.9329172372817993,
"learning_rate": 4.992205315598926e-06,
"loss": 0.7989,
"step": 506
},
{
"epoch": 0.18717696283534335,
"grad_norm": 0.9103102087974548,
"learning_rate": 4.9921668909399976e-06,
"loss": 0.7687,
"step": 507
},
{
"epoch": 0.18754614816637952,
"grad_norm": 0.909018337726593,
"learning_rate": 4.992128371953339e-06,
"loss": 0.7794,
"step": 508
},
{
"epoch": 0.1879153334974157,
"grad_norm": 0.8800110220909119,
"learning_rate": 4.992089758640407e-06,
"loss": 0.7365,
"step": 509
},
{
"epoch": 0.18828451882845187,
"grad_norm": 0.9656134247779846,
"learning_rate": 4.992051051002665e-06,
"loss": 0.8176,
"step": 510
},
{
"epoch": 0.18865370415948807,
"grad_norm": 0.9035509824752808,
"learning_rate": 4.992012249041578e-06,
"loss": 0.7532,
"step": 511
},
{
"epoch": 0.18902288949052423,
"grad_norm": 0.9327494502067566,
"learning_rate": 4.9919733527586126e-06,
"loss": 0.7733,
"step": 512
},
{
"epoch": 0.18939207482156042,
"grad_norm": 0.918406069278717,
"learning_rate": 4.991934362155243e-06,
"loss": 0.8047,
"step": 513
},
{
"epoch": 0.18976126015259662,
"grad_norm": 0.9656594395637512,
"learning_rate": 4.991895277232944e-06,
"loss": 0.7923,
"step": 514
},
{
"epoch": 0.19013044548363278,
"grad_norm": 0.9281619787216187,
"learning_rate": 4.991856097993195e-06,
"loss": 0.7801,
"step": 515
},
{
"epoch": 0.19049963081466897,
"grad_norm": 0.8924593329429626,
"learning_rate": 4.99181682443748e-06,
"loss": 0.7822,
"step": 516
},
{
"epoch": 0.19086881614570514,
"grad_norm": 0.9071043729782104,
"learning_rate": 4.991777456567284e-06,
"loss": 0.7975,
"step": 517
},
{
"epoch": 0.19123800147674133,
"grad_norm": 0.9340181350708008,
"learning_rate": 4.991737994384097e-06,
"loss": 0.8108,
"step": 518
},
{
"epoch": 0.1916071868077775,
"grad_norm": 0.8986587524414062,
"learning_rate": 4.991698437889414e-06,
"loss": 0.8141,
"step": 519
},
{
"epoch": 0.1919763721388137,
"grad_norm": 0.8858151435852051,
"learning_rate": 4.991658787084732e-06,
"loss": 0.7736,
"step": 520
},
{
"epoch": 0.19234555746984985,
"grad_norm": 0.9259651899337769,
"learning_rate": 4.991619041971551e-06,
"loss": 0.8044,
"step": 521
},
{
"epoch": 0.19271474280088605,
"grad_norm": 0.8833301067352295,
"learning_rate": 4.991579202551376e-06,
"loss": 0.7753,
"step": 522
},
{
"epoch": 0.1930839281319222,
"grad_norm": 0.8822750449180603,
"learning_rate": 4.991539268825713e-06,
"loss": 0.7369,
"step": 523
},
{
"epoch": 0.1934531134629584,
"grad_norm": 0.900492787361145,
"learning_rate": 4.9914992407960765e-06,
"loss": 0.792,
"step": 524
},
{
"epoch": 0.1938222987939946,
"grad_norm": 0.8839775323867798,
"learning_rate": 4.991459118463979e-06,
"loss": 0.7633,
"step": 525
},
{
"epoch": 0.19419148412503076,
"grad_norm": 0.9306631088256836,
"learning_rate": 4.991418901830941e-06,
"loss": 0.7585,
"step": 526
},
{
"epoch": 0.19456066945606695,
"grad_norm": 0.8982890248298645,
"learning_rate": 4.991378590898483e-06,
"loss": 0.7856,
"step": 527
},
{
"epoch": 0.19492985478710312,
"grad_norm": 0.8740949034690857,
"learning_rate": 4.991338185668133e-06,
"loss": 0.8008,
"step": 528
},
{
"epoch": 0.1952990401181393,
"grad_norm": 0.9367266297340393,
"learning_rate": 4.991297686141418e-06,
"loss": 0.8715,
"step": 529
},
{
"epoch": 0.19566822544917548,
"grad_norm": 0.8908909559249878,
"learning_rate": 4.9912570923198724e-06,
"loss": 0.777,
"step": 530
},
{
"epoch": 0.19603741078021167,
"grad_norm": 0.9466776251792908,
"learning_rate": 4.9912164042050315e-06,
"loss": 0.7857,
"step": 531
},
{
"epoch": 0.19640659611124783,
"grad_norm": 0.9122392535209656,
"learning_rate": 4.991175621798436e-06,
"loss": 0.7454,
"step": 532
},
{
"epoch": 0.19677578144228403,
"grad_norm": 0.9155495762825012,
"learning_rate": 4.99113474510163e-06,
"loss": 0.788,
"step": 533
},
{
"epoch": 0.19714496677332022,
"grad_norm": 0.9131052494049072,
"learning_rate": 4.99109377411616e-06,
"loss": 0.7724,
"step": 534
},
{
"epoch": 0.19751415210435638,
"grad_norm": 0.9260715246200562,
"learning_rate": 4.9910527088435766e-06,
"loss": 0.7539,
"step": 535
},
{
"epoch": 0.19788333743539258,
"grad_norm": 0.9215927124023438,
"learning_rate": 4.991011549285434e-06,
"loss": 0.8061,
"step": 536
},
{
"epoch": 0.19825252276642874,
"grad_norm": 0.9671293497085571,
"learning_rate": 4.990970295443291e-06,
"loss": 0.8186,
"step": 537
},
{
"epoch": 0.19862170809746493,
"grad_norm": 0.8750205636024475,
"learning_rate": 4.990928947318708e-06,
"loss": 0.7434,
"step": 538
},
{
"epoch": 0.1989908934285011,
"grad_norm": 0.9240961670875549,
"learning_rate": 4.990887504913251e-06,
"loss": 0.7365,
"step": 539
},
{
"epoch": 0.1993600787595373,
"grad_norm": 0.9260193109512329,
"learning_rate": 4.990845968228488e-06,
"loss": 0.8067,
"step": 540
},
{
"epoch": 0.19972926409057346,
"grad_norm": 0.9060749411582947,
"learning_rate": 4.990804337265991e-06,
"loss": 0.8146,
"step": 541
},
{
"epoch": 0.20009844942160965,
"grad_norm": 0.9333279728889465,
"learning_rate": 4.9907626120273355e-06,
"loss": 0.7811,
"step": 542
},
{
"epoch": 0.20046763475264584,
"grad_norm": 1.0027539730072021,
"learning_rate": 4.990720792514102e-06,
"loss": 0.7587,
"step": 543
},
{
"epoch": 0.200836820083682,
"grad_norm": 0.9125142097473145,
"learning_rate": 4.9906788787278725e-06,
"loss": 0.8037,
"step": 544
},
{
"epoch": 0.2012060054147182,
"grad_norm": 0.8967962861061096,
"learning_rate": 4.990636870670234e-06,
"loss": 0.7533,
"step": 545
},
{
"epoch": 0.20157519074575436,
"grad_norm": 0.9158695936203003,
"learning_rate": 4.9905947683427745e-06,
"loss": 0.7654,
"step": 546
},
{
"epoch": 0.20194437607679055,
"grad_norm": 0.9484332203865051,
"learning_rate": 4.99055257174709e-06,
"loss": 0.7419,
"step": 547
},
{
"epoch": 0.20231356140782672,
"grad_norm": 0.9259840846061707,
"learning_rate": 4.990510280884777e-06,
"loss": 0.7781,
"step": 548
},
{
"epoch": 0.2026827467388629,
"grad_norm": 0.8913872241973877,
"learning_rate": 4.990467895757435e-06,
"loss": 0.7545,
"step": 549
},
{
"epoch": 0.20305193206989908,
"grad_norm": 0.9442530870437622,
"learning_rate": 4.99042541636667e-06,
"loss": 0.7402,
"step": 550
},
{
"epoch": 0.20342111740093527,
"grad_norm": 0.9780921339988708,
"learning_rate": 4.9903828427140885e-06,
"loss": 0.7911,
"step": 551
},
{
"epoch": 0.20379030273197146,
"grad_norm": 0.9168081283569336,
"learning_rate": 4.990340174801302e-06,
"loss": 0.7886,
"step": 552
},
{
"epoch": 0.20415948806300763,
"grad_norm": 0.8821970224380493,
"learning_rate": 4.990297412629926e-06,
"loss": 0.8018,
"step": 553
},
{
"epoch": 0.20452867339404382,
"grad_norm": 0.8984123468399048,
"learning_rate": 4.99025455620158e-06,
"loss": 0.7459,
"step": 554
},
{
"epoch": 0.20489785872507998,
"grad_norm": 0.9142769575119019,
"learning_rate": 4.990211605517884e-06,
"loss": 0.7697,
"step": 555
},
{
"epoch": 0.20526704405611618,
"grad_norm": 0.8822063207626343,
"learning_rate": 4.990168560580465e-06,
"loss": 0.801,
"step": 556
},
{
"epoch": 0.20563622938715234,
"grad_norm": 0.9008304476737976,
"learning_rate": 4.990125421390952e-06,
"loss": 0.7478,
"step": 557
},
{
"epoch": 0.20600541471818853,
"grad_norm": 0.9141461849212646,
"learning_rate": 4.990082187950977e-06,
"loss": 0.7356,
"step": 558
},
{
"epoch": 0.2063746000492247,
"grad_norm": 0.9026126265525818,
"learning_rate": 4.9900388602621775e-06,
"loss": 0.7594,
"step": 559
},
{
"epoch": 0.2067437853802609,
"grad_norm": 0.9227587580680847,
"learning_rate": 4.989995438326193e-06,
"loss": 0.7465,
"step": 560
},
{
"epoch": 0.20711297071129708,
"grad_norm": 0.9264957904815674,
"learning_rate": 4.989951922144667e-06,
"loss": 0.799,
"step": 561
},
{
"epoch": 0.20748215604233325,
"grad_norm": 0.9035301208496094,
"learning_rate": 4.989908311719247e-06,
"loss": 0.8013,
"step": 562
},
{
"epoch": 0.20785134137336944,
"grad_norm": 0.9293225407600403,
"learning_rate": 4.989864607051583e-06,
"loss": 0.78,
"step": 563
},
{
"epoch": 0.2082205267044056,
"grad_norm": 0.9383095502853394,
"learning_rate": 4.989820808143328e-06,
"loss": 0.7629,
"step": 564
},
{
"epoch": 0.2085897120354418,
"grad_norm": 0.9269458055496216,
"learning_rate": 4.989776914996144e-06,
"loss": 0.7698,
"step": 565
},
{
"epoch": 0.20895889736647796,
"grad_norm": 0.8632141351699829,
"learning_rate": 4.989732927611688e-06,
"loss": 0.7084,
"step": 566
},
{
"epoch": 0.20932808269751416,
"grad_norm": 0.9031556844711304,
"learning_rate": 4.989688845991626e-06,
"loss": 0.7425,
"step": 567
},
{
"epoch": 0.20969726802855032,
"grad_norm": 0.9389427304267883,
"learning_rate": 4.989644670137627e-06,
"loss": 0.847,
"step": 568
},
{
"epoch": 0.2100664533595865,
"grad_norm": 0.8885316252708435,
"learning_rate": 4.9896004000513635e-06,
"loss": 0.768,
"step": 569
},
{
"epoch": 0.2104356386906227,
"grad_norm": 0.9279949069023132,
"learning_rate": 4.989556035734511e-06,
"loss": 0.8118,
"step": 570
},
{
"epoch": 0.21080482402165887,
"grad_norm": 0.9301499724388123,
"learning_rate": 4.989511577188748e-06,
"loss": 0.7743,
"step": 571
},
{
"epoch": 0.21117400935269506,
"grad_norm": 0.9175605773925781,
"learning_rate": 4.989467024415757e-06,
"loss": 0.7543,
"step": 572
},
{
"epoch": 0.21154319468373123,
"grad_norm": 0.8823210597038269,
"learning_rate": 4.989422377417225e-06,
"loss": 0.7554,
"step": 573
},
{
"epoch": 0.21191238001476742,
"grad_norm": 0.9052088260650635,
"learning_rate": 4.989377636194842e-06,
"loss": 0.7853,
"step": 574
},
{
"epoch": 0.21228156534580359,
"grad_norm": 0.9024859666824341,
"learning_rate": 4.9893328007503e-06,
"loss": 0.7822,
"step": 575
},
{
"epoch": 0.21265075067683978,
"grad_norm": 0.8866460919380188,
"learning_rate": 4.989287871085299e-06,
"loss": 0.781,
"step": 576
},
{
"epoch": 0.21301993600787594,
"grad_norm": 0.9139004349708557,
"learning_rate": 4.989242847201537e-06,
"loss": 0.7486,
"step": 577
},
{
"epoch": 0.21338912133891214,
"grad_norm": 0.912174642086029,
"learning_rate": 4.9891977291007174e-06,
"loss": 0.7911,
"step": 578
},
{
"epoch": 0.2137583066699483,
"grad_norm": 0.9372491240501404,
"learning_rate": 4.989152516784551e-06,
"loss": 0.7751,
"step": 579
},
{
"epoch": 0.2141274920009845,
"grad_norm": 0.9477733373641968,
"learning_rate": 4.989107210254748e-06,
"loss": 0.7798,
"step": 580
},
{
"epoch": 0.21449667733202069,
"grad_norm": 0.9927518367767334,
"learning_rate": 4.989061809513021e-06,
"loss": 0.7844,
"step": 581
},
{
"epoch": 0.21486586266305685,
"grad_norm": 0.9412055015563965,
"learning_rate": 4.98901631456109e-06,
"loss": 0.8244,
"step": 582
},
{
"epoch": 0.21523504799409304,
"grad_norm": 0.8675752282142639,
"learning_rate": 4.988970725400678e-06,
"loss": 0.7988,
"step": 583
},
{
"epoch": 0.2156042333251292,
"grad_norm": 0.9113022089004517,
"learning_rate": 4.98892504203351e-06,
"loss": 0.7368,
"step": 584
},
{
"epoch": 0.2159734186561654,
"grad_norm": 0.949691116809845,
"learning_rate": 4.988879264461314e-06,
"loss": 0.7842,
"step": 585
},
{
"epoch": 0.21634260398720156,
"grad_norm": 0.9027278423309326,
"learning_rate": 4.9888333926858235e-06,
"loss": 0.7686,
"step": 586
},
{
"epoch": 0.21671178931823776,
"grad_norm": 0.926006019115448,
"learning_rate": 4.988787426708775e-06,
"loss": 0.8051,
"step": 587
},
{
"epoch": 0.21708097464927392,
"grad_norm": 0.9157929420471191,
"learning_rate": 4.988741366531906e-06,
"loss": 0.7682,
"step": 588
},
{
"epoch": 0.21745015998031011,
"grad_norm": 0.9037911295890808,
"learning_rate": 4.988695212156963e-06,
"loss": 0.7853,
"step": 589
},
{
"epoch": 0.2178193453113463,
"grad_norm": 0.927545964717865,
"learning_rate": 4.988648963585692e-06,
"loss": 0.8019,
"step": 590
},
{
"epoch": 0.21818853064238247,
"grad_norm": 0.901202917098999,
"learning_rate": 4.988602620819843e-06,
"loss": 0.7602,
"step": 591
},
{
"epoch": 0.21855771597341866,
"grad_norm": 0.92628413438797,
"learning_rate": 4.98855618386117e-06,
"loss": 0.7674,
"step": 592
},
{
"epoch": 0.21892690130445483,
"grad_norm": 0.9130452871322632,
"learning_rate": 4.988509652711431e-06,
"loss": 0.7891,
"step": 593
},
{
"epoch": 0.21929608663549102,
"grad_norm": 0.8740320801734924,
"learning_rate": 4.988463027372387e-06,
"loss": 0.7462,
"step": 594
},
{
"epoch": 0.2196652719665272,
"grad_norm": 0.9068711400032043,
"learning_rate": 4.9884163078458026e-06,
"loss": 0.7934,
"step": 595
},
{
"epoch": 0.22003445729756338,
"grad_norm": 0.8960202932357788,
"learning_rate": 4.988369494133447e-06,
"loss": 0.7855,
"step": 596
},
{
"epoch": 0.22040364262859954,
"grad_norm": 0.8958917260169983,
"learning_rate": 4.98832258623709e-06,
"loss": 0.7641,
"step": 597
},
{
"epoch": 0.22077282795963574,
"grad_norm": 0.9499006271362305,
"learning_rate": 4.988275584158509e-06,
"loss": 0.773,
"step": 598
},
{
"epoch": 0.22114201329067193,
"grad_norm": 0.9037056565284729,
"learning_rate": 4.988228487899483e-06,
"loss": 0.7528,
"step": 599
},
{
"epoch": 0.2215111986217081,
"grad_norm": 0.9397220611572266,
"learning_rate": 4.988181297461794e-06,
"loss": 0.7659,
"step": 600
},
{
"epoch": 0.2218803839527443,
"grad_norm": 0.91253262758255,
"learning_rate": 4.988134012847228e-06,
"loss": 0.7959,
"step": 601
},
{
"epoch": 0.22224956928378045,
"grad_norm": 0.9446122646331787,
"learning_rate": 4.988086634057575e-06,
"loss": 0.7876,
"step": 602
},
{
"epoch": 0.22261875461481664,
"grad_norm": 0.9358010292053223,
"learning_rate": 4.9880391610946276e-06,
"loss": 0.7768,
"step": 603
},
{
"epoch": 0.2229879399458528,
"grad_norm": 0.8857962489128113,
"learning_rate": 4.987991593960184e-06,
"loss": 0.7973,
"step": 604
},
{
"epoch": 0.223357125276889,
"grad_norm": 0.8937807679176331,
"learning_rate": 4.987943932656043e-06,
"loss": 0.7557,
"step": 605
},
{
"epoch": 0.22372631060792517,
"grad_norm": 0.9220109581947327,
"learning_rate": 4.9878961771840096e-06,
"loss": 0.8054,
"step": 606
},
{
"epoch": 0.22409549593896136,
"grad_norm": 0.8997550010681152,
"learning_rate": 4.987848327545891e-06,
"loss": 0.7715,
"step": 607
},
{
"epoch": 0.22446468126999755,
"grad_norm": 0.9468237161636353,
"learning_rate": 4.9878003837434986e-06,
"loss": 0.7738,
"step": 608
},
{
"epoch": 0.22483386660103372,
"grad_norm": 0.9114269018173218,
"learning_rate": 4.987752345778647e-06,
"loss": 0.7656,
"step": 609
},
{
"epoch": 0.2252030519320699,
"grad_norm": 0.9273460507392883,
"learning_rate": 4.987704213653154e-06,
"loss": 0.7221,
"step": 610
},
{
"epoch": 0.22557223726310607,
"grad_norm": 1.0038539171218872,
"learning_rate": 4.987655987368842e-06,
"loss": 0.7798,
"step": 611
},
{
"epoch": 0.22594142259414227,
"grad_norm": 0.9012103080749512,
"learning_rate": 4.987607666927535e-06,
"loss": 0.7526,
"step": 612
},
{
"epoch": 0.22631060792517843,
"grad_norm": 0.9375380277633667,
"learning_rate": 4.987559252331064e-06,
"loss": 0.7864,
"step": 613
},
{
"epoch": 0.22667979325621462,
"grad_norm": 0.9294722080230713,
"learning_rate": 4.98751074358126e-06,
"loss": 0.7441,
"step": 614
},
{
"epoch": 0.2270489785872508,
"grad_norm": 0.9203857183456421,
"learning_rate": 4.9874621406799595e-06,
"loss": 0.7508,
"step": 615
},
{
"epoch": 0.22741816391828698,
"grad_norm": 0.9149799346923828,
"learning_rate": 4.987413443629002e-06,
"loss": 0.764,
"step": 616
},
{
"epoch": 0.22778734924932317,
"grad_norm": 0.9542253613471985,
"learning_rate": 4.987364652430231e-06,
"loss": 0.7515,
"step": 617
},
{
"epoch": 0.22815653458035934,
"grad_norm": 0.9015132784843445,
"learning_rate": 4.9873157670854925e-06,
"loss": 0.7474,
"step": 618
},
{
"epoch": 0.22852571991139553,
"grad_norm": 0.8520395159721375,
"learning_rate": 4.987266787596637e-06,
"loss": 0.7202,
"step": 619
},
{
"epoch": 0.2288949052424317,
"grad_norm": 0.939619243144989,
"learning_rate": 4.987217713965519e-06,
"loss": 0.7998,
"step": 620
},
{
"epoch": 0.2292640905734679,
"grad_norm": 0.9087517857551575,
"learning_rate": 4.9871685461939954e-06,
"loss": 0.7436,
"step": 621
},
{
"epoch": 0.22963327590450405,
"grad_norm": 0.9153372049331665,
"learning_rate": 4.9871192842839264e-06,
"loss": 0.7439,
"step": 622
},
{
"epoch": 0.23000246123554025,
"grad_norm": 0.9004572629928589,
"learning_rate": 4.98706992823718e-06,
"loss": 0.7366,
"step": 623
},
{
"epoch": 0.2303716465665764,
"grad_norm": 0.8933371305465698,
"learning_rate": 4.9870204780556185e-06,
"loss": 0.7707,
"step": 624
},
{
"epoch": 0.2307408318976126,
"grad_norm": 0.9173306822776794,
"learning_rate": 4.9869709337411184e-06,
"loss": 0.769,
"step": 625
},
{
"epoch": 0.2311100172286488,
"grad_norm": 0.9018936157226562,
"learning_rate": 4.986921295295554e-06,
"loss": 0.7574,
"step": 626
},
{
"epoch": 0.23147920255968496,
"grad_norm": 0.9065893888473511,
"learning_rate": 4.986871562720803e-06,
"loss": 0.7649,
"step": 627
},
{
"epoch": 0.23184838789072115,
"grad_norm": 0.875457763671875,
"learning_rate": 4.986821736018748e-06,
"loss": 0.7606,
"step": 628
},
{
"epoch": 0.23221757322175732,
"grad_norm": 0.9208522439002991,
"learning_rate": 4.986771815191275e-06,
"loss": 0.7883,
"step": 629
},
{
"epoch": 0.2325867585527935,
"grad_norm": 0.9178383350372314,
"learning_rate": 4.986721800240273e-06,
"loss": 0.7694,
"step": 630
},
{
"epoch": 0.23295594388382967,
"grad_norm": 0.9127592444419861,
"learning_rate": 4.986671691167637e-06,
"loss": 0.7405,
"step": 631
},
{
"epoch": 0.23332512921486587,
"grad_norm": 0.9244971871376038,
"learning_rate": 4.986621487975261e-06,
"loss": 0.796,
"step": 632
},
{
"epoch": 0.23369431454590203,
"grad_norm": 0.9289013147354126,
"learning_rate": 4.9865711906650485e-06,
"loss": 0.7398,
"step": 633
},
{
"epoch": 0.23406349987693822,
"grad_norm": 0.8860224485397339,
"learning_rate": 4.9865207992389e-06,
"loss": 0.7518,
"step": 634
},
{
"epoch": 0.2344326852079744,
"grad_norm": 0.983054518699646,
"learning_rate": 4.986470313698723e-06,
"loss": 0.772,
"step": 635
},
{
"epoch": 0.23480187053901058,
"grad_norm": 0.896415650844574,
"learning_rate": 4.98641973404643e-06,
"loss": 0.783,
"step": 636
},
{
"epoch": 0.23517105587004677,
"grad_norm": 0.9165555238723755,
"learning_rate": 4.986369060283935e-06,
"loss": 0.7902,
"step": 637
},
{
"epoch": 0.23554024120108294,
"grad_norm": 0.928102970123291,
"learning_rate": 4.986318292413157e-06,
"loss": 0.7458,
"step": 638
},
{
"epoch": 0.23590942653211913,
"grad_norm": 0.8924850821495056,
"learning_rate": 4.986267430436015e-06,
"loss": 0.7464,
"step": 639
},
{
"epoch": 0.2362786118631553,
"grad_norm": 0.9051222801208496,
"learning_rate": 4.986216474354436e-06,
"loss": 0.7789,
"step": 640
},
{
"epoch": 0.2366477971941915,
"grad_norm": 0.9151371717453003,
"learning_rate": 4.986165424170347e-06,
"loss": 0.7722,
"step": 641
},
{
"epoch": 0.23701698252522765,
"grad_norm": 0.9557187557220459,
"learning_rate": 4.9861142798856824e-06,
"loss": 0.7606,
"step": 642
},
{
"epoch": 0.23738616785626385,
"grad_norm": 0.9134449362754822,
"learning_rate": 4.986063041502377e-06,
"loss": 0.7482,
"step": 643
},
{
"epoch": 0.2377553531873,
"grad_norm": 0.920628547668457,
"learning_rate": 4.9860117090223695e-06,
"loss": 0.7545,
"step": 644
},
{
"epoch": 0.2381245385183362,
"grad_norm": 0.9102922081947327,
"learning_rate": 4.9859602824476035e-06,
"loss": 0.7575,
"step": 645
},
{
"epoch": 0.2384937238493724,
"grad_norm": 0.9474995136260986,
"learning_rate": 4.985908761780025e-06,
"loss": 0.7511,
"step": 646
},
{
"epoch": 0.23886290918040856,
"grad_norm": 0.9534257650375366,
"learning_rate": 4.9858571470215854e-06,
"loss": 0.7846,
"step": 647
},
{
"epoch": 0.23923209451144475,
"grad_norm": 0.9420318007469177,
"learning_rate": 4.9858054381742374e-06,
"loss": 0.7846,
"step": 648
},
{
"epoch": 0.23960127984248092,
"grad_norm": 0.9332766532897949,
"learning_rate": 4.9857536352399376e-06,
"loss": 0.7763,
"step": 649
},
{
"epoch": 0.2399704651735171,
"grad_norm": 0.8975653648376465,
"learning_rate": 4.985701738220647e-06,
"loss": 0.7375,
"step": 650
},
{
"epoch": 0.24033965050455328,
"grad_norm": 1.0437159538269043,
"learning_rate": 4.98564974711833e-06,
"loss": 0.7838,
"step": 651
},
{
"epoch": 0.24070883583558947,
"grad_norm": 0.9787809252738953,
"learning_rate": 4.985597661934955e-06,
"loss": 0.8084,
"step": 652
},
{
"epoch": 0.24107802116662563,
"grad_norm": 0.9051234722137451,
"learning_rate": 4.985545482672493e-06,
"loss": 0.7251,
"step": 653
},
{
"epoch": 0.24144720649766183,
"grad_norm": 0.9520812630653381,
"learning_rate": 4.985493209332918e-06,
"loss": 0.7966,
"step": 654
},
{
"epoch": 0.24181639182869802,
"grad_norm": 0.9688146114349365,
"learning_rate": 4.985440841918211e-06,
"loss": 0.7541,
"step": 655
},
{
"epoch": 0.24218557715973418,
"grad_norm": 0.9103109240531921,
"learning_rate": 4.9853883804303515e-06,
"loss": 0.7515,
"step": 656
},
{
"epoch": 0.24255476249077038,
"grad_norm": 0.8874120712280273,
"learning_rate": 4.9853358248713266e-06,
"loss": 0.7416,
"step": 657
},
{
"epoch": 0.24292394782180654,
"grad_norm": 0.8995088338851929,
"learning_rate": 4.9852831752431256e-06,
"loss": 0.7567,
"step": 658
},
{
"epoch": 0.24329313315284273,
"grad_norm": 0.9225655198097229,
"learning_rate": 4.98523043154774e-06,
"loss": 0.754,
"step": 659
},
{
"epoch": 0.2436623184838789,
"grad_norm": 0.9014883637428284,
"learning_rate": 4.985177593787167e-06,
"loss": 0.7289,
"step": 660
},
{
"epoch": 0.2440315038149151,
"grad_norm": 1.0416333675384521,
"learning_rate": 4.9851246619634054e-06,
"loss": 0.7705,
"step": 661
},
{
"epoch": 0.24440068914595126,
"grad_norm": 0.9075980186462402,
"learning_rate": 4.9850716360784615e-06,
"loss": 0.7608,
"step": 662
},
{
"epoch": 0.24476987447698745,
"grad_norm": 0.8677279949188232,
"learning_rate": 4.98501851613434e-06,
"loss": 0.7988,
"step": 663
},
{
"epoch": 0.24513905980802364,
"grad_norm": 0.9182518124580383,
"learning_rate": 4.984965302133051e-06,
"loss": 0.7687,
"step": 664
},
{
"epoch": 0.2455082451390598,
"grad_norm": 0.9112989902496338,
"learning_rate": 4.98491199407661e-06,
"loss": 0.7842,
"step": 665
},
{
"epoch": 0.245877430470096,
"grad_norm": 0.8956560492515564,
"learning_rate": 4.984858591967035e-06,
"loss": 0.7418,
"step": 666
},
{
"epoch": 0.24624661580113216,
"grad_norm": 0.9098480343818665,
"learning_rate": 4.984805095806346e-06,
"loss": 0.7978,
"step": 667
},
{
"epoch": 0.24661580113216836,
"grad_norm": 0.8892375230789185,
"learning_rate": 4.9847515055965685e-06,
"loss": 0.7563,
"step": 668
},
{
"epoch": 0.24698498646320452,
"grad_norm": 0.9025793671607971,
"learning_rate": 4.984697821339731e-06,
"loss": 0.7302,
"step": 669
},
{
"epoch": 0.2473541717942407,
"grad_norm": 0.9058409333229065,
"learning_rate": 4.984644043037864e-06,
"loss": 0.7938,
"step": 670
},
{
"epoch": 0.24772335712527688,
"grad_norm": 0.9041579365730286,
"learning_rate": 4.984590170693005e-06,
"loss": 0.7788,
"step": 671
},
{
"epoch": 0.24809254245631307,
"grad_norm": 0.9054029583930969,
"learning_rate": 4.9845362043071925e-06,
"loss": 0.7546,
"step": 672
},
{
"epoch": 0.24846172778734926,
"grad_norm": 0.9551224708557129,
"learning_rate": 4.984482143882469e-06,
"loss": 0.7458,
"step": 673
},
{
"epoch": 0.24883091311838543,
"grad_norm": 0.9546729922294617,
"learning_rate": 4.9844279894208795e-06,
"loss": 0.7808,
"step": 674
},
{
"epoch": 0.24920009844942162,
"grad_norm": 0.9500798583030701,
"learning_rate": 4.984373740924475e-06,
"loss": 0.7773,
"step": 675
},
{
"epoch": 0.24956928378045778,
"grad_norm": 0.9165859222412109,
"learning_rate": 4.984319398395308e-06,
"loss": 0.7596,
"step": 676
},
{
"epoch": 0.24993846911149398,
"grad_norm": 0.9380325675010681,
"learning_rate": 4.984264961835436e-06,
"loss": 0.799,
"step": 677
},
{
"epoch": 0.25030765444253017,
"grad_norm": 0.9391986131668091,
"learning_rate": 4.98421043124692e-06,
"loss": 0.7435,
"step": 678
},
{
"epoch": 0.2506768397735663,
"grad_norm": 1.0003539323806763,
"learning_rate": 4.984155806631823e-06,
"loss": 0.7798,
"step": 679
},
{
"epoch": 0.2510460251046025,
"grad_norm": 0.9141901731491089,
"learning_rate": 4.984101087992212e-06,
"loss": 0.7093,
"step": 680
},
{
"epoch": 0.2514152104356387,
"grad_norm": 1.0558290481567383,
"learning_rate": 4.98404627533016e-06,
"loss": 0.7651,
"step": 681
},
{
"epoch": 0.2517843957666749,
"grad_norm": 0.8751945495605469,
"learning_rate": 4.98399136864774e-06,
"loss": 0.7598,
"step": 682
},
{
"epoch": 0.2521535810977111,
"grad_norm": 0.8984244465827942,
"learning_rate": 4.9839363679470296e-06,
"loss": 0.7921,
"step": 683
},
{
"epoch": 0.2525227664287472,
"grad_norm": 0.9515509605407715,
"learning_rate": 4.9838812732301134e-06,
"loss": 0.7781,
"step": 684
},
{
"epoch": 0.2528919517597834,
"grad_norm": 0.9040102958679199,
"learning_rate": 4.983826084499074e-06,
"loss": 0.7435,
"step": 685
},
{
"epoch": 0.2532611370908196,
"grad_norm": 0.8813716769218445,
"learning_rate": 4.983770801756001e-06,
"loss": 0.7675,
"step": 686
},
{
"epoch": 0.2536303224218558,
"grad_norm": 0.9750944375991821,
"learning_rate": 4.983715425002987e-06,
"loss": 0.7389,
"step": 687
},
{
"epoch": 0.25399950775289193,
"grad_norm": 0.8693578839302063,
"learning_rate": 4.983659954242128e-06,
"loss": 0.7325,
"step": 688
},
{
"epoch": 0.2543686930839281,
"grad_norm": 0.934140145778656,
"learning_rate": 4.983604389475525e-06,
"loss": 0.8121,
"step": 689
},
{
"epoch": 0.2547378784149643,
"grad_norm": 0.9266787767410278,
"learning_rate": 4.983548730705278e-06,
"loss": 0.793,
"step": 690
},
{
"epoch": 0.2551070637460005,
"grad_norm": 0.9268523454666138,
"learning_rate": 4.9834929779334964e-06,
"loss": 0.7605,
"step": 691
},
{
"epoch": 0.2554762490770367,
"grad_norm": 0.9014139771461487,
"learning_rate": 4.983437131162289e-06,
"loss": 0.7518,
"step": 692
},
{
"epoch": 0.25584543440807284,
"grad_norm": 0.8949803113937378,
"learning_rate": 4.98338119039377e-06,
"loss": 0.7179,
"step": 693
},
{
"epoch": 0.25621461973910903,
"grad_norm": 0.9687701463699341,
"learning_rate": 4.983325155630056e-06,
"loss": 0.7602,
"step": 694
},
{
"epoch": 0.2565838050701452,
"grad_norm": 0.9221265316009521,
"learning_rate": 4.983269026873269e-06,
"loss": 0.7374,
"step": 695
},
{
"epoch": 0.2569529904011814,
"grad_norm": 0.9037137627601624,
"learning_rate": 4.983212804125533e-06,
"loss": 0.7424,
"step": 696
},
{
"epoch": 0.25732217573221755,
"grad_norm": 0.9276369214057922,
"learning_rate": 4.983156487388977e-06,
"loss": 0.8033,
"step": 697
},
{
"epoch": 0.25769136106325374,
"grad_norm": 0.964596152305603,
"learning_rate": 4.983100076665731e-06,
"loss": 0.8284,
"step": 698
},
{
"epoch": 0.25806054639428994,
"grad_norm": 0.9751665592193604,
"learning_rate": 4.983043571957931e-06,
"loss": 0.7919,
"step": 699
},
{
"epoch": 0.25842973172532613,
"grad_norm": 0.9727154970169067,
"learning_rate": 4.9829869732677146e-06,
"loss": 0.8094,
"step": 700
},
{
"epoch": 0.2587989170563623,
"grad_norm": 0.9635800719261169,
"learning_rate": 4.982930280597226e-06,
"loss": 0.7682,
"step": 701
},
{
"epoch": 0.25916810238739846,
"grad_norm": 0.8952401280403137,
"learning_rate": 4.98287349394861e-06,
"loss": 0.812,
"step": 702
},
{
"epoch": 0.25953728771843465,
"grad_norm": 0.905311107635498,
"learning_rate": 4.982816613324015e-06,
"loss": 0.7621,
"step": 703
},
{
"epoch": 0.25990647304947084,
"grad_norm": 0.9060837030410767,
"learning_rate": 4.982759638725595e-06,
"loss": 0.7343,
"step": 704
},
{
"epoch": 0.26027565838050704,
"grad_norm": 0.9590125679969788,
"learning_rate": 4.982702570155506e-06,
"loss": 0.7996,
"step": 705
},
{
"epoch": 0.2606448437115432,
"grad_norm": 0.9153867959976196,
"learning_rate": 4.9826454076159094e-06,
"loss": 0.7566,
"step": 706
},
{
"epoch": 0.26101402904257937,
"grad_norm": 0.8935747146606445,
"learning_rate": 4.982588151108966e-06,
"loss": 0.7675,
"step": 707
},
{
"epoch": 0.26138321437361556,
"grad_norm": 0.9266985058784485,
"learning_rate": 4.982530800636845e-06,
"loss": 0.7712,
"step": 708
},
{
"epoch": 0.26175239970465175,
"grad_norm": 0.9422087073326111,
"learning_rate": 4.982473356201718e-06,
"loss": 0.7768,
"step": 709
},
{
"epoch": 0.26212158503568794,
"grad_norm": 0.9119973182678223,
"learning_rate": 4.982415817805757e-06,
"loss": 0.7693,
"step": 710
},
{
"epoch": 0.2624907703667241,
"grad_norm": 0.8853102326393127,
"learning_rate": 4.982358185451141e-06,
"loss": 0.75,
"step": 711
},
{
"epoch": 0.2628599556977603,
"grad_norm": 0.9192734360694885,
"learning_rate": 4.982300459140051e-06,
"loss": 0.7715,
"step": 712
},
{
"epoch": 0.26322914102879647,
"grad_norm": 0.8763948082923889,
"learning_rate": 4.982242638874672e-06,
"loss": 0.7635,
"step": 713
},
{
"epoch": 0.26359832635983266,
"grad_norm": 0.8886423707008362,
"learning_rate": 4.982184724657192e-06,
"loss": 0.753,
"step": 714
},
{
"epoch": 0.2639675116908688,
"grad_norm": 0.9051578044891357,
"learning_rate": 4.9821267164898045e-06,
"loss": 0.7759,
"step": 715
},
{
"epoch": 0.264336697021905,
"grad_norm": 0.9290419220924377,
"learning_rate": 4.9820686143747045e-06,
"loss": 0.8023,
"step": 716
},
{
"epoch": 0.2647058823529412,
"grad_norm": 0.9183179140090942,
"learning_rate": 4.982010418314089e-06,
"loss": 0.7909,
"step": 717
},
{
"epoch": 0.2650750676839774,
"grad_norm": 0.9106544256210327,
"learning_rate": 4.981952128310165e-06,
"loss": 0.7534,
"step": 718
},
{
"epoch": 0.2654442530150135,
"grad_norm": 0.8992197513580322,
"learning_rate": 4.981893744365134e-06,
"loss": 0.7368,
"step": 719
},
{
"epoch": 0.2658134383460497,
"grad_norm": 0.8842350840568542,
"learning_rate": 4.981835266481209e-06,
"loss": 0.762,
"step": 720
},
{
"epoch": 0.2661826236770859,
"grad_norm": 0.9051290154457092,
"learning_rate": 4.9817766946606025e-06,
"loss": 0.7509,
"step": 721
},
{
"epoch": 0.2665518090081221,
"grad_norm": 0.9140417575836182,
"learning_rate": 4.9817180289055314e-06,
"loss": 0.7554,
"step": 722
},
{
"epoch": 0.2669209943391583,
"grad_norm": 0.8826530575752258,
"learning_rate": 4.981659269218216e-06,
"loss": 0.7498,
"step": 723
},
{
"epoch": 0.2672901796701944,
"grad_norm": 0.8926510214805603,
"learning_rate": 4.98160041560088e-06,
"loss": 0.7614,
"step": 724
},
{
"epoch": 0.2676593650012306,
"grad_norm": 0.9062178134918213,
"learning_rate": 4.9815414680557514e-06,
"loss": 0.7644,
"step": 725
},
{
"epoch": 0.2680285503322668,
"grad_norm": 0.9631175994873047,
"learning_rate": 4.981482426585063e-06,
"loss": 0.7816,
"step": 726
},
{
"epoch": 0.268397735663303,
"grad_norm": 0.9016267657279968,
"learning_rate": 4.981423291191047e-06,
"loss": 0.7367,
"step": 727
},
{
"epoch": 0.26876692099433913,
"grad_norm": 0.8919848799705505,
"learning_rate": 4.981364061875942e-06,
"loss": 0.7523,
"step": 728
},
{
"epoch": 0.2691361063253753,
"grad_norm": 0.8961866497993469,
"learning_rate": 4.981304738641991e-06,
"loss": 0.7743,
"step": 729
},
{
"epoch": 0.2695052916564115,
"grad_norm": 1.0036101341247559,
"learning_rate": 4.981245321491438e-06,
"loss": 0.7526,
"step": 730
},
{
"epoch": 0.2698744769874477,
"grad_norm": 0.9046428203582764,
"learning_rate": 4.9811858104265334e-06,
"loss": 0.7522,
"step": 731
},
{
"epoch": 0.2702436623184839,
"grad_norm": 0.9048503041267395,
"learning_rate": 4.981126205449529e-06,
"loss": 0.7233,
"step": 732
},
{
"epoch": 0.27061284764952004,
"grad_norm": 1.0362783670425415,
"learning_rate": 4.9810665065626805e-06,
"loss": 0.751,
"step": 733
},
{
"epoch": 0.27098203298055623,
"grad_norm": 0.9431849122047424,
"learning_rate": 4.981006713768248e-06,
"loss": 0.7831,
"step": 734
},
{
"epoch": 0.2713512183115924,
"grad_norm": 0.9257729649543762,
"learning_rate": 4.980946827068494e-06,
"loss": 0.7801,
"step": 735
},
{
"epoch": 0.2717204036426286,
"grad_norm": 0.8980259299278259,
"learning_rate": 4.980886846465686e-06,
"loss": 0.7452,
"step": 736
},
{
"epoch": 0.27208958897366475,
"grad_norm": 0.895393967628479,
"learning_rate": 4.980826771962094e-06,
"loss": 0.72,
"step": 737
},
{
"epoch": 0.27245877430470095,
"grad_norm": 0.8936492800712585,
"learning_rate": 4.980766603559991e-06,
"loss": 0.7668,
"step": 738
},
{
"epoch": 0.27282795963573714,
"grad_norm": 0.8899438977241516,
"learning_rate": 4.980706341261655e-06,
"loss": 0.7224,
"step": 739
},
{
"epoch": 0.27319714496677333,
"grad_norm": 0.9478532075881958,
"learning_rate": 4.980645985069367e-06,
"loss": 0.7859,
"step": 740
},
{
"epoch": 0.2735663302978095,
"grad_norm": 0.9106540083885193,
"learning_rate": 4.980585534985412e-06,
"loss": 0.7769,
"step": 741
},
{
"epoch": 0.27393551562884566,
"grad_norm": 0.9290282130241394,
"learning_rate": 4.9805249910120776e-06,
"loss": 0.7358,
"step": 742
},
{
"epoch": 0.27430470095988185,
"grad_norm": 0.9177589416503906,
"learning_rate": 4.980464353151654e-06,
"loss": 0.7647,
"step": 743
},
{
"epoch": 0.27467388629091805,
"grad_norm": 0.900225818157196,
"learning_rate": 4.980403621406439e-06,
"loss": 0.7416,
"step": 744
},
{
"epoch": 0.27504307162195424,
"grad_norm": 0.905703604221344,
"learning_rate": 4.980342795778728e-06,
"loss": 0.8068,
"step": 745
},
{
"epoch": 0.2754122569529904,
"grad_norm": 0.8948282599449158,
"learning_rate": 4.980281876270826e-06,
"loss": 0.7597,
"step": 746
},
{
"epoch": 0.27578144228402657,
"grad_norm": 0.9245081543922424,
"learning_rate": 4.980220862885038e-06,
"loss": 0.7406,
"step": 747
},
{
"epoch": 0.27615062761506276,
"grad_norm": 0.8864959478378296,
"learning_rate": 4.980159755623673e-06,
"loss": 0.7464,
"step": 748
},
{
"epoch": 0.27651981294609895,
"grad_norm": 0.9063557386398315,
"learning_rate": 4.9800985544890425e-06,
"loss": 0.77,
"step": 749
},
{
"epoch": 0.27688899827713515,
"grad_norm": 0.8840070962905884,
"learning_rate": 4.9800372594834656e-06,
"loss": 0.7356,
"step": 750
},
{
"epoch": 0.2772581836081713,
"grad_norm": 0.8699694275856018,
"learning_rate": 4.979975870609261e-06,
"loss": 0.747,
"step": 751
},
{
"epoch": 0.2776273689392075,
"grad_norm": 0.9077945351600647,
"learning_rate": 4.979914387868753e-06,
"loss": 0.7428,
"step": 752
},
{
"epoch": 0.27799655427024367,
"grad_norm": 0.9290851354598999,
"learning_rate": 4.979852811264267e-06,
"loss": 0.7509,
"step": 753
},
{
"epoch": 0.27836573960127986,
"grad_norm": 0.928439199924469,
"learning_rate": 4.979791140798136e-06,
"loss": 0.7949,
"step": 754
},
{
"epoch": 0.278734924932316,
"grad_norm": 0.8840503692626953,
"learning_rate": 4.9797293764726924e-06,
"loss": 0.7167,
"step": 755
},
{
"epoch": 0.2791041102633522,
"grad_norm": 0.9051108360290527,
"learning_rate": 4.979667518290274e-06,
"loss": 0.7636,
"step": 756
},
{
"epoch": 0.2794732955943884,
"grad_norm": 0.9400845766067505,
"learning_rate": 4.979605566253224e-06,
"loss": 0.7595,
"step": 757
},
{
"epoch": 0.2798424809254246,
"grad_norm": 0.9284423589706421,
"learning_rate": 4.979543520363884e-06,
"loss": 0.728,
"step": 758
},
{
"epoch": 0.28021166625646077,
"grad_norm": 0.8976213932037354,
"learning_rate": 4.979481380624606e-06,
"loss": 0.7399,
"step": 759
},
{
"epoch": 0.2805808515874969,
"grad_norm": 0.8708903193473816,
"learning_rate": 4.97941914703774e-06,
"loss": 0.7534,
"step": 760
},
{
"epoch": 0.2809500369185331,
"grad_norm": 0.9106154441833496,
"learning_rate": 4.979356819605641e-06,
"loss": 0.7552,
"step": 761
},
{
"epoch": 0.2813192222495693,
"grad_norm": 0.9194098711013794,
"learning_rate": 4.979294398330668e-06,
"loss": 0.7365,
"step": 762
},
{
"epoch": 0.2816884075806055,
"grad_norm": 0.9231695532798767,
"learning_rate": 4.9792318832151864e-06,
"loss": 0.7607,
"step": 763
},
{
"epoch": 0.2820575929116416,
"grad_norm": 0.9608139991760254,
"learning_rate": 4.97916927426156e-06,
"loss": 0.7909,
"step": 764
},
{
"epoch": 0.2824267782426778,
"grad_norm": 0.9300723075866699,
"learning_rate": 4.979106571472159e-06,
"loss": 0.7453,
"step": 765
},
{
"epoch": 0.282795963573714,
"grad_norm": 0.9103767275810242,
"learning_rate": 4.979043774849356e-06,
"loss": 0.7816,
"step": 766
},
{
"epoch": 0.2831651489047502,
"grad_norm": 0.9026121497154236,
"learning_rate": 4.9789808843955294e-06,
"loss": 0.7656,
"step": 767
},
{
"epoch": 0.2835343342357864,
"grad_norm": 0.8946061730384827,
"learning_rate": 4.978917900113059e-06,
"loss": 0.7399,
"step": 768
},
{
"epoch": 0.2839035195668225,
"grad_norm": 0.8899773955345154,
"learning_rate": 4.978854822004327e-06,
"loss": 0.7666,
"step": 769
},
{
"epoch": 0.2842727048978587,
"grad_norm": 0.9117376804351807,
"learning_rate": 4.978791650071723e-06,
"loss": 0.7552,
"step": 770
},
{
"epoch": 0.2846418902288949,
"grad_norm": 0.8872507810592651,
"learning_rate": 4.978728384317637e-06,
"loss": 0.7367,
"step": 771
},
{
"epoch": 0.2850110755599311,
"grad_norm": 0.9358695149421692,
"learning_rate": 4.978665024744465e-06,
"loss": 0.7751,
"step": 772
},
{
"epoch": 0.28538026089096724,
"grad_norm": 0.879411518573761,
"learning_rate": 4.9786015713546035e-06,
"loss": 0.7681,
"step": 773
},
{
"epoch": 0.28574944622200343,
"grad_norm": 0.9180899262428284,
"learning_rate": 4.978538024150455e-06,
"loss": 0.7481,
"step": 774
},
{
"epoch": 0.2861186315530396,
"grad_norm": 0.8577932119369507,
"learning_rate": 4.978474383134424e-06,
"loss": 0.7133,
"step": 775
},
{
"epoch": 0.2864878168840758,
"grad_norm": 0.8630688190460205,
"learning_rate": 4.97841064830892e-06,
"loss": 0.7232,
"step": 776
},
{
"epoch": 0.286857002215112,
"grad_norm": 0.902454137802124,
"learning_rate": 4.978346819676355e-06,
"loss": 0.7438,
"step": 777
},
{
"epoch": 0.28722618754614815,
"grad_norm": 0.9192384481430054,
"learning_rate": 4.9782828972391466e-06,
"loss": 0.8082,
"step": 778
},
{
"epoch": 0.28759537287718434,
"grad_norm": 0.9017093777656555,
"learning_rate": 4.9782188809997106e-06,
"loss": 0.7713,
"step": 779
},
{
"epoch": 0.28796455820822053,
"grad_norm": 0.8741960525512695,
"learning_rate": 4.978154770960473e-06,
"loss": 0.7601,
"step": 780
},
{
"epoch": 0.2883337435392567,
"grad_norm": 0.8619513511657715,
"learning_rate": 4.978090567123859e-06,
"loss": 0.7267,
"step": 781
},
{
"epoch": 0.28870292887029286,
"grad_norm": 0.9079142808914185,
"learning_rate": 4.9780262694923e-06,
"loss": 0.7804,
"step": 782
},
{
"epoch": 0.28907211420132906,
"grad_norm": 0.8758432865142822,
"learning_rate": 4.977961878068228e-06,
"loss": 0.7271,
"step": 783
},
{
"epoch": 0.28944129953236525,
"grad_norm": 0.8986886143684387,
"learning_rate": 4.977897392854081e-06,
"loss": 0.7885,
"step": 784
},
{
"epoch": 0.28981048486340144,
"grad_norm": 0.9059154391288757,
"learning_rate": 4.9778328138523e-06,
"loss": 0.7367,
"step": 785
},
{
"epoch": 0.29017967019443763,
"grad_norm": 0.9500458240509033,
"learning_rate": 4.9777681410653295e-06,
"loss": 0.7784,
"step": 786
},
{
"epoch": 0.29054885552547377,
"grad_norm": 0.9947640299797058,
"learning_rate": 4.977703374495616e-06,
"loss": 0.7704,
"step": 787
},
{
"epoch": 0.29091804085650996,
"grad_norm": 0.9205284118652344,
"learning_rate": 4.977638514145612e-06,
"loss": 0.7906,
"step": 788
},
{
"epoch": 0.29128722618754616,
"grad_norm": 0.925631582736969,
"learning_rate": 4.977573560017772e-06,
"loss": 0.7847,
"step": 789
},
{
"epoch": 0.29165641151858235,
"grad_norm": 0.96160888671875,
"learning_rate": 4.977508512114556e-06,
"loss": 0.7993,
"step": 790
},
{
"epoch": 0.2920255968496185,
"grad_norm": 0.9323878884315491,
"learning_rate": 4.977443370438423e-06,
"loss": 0.7811,
"step": 791
},
{
"epoch": 0.2923947821806547,
"grad_norm": 0.9564676880836487,
"learning_rate": 4.977378134991841e-06,
"loss": 0.7759,
"step": 792
},
{
"epoch": 0.29276396751169087,
"grad_norm": 0.8873472213745117,
"learning_rate": 4.977312805777279e-06,
"loss": 0.7757,
"step": 793
},
{
"epoch": 0.29313315284272706,
"grad_norm": 0.9213703274726868,
"learning_rate": 4.977247382797208e-06,
"loss": 0.7134,
"step": 794
},
{
"epoch": 0.29350233817376326,
"grad_norm": 0.9845170974731445,
"learning_rate": 4.977181866054106e-06,
"loss": 0.7865,
"step": 795
},
{
"epoch": 0.2938715235047994,
"grad_norm": 0.9223014116287231,
"learning_rate": 4.977116255550452e-06,
"loss": 0.7784,
"step": 796
},
{
"epoch": 0.2942407088358356,
"grad_norm": 0.8821209073066711,
"learning_rate": 4.97705055128873e-06,
"loss": 0.7164,
"step": 797
},
{
"epoch": 0.2946098941668718,
"grad_norm": 0.8854329586029053,
"learning_rate": 4.976984753271427e-06,
"loss": 0.7593,
"step": 798
},
{
"epoch": 0.29497907949790797,
"grad_norm": 0.9611573219299316,
"learning_rate": 4.976918861501031e-06,
"loss": 0.7343,
"step": 799
},
{
"epoch": 0.2953482648289441,
"grad_norm": 0.9474232792854309,
"learning_rate": 4.976852875980039e-06,
"loss": 0.7501,
"step": 800
},
{
"epoch": 0.2957174501599803,
"grad_norm": 0.9252268671989441,
"learning_rate": 4.976786796710947e-06,
"loss": 0.7608,
"step": 801
},
{
"epoch": 0.2960866354910165,
"grad_norm": 0.897132396697998,
"learning_rate": 4.976720623696257e-06,
"loss": 0.7549,
"step": 802
},
{
"epoch": 0.2964558208220527,
"grad_norm": 0.8893219828605652,
"learning_rate": 4.976654356938472e-06,
"loss": 0.7268,
"step": 803
},
{
"epoch": 0.2968250061530889,
"grad_norm": 0.9587628841400146,
"learning_rate": 4.976587996440102e-06,
"loss": 0.7845,
"step": 804
},
{
"epoch": 0.297194191484125,
"grad_norm": 0.8750391006469727,
"learning_rate": 4.976521542203658e-06,
"loss": 0.7305,
"step": 805
},
{
"epoch": 0.2975633768151612,
"grad_norm": 0.9181191921234131,
"learning_rate": 4.976454994231656e-06,
"loss": 0.7969,
"step": 806
},
{
"epoch": 0.2979325621461974,
"grad_norm": 0.934037983417511,
"learning_rate": 4.976388352526612e-06,
"loss": 0.7438,
"step": 807
},
{
"epoch": 0.2983017474772336,
"grad_norm": 0.9424565434455872,
"learning_rate": 4.976321617091052e-06,
"loss": 0.7631,
"step": 808
},
{
"epoch": 0.29867093280826973,
"grad_norm": 0.9459184408187866,
"learning_rate": 4.976254787927499e-06,
"loss": 0.7616,
"step": 809
},
{
"epoch": 0.2990401181393059,
"grad_norm": 0.912196934223175,
"learning_rate": 4.976187865038485e-06,
"loss": 0.7814,
"step": 810
},
{
"epoch": 0.2994093034703421,
"grad_norm": 0.8629269003868103,
"learning_rate": 4.976120848426542e-06,
"loss": 0.7365,
"step": 811
},
{
"epoch": 0.2997784888013783,
"grad_norm": 0.9021220207214355,
"learning_rate": 4.9760537380942055e-06,
"loss": 0.7271,
"step": 812
},
{
"epoch": 0.3001476741324145,
"grad_norm": 0.8790842294692993,
"learning_rate": 4.975986534044017e-06,
"loss": 0.7211,
"step": 813
},
{
"epoch": 0.30051685946345064,
"grad_norm": 0.9232593178749084,
"learning_rate": 4.975919236278519e-06,
"loss": 0.7557,
"step": 814
},
{
"epoch": 0.30088604479448683,
"grad_norm": 0.9208589792251587,
"learning_rate": 4.975851844800259e-06,
"loss": 0.7585,
"step": 815
},
{
"epoch": 0.301255230125523,
"grad_norm": 0.8960398435592651,
"learning_rate": 4.9757843596117894e-06,
"loss": 0.81,
"step": 816
},
{
"epoch": 0.3016244154565592,
"grad_norm": 0.9213392734527588,
"learning_rate": 4.975716780715662e-06,
"loss": 0.7421,
"step": 817
},
{
"epoch": 0.30199360078759535,
"grad_norm": 0.882556140422821,
"learning_rate": 4.975649108114437e-06,
"loss": 0.7653,
"step": 818
},
{
"epoch": 0.30236278611863154,
"grad_norm": 0.8911692500114441,
"learning_rate": 4.9755813418106735e-06,
"loss": 0.7357,
"step": 819
},
{
"epoch": 0.30273197144966774,
"grad_norm": 0.9037520885467529,
"learning_rate": 4.975513481806939e-06,
"loss": 0.7503,
"step": 820
},
{
"epoch": 0.30310115678070393,
"grad_norm": 0.9327360391616821,
"learning_rate": 4.975445528105799e-06,
"loss": 0.7269,
"step": 821
},
{
"epoch": 0.3034703421117401,
"grad_norm": 0.8998024463653564,
"learning_rate": 4.9753774807098275e-06,
"loss": 0.7384,
"step": 822
},
{
"epoch": 0.30383952744277626,
"grad_norm": 0.9088361859321594,
"learning_rate": 4.975309339621599e-06,
"loss": 0.7571,
"step": 823
},
{
"epoch": 0.30420871277381245,
"grad_norm": 0.8943783640861511,
"learning_rate": 4.975241104843694e-06,
"loss": 0.7007,
"step": 824
},
{
"epoch": 0.30457789810484864,
"grad_norm": 0.9370293021202087,
"learning_rate": 4.975172776378694e-06,
"loss": 0.746,
"step": 825
},
{
"epoch": 0.30494708343588484,
"grad_norm": 0.946474552154541,
"learning_rate": 4.9751043542291854e-06,
"loss": 0.7987,
"step": 826
},
{
"epoch": 0.305316268766921,
"grad_norm": 0.9123954772949219,
"learning_rate": 4.975035838397759e-06,
"loss": 0.7611,
"step": 827
},
{
"epoch": 0.30568545409795717,
"grad_norm": 0.9167425632476807,
"learning_rate": 4.974967228887007e-06,
"loss": 0.7794,
"step": 828
},
{
"epoch": 0.30605463942899336,
"grad_norm": 0.9110398292541504,
"learning_rate": 4.974898525699526e-06,
"loss": 0.763,
"step": 829
},
{
"epoch": 0.30642382476002955,
"grad_norm": 0.9574660062789917,
"learning_rate": 4.974829728837917e-06,
"loss": 0.7552,
"step": 830
},
{
"epoch": 0.3067930100910657,
"grad_norm": 0.9426335096359253,
"learning_rate": 4.974760838304784e-06,
"loss": 0.7404,
"step": 831
},
{
"epoch": 0.3071621954221019,
"grad_norm": 0.9049092531204224,
"learning_rate": 4.974691854102734e-06,
"loss": 0.7394,
"step": 832
},
{
"epoch": 0.3075313807531381,
"grad_norm": 0.8891183733940125,
"learning_rate": 4.974622776234379e-06,
"loss": 0.734,
"step": 833
},
{
"epoch": 0.30790056608417427,
"grad_norm": 0.8763934373855591,
"learning_rate": 4.974553604702332e-06,
"loss": 0.7341,
"step": 834
},
{
"epoch": 0.30826975141521046,
"grad_norm": 0.9248993396759033,
"learning_rate": 4.974484339509213e-06,
"loss": 0.7694,
"step": 835
},
{
"epoch": 0.3086389367462466,
"grad_norm": 0.8956395983695984,
"learning_rate": 4.974414980657642e-06,
"loss": 0.7665,
"step": 836
},
{
"epoch": 0.3090081220772828,
"grad_norm": 0.8790796399116516,
"learning_rate": 4.974345528150245e-06,
"loss": 0.7492,
"step": 837
},
{
"epoch": 0.309377307408319,
"grad_norm": 0.9157189726829529,
"learning_rate": 4.974275981989651e-06,
"loss": 0.7643,
"step": 838
},
{
"epoch": 0.3097464927393552,
"grad_norm": 0.9237195253372192,
"learning_rate": 4.974206342178492e-06,
"loss": 0.755,
"step": 839
},
{
"epoch": 0.3101156780703913,
"grad_norm": 0.8979294896125793,
"learning_rate": 4.974136608719404e-06,
"loss": 0.8043,
"step": 840
},
{
"epoch": 0.3104848634014275,
"grad_norm": 0.9376009702682495,
"learning_rate": 4.974066781615026e-06,
"loss": 0.7899,
"step": 841
},
{
"epoch": 0.3108540487324637,
"grad_norm": 0.926584780216217,
"learning_rate": 4.973996860868001e-06,
"loss": 0.7436,
"step": 842
},
{
"epoch": 0.3112232340634999,
"grad_norm": 0.8795569539070129,
"learning_rate": 4.973926846480975e-06,
"loss": 0.7399,
"step": 843
},
{
"epoch": 0.3115924193945361,
"grad_norm": 0.9180058836936951,
"learning_rate": 4.973856738456599e-06,
"loss": 0.7332,
"step": 844
},
{
"epoch": 0.3119616047255722,
"grad_norm": 0.9014691710472107,
"learning_rate": 4.973786536797527e-06,
"loss": 0.719,
"step": 845
},
{
"epoch": 0.3123307900566084,
"grad_norm": 0.9873002767562866,
"learning_rate": 4.973716241506415e-06,
"loss": 0.7921,
"step": 846
},
{
"epoch": 0.3126999753876446,
"grad_norm": 0.8903076648712158,
"learning_rate": 4.973645852585923e-06,
"loss": 0.7645,
"step": 847
},
{
"epoch": 0.3130691607186808,
"grad_norm": 0.8644299507141113,
"learning_rate": 4.973575370038718e-06,
"loss": 0.7022,
"step": 848
},
{
"epoch": 0.31343834604971693,
"grad_norm": 0.9160073399543762,
"learning_rate": 4.973504793867465e-06,
"loss": 0.7261,
"step": 849
},
{
"epoch": 0.3138075313807531,
"grad_norm": 0.937968373298645,
"learning_rate": 4.973434124074836e-06,
"loss": 0.7851,
"step": 850
},
{
"epoch": 0.3141767167117893,
"grad_norm": 0.95966637134552,
"learning_rate": 4.973363360663506e-06,
"loss": 0.7395,
"step": 851
},
{
"epoch": 0.3145459020428255,
"grad_norm": 0.8805307745933533,
"learning_rate": 4.973292503636154e-06,
"loss": 0.7323,
"step": 852
},
{
"epoch": 0.3149150873738617,
"grad_norm": 0.9085668921470642,
"learning_rate": 4.97322155299546e-06,
"loss": 0.6853,
"step": 853
},
{
"epoch": 0.31528427270489784,
"grad_norm": 0.9099245071411133,
"learning_rate": 4.973150508744111e-06,
"loss": 0.7732,
"step": 854
},
{
"epoch": 0.31565345803593403,
"grad_norm": 0.8982505202293396,
"learning_rate": 4.973079370884797e-06,
"loss": 0.7127,
"step": 855
},
{
"epoch": 0.3160226433669702,
"grad_norm": 0.9104130864143372,
"learning_rate": 4.973008139420209e-06,
"loss": 0.7405,
"step": 856
},
{
"epoch": 0.3163918286980064,
"grad_norm": 0.8904514908790588,
"learning_rate": 4.9729368143530435e-06,
"loss": 0.7901,
"step": 857
},
{
"epoch": 0.31676101402904255,
"grad_norm": 0.9432110786437988,
"learning_rate": 4.9728653956859995e-06,
"loss": 0.7735,
"step": 858
},
{
"epoch": 0.31713019936007875,
"grad_norm": 0.9377402067184448,
"learning_rate": 4.97279388342178e-06,
"loss": 0.7634,
"step": 859
},
{
"epoch": 0.31749938469111494,
"grad_norm": 0.8846672177314758,
"learning_rate": 4.972722277563094e-06,
"loss": 0.7373,
"step": 860
},
{
"epoch": 0.31786857002215113,
"grad_norm": 0.907082200050354,
"learning_rate": 4.97265057811265e-06,
"loss": 0.746,
"step": 861
},
{
"epoch": 0.3182377553531873,
"grad_norm": 0.92213374376297,
"learning_rate": 4.972578785073161e-06,
"loss": 0.7361,
"step": 862
},
{
"epoch": 0.31860694068422346,
"grad_norm": 0.950560986995697,
"learning_rate": 4.972506898447346e-06,
"loss": 0.8017,
"step": 863
},
{
"epoch": 0.31897612601525965,
"grad_norm": 0.880368709564209,
"learning_rate": 4.972434918237925e-06,
"loss": 0.7593,
"step": 864
},
{
"epoch": 0.31934531134629585,
"grad_norm": 1.2160859107971191,
"learning_rate": 4.972362844447623e-06,
"loss": 0.7608,
"step": 865
},
{
"epoch": 0.31971449667733204,
"grad_norm": 0.9254541993141174,
"learning_rate": 4.972290677079168e-06,
"loss": 0.6959,
"step": 866
},
{
"epoch": 0.3200836820083682,
"grad_norm": 0.8902239203453064,
"learning_rate": 4.97221841613529e-06,
"loss": 0.7166,
"step": 867
},
{
"epoch": 0.32045286733940437,
"grad_norm": 0.9283355474472046,
"learning_rate": 4.972146061618726e-06,
"loss": 0.8209,
"step": 868
},
{
"epoch": 0.32082205267044056,
"grad_norm": 0.906304657459259,
"learning_rate": 4.972073613532214e-06,
"loss": 0.7639,
"step": 869
},
{
"epoch": 0.32119123800147675,
"grad_norm": 0.9373779892921448,
"learning_rate": 4.972001071878495e-06,
"loss": 0.7908,
"step": 870
},
{
"epoch": 0.32156042333251295,
"grad_norm": 0.9056022763252258,
"learning_rate": 4.971928436660316e-06,
"loss": 0.768,
"step": 871
},
{
"epoch": 0.3219296086635491,
"grad_norm": 0.8719453811645508,
"learning_rate": 4.971855707880426e-06,
"loss": 0.7401,
"step": 872
},
{
"epoch": 0.3222987939945853,
"grad_norm": 0.9360424876213074,
"learning_rate": 4.971782885541578e-06,
"loss": 0.777,
"step": 873
},
{
"epoch": 0.32266797932562147,
"grad_norm": 0.8848892450332642,
"learning_rate": 4.971709969646527e-06,
"loss": 0.6993,
"step": 874
},
{
"epoch": 0.32303716465665766,
"grad_norm": 0.8988775610923767,
"learning_rate": 4.971636960198033e-06,
"loss": 0.7392,
"step": 875
},
{
"epoch": 0.3234063499876938,
"grad_norm": 0.901785135269165,
"learning_rate": 4.971563857198862e-06,
"loss": 0.7618,
"step": 876
},
{
"epoch": 0.32377553531873,
"grad_norm": 0.8972448110580444,
"learning_rate": 4.971490660651778e-06,
"loss": 0.7159,
"step": 877
},
{
"epoch": 0.3241447206497662,
"grad_norm": 0.9601827263832092,
"learning_rate": 4.971417370559552e-06,
"loss": 0.748,
"step": 878
},
{
"epoch": 0.3245139059808024,
"grad_norm": 0.8916758894920349,
"learning_rate": 4.97134398692496e-06,
"loss": 0.7598,
"step": 879
},
{
"epoch": 0.32488309131183857,
"grad_norm": 0.8835681080818176,
"learning_rate": 4.971270509750778e-06,
"loss": 0.6888,
"step": 880
},
{
"epoch": 0.3252522766428747,
"grad_norm": 0.9150434732437134,
"learning_rate": 4.971196939039786e-06,
"loss": 0.7612,
"step": 881
},
{
"epoch": 0.3256214619739109,
"grad_norm": 0.8889224529266357,
"learning_rate": 4.97112327479477e-06,
"loss": 0.7409,
"step": 882
},
{
"epoch": 0.3259906473049471,
"grad_norm": 0.8889238834381104,
"learning_rate": 4.971049517018518e-06,
"loss": 0.7537,
"step": 883
},
{
"epoch": 0.3263598326359833,
"grad_norm": 0.9054772853851318,
"learning_rate": 4.970975665713822e-06,
"loss": 0.7535,
"step": 884
},
{
"epoch": 0.3267290179670194,
"grad_norm": 0.9268242716789246,
"learning_rate": 4.970901720883477e-06,
"loss": 0.7688,
"step": 885
},
{
"epoch": 0.3270982032980556,
"grad_norm": 0.9592145681381226,
"learning_rate": 4.970827682530282e-06,
"loss": 0.751,
"step": 886
},
{
"epoch": 0.3274673886290918,
"grad_norm": 0.9229305386543274,
"learning_rate": 4.970753550657038e-06,
"loss": 0.755,
"step": 887
},
{
"epoch": 0.327836573960128,
"grad_norm": 0.9249312281608582,
"learning_rate": 4.970679325266552e-06,
"loss": 0.7411,
"step": 888
},
{
"epoch": 0.3282057592911642,
"grad_norm": 0.8924015164375305,
"learning_rate": 4.970605006361634e-06,
"loss": 0.7259,
"step": 889
},
{
"epoch": 0.3285749446222003,
"grad_norm": 0.8923146724700928,
"learning_rate": 4.970530593945096e-06,
"loss": 0.75,
"step": 890
},
{
"epoch": 0.3289441299532365,
"grad_norm": 0.9257709980010986,
"learning_rate": 4.9704560880197546e-06,
"loss": 0.7579,
"step": 891
},
{
"epoch": 0.3293133152842727,
"grad_norm": 0.8968879580497742,
"learning_rate": 4.97038148858843e-06,
"loss": 0.7173,
"step": 892
},
{
"epoch": 0.3296825006153089,
"grad_norm": 0.9248254299163818,
"learning_rate": 4.970306795653946e-06,
"loss": 0.7422,
"step": 893
},
{
"epoch": 0.33005168594634504,
"grad_norm": 0.8645913600921631,
"learning_rate": 4.970232009219129e-06,
"loss": 0.7232,
"step": 894
},
{
"epoch": 0.33042087127738123,
"grad_norm": 0.8754826188087463,
"learning_rate": 4.97015712928681e-06,
"loss": 0.7279,
"step": 895
},
{
"epoch": 0.3307900566084174,
"grad_norm": 0.8828722238540649,
"learning_rate": 4.970082155859823e-06,
"loss": 0.7185,
"step": 896
},
{
"epoch": 0.3311592419394536,
"grad_norm": 0.8933262825012207,
"learning_rate": 4.970007088941007e-06,
"loss": 0.7293,
"step": 897
},
{
"epoch": 0.3315284272704898,
"grad_norm": 0.908781886100769,
"learning_rate": 4.9699319285332016e-06,
"loss": 0.7549,
"step": 898
},
{
"epoch": 0.33189761260152595,
"grad_norm": 0.8825910091400146,
"learning_rate": 4.969856674639252e-06,
"loss": 0.767,
"step": 899
},
{
"epoch": 0.33226679793256214,
"grad_norm": 0.9013200402259827,
"learning_rate": 4.969781327262008e-06,
"loss": 0.7292,
"step": 900
},
{
"epoch": 0.33263598326359833,
"grad_norm": 0.944476306438446,
"learning_rate": 4.969705886404319e-06,
"loss": 0.7586,
"step": 901
},
{
"epoch": 0.3330051685946345,
"grad_norm": 0.9089605808258057,
"learning_rate": 4.9696303520690415e-06,
"loss": 0.7238,
"step": 902
},
{
"epoch": 0.33337435392567066,
"grad_norm": 0.88468998670578,
"learning_rate": 4.969554724259036e-06,
"loss": 0.7392,
"step": 903
},
{
"epoch": 0.33374353925670686,
"grad_norm": 0.9044007062911987,
"learning_rate": 4.969479002977162e-06,
"loss": 0.7202,
"step": 904
},
{
"epoch": 0.33411272458774305,
"grad_norm": 0.907281756401062,
"learning_rate": 4.969403188226288e-06,
"loss": 0.7362,
"step": 905
},
{
"epoch": 0.33448190991877924,
"grad_norm": 0.9365758895874023,
"learning_rate": 4.969327280009282e-06,
"loss": 0.7881,
"step": 906
},
{
"epoch": 0.33485109524981543,
"grad_norm": 0.9953451752662659,
"learning_rate": 4.969251278329018e-06,
"loss": 0.7516,
"step": 907
},
{
"epoch": 0.33522028058085157,
"grad_norm": 0.9089345932006836,
"learning_rate": 4.969175183188373e-06,
"loss": 0.7508,
"step": 908
},
{
"epoch": 0.33558946591188776,
"grad_norm": 0.9271081686019897,
"learning_rate": 4.969098994590226e-06,
"loss": 0.7696,
"step": 909
},
{
"epoch": 0.33595865124292396,
"grad_norm": 0.9360471367835999,
"learning_rate": 4.9690227125374615e-06,
"loss": 0.7608,
"step": 910
},
{
"epoch": 0.33632783657396015,
"grad_norm": 0.9032567739486694,
"learning_rate": 4.968946337032967e-06,
"loss": 0.7514,
"step": 911
},
{
"epoch": 0.3366970219049963,
"grad_norm": 0.9877912998199463,
"learning_rate": 4.9688698680796325e-06,
"loss": 0.765,
"step": 912
},
{
"epoch": 0.3370662072360325,
"grad_norm": 0.9205021858215332,
"learning_rate": 4.9687933056803525e-06,
"loss": 0.7753,
"step": 913
},
{
"epoch": 0.33743539256706867,
"grad_norm": 0.8936699032783508,
"learning_rate": 4.968716649838025e-06,
"loss": 0.745,
"step": 914
},
{
"epoch": 0.33780457789810486,
"grad_norm": 0.9072567820549011,
"learning_rate": 4.968639900555552e-06,
"loss": 0.7801,
"step": 915
},
{
"epoch": 0.33817376322914106,
"grad_norm": 0.8919604420661926,
"learning_rate": 4.968563057835837e-06,
"loss": 0.7493,
"step": 916
},
{
"epoch": 0.3385429485601772,
"grad_norm": 0.9033501148223877,
"learning_rate": 4.96848612168179e-06,
"loss": 0.7606,
"step": 917
},
{
"epoch": 0.3389121338912134,
"grad_norm": 0.916556715965271,
"learning_rate": 4.968409092096322e-06,
"loss": 0.719,
"step": 918
},
{
"epoch": 0.3392813192222496,
"grad_norm": 0.9027720093727112,
"learning_rate": 4.968331969082349e-06,
"loss": 0.7027,
"step": 919
},
{
"epoch": 0.33965050455328577,
"grad_norm": 1.002319574356079,
"learning_rate": 4.96825475264279e-06,
"loss": 0.7999,
"step": 920
},
{
"epoch": 0.3400196898843219,
"grad_norm": 0.9688315391540527,
"learning_rate": 4.968177442780568e-06,
"loss": 0.8079,
"step": 921
},
{
"epoch": 0.3403888752153581,
"grad_norm": 0.9085078239440918,
"learning_rate": 4.968100039498609e-06,
"loss": 0.7369,
"step": 922
},
{
"epoch": 0.3407580605463943,
"grad_norm": 0.9328852295875549,
"learning_rate": 4.968022542799842e-06,
"loss": 0.7864,
"step": 923
},
{
"epoch": 0.3411272458774305,
"grad_norm": 0.9257019758224487,
"learning_rate": 4.9679449526872e-06,
"loss": 0.7838,
"step": 924
},
{
"epoch": 0.3414964312084667,
"grad_norm": 0.9182167053222656,
"learning_rate": 4.9678672691636214e-06,
"loss": 0.7348,
"step": 925
},
{
"epoch": 0.3418656165395028,
"grad_norm": 0.928878664970398,
"learning_rate": 4.967789492232046e-06,
"loss": 0.7564,
"step": 926
},
{
"epoch": 0.342234801870539,
"grad_norm": 0.8896941542625427,
"learning_rate": 4.967711621895416e-06,
"loss": 0.7435,
"step": 927
},
{
"epoch": 0.3426039872015752,
"grad_norm": 0.9109853506088257,
"learning_rate": 4.9676336581566795e-06,
"loss": 0.7818,
"step": 928
},
{
"epoch": 0.3429731725326114,
"grad_norm": 0.899246096611023,
"learning_rate": 4.96755560101879e-06,
"loss": 0.7404,
"step": 929
},
{
"epoch": 0.34334235786364753,
"grad_norm": 0.9120781421661377,
"learning_rate": 4.967477450484698e-06,
"loss": 0.7928,
"step": 930
},
{
"epoch": 0.3437115431946837,
"grad_norm": 0.9362387657165527,
"learning_rate": 4.967399206557363e-06,
"loss": 0.8073,
"step": 931
},
{
"epoch": 0.3440807285257199,
"grad_norm": 0.8933055996894836,
"learning_rate": 4.967320869239748e-06,
"loss": 0.756,
"step": 932
},
{
"epoch": 0.3444499138567561,
"grad_norm": 0.9109773635864258,
"learning_rate": 4.967242438534816e-06,
"loss": 0.7481,
"step": 933
},
{
"epoch": 0.34481909918779224,
"grad_norm": 0.9119990468025208,
"learning_rate": 4.967163914445537e-06,
"loss": 0.7529,
"step": 934
},
{
"epoch": 0.34518828451882844,
"grad_norm": 0.8959357142448425,
"learning_rate": 4.967085296974882e-06,
"loss": 0.7286,
"step": 935
},
{
"epoch": 0.34555746984986463,
"grad_norm": 0.9190980195999146,
"learning_rate": 4.967006586125827e-06,
"loss": 0.7311,
"step": 936
},
{
"epoch": 0.3459266551809008,
"grad_norm": 0.9075052738189697,
"learning_rate": 4.966927781901351e-06,
"loss": 0.7027,
"step": 937
},
{
"epoch": 0.346295840511937,
"grad_norm": 0.8960855603218079,
"learning_rate": 4.9668488843044375e-06,
"loss": 0.7864,
"step": 938
},
{
"epoch": 0.34666502584297315,
"grad_norm": 0.9005295038223267,
"learning_rate": 4.9667698933380724e-06,
"loss": 0.7036,
"step": 939
},
{
"epoch": 0.34703421117400934,
"grad_norm": 0.9100990891456604,
"learning_rate": 4.966690809005246e-06,
"loss": 0.7641,
"step": 940
},
{
"epoch": 0.34740339650504554,
"grad_norm": 0.8951278328895569,
"learning_rate": 4.96661163130895e-06,
"loss": 0.7563,
"step": 941
},
{
"epoch": 0.34777258183608173,
"grad_norm": 0.942742109298706,
"learning_rate": 4.966532360252182e-06,
"loss": 0.7392,
"step": 942
},
{
"epoch": 0.34814176716711787,
"grad_norm": 0.9036753177642822,
"learning_rate": 4.966452995837943e-06,
"loss": 0.7435,
"step": 943
},
{
"epoch": 0.34851095249815406,
"grad_norm": 0.9136203527450562,
"learning_rate": 4.966373538069236e-06,
"loss": 0.751,
"step": 944
},
{
"epoch": 0.34888013782919025,
"grad_norm": 0.9514570832252502,
"learning_rate": 4.96629398694907e-06,
"loss": 0.7368,
"step": 945
},
{
"epoch": 0.34924932316022644,
"grad_norm": 0.870691180229187,
"learning_rate": 4.966214342480455e-06,
"loss": 0.6951,
"step": 946
},
{
"epoch": 0.34961850849126264,
"grad_norm": 0.8581681847572327,
"learning_rate": 4.966134604666405e-06,
"loss": 0.6636,
"step": 947
},
{
"epoch": 0.3499876938222988,
"grad_norm": 0.912087619304657,
"learning_rate": 4.966054773509938e-06,
"loss": 0.7349,
"step": 948
},
{
"epoch": 0.35035687915333497,
"grad_norm": 0.9115909934043884,
"learning_rate": 4.965974849014078e-06,
"loss": 0.7449,
"step": 949
},
{
"epoch": 0.35072606448437116,
"grad_norm": 0.8997576832771301,
"learning_rate": 4.965894831181847e-06,
"loss": 0.7326,
"step": 950
},
{
"epoch": 0.35109524981540735,
"grad_norm": 0.8838664293289185,
"learning_rate": 4.965814720016274e-06,
"loss": 0.7337,
"step": 951
},
{
"epoch": 0.3514644351464435,
"grad_norm": 0.9265721440315247,
"learning_rate": 4.965734515520393e-06,
"loss": 0.733,
"step": 952
},
{
"epoch": 0.3518336204774797,
"grad_norm": 0.9667180776596069,
"learning_rate": 4.9656542176972386e-06,
"loss": 0.7494,
"step": 953
},
{
"epoch": 0.3522028058085159,
"grad_norm": 0.9301754236221313,
"learning_rate": 4.965573826549851e-06,
"loss": 0.7519,
"step": 954
},
{
"epoch": 0.35257199113955207,
"grad_norm": 0.9007225036621094,
"learning_rate": 4.965493342081271e-06,
"loss": 0.7306,
"step": 955
},
{
"epoch": 0.35294117647058826,
"grad_norm": 0.89749675989151,
"learning_rate": 4.965412764294547e-06,
"loss": 0.7551,
"step": 956
},
{
"epoch": 0.3533103618016244,
"grad_norm": 0.9183578491210938,
"learning_rate": 4.965332093192727e-06,
"loss": 0.7756,
"step": 957
},
{
"epoch": 0.3536795471326606,
"grad_norm": 0.9291596412658691,
"learning_rate": 4.9652513287788665e-06,
"loss": 0.721,
"step": 958
},
{
"epoch": 0.3540487324636968,
"grad_norm": 0.9414975047111511,
"learning_rate": 4.965170471056021e-06,
"loss": 0.7852,
"step": 959
},
{
"epoch": 0.354417917794733,
"grad_norm": 0.9100828766822815,
"learning_rate": 4.965089520027251e-06,
"loss": 0.7166,
"step": 960
},
{
"epoch": 0.3547871031257691,
"grad_norm": 0.8750141263008118,
"learning_rate": 4.96500847569562e-06,
"loss": 0.7535,
"step": 961
},
{
"epoch": 0.3551562884568053,
"grad_norm": 0.9412403702735901,
"learning_rate": 4.964927338064197e-06,
"loss": 0.7289,
"step": 962
},
{
"epoch": 0.3555254737878415,
"grad_norm": 0.9291063547134399,
"learning_rate": 4.964846107136052e-06,
"loss": 0.7615,
"step": 963
},
{
"epoch": 0.3558946591188777,
"grad_norm": 0.9080612063407898,
"learning_rate": 4.96476478291426e-06,
"loss": 0.7425,
"step": 964
},
{
"epoch": 0.3562638444499139,
"grad_norm": 0.9023758769035339,
"learning_rate": 4.9646833654018974e-06,
"loss": 0.761,
"step": 965
},
{
"epoch": 0.35663302978095,
"grad_norm": 0.9533596038818359,
"learning_rate": 4.964601854602049e-06,
"loss": 0.726,
"step": 966
},
{
"epoch": 0.3570022151119862,
"grad_norm": 0.9523007273674011,
"learning_rate": 4.964520250517798e-06,
"loss": 0.7443,
"step": 967
},
{
"epoch": 0.3573714004430224,
"grad_norm": 0.9014210104942322,
"learning_rate": 4.964438553152233e-06,
"loss": 0.7348,
"step": 968
},
{
"epoch": 0.3577405857740586,
"grad_norm": 0.9457989931106567,
"learning_rate": 4.964356762508447e-06,
"loss": 0.7742,
"step": 969
},
{
"epoch": 0.35810977110509473,
"grad_norm": 0.8954206109046936,
"learning_rate": 4.964274878589535e-06,
"loss": 0.7083,
"step": 970
},
{
"epoch": 0.3584789564361309,
"grad_norm": 0.9402279853820801,
"learning_rate": 4.964192901398595e-06,
"loss": 0.7566,
"step": 971
},
{
"epoch": 0.3588481417671671,
"grad_norm": 0.8966061472892761,
"learning_rate": 4.964110830938734e-06,
"loss": 0.7132,
"step": 972
},
{
"epoch": 0.3592173270982033,
"grad_norm": 0.8585612773895264,
"learning_rate": 4.964028667213054e-06,
"loss": 0.7365,
"step": 973
},
{
"epoch": 0.3595865124292395,
"grad_norm": 0.8799294233322144,
"learning_rate": 4.9639464102246675e-06,
"loss": 0.7253,
"step": 974
},
{
"epoch": 0.35995569776027564,
"grad_norm": 0.9214800000190735,
"learning_rate": 4.963864059976686e-06,
"loss": 0.7317,
"step": 975
},
{
"epoch": 0.36032488309131183,
"grad_norm": 0.9081399440765381,
"learning_rate": 4.9637816164722285e-06,
"loss": 0.746,
"step": 976
},
{
"epoch": 0.360694068422348,
"grad_norm": 0.9382967948913574,
"learning_rate": 4.963699079714415e-06,
"loss": 0.7606,
"step": 977
},
{
"epoch": 0.3610632537533842,
"grad_norm": 0.9069362282752991,
"learning_rate": 4.963616449706367e-06,
"loss": 0.7644,
"step": 978
},
{
"epoch": 0.36143243908442035,
"grad_norm": 0.8491265773773193,
"learning_rate": 4.963533726451215e-06,
"loss": 0.738,
"step": 979
},
{
"epoch": 0.36180162441545655,
"grad_norm": 0.9470365643501282,
"learning_rate": 4.963450909952089e-06,
"loss": 0.7655,
"step": 980
},
{
"epoch": 0.36217080974649274,
"grad_norm": 0.8890638947486877,
"learning_rate": 4.963368000212123e-06,
"loss": 0.7089,
"step": 981
},
{
"epoch": 0.36253999507752893,
"grad_norm": 0.9325974583625793,
"learning_rate": 4.963284997234456e-06,
"loss": 0.775,
"step": 982
},
{
"epoch": 0.3629091804085651,
"grad_norm": 0.874653160572052,
"learning_rate": 4.96320190102223e-06,
"loss": 0.7289,
"step": 983
},
{
"epoch": 0.36327836573960126,
"grad_norm": 0.9684560894966125,
"learning_rate": 4.9631187115785885e-06,
"loss": 0.7216,
"step": 984
},
{
"epoch": 0.36364755107063745,
"grad_norm": 0.9118450284004211,
"learning_rate": 4.963035428906681e-06,
"loss": 0.7647,
"step": 985
},
{
"epoch": 0.36401673640167365,
"grad_norm": 0.9047130346298218,
"learning_rate": 4.96295205300966e-06,
"loss": 0.748,
"step": 986
},
{
"epoch": 0.36438592173270984,
"grad_norm": 0.9111741185188293,
"learning_rate": 4.962868583890682e-06,
"loss": 0.7079,
"step": 987
},
{
"epoch": 0.364755107063746,
"grad_norm": 0.9440281391143799,
"learning_rate": 4.962785021552904e-06,
"loss": 0.7544,
"step": 988
},
{
"epoch": 0.36512429239478217,
"grad_norm": 0.9372045397758484,
"learning_rate": 4.962701365999491e-06,
"loss": 0.7232,
"step": 989
},
{
"epoch": 0.36549347772581836,
"grad_norm": 0.9209040999412537,
"learning_rate": 4.962617617233608e-06,
"loss": 0.7434,
"step": 990
},
{
"epoch": 0.36586266305685455,
"grad_norm": 0.9633092284202576,
"learning_rate": 4.962533775258426e-06,
"loss": 0.7531,
"step": 991
},
{
"epoch": 0.36623184838789075,
"grad_norm": 0.9297153353691101,
"learning_rate": 4.962449840077118e-06,
"loss": 0.7644,
"step": 992
},
{
"epoch": 0.3666010337189269,
"grad_norm": 0.9207053184509277,
"learning_rate": 4.96236581169286e-06,
"loss": 0.7789,
"step": 993
},
{
"epoch": 0.3669702190499631,
"grad_norm": 0.9333449602127075,
"learning_rate": 4.962281690108834e-06,
"loss": 0.8054,
"step": 994
},
{
"epoch": 0.36733940438099927,
"grad_norm": 0.9394077658653259,
"learning_rate": 4.962197475328222e-06,
"loss": 0.7472,
"step": 995
},
{
"epoch": 0.36770858971203546,
"grad_norm": 0.980050802230835,
"learning_rate": 4.962113167354213e-06,
"loss": 0.7505,
"step": 996
},
{
"epoch": 0.3680777750430716,
"grad_norm": 0.9034879803657532,
"learning_rate": 4.962028766189999e-06,
"loss": 0.7232,
"step": 997
},
{
"epoch": 0.3684469603741078,
"grad_norm": 0.9549526572227478,
"learning_rate": 4.961944271838772e-06,
"loss": 0.7566,
"step": 998
},
{
"epoch": 0.368816145705144,
"grad_norm": 0.9430050253868103,
"learning_rate": 4.961859684303731e-06,
"loss": 0.7485,
"step": 999
},
{
"epoch": 0.3691853310361802,
"grad_norm": 1.0592334270477295,
"learning_rate": 4.961775003588079e-06,
"loss": 0.7219,
"step": 1000
},
{
"epoch": 0.36955451636721637,
"grad_norm": 0.9190243482589722,
"learning_rate": 4.96169022969502e-06,
"loss": 0.7679,
"step": 1001
},
{
"epoch": 0.3699237016982525,
"grad_norm": 0.9574081301689148,
"learning_rate": 4.961605362627761e-06,
"loss": 0.7678,
"step": 1002
},
{
"epoch": 0.3702928870292887,
"grad_norm": 0.9283050298690796,
"learning_rate": 4.961520402389517e-06,
"loss": 0.7584,
"step": 1003
},
{
"epoch": 0.3706620723603249,
"grad_norm": 0.9419953227043152,
"learning_rate": 4.961435348983503e-06,
"loss": 0.7955,
"step": 1004
},
{
"epoch": 0.3710312576913611,
"grad_norm": 0.9116794466972351,
"learning_rate": 4.961350202412938e-06,
"loss": 0.7739,
"step": 1005
},
{
"epoch": 0.3714004430223972,
"grad_norm": 0.8843052387237549,
"learning_rate": 4.961264962681044e-06,
"loss": 0.6978,
"step": 1006
},
{
"epoch": 0.3717696283534334,
"grad_norm": 0.9120619297027588,
"learning_rate": 4.961179629791049e-06,
"loss": 0.7662,
"step": 1007
},
{
"epoch": 0.3721388136844696,
"grad_norm": 0.9376091361045837,
"learning_rate": 4.961094203746181e-06,
"loss": 0.7636,
"step": 1008
},
{
"epoch": 0.3725079990155058,
"grad_norm": 0.9145896434783936,
"learning_rate": 4.961008684549674e-06,
"loss": 0.7355,
"step": 1009
},
{
"epoch": 0.372877184346542,
"grad_norm": 0.8806540966033936,
"learning_rate": 4.960923072204765e-06,
"loss": 0.7709,
"step": 1010
},
{
"epoch": 0.3732463696775781,
"grad_norm": 0.8880794048309326,
"learning_rate": 4.9608373667146945e-06,
"loss": 0.7272,
"step": 1011
},
{
"epoch": 0.3736155550086143,
"grad_norm": 0.9063923358917236,
"learning_rate": 4.9607515680827065e-06,
"loss": 0.7184,
"step": 1012
},
{
"epoch": 0.3739847403396505,
"grad_norm": 0.8815094232559204,
"learning_rate": 4.960665676312047e-06,
"loss": 0.753,
"step": 1013
},
{
"epoch": 0.3743539256706867,
"grad_norm": 0.899018406867981,
"learning_rate": 4.96057969140597e-06,
"loss": 0.7388,
"step": 1014
},
{
"epoch": 0.37472311100172284,
"grad_norm": 0.9038127064704895,
"learning_rate": 4.960493613367728e-06,
"loss": 0.7131,
"step": 1015
},
{
"epoch": 0.37509229633275903,
"grad_norm": 0.890527069568634,
"learning_rate": 4.960407442200579e-06,
"loss": 0.7646,
"step": 1016
},
{
"epoch": 0.3754614816637952,
"grad_norm": 0.9003925919532776,
"learning_rate": 4.9603211779077845e-06,
"loss": 0.7285,
"step": 1017
},
{
"epoch": 0.3758306669948314,
"grad_norm": 0.9128808975219727,
"learning_rate": 4.96023482049261e-06,
"loss": 0.7608,
"step": 1018
},
{
"epoch": 0.3761998523258676,
"grad_norm": 0.9050713181495667,
"learning_rate": 4.960148369958324e-06,
"loss": 0.7375,
"step": 1019
},
{
"epoch": 0.37656903765690375,
"grad_norm": 0.9218745827674866,
"learning_rate": 4.960061826308199e-06,
"loss": 0.7222,
"step": 1020
},
{
"epoch": 0.37693822298793994,
"grad_norm": 0.9460575580596924,
"learning_rate": 4.95997518954551e-06,
"loss": 0.7733,
"step": 1021
},
{
"epoch": 0.37730740831897613,
"grad_norm": 0.9175562262535095,
"learning_rate": 4.959888459673536e-06,
"loss": 0.7418,
"step": 1022
},
{
"epoch": 0.3776765936500123,
"grad_norm": 0.9456244707107544,
"learning_rate": 4.959801636695561e-06,
"loss": 0.7552,
"step": 1023
},
{
"epoch": 0.37804577898104846,
"grad_norm": 0.8985305428504944,
"learning_rate": 4.959714720614871e-06,
"loss": 0.7366,
"step": 1024
},
{
"epoch": 0.37841496431208466,
"grad_norm": 0.9268773794174194,
"learning_rate": 4.959627711434753e-06,
"loss": 0.7408,
"step": 1025
},
{
"epoch": 0.37878414964312085,
"grad_norm": 0.9267814755439758,
"learning_rate": 4.959540609158504e-06,
"loss": 0.7589,
"step": 1026
},
{
"epoch": 0.37915333497415704,
"grad_norm": 0.905430018901825,
"learning_rate": 4.959453413789419e-06,
"loss": 0.7404,
"step": 1027
},
{
"epoch": 0.37952252030519323,
"grad_norm": 0.9021572470664978,
"learning_rate": 4.959366125330798e-06,
"loss": 0.7399,
"step": 1028
},
{
"epoch": 0.37989170563622937,
"grad_norm": 0.9395810961723328,
"learning_rate": 4.9592787437859455e-06,
"loss": 0.7259,
"step": 1029
},
{
"epoch": 0.38026089096726556,
"grad_norm": 0.8881012201309204,
"learning_rate": 4.959191269158169e-06,
"loss": 0.7365,
"step": 1030
},
{
"epoch": 0.38063007629830176,
"grad_norm": 0.9610297679901123,
"learning_rate": 4.959103701450779e-06,
"loss": 0.7141,
"step": 1031
},
{
"epoch": 0.38099926162933795,
"grad_norm": 0.8691598176956177,
"learning_rate": 4.959016040667089e-06,
"loss": 0.725,
"step": 1032
},
{
"epoch": 0.3813684469603741,
"grad_norm": 0.912972092628479,
"learning_rate": 4.9589282868104195e-06,
"loss": 0.6629,
"step": 1033
},
{
"epoch": 0.3817376322914103,
"grad_norm": 0.9238312840461731,
"learning_rate": 4.95884043988409e-06,
"loss": 0.7799,
"step": 1034
},
{
"epoch": 0.38210681762244647,
"grad_norm": 0.8956131935119629,
"learning_rate": 4.9587524998914255e-06,
"loss": 0.759,
"step": 1035
},
{
"epoch": 0.38247600295348266,
"grad_norm": 0.8826711177825928,
"learning_rate": 4.958664466835756e-06,
"loss": 0.7486,
"step": 1036
},
{
"epoch": 0.38284518828451886,
"grad_norm": 0.9134225845336914,
"learning_rate": 4.95857634072041e-06,
"loss": 0.7608,
"step": 1037
},
{
"epoch": 0.383214373615555,
"grad_norm": 0.9029486179351807,
"learning_rate": 4.958488121548727e-06,
"loss": 0.7254,
"step": 1038
},
{
"epoch": 0.3835835589465912,
"grad_norm": 0.8955227732658386,
"learning_rate": 4.958399809324045e-06,
"loss": 0.7928,
"step": 1039
},
{
"epoch": 0.3839527442776274,
"grad_norm": 0.8920780420303345,
"learning_rate": 4.958311404049705e-06,
"loss": 0.7654,
"step": 1040
},
{
"epoch": 0.38432192960866357,
"grad_norm": 0.9059119820594788,
"learning_rate": 4.958222905729055e-06,
"loss": 0.7263,
"step": 1041
},
{
"epoch": 0.3846911149396997,
"grad_norm": 0.8846084475517273,
"learning_rate": 4.958134314365443e-06,
"loss": 0.7119,
"step": 1042
},
{
"epoch": 0.3850603002707359,
"grad_norm": 0.8834053874015808,
"learning_rate": 4.9580456299622235e-06,
"loss": 0.7113,
"step": 1043
},
{
"epoch": 0.3854294856017721,
"grad_norm": 0.9049264192581177,
"learning_rate": 4.957956852522753e-06,
"loss": 0.7233,
"step": 1044
},
{
"epoch": 0.3857986709328083,
"grad_norm": 0.8891218900680542,
"learning_rate": 4.9578679820503905e-06,
"loss": 0.6903,
"step": 1045
},
{
"epoch": 0.3861678562638444,
"grad_norm": 0.8697208166122437,
"learning_rate": 4.957779018548501e-06,
"loss": 0.748,
"step": 1046
},
{
"epoch": 0.3865370415948806,
"grad_norm": 0.9173257946968079,
"learning_rate": 4.957689962020452e-06,
"loss": 0.762,
"step": 1047
},
{
"epoch": 0.3869062269259168,
"grad_norm": 0.9417243003845215,
"learning_rate": 4.957600812469613e-06,
"loss": 0.7513,
"step": 1048
},
{
"epoch": 0.387275412256953,
"grad_norm": 0.9135996103286743,
"learning_rate": 4.95751156989936e-06,
"loss": 0.7303,
"step": 1049
},
{
"epoch": 0.3876445975879892,
"grad_norm": 0.8908846378326416,
"learning_rate": 4.957422234313068e-06,
"loss": 0.7606,
"step": 1050
},
{
"epoch": 0.38801378291902533,
"grad_norm": 0.8868354558944702,
"learning_rate": 4.95733280571412e-06,
"loss": 0.7219,
"step": 1051
},
{
"epoch": 0.3883829682500615,
"grad_norm": 0.9057561755180359,
"learning_rate": 4.957243284105902e-06,
"loss": 0.7392,
"step": 1052
},
{
"epoch": 0.3887521535810977,
"grad_norm": 0.9165422916412354,
"learning_rate": 4.9571536694918e-06,
"loss": 0.7682,
"step": 1053
},
{
"epoch": 0.3891213389121339,
"grad_norm": 0.9119623899459839,
"learning_rate": 4.957063961875208e-06,
"loss": 0.714,
"step": 1054
},
{
"epoch": 0.38949052424317004,
"grad_norm": 0.9237239956855774,
"learning_rate": 4.95697416125952e-06,
"loss": 0.7394,
"step": 1055
},
{
"epoch": 0.38985970957420624,
"grad_norm": 0.8836085796356201,
"learning_rate": 4.956884267648136e-06,
"loss": 0.7101,
"step": 1056
},
{
"epoch": 0.39022889490524243,
"grad_norm": 0.8749224543571472,
"learning_rate": 4.956794281044458e-06,
"loss": 0.7181,
"step": 1057
},
{
"epoch": 0.3905980802362786,
"grad_norm": 0.9028134942054749,
"learning_rate": 4.956704201451891e-06,
"loss": 0.7268,
"step": 1058
},
{
"epoch": 0.3909672655673148,
"grad_norm": 0.8953654170036316,
"learning_rate": 4.956614028873846e-06,
"loss": 0.7691,
"step": 1059
},
{
"epoch": 0.39133645089835095,
"grad_norm": 0.881486713886261,
"learning_rate": 4.956523763313736e-06,
"loss": 0.7499,
"step": 1060
},
{
"epoch": 0.39170563622938714,
"grad_norm": 0.8811509609222412,
"learning_rate": 4.956433404774975e-06,
"loss": 0.718,
"step": 1061
},
{
"epoch": 0.39207482156042334,
"grad_norm": 0.9057355523109436,
"learning_rate": 4.956342953260986e-06,
"loss": 0.7533,
"step": 1062
},
{
"epoch": 0.39244400689145953,
"grad_norm": 0.9343195557594299,
"learning_rate": 4.956252408775191e-06,
"loss": 0.7808,
"step": 1063
},
{
"epoch": 0.39281319222249567,
"grad_norm": 0.8957801461219788,
"learning_rate": 4.9561617713210174e-06,
"loss": 0.7304,
"step": 1064
},
{
"epoch": 0.39318237755353186,
"grad_norm": 0.8987041115760803,
"learning_rate": 4.956071040901897e-06,
"loss": 0.745,
"step": 1065
},
{
"epoch": 0.39355156288456805,
"grad_norm": 0.9891381859779358,
"learning_rate": 4.955980217521263e-06,
"loss": 0.761,
"step": 1066
},
{
"epoch": 0.39392074821560424,
"grad_norm": 0.9239757061004639,
"learning_rate": 4.955889301182551e-06,
"loss": 0.7811,
"step": 1067
},
{
"epoch": 0.39428993354664044,
"grad_norm": 0.9178396463394165,
"learning_rate": 4.955798291889205e-06,
"loss": 0.7447,
"step": 1068
},
{
"epoch": 0.3946591188776766,
"grad_norm": 0.927836537361145,
"learning_rate": 4.955707189644669e-06,
"loss": 0.7527,
"step": 1069
},
{
"epoch": 0.39502830420871277,
"grad_norm": 0.9121710658073425,
"learning_rate": 4.955615994452391e-06,
"loss": 0.7283,
"step": 1070
},
{
"epoch": 0.39539748953974896,
"grad_norm": 0.8991080522537231,
"learning_rate": 4.955524706315822e-06,
"loss": 0.7521,
"step": 1071
},
{
"epoch": 0.39576667487078515,
"grad_norm": 0.9071163535118103,
"learning_rate": 4.955433325238418e-06,
"loss": 0.7227,
"step": 1072
},
{
"epoch": 0.3961358602018213,
"grad_norm": 0.9359886646270752,
"learning_rate": 4.955341851223639e-06,
"loss": 0.7888,
"step": 1073
},
{
"epoch": 0.3965050455328575,
"grad_norm": 0.9484068155288696,
"learning_rate": 4.955250284274944e-06,
"loss": 0.7609,
"step": 1074
},
{
"epoch": 0.3968742308638937,
"grad_norm": 0.9310367107391357,
"learning_rate": 4.9551586243958e-06,
"loss": 0.7698,
"step": 1075
},
{
"epoch": 0.39724341619492987,
"grad_norm": 0.9183504581451416,
"learning_rate": 4.955066871589679e-06,
"loss": 0.7442,
"step": 1076
},
{
"epoch": 0.39761260152596606,
"grad_norm": 0.9239982962608337,
"learning_rate": 4.954975025860051e-06,
"loss": 0.7485,
"step": 1077
},
{
"epoch": 0.3979817868570022,
"grad_norm": 0.9956908226013184,
"learning_rate": 4.954883087210393e-06,
"loss": 0.7817,
"step": 1078
},
{
"epoch": 0.3983509721880384,
"grad_norm": 0.897830069065094,
"learning_rate": 4.9547910556441845e-06,
"loss": 0.7241,
"step": 1079
},
{
"epoch": 0.3987201575190746,
"grad_norm": 0.8917036652565002,
"learning_rate": 4.95469893116491e-06,
"loss": 0.7047,
"step": 1080
},
{
"epoch": 0.3990893428501108,
"grad_norm": 0.8943704962730408,
"learning_rate": 4.954606713776056e-06,
"loss": 0.7071,
"step": 1081
},
{
"epoch": 0.3994585281811469,
"grad_norm": 0.8890754580497742,
"learning_rate": 4.954514403481112e-06,
"loss": 0.7295,
"step": 1082
},
{
"epoch": 0.3998277135121831,
"grad_norm": 0.9230964779853821,
"learning_rate": 4.954422000283572e-06,
"loss": 0.7491,
"step": 1083
},
{
"epoch": 0.4001968988432193,
"grad_norm": 0.9171218276023865,
"learning_rate": 4.954329504186935e-06,
"loss": 0.7463,
"step": 1084
},
{
"epoch": 0.4005660841742555,
"grad_norm": 0.8871411085128784,
"learning_rate": 4.954236915194699e-06,
"loss": 0.7312,
"step": 1085
},
{
"epoch": 0.4009352695052917,
"grad_norm": 0.9313485026359558,
"learning_rate": 4.954144233310372e-06,
"loss": 0.7129,
"step": 1086
},
{
"epoch": 0.4013044548363278,
"grad_norm": 0.9138079881668091,
"learning_rate": 4.95405145853746e-06,
"loss": 0.746,
"step": 1087
},
{
"epoch": 0.401673640167364,
"grad_norm": 0.9450967907905579,
"learning_rate": 4.9539585908794746e-06,
"loss": 0.7811,
"step": 1088
},
{
"epoch": 0.4020428254984002,
"grad_norm": 0.9284427762031555,
"learning_rate": 4.9538656303399314e-06,
"loss": 0.74,
"step": 1089
},
{
"epoch": 0.4024120108294364,
"grad_norm": 0.9271199107170105,
"learning_rate": 4.953772576922348e-06,
"loss": 0.7622,
"step": 1090
},
{
"epoch": 0.40278119616047253,
"grad_norm": 0.8856538534164429,
"learning_rate": 4.953679430630247e-06,
"loss": 0.6994,
"step": 1091
},
{
"epoch": 0.4031503814915087,
"grad_norm": 0.9314479231834412,
"learning_rate": 4.953586191467155e-06,
"loss": 0.7583,
"step": 1092
},
{
"epoch": 0.4035195668225449,
"grad_norm": 0.9417778253555298,
"learning_rate": 4.953492859436599e-06,
"loss": 0.7601,
"step": 1093
},
{
"epoch": 0.4038887521535811,
"grad_norm": 0.8887828588485718,
"learning_rate": 4.953399434542112e-06,
"loss": 0.744,
"step": 1094
},
{
"epoch": 0.4042579374846173,
"grad_norm": 0.926156759262085,
"learning_rate": 4.953305916787232e-06,
"loss": 0.7113,
"step": 1095
},
{
"epoch": 0.40462712281565344,
"grad_norm": 0.9664729833602905,
"learning_rate": 4.9532123061754966e-06,
"loss": 0.733,
"step": 1096
},
{
"epoch": 0.40499630814668963,
"grad_norm": 0.9112409353256226,
"learning_rate": 4.953118602710449e-06,
"loss": 0.764,
"step": 1097
},
{
"epoch": 0.4053654934777258,
"grad_norm": 0.9250743389129639,
"learning_rate": 4.9530248063956375e-06,
"loss": 0.7536,
"step": 1098
},
{
"epoch": 0.405734678808762,
"grad_norm": 0.8889137506484985,
"learning_rate": 4.952930917234612e-06,
"loss": 0.7208,
"step": 1099
},
{
"epoch": 0.40610386413979815,
"grad_norm": 0.9497808814048767,
"learning_rate": 4.952836935230924e-06,
"loss": 0.7263,
"step": 1100
},
{
"epoch": 0.40647304947083435,
"grad_norm": 0.9288194179534912,
"learning_rate": 4.952742860388133e-06,
"loss": 0.7321,
"step": 1101
},
{
"epoch": 0.40684223480187054,
"grad_norm": 0.9409608244895935,
"learning_rate": 4.952648692709798e-06,
"loss": 0.7375,
"step": 1102
},
{
"epoch": 0.40721142013290673,
"grad_norm": 0.8995351195335388,
"learning_rate": 4.952554432199485e-06,
"loss": 0.7626,
"step": 1103
},
{
"epoch": 0.4075806054639429,
"grad_norm": 0.9207445979118347,
"learning_rate": 4.95246007886076e-06,
"loss": 0.7324,
"step": 1104
},
{
"epoch": 0.40794979079497906,
"grad_norm": 0.9042755365371704,
"learning_rate": 4.9523656326971954e-06,
"loss": 0.7281,
"step": 1105
},
{
"epoch": 0.40831897612601525,
"grad_norm": 0.9243267178535461,
"learning_rate": 4.952271093712366e-06,
"loss": 0.7536,
"step": 1106
},
{
"epoch": 0.40868816145705145,
"grad_norm": 0.9243746399879456,
"learning_rate": 4.952176461909849e-06,
"loss": 0.7514,
"step": 1107
},
{
"epoch": 0.40905734678808764,
"grad_norm": 0.9338531494140625,
"learning_rate": 4.952081737293227e-06,
"loss": 0.7443,
"step": 1108
},
{
"epoch": 0.4094265321191238,
"grad_norm": 0.9238013625144958,
"learning_rate": 4.951986919866085e-06,
"loss": 0.6872,
"step": 1109
},
{
"epoch": 0.40979571745015997,
"grad_norm": 0.9439871907234192,
"learning_rate": 4.951892009632012e-06,
"loss": 0.7577,
"step": 1110
},
{
"epoch": 0.41016490278119616,
"grad_norm": 0.9026377201080322,
"learning_rate": 4.951797006594601e-06,
"loss": 0.7025,
"step": 1111
},
{
"epoch": 0.41053408811223235,
"grad_norm": 0.9088364839553833,
"learning_rate": 4.951701910757446e-06,
"loss": 0.7286,
"step": 1112
},
{
"epoch": 0.41090327344326855,
"grad_norm": 0.9279249906539917,
"learning_rate": 4.9516067221241485e-06,
"loss": 0.7533,
"step": 1113
},
{
"epoch": 0.4112724587743047,
"grad_norm": 0.8800785541534424,
"learning_rate": 4.951511440698309e-06,
"loss": 0.7352,
"step": 1114
},
{
"epoch": 0.4116416441053409,
"grad_norm": 0.8675339818000793,
"learning_rate": 4.9514160664835366e-06,
"loss": 0.7565,
"step": 1115
},
{
"epoch": 0.41201082943637707,
"grad_norm": 0.9367777109146118,
"learning_rate": 4.951320599483439e-06,
"loss": 0.7501,
"step": 1116
},
{
"epoch": 0.41238001476741326,
"grad_norm": 0.9239450693130493,
"learning_rate": 4.9512250397016304e-06,
"loss": 0.7774,
"step": 1117
},
{
"epoch": 0.4127492000984494,
"grad_norm": 0.8786678314208984,
"learning_rate": 4.951129387141728e-06,
"loss": 0.7385,
"step": 1118
},
{
"epoch": 0.4131183854294856,
"grad_norm": 0.9262030720710754,
"learning_rate": 4.951033641807351e-06,
"loss": 0.7653,
"step": 1119
},
{
"epoch": 0.4134875707605218,
"grad_norm": 0.901329755783081,
"learning_rate": 4.950937803702125e-06,
"loss": 0.7375,
"step": 1120
},
{
"epoch": 0.413856756091558,
"grad_norm": 0.9039179086685181,
"learning_rate": 4.950841872829676e-06,
"loss": 0.7644,
"step": 1121
},
{
"epoch": 0.41422594142259417,
"grad_norm": 0.9563358426094055,
"learning_rate": 4.9507458491936365e-06,
"loss": 0.7476,
"step": 1122
},
{
"epoch": 0.4145951267536303,
"grad_norm": 0.9279500842094421,
"learning_rate": 4.950649732797639e-06,
"loss": 0.723,
"step": 1123
},
{
"epoch": 0.4149643120846665,
"grad_norm": 0.9395243525505066,
"learning_rate": 4.950553523645324e-06,
"loss": 0.7341,
"step": 1124
},
{
"epoch": 0.4153334974157027,
"grad_norm": 0.9069976210594177,
"learning_rate": 4.9504572217403305e-06,
"loss": 0.7458,
"step": 1125
},
{
"epoch": 0.4157026827467389,
"grad_norm": 0.9429416656494141,
"learning_rate": 4.9503608270863046e-06,
"loss": 0.7534,
"step": 1126
},
{
"epoch": 0.416071868077775,
"grad_norm": 0.9167525172233582,
"learning_rate": 4.950264339686895e-06,
"loss": 0.7223,
"step": 1127
},
{
"epoch": 0.4164410534088112,
"grad_norm": 0.8981575965881348,
"learning_rate": 4.950167759545753e-06,
"loss": 0.7562,
"step": 1128
},
{
"epoch": 0.4168102387398474,
"grad_norm": 0.8772927522659302,
"learning_rate": 4.950071086666535e-06,
"loss": 0.7528,
"step": 1129
},
{
"epoch": 0.4171794240708836,
"grad_norm": 0.8946474194526672,
"learning_rate": 4.949974321052899e-06,
"loss": 0.7232,
"step": 1130
},
{
"epoch": 0.4175486094019198,
"grad_norm": 0.9215155839920044,
"learning_rate": 4.94987746270851e-06,
"loss": 0.7705,
"step": 1131
},
{
"epoch": 0.4179177947329559,
"grad_norm": 0.8860598802566528,
"learning_rate": 4.94978051163703e-06,
"loss": 0.7468,
"step": 1132
},
{
"epoch": 0.4182869800639921,
"grad_norm": 0.9385093450546265,
"learning_rate": 4.9496834678421325e-06,
"loss": 0.7881,
"step": 1133
},
{
"epoch": 0.4186561653950283,
"grad_norm": 0.9394497275352478,
"learning_rate": 4.949586331327488e-06,
"loss": 0.7543,
"step": 1134
},
{
"epoch": 0.4190253507260645,
"grad_norm": 0.9088026881217957,
"learning_rate": 4.949489102096774e-06,
"loss": 0.7598,
"step": 1135
},
{
"epoch": 0.41939453605710064,
"grad_norm": 0.9452118873596191,
"learning_rate": 4.94939178015367e-06,
"loss": 0.7462,
"step": 1136
},
{
"epoch": 0.41976372138813683,
"grad_norm": 0.9107792377471924,
"learning_rate": 4.949294365501862e-06,
"loss": 0.7423,
"step": 1137
},
{
"epoch": 0.420132906719173,
"grad_norm": 0.8913663625717163,
"learning_rate": 4.9491968581450334e-06,
"loss": 0.7334,
"step": 1138
},
{
"epoch": 0.4205020920502092,
"grad_norm": 0.902692973613739,
"learning_rate": 4.949099258086878e-06,
"loss": 0.701,
"step": 1139
},
{
"epoch": 0.4208712773812454,
"grad_norm": 0.8897360563278198,
"learning_rate": 4.949001565331087e-06,
"loss": 0.7425,
"step": 1140
},
{
"epoch": 0.42124046271228155,
"grad_norm": 0.9004592299461365,
"learning_rate": 4.948903779881361e-06,
"loss": 0.7052,
"step": 1141
},
{
"epoch": 0.42160964804331774,
"grad_norm": 0.8605636954307556,
"learning_rate": 4.9488059017413995e-06,
"loss": 0.7199,
"step": 1142
},
{
"epoch": 0.42197883337435393,
"grad_norm": 0.9023451805114746,
"learning_rate": 4.948707930914908e-06,
"loss": 0.7718,
"step": 1143
},
{
"epoch": 0.4223480187053901,
"grad_norm": 0.9158706068992615,
"learning_rate": 4.948609867405594e-06,
"loss": 0.759,
"step": 1144
},
{
"epoch": 0.42271720403642626,
"grad_norm": 0.914189338684082,
"learning_rate": 4.948511711217168e-06,
"loss": 0.7292,
"step": 1145
},
{
"epoch": 0.42308638936746246,
"grad_norm": 0.9431989789009094,
"learning_rate": 4.948413462353347e-06,
"loss": 0.7385,
"step": 1146
},
{
"epoch": 0.42345557469849865,
"grad_norm": 0.9301594495773315,
"learning_rate": 4.9483151208178505e-06,
"loss": 0.7613,
"step": 1147
},
{
"epoch": 0.42382476002953484,
"grad_norm": 0.9736144542694092,
"learning_rate": 4.948216686614398e-06,
"loss": 0.7694,
"step": 1148
},
{
"epoch": 0.42419394536057103,
"grad_norm": 0.9035144448280334,
"learning_rate": 4.948118159746718e-06,
"loss": 0.7107,
"step": 1149
},
{
"epoch": 0.42456313069160717,
"grad_norm": 0.9065275192260742,
"learning_rate": 4.948019540218536e-06,
"loss": 0.7772,
"step": 1150
},
{
"epoch": 0.42493231602264336,
"grad_norm": 0.920475959777832,
"learning_rate": 4.9479208280335885e-06,
"loss": 0.7342,
"step": 1151
},
{
"epoch": 0.42530150135367956,
"grad_norm": 0.9290496706962585,
"learning_rate": 4.947822023195611e-06,
"loss": 0.7331,
"step": 1152
},
{
"epoch": 0.42567068668471575,
"grad_norm": 0.8922300934791565,
"learning_rate": 4.9477231257083415e-06,
"loss": 0.7655,
"step": 1153
},
{
"epoch": 0.4260398720157519,
"grad_norm": 0.9539517164230347,
"learning_rate": 4.947624135575524e-06,
"loss": 0.7567,
"step": 1154
},
{
"epoch": 0.4264090573467881,
"grad_norm": 0.8572643995285034,
"learning_rate": 4.9475250528009055e-06,
"loss": 0.6853,
"step": 1155
},
{
"epoch": 0.42677824267782427,
"grad_norm": 1.0646028518676758,
"learning_rate": 4.947425877388237e-06,
"loss": 0.7308,
"step": 1156
},
{
"epoch": 0.42714742800886046,
"grad_norm": 0.9418565630912781,
"learning_rate": 4.947326609341271e-06,
"loss": 0.7319,
"step": 1157
},
{
"epoch": 0.4275166133398966,
"grad_norm": 0.9165734052658081,
"learning_rate": 4.947227248663764e-06,
"loss": 0.7168,
"step": 1158
},
{
"epoch": 0.4278857986709328,
"grad_norm": 0.9139310717582703,
"learning_rate": 4.94712779535948e-06,
"loss": 0.7387,
"step": 1159
},
{
"epoch": 0.428254984001969,
"grad_norm": 0.8762199282646179,
"learning_rate": 4.94702824943218e-06,
"loss": 0.6796,
"step": 1160
},
{
"epoch": 0.4286241693330052,
"grad_norm": 0.9274040460586548,
"learning_rate": 4.946928610885633e-06,
"loss": 0.7424,
"step": 1161
},
{
"epoch": 0.42899335466404137,
"grad_norm": 0.9182401299476624,
"learning_rate": 4.946828879723611e-06,
"loss": 0.7242,
"step": 1162
},
{
"epoch": 0.4293625399950775,
"grad_norm": 0.9233170747756958,
"learning_rate": 4.946729055949888e-06,
"loss": 0.7557,
"step": 1163
},
{
"epoch": 0.4297317253261137,
"grad_norm": 0.9127527475357056,
"learning_rate": 4.946629139568242e-06,
"loss": 0.754,
"step": 1164
},
{
"epoch": 0.4301009106571499,
"grad_norm": 0.9096380472183228,
"learning_rate": 4.946529130582456e-06,
"loss": 0.7747,
"step": 1165
},
{
"epoch": 0.4304700959881861,
"grad_norm": 0.927699089050293,
"learning_rate": 4.946429028996314e-06,
"loss": 0.7741,
"step": 1166
},
{
"epoch": 0.4308392813192222,
"grad_norm": 0.8996379971504211,
"learning_rate": 4.946328834813605e-06,
"loss": 0.7582,
"step": 1167
},
{
"epoch": 0.4312084666502584,
"grad_norm": 0.9378359317779541,
"learning_rate": 4.946228548038122e-06,
"loss": 0.7873,
"step": 1168
},
{
"epoch": 0.4315776519812946,
"grad_norm": 0.85906583070755,
"learning_rate": 4.946128168673662e-06,
"loss": 0.6732,
"step": 1169
},
{
"epoch": 0.4319468373123308,
"grad_norm": 0.8577884435653687,
"learning_rate": 4.94602769672402e-06,
"loss": 0.705,
"step": 1170
},
{
"epoch": 0.432316022643367,
"grad_norm": 0.8988800048828125,
"learning_rate": 4.945927132193003e-06,
"loss": 0.7255,
"step": 1171
},
{
"epoch": 0.43268520797440313,
"grad_norm": 0.8887507319450378,
"learning_rate": 4.945826475084417e-06,
"loss": 0.7122,
"step": 1172
},
{
"epoch": 0.4330543933054393,
"grad_norm": 0.9307096004486084,
"learning_rate": 4.9457257254020696e-06,
"loss": 0.7394,
"step": 1173
},
{
"epoch": 0.4334235786364755,
"grad_norm": 0.9346253871917725,
"learning_rate": 4.945624883149776e-06,
"loss": 0.7617,
"step": 1174
},
{
"epoch": 0.4337927639675117,
"grad_norm": 0.935279905796051,
"learning_rate": 4.945523948331352e-06,
"loss": 0.72,
"step": 1175
},
{
"epoch": 0.43416194929854784,
"grad_norm": 0.9695756435394287,
"learning_rate": 4.9454229209506186e-06,
"loss": 0.7499,
"step": 1176
},
{
"epoch": 0.43453113462958404,
"grad_norm": 0.8869019150733948,
"learning_rate": 4.9453218010114e-06,
"loss": 0.6932,
"step": 1177
},
{
"epoch": 0.43490031996062023,
"grad_norm": 0.9020052552223206,
"learning_rate": 4.945220588517522e-06,
"loss": 0.7313,
"step": 1178
},
{
"epoch": 0.4352695052916564,
"grad_norm": 0.9483025670051575,
"learning_rate": 4.945119283472816e-06,
"loss": 0.7311,
"step": 1179
},
{
"epoch": 0.4356386906226926,
"grad_norm": 0.8632071614265442,
"learning_rate": 4.945017885881118e-06,
"loss": 0.6701,
"step": 1180
},
{
"epoch": 0.43600787595372875,
"grad_norm": 0.8918522000312805,
"learning_rate": 4.944916395746264e-06,
"loss": 0.6994,
"step": 1181
},
{
"epoch": 0.43637706128476494,
"grad_norm": 0.9055469036102295,
"learning_rate": 4.944814813072097e-06,
"loss": 0.7532,
"step": 1182
},
{
"epoch": 0.43674624661580114,
"grad_norm": 0.9149122834205627,
"learning_rate": 4.94471313786246e-06,
"loss": 0.7713,
"step": 1183
},
{
"epoch": 0.43711543194683733,
"grad_norm": 0.9110752940177917,
"learning_rate": 4.944611370121203e-06,
"loss": 0.7157,
"step": 1184
},
{
"epoch": 0.43748461727787347,
"grad_norm": 0.9197525382041931,
"learning_rate": 4.9445095098521765e-06,
"loss": 0.7595,
"step": 1185
},
{
"epoch": 0.43785380260890966,
"grad_norm": 0.9142046570777893,
"learning_rate": 4.944407557059236e-06,
"loss": 0.6909,
"step": 1186
},
{
"epoch": 0.43822298793994585,
"grad_norm": 0.8837047219276428,
"learning_rate": 4.944305511746242e-06,
"loss": 0.7213,
"step": 1187
},
{
"epoch": 0.43859217327098204,
"grad_norm": 0.9123366475105286,
"learning_rate": 4.944203373917056e-06,
"loss": 0.7507,
"step": 1188
},
{
"epoch": 0.43896135860201824,
"grad_norm": 0.9120188355445862,
"learning_rate": 4.944101143575542e-06,
"loss": 0.7194,
"step": 1189
},
{
"epoch": 0.4393305439330544,
"grad_norm": 0.9454036951065063,
"learning_rate": 4.943998820725573e-06,
"loss": 0.7523,
"step": 1190
},
{
"epoch": 0.43969972926409057,
"grad_norm": 0.9568728804588318,
"learning_rate": 4.943896405371019e-06,
"loss": 0.7193,
"step": 1191
},
{
"epoch": 0.44006891459512676,
"grad_norm": 0.9208801984786987,
"learning_rate": 4.9437938975157586e-06,
"loss": 0.7172,
"step": 1192
},
{
"epoch": 0.44043809992616295,
"grad_norm": 0.9229491353034973,
"learning_rate": 4.9436912971636695e-06,
"loss": 0.738,
"step": 1193
},
{
"epoch": 0.4408072852571991,
"grad_norm": 0.9245941042900085,
"learning_rate": 4.943588604318635e-06,
"loss": 0.7437,
"step": 1194
},
{
"epoch": 0.4411764705882353,
"grad_norm": 0.8792277574539185,
"learning_rate": 4.943485818984545e-06,
"loss": 0.7363,
"step": 1195
},
{
"epoch": 0.4415456559192715,
"grad_norm": 0.9498505592346191,
"learning_rate": 4.9433829411652864e-06,
"loss": 0.7757,
"step": 1196
},
{
"epoch": 0.44191484125030767,
"grad_norm": 0.9258801341056824,
"learning_rate": 4.943279970864755e-06,
"loss": 0.7355,
"step": 1197
},
{
"epoch": 0.44228402658134386,
"grad_norm": 0.9117864370346069,
"learning_rate": 4.943176908086849e-06,
"loss": 0.7208,
"step": 1198
},
{
"epoch": 0.44265321191238,
"grad_norm": 0.8816313743591309,
"learning_rate": 4.9430737528354665e-06,
"loss": 0.6972,
"step": 1199
},
{
"epoch": 0.4430223972434162,
"grad_norm": 0.9166438579559326,
"learning_rate": 4.942970505114514e-06,
"loss": 0.76,
"step": 1200
},
{
"epoch": 0.4433915825744524,
"grad_norm": 0.8871222138404846,
"learning_rate": 4.942867164927899e-06,
"loss": 0.7348,
"step": 1201
},
{
"epoch": 0.4437607679054886,
"grad_norm": 0.9092727303504944,
"learning_rate": 4.942763732279533e-06,
"loss": 0.7114,
"step": 1202
},
{
"epoch": 0.4441299532365247,
"grad_norm": 0.9247320294380188,
"learning_rate": 4.94266020717333e-06,
"loss": 0.756,
"step": 1203
},
{
"epoch": 0.4444991385675609,
"grad_norm": 0.9279753565788269,
"learning_rate": 4.94255658961321e-06,
"loss": 0.7269,
"step": 1204
},
{
"epoch": 0.4448683238985971,
"grad_norm": 0.917389452457428,
"learning_rate": 4.942452879603094e-06,
"loss": 0.7285,
"step": 1205
},
{
"epoch": 0.4452375092296333,
"grad_norm": 0.8783107399940491,
"learning_rate": 4.942349077146906e-06,
"loss": 0.7487,
"step": 1206
},
{
"epoch": 0.4456066945606695,
"grad_norm": 0.9037907719612122,
"learning_rate": 4.9422451822485776e-06,
"loss": 0.7436,
"step": 1207
},
{
"epoch": 0.4459758798917056,
"grad_norm": 0.8458569049835205,
"learning_rate": 4.942141194912039e-06,
"loss": 0.6799,
"step": 1208
},
{
"epoch": 0.4463450652227418,
"grad_norm": 0.882938802242279,
"learning_rate": 4.942037115141228e-06,
"loss": 0.7216,
"step": 1209
},
{
"epoch": 0.446714250553778,
"grad_norm": 0.920384407043457,
"learning_rate": 4.9419329429400816e-06,
"loss": 0.7292,
"step": 1210
},
{
"epoch": 0.4470834358848142,
"grad_norm": 0.9709598422050476,
"learning_rate": 4.941828678312545e-06,
"loss": 0.7588,
"step": 1211
},
{
"epoch": 0.44745262121585033,
"grad_norm": 0.883941650390625,
"learning_rate": 4.941724321262563e-06,
"loss": 0.7007,
"step": 1212
},
{
"epoch": 0.4478218065468865,
"grad_norm": 0.9086169004440308,
"learning_rate": 4.941619871794087e-06,
"loss": 0.73,
"step": 1213
},
{
"epoch": 0.4481909918779227,
"grad_norm": 0.8642125129699707,
"learning_rate": 4.941515329911068e-06,
"loss": 0.6792,
"step": 1214
},
{
"epoch": 0.4485601772089589,
"grad_norm": 0.9487695693969727,
"learning_rate": 4.941410695617464e-06,
"loss": 0.7419,
"step": 1215
},
{
"epoch": 0.4489293625399951,
"grad_norm": 0.994884729385376,
"learning_rate": 4.941305968917238e-06,
"loss": 0.6888,
"step": 1216
},
{
"epoch": 0.44929854787103124,
"grad_norm": 0.9372398853302002,
"learning_rate": 4.941201149814349e-06,
"loss": 0.7325,
"step": 1217
},
{
"epoch": 0.44966773320206743,
"grad_norm": 0.9120617508888245,
"learning_rate": 4.94109623831277e-06,
"loss": 0.7697,
"step": 1218
},
{
"epoch": 0.4500369185331036,
"grad_norm": 0.9304324388504028,
"learning_rate": 4.940991234416466e-06,
"loss": 0.74,
"step": 1219
},
{
"epoch": 0.4504061038641398,
"grad_norm": 0.8959391713142395,
"learning_rate": 4.940886138129415e-06,
"loss": 0.7074,
"step": 1220
},
{
"epoch": 0.45077528919517595,
"grad_norm": 0.9347814917564392,
"learning_rate": 4.940780949455595e-06,
"loss": 0.7111,
"step": 1221
},
{
"epoch": 0.45114447452621215,
"grad_norm": 0.8901122212409973,
"learning_rate": 4.940675668398986e-06,
"loss": 0.74,
"step": 1222
},
{
"epoch": 0.45151365985724834,
"grad_norm": 0.9231247305870056,
"learning_rate": 4.940570294963572e-06,
"loss": 0.7486,
"step": 1223
},
{
"epoch": 0.45188284518828453,
"grad_norm": 0.9006732106208801,
"learning_rate": 4.940464829153343e-06,
"loss": 0.7592,
"step": 1224
},
{
"epoch": 0.4522520305193207,
"grad_norm": 0.8815886974334717,
"learning_rate": 4.940359270972291e-06,
"loss": 0.6919,
"step": 1225
},
{
"epoch": 0.45262121585035686,
"grad_norm": 0.8991600275039673,
"learning_rate": 4.940253620424411e-06,
"loss": 0.7178,
"step": 1226
},
{
"epoch": 0.45299040118139305,
"grad_norm": 0.8781217932701111,
"learning_rate": 4.940147877513701e-06,
"loss": 0.722,
"step": 1227
},
{
"epoch": 0.45335958651242925,
"grad_norm": 0.9302307367324829,
"learning_rate": 4.940042042244164e-06,
"loss": 0.7535,
"step": 1228
},
{
"epoch": 0.45372877184346544,
"grad_norm": 0.9426242113113403,
"learning_rate": 4.9399361146198065e-06,
"loss": 0.7451,
"step": 1229
},
{
"epoch": 0.4540979571745016,
"grad_norm": 0.9411885738372803,
"learning_rate": 4.939830094644637e-06,
"loss": 0.7625,
"step": 1230
},
{
"epoch": 0.45446714250553777,
"grad_norm": 0.9621394872665405,
"learning_rate": 4.939723982322667e-06,
"loss": 0.6874,
"step": 1231
},
{
"epoch": 0.45483632783657396,
"grad_norm": 0.928747296333313,
"learning_rate": 4.939617777657916e-06,
"loss": 0.761,
"step": 1232
},
{
"epoch": 0.45520551316761015,
"grad_norm": 0.8658984899520874,
"learning_rate": 4.939511480654401e-06,
"loss": 0.7426,
"step": 1233
},
{
"epoch": 0.45557469849864635,
"grad_norm": 0.901623010635376,
"learning_rate": 4.939405091316147e-06,
"loss": 0.7723,
"step": 1234
},
{
"epoch": 0.4559438838296825,
"grad_norm": 0.8813204765319824,
"learning_rate": 4.9392986096471796e-06,
"loss": 0.7268,
"step": 1235
},
{
"epoch": 0.4563130691607187,
"grad_norm": 0.9547492265701294,
"learning_rate": 4.93919203565153e-06,
"loss": 0.7687,
"step": 1236
},
{
"epoch": 0.45668225449175487,
"grad_norm": 0.9250112175941467,
"learning_rate": 4.939085369333232e-06,
"loss": 0.7749,
"step": 1237
},
{
"epoch": 0.45705143982279106,
"grad_norm": 0.9013698697090149,
"learning_rate": 4.938978610696322e-06,
"loss": 0.7553,
"step": 1238
},
{
"epoch": 0.4574206251538272,
"grad_norm": 0.8597696423530579,
"learning_rate": 4.938871759744842e-06,
"loss": 0.6982,
"step": 1239
},
{
"epoch": 0.4577898104848634,
"grad_norm": 0.9342607259750366,
"learning_rate": 4.938764816482835e-06,
"loss": 0.7266,
"step": 1240
},
{
"epoch": 0.4581589958158996,
"grad_norm": 0.9153682589530945,
"learning_rate": 4.93865778091435e-06,
"loss": 0.7474,
"step": 1241
},
{
"epoch": 0.4585281811469358,
"grad_norm": 0.9273695945739746,
"learning_rate": 4.938550653043437e-06,
"loss": 0.7094,
"step": 1242
},
{
"epoch": 0.45889736647797197,
"grad_norm": 0.9250311255455017,
"learning_rate": 4.938443432874151e-06,
"loss": 0.7576,
"step": 1243
},
{
"epoch": 0.4592665518090081,
"grad_norm": 0.9787151217460632,
"learning_rate": 4.938336120410551e-06,
"loss": 0.7324,
"step": 1244
},
{
"epoch": 0.4596357371400443,
"grad_norm": 0.8964402079582214,
"learning_rate": 4.938228715656699e-06,
"loss": 0.766,
"step": 1245
},
{
"epoch": 0.4600049224710805,
"grad_norm": 0.9307600259780884,
"learning_rate": 4.938121218616659e-06,
"loss": 0.7328,
"step": 1246
},
{
"epoch": 0.4603741078021167,
"grad_norm": 0.8917447328567505,
"learning_rate": 4.938013629294502e-06,
"loss": 0.7606,
"step": 1247
},
{
"epoch": 0.4607432931331528,
"grad_norm": 0.902154803276062,
"learning_rate": 4.937905947694296e-06,
"loss": 0.6913,
"step": 1248
},
{
"epoch": 0.461112478464189,
"grad_norm": 0.9088221192359924,
"learning_rate": 4.937798173820121e-06,
"loss": 0.7124,
"step": 1249
},
{
"epoch": 0.4614816637952252,
"grad_norm": 0.8867905735969543,
"learning_rate": 4.937690307676054e-06,
"loss": 0.741,
"step": 1250
},
{
"epoch": 0.4618508491262614,
"grad_norm": 0.9009400606155396,
"learning_rate": 4.937582349266178e-06,
"loss": 0.7393,
"step": 1251
},
{
"epoch": 0.4622200344572976,
"grad_norm": 0.9617549777030945,
"learning_rate": 4.937474298594579e-06,
"loss": 0.7683,
"step": 1252
},
{
"epoch": 0.4625892197883337,
"grad_norm": 0.9465776085853577,
"learning_rate": 4.937366155665348e-06,
"loss": 0.7684,
"step": 1253
},
{
"epoch": 0.4629584051193699,
"grad_norm": 0.9941141605377197,
"learning_rate": 4.9372579204825775e-06,
"loss": 0.7538,
"step": 1254
},
{
"epoch": 0.4633275904504061,
"grad_norm": 0.8829614520072937,
"learning_rate": 4.937149593050363e-06,
"loss": 0.7195,
"step": 1255
},
{
"epoch": 0.4636967757814423,
"grad_norm": 0.8967337608337402,
"learning_rate": 4.937041173372806e-06,
"loss": 0.7436,
"step": 1256
},
{
"epoch": 0.46406596111247844,
"grad_norm": 0.8753035664558411,
"learning_rate": 4.9369326614540096e-06,
"loss": 0.7431,
"step": 1257
},
{
"epoch": 0.46443514644351463,
"grad_norm": 0.9020246863365173,
"learning_rate": 4.936824057298081e-06,
"loss": 0.7362,
"step": 1258
},
{
"epoch": 0.4648043317745508,
"grad_norm": 0.8981832265853882,
"learning_rate": 4.936715360909131e-06,
"loss": 0.7306,
"step": 1259
},
{
"epoch": 0.465173517105587,
"grad_norm": 0.9331729412078857,
"learning_rate": 4.9366065722912735e-06,
"loss": 0.7471,
"step": 1260
},
{
"epoch": 0.4655427024366232,
"grad_norm": 0.9088561534881592,
"learning_rate": 4.936497691448627e-06,
"loss": 0.7693,
"step": 1261
},
{
"epoch": 0.46591188776765935,
"grad_norm": 0.9289308190345764,
"learning_rate": 4.936388718385311e-06,
"loss": 0.7338,
"step": 1262
},
{
"epoch": 0.46628107309869554,
"grad_norm": 0.9137311577796936,
"learning_rate": 4.936279653105452e-06,
"loss": 0.7147,
"step": 1263
},
{
"epoch": 0.46665025842973173,
"grad_norm": 0.8960225582122803,
"learning_rate": 4.936170495613175e-06,
"loss": 0.7154,
"step": 1264
},
{
"epoch": 0.4670194437607679,
"grad_norm": 0.8980303406715393,
"learning_rate": 4.936061245912615e-06,
"loss": 0.7259,
"step": 1265
},
{
"epoch": 0.46738862909180406,
"grad_norm": 0.9025071263313293,
"learning_rate": 4.935951904007906e-06,
"loss": 0.7525,
"step": 1266
},
{
"epoch": 0.46775781442284026,
"grad_norm": 0.8963534235954285,
"learning_rate": 4.935842469903186e-06,
"loss": 0.721,
"step": 1267
},
{
"epoch": 0.46812699975387645,
"grad_norm": 0.9002708196640015,
"learning_rate": 4.935732943602597e-06,
"loss": 0.7474,
"step": 1268
},
{
"epoch": 0.46849618508491264,
"grad_norm": 0.9217738509178162,
"learning_rate": 4.935623325110285e-06,
"loss": 0.7647,
"step": 1269
},
{
"epoch": 0.4688653704159488,
"grad_norm": 0.9251262545585632,
"learning_rate": 4.935513614430399e-06,
"loss": 0.732,
"step": 1270
},
{
"epoch": 0.46923455574698497,
"grad_norm": 0.8719298243522644,
"learning_rate": 4.935403811567091e-06,
"loss": 0.7526,
"step": 1271
},
{
"epoch": 0.46960374107802116,
"grad_norm": 0.893237292766571,
"learning_rate": 4.935293916524517e-06,
"loss": 0.7306,
"step": 1272
},
{
"epoch": 0.46997292640905736,
"grad_norm": 0.8918770551681519,
"learning_rate": 4.935183929306837e-06,
"loss": 0.7181,
"step": 1273
},
{
"epoch": 0.47034211174009355,
"grad_norm": 0.914319634437561,
"learning_rate": 4.935073849918214e-06,
"loss": 0.7679,
"step": 1274
},
{
"epoch": 0.4707112970711297,
"grad_norm": 0.895769476890564,
"learning_rate": 4.934963678362815e-06,
"loss": 0.7601,
"step": 1275
},
{
"epoch": 0.4710804824021659,
"grad_norm": 0.8876564502716064,
"learning_rate": 4.934853414644808e-06,
"loss": 0.733,
"step": 1276
},
{
"epoch": 0.47144966773320207,
"grad_norm": 0.9218546152114868,
"learning_rate": 4.934743058768369e-06,
"loss": 0.7578,
"step": 1277
},
{
"epoch": 0.47181885306423826,
"grad_norm": 0.9365533590316772,
"learning_rate": 4.934632610737673e-06,
"loss": 0.7511,
"step": 1278
},
{
"epoch": 0.4721880383952744,
"grad_norm": 0.8840087652206421,
"learning_rate": 4.934522070556901e-06,
"loss": 0.7085,
"step": 1279
},
{
"epoch": 0.4725572237263106,
"grad_norm": 0.8947144150733948,
"learning_rate": 4.934411438230237e-06,
"loss": 0.7547,
"step": 1280
},
{
"epoch": 0.4729264090573468,
"grad_norm": 0.9141665697097778,
"learning_rate": 4.934300713761868e-06,
"loss": 0.7278,
"step": 1281
},
{
"epoch": 0.473295594388383,
"grad_norm": 0.8770225048065186,
"learning_rate": 4.9341898971559856e-06,
"loss": 0.7245,
"step": 1282
},
{
"epoch": 0.47366477971941917,
"grad_norm": 0.8997796177864075,
"learning_rate": 4.934078988416784e-06,
"loss": 0.775,
"step": 1283
},
{
"epoch": 0.4740339650504553,
"grad_norm": 0.9298402667045593,
"learning_rate": 4.933967987548461e-06,
"loss": 0.7564,
"step": 1284
},
{
"epoch": 0.4744031503814915,
"grad_norm": 0.9105640053749084,
"learning_rate": 4.933856894555218e-06,
"loss": 0.7507,
"step": 1285
},
{
"epoch": 0.4747723357125277,
"grad_norm": 0.8863377571105957,
"learning_rate": 4.933745709441259e-06,
"loss": 0.7113,
"step": 1286
},
{
"epoch": 0.4751415210435639,
"grad_norm": 0.897258996963501,
"learning_rate": 4.9336344322107935e-06,
"loss": 0.7776,
"step": 1287
},
{
"epoch": 0.4755107063746,
"grad_norm": 0.8783311247825623,
"learning_rate": 4.933523062868033e-06,
"loss": 0.7136,
"step": 1288
},
{
"epoch": 0.4758798917056362,
"grad_norm": 0.8907281160354614,
"learning_rate": 4.933411601417192e-06,
"loss": 0.6871,
"step": 1289
},
{
"epoch": 0.4762490770366724,
"grad_norm": 0.9048116207122803,
"learning_rate": 4.93330004786249e-06,
"loss": 0.7545,
"step": 1290
},
{
"epoch": 0.4766182623677086,
"grad_norm": 0.8691855669021606,
"learning_rate": 4.933188402208149e-06,
"loss": 0.703,
"step": 1291
},
{
"epoch": 0.4769874476987448,
"grad_norm": 0.8851851224899292,
"learning_rate": 4.933076664458395e-06,
"loss": 0.7349,
"step": 1292
},
{
"epoch": 0.47735663302978093,
"grad_norm": 0.9410663843154907,
"learning_rate": 4.9329648346174575e-06,
"loss": 0.7593,
"step": 1293
},
{
"epoch": 0.4777258183608171,
"grad_norm": 0.9117968082427979,
"learning_rate": 4.932852912689569e-06,
"loss": 0.7231,
"step": 1294
},
{
"epoch": 0.4780950036918533,
"grad_norm": 0.9473034143447876,
"learning_rate": 4.932740898678965e-06,
"loss": 0.7467,
"step": 1295
},
{
"epoch": 0.4784641890228895,
"grad_norm": 0.9036644697189331,
"learning_rate": 4.932628792589887e-06,
"loss": 0.7426,
"step": 1296
},
{
"epoch": 0.47883337435392564,
"grad_norm": 0.9032191038131714,
"learning_rate": 4.932516594426575e-06,
"loss": 0.7258,
"step": 1297
},
{
"epoch": 0.47920255968496184,
"grad_norm": 0.864815354347229,
"learning_rate": 4.932404304193279e-06,
"loss": 0.7088,
"step": 1298
},
{
"epoch": 0.47957174501599803,
"grad_norm": 0.8944458961486816,
"learning_rate": 4.9322919218942466e-06,
"loss": 0.7706,
"step": 1299
},
{
"epoch": 0.4799409303470342,
"grad_norm": 0.9362423419952393,
"learning_rate": 4.932179447533734e-06,
"loss": 0.7286,
"step": 1300
},
{
"epoch": 0.4803101156780704,
"grad_norm": 0.9102439284324646,
"learning_rate": 4.9320668811159954e-06,
"loss": 0.7096,
"step": 1301
},
{
"epoch": 0.48067930100910655,
"grad_norm": 0.8899404406547546,
"learning_rate": 4.931954222645294e-06,
"loss": 0.7413,
"step": 1302
},
{
"epoch": 0.48104848634014274,
"grad_norm": 0.9097535610198975,
"learning_rate": 4.9318414721258924e-06,
"loss": 0.7417,
"step": 1303
},
{
"epoch": 0.48141767167117894,
"grad_norm": 0.9498276114463806,
"learning_rate": 4.931728629562059e-06,
"loss": 0.724,
"step": 1304
},
{
"epoch": 0.48178685700221513,
"grad_norm": 0.9295927882194519,
"learning_rate": 4.9316156949580645e-06,
"loss": 0.7388,
"step": 1305
},
{
"epoch": 0.48215604233325127,
"grad_norm": 0.907588541507721,
"learning_rate": 4.931502668318183e-06,
"loss": 0.7486,
"step": 1306
},
{
"epoch": 0.48252522766428746,
"grad_norm": 0.9081675410270691,
"learning_rate": 4.9313895496466936e-06,
"loss": 0.7562,
"step": 1307
},
{
"epoch": 0.48289441299532365,
"grad_norm": 0.9324904084205627,
"learning_rate": 4.931276338947876e-06,
"loss": 0.7588,
"step": 1308
},
{
"epoch": 0.48326359832635984,
"grad_norm": 0.9412097930908203,
"learning_rate": 4.931163036226017e-06,
"loss": 0.7773,
"step": 1309
},
{
"epoch": 0.48363278365739604,
"grad_norm": 0.9143854975700378,
"learning_rate": 4.931049641485404e-06,
"loss": 0.7591,
"step": 1310
},
{
"epoch": 0.4840019689884322,
"grad_norm": 0.8855016827583313,
"learning_rate": 4.930936154730329e-06,
"loss": 0.7749,
"step": 1311
},
{
"epoch": 0.48437115431946837,
"grad_norm": 0.9048222303390503,
"learning_rate": 4.930822575965089e-06,
"loss": 0.7268,
"step": 1312
},
{
"epoch": 0.48474033965050456,
"grad_norm": 0.9106447100639343,
"learning_rate": 4.93070890519398e-06,
"loss": 0.7537,
"step": 1313
},
{
"epoch": 0.48510952498154075,
"grad_norm": 0.9264537692070007,
"learning_rate": 4.930595142421307e-06,
"loss": 0.7544,
"step": 1314
},
{
"epoch": 0.4854787103125769,
"grad_norm": 0.9199881553649902,
"learning_rate": 4.930481287651375e-06,
"loss": 0.7032,
"step": 1315
},
{
"epoch": 0.4858478956436131,
"grad_norm": 0.9257407188415527,
"learning_rate": 4.930367340888494e-06,
"loss": 0.7343,
"step": 1316
},
{
"epoch": 0.4862170809746493,
"grad_norm": 0.9798755645751953,
"learning_rate": 4.930253302136976e-06,
"loss": 0.7448,
"step": 1317
},
{
"epoch": 0.48658626630568547,
"grad_norm": 0.919750988483429,
"learning_rate": 4.930139171401136e-06,
"loss": 0.7061,
"step": 1318
},
{
"epoch": 0.48695545163672166,
"grad_norm": 0.9479880332946777,
"learning_rate": 4.930024948685297e-06,
"loss": 0.7962,
"step": 1319
},
{
"epoch": 0.4873246369677578,
"grad_norm": 0.9518943428993225,
"learning_rate": 4.92991063399378e-06,
"loss": 0.7174,
"step": 1320
},
{
"epoch": 0.487693822298794,
"grad_norm": 0.908926784992218,
"learning_rate": 4.929796227330912e-06,
"loss": 0.7462,
"step": 1321
},
{
"epoch": 0.4880630076298302,
"grad_norm": 0.8598317503929138,
"learning_rate": 4.929681728701023e-06,
"loss": 0.7367,
"step": 1322
},
{
"epoch": 0.4884321929608664,
"grad_norm": 0.871263325214386,
"learning_rate": 4.929567138108449e-06,
"loss": 0.7283,
"step": 1323
},
{
"epoch": 0.4888013782919025,
"grad_norm": 0.8650959134101868,
"learning_rate": 4.9294524555575255e-06,
"loss": 0.703,
"step": 1324
},
{
"epoch": 0.4891705636229387,
"grad_norm": 0.9248819351196289,
"learning_rate": 4.9293376810525925e-06,
"loss": 0.7485,
"step": 1325
},
{
"epoch": 0.4895397489539749,
"grad_norm": 0.9359372854232788,
"learning_rate": 4.929222814597995e-06,
"loss": 0.7505,
"step": 1326
},
{
"epoch": 0.4899089342850111,
"grad_norm": 0.9581688046455383,
"learning_rate": 4.929107856198081e-06,
"loss": 0.747,
"step": 1327
},
{
"epoch": 0.4902781196160473,
"grad_norm": 0.9165839552879333,
"learning_rate": 4.928992805857201e-06,
"loss": 0.7406,
"step": 1328
},
{
"epoch": 0.4906473049470834,
"grad_norm": 0.9622183442115784,
"learning_rate": 4.9288776635797105e-06,
"loss": 0.7578,
"step": 1329
},
{
"epoch": 0.4910164902781196,
"grad_norm": 0.9130443930625916,
"learning_rate": 4.928762429369966e-06,
"loss": 0.7165,
"step": 1330
},
{
"epoch": 0.4913856756091558,
"grad_norm": 0.9140217304229736,
"learning_rate": 4.928647103232331e-06,
"loss": 0.7387,
"step": 1331
},
{
"epoch": 0.491754860940192,
"grad_norm": 0.9288978576660156,
"learning_rate": 4.928531685171169e-06,
"loss": 0.732,
"step": 1332
},
{
"epoch": 0.49212404627122813,
"grad_norm": 0.9056506156921387,
"learning_rate": 4.92841617519085e-06,
"loss": 0.6913,
"step": 1333
},
{
"epoch": 0.4924932316022643,
"grad_norm": 0.8599223494529724,
"learning_rate": 4.928300573295744e-06,
"loss": 0.7318,
"step": 1334
},
{
"epoch": 0.4928624169333005,
"grad_norm": 0.9114431142807007,
"learning_rate": 4.928184879490228e-06,
"loss": 0.6987,
"step": 1335
},
{
"epoch": 0.4932316022643367,
"grad_norm": 0.910118579864502,
"learning_rate": 4.9280690937786815e-06,
"loss": 0.7212,
"step": 1336
},
{
"epoch": 0.4936007875953729,
"grad_norm": 0.9045255780220032,
"learning_rate": 4.927953216165486e-06,
"loss": 0.692,
"step": 1337
},
{
"epoch": 0.49396997292640904,
"grad_norm": 0.9406313896179199,
"learning_rate": 4.927837246655027e-06,
"loss": 0.7439,
"step": 1338
},
{
"epoch": 0.49433915825744523,
"grad_norm": 0.9328321218490601,
"learning_rate": 4.9277211852516945e-06,
"loss": 0.7221,
"step": 1339
},
{
"epoch": 0.4947083435884814,
"grad_norm": 0.9344122409820557,
"learning_rate": 4.927605031959882e-06,
"loss": 0.7485,
"step": 1340
},
{
"epoch": 0.4950775289195176,
"grad_norm": 0.8839752674102783,
"learning_rate": 4.9274887867839845e-06,
"loss": 0.7085,
"step": 1341
},
{
"epoch": 0.49544671425055375,
"grad_norm": 0.9064518809318542,
"learning_rate": 4.9273724497284025e-06,
"loss": 0.7123,
"step": 1342
},
{
"epoch": 0.49581589958158995,
"grad_norm": 0.8806946873664856,
"learning_rate": 4.9272560207975395e-06,
"loss": 0.7362,
"step": 1343
},
{
"epoch": 0.49618508491262614,
"grad_norm": 0.9120421409606934,
"learning_rate": 4.9271394999958025e-06,
"loss": 0.7271,
"step": 1344
},
{
"epoch": 0.49655427024366233,
"grad_norm": 0.8838410377502441,
"learning_rate": 4.927022887327601e-06,
"loss": 0.7167,
"step": 1345
},
{
"epoch": 0.4969234555746985,
"grad_norm": 0.8578704595565796,
"learning_rate": 4.926906182797349e-06,
"loss": 0.6924,
"step": 1346
},
{
"epoch": 0.49729264090573466,
"grad_norm": 0.8794922828674316,
"learning_rate": 4.9267893864094644e-06,
"loss": 0.6941,
"step": 1347
},
{
"epoch": 0.49766182623677085,
"grad_norm": 0.902854323387146,
"learning_rate": 4.926672498168368e-06,
"loss": 0.7546,
"step": 1348
},
{
"epoch": 0.49803101156780705,
"grad_norm": 0.9118044376373291,
"learning_rate": 4.926555518078482e-06,
"loss": 0.7321,
"step": 1349
},
{
"epoch": 0.49840019689884324,
"grad_norm": 0.9261218905448914,
"learning_rate": 4.926438446144237e-06,
"loss": 0.7313,
"step": 1350
},
{
"epoch": 0.4987693822298794,
"grad_norm": 0.887624979019165,
"learning_rate": 4.9263212823700616e-06,
"loss": 0.7024,
"step": 1351
},
{
"epoch": 0.49913856756091557,
"grad_norm": 0.899479329586029,
"learning_rate": 4.926204026760392e-06,
"loss": 0.7183,
"step": 1352
},
{
"epoch": 0.49950775289195176,
"grad_norm": 0.8791046142578125,
"learning_rate": 4.926086679319665e-06,
"loss": 0.7311,
"step": 1353
},
{
"epoch": 0.49987693822298795,
"grad_norm": 0.9053353667259216,
"learning_rate": 4.925969240052323e-06,
"loss": 0.7332,
"step": 1354
},
{
"epoch": 0.5002461235540241,
"grad_norm": 0.8757712244987488,
"learning_rate": 4.925851708962811e-06,
"loss": 0.748,
"step": 1355
},
{
"epoch": 0.5006153088850603,
"grad_norm": 0.9180270433425903,
"learning_rate": 4.925734086055578e-06,
"loss": 0.7304,
"step": 1356
},
{
"epoch": 0.5009844942160965,
"grad_norm": 0.8982318043708801,
"learning_rate": 4.9256163713350745e-06,
"loss": 0.7179,
"step": 1357
},
{
"epoch": 0.5013536795471326,
"grad_norm": 0.8590686321258545,
"learning_rate": 4.925498564805757e-06,
"loss": 0.6921,
"step": 1358
},
{
"epoch": 0.5017228648781689,
"grad_norm": 0.9188492894172668,
"learning_rate": 4.925380666472085e-06,
"loss": 0.719,
"step": 1359
},
{
"epoch": 0.502092050209205,
"grad_norm": 0.8929955363273621,
"learning_rate": 4.92526267633852e-06,
"loss": 0.7179,
"step": 1360
},
{
"epoch": 0.5024612355402412,
"grad_norm": 0.9286133646965027,
"learning_rate": 4.925144594409528e-06,
"loss": 0.7686,
"step": 1361
},
{
"epoch": 0.5028304208712774,
"grad_norm": 0.869183361530304,
"learning_rate": 4.925026420689579e-06,
"loss": 0.6961,
"step": 1362
},
{
"epoch": 0.5031996062023135,
"grad_norm": 0.8674918413162231,
"learning_rate": 4.924908155183145e-06,
"loss": 0.7365,
"step": 1363
},
{
"epoch": 0.5035687915333498,
"grad_norm": 0.9061448574066162,
"learning_rate": 4.924789797894701e-06,
"loss": 0.7142,
"step": 1364
},
{
"epoch": 0.5039379768643859,
"grad_norm": 0.9563679695129395,
"learning_rate": 4.924671348828731e-06,
"loss": 0.7383,
"step": 1365
},
{
"epoch": 0.5043071621954222,
"grad_norm": 0.8675025701522827,
"learning_rate": 4.924552807989715e-06,
"loss": 0.7392,
"step": 1366
},
{
"epoch": 0.5046763475264583,
"grad_norm": 0.8793214559555054,
"learning_rate": 4.9244341753821396e-06,
"loss": 0.7712,
"step": 1367
},
{
"epoch": 0.5050455328574944,
"grad_norm": 0.9035436511039734,
"learning_rate": 4.924315451010496e-06,
"loss": 0.7131,
"step": 1368
},
{
"epoch": 0.5054147181885307,
"grad_norm": 0.8869012594223022,
"learning_rate": 4.924196634879278e-06,
"loss": 0.73,
"step": 1369
},
{
"epoch": 0.5057839035195668,
"grad_norm": 0.9123469591140747,
"learning_rate": 4.9240777269929825e-06,
"loss": 0.7164,
"step": 1370
},
{
"epoch": 0.506153088850603,
"grad_norm": 0.8680907487869263,
"learning_rate": 4.923958727356109e-06,
"loss": 0.7002,
"step": 1371
},
{
"epoch": 0.5065222741816392,
"grad_norm": 0.9459185004234314,
"learning_rate": 4.923839635973165e-06,
"loss": 0.7234,
"step": 1372
},
{
"epoch": 0.5068914595126753,
"grad_norm": 0.8962308168411255,
"learning_rate": 4.923720452848653e-06,
"loss": 0.7349,
"step": 1373
},
{
"epoch": 0.5072606448437116,
"grad_norm": 0.8820279836654663,
"learning_rate": 4.92360117798709e-06,
"loss": 0.7304,
"step": 1374
},
{
"epoch": 0.5076298301747477,
"grad_norm": 0.8836473226547241,
"learning_rate": 4.923481811392985e-06,
"loss": 0.749,
"step": 1375
},
{
"epoch": 0.5079990155057839,
"grad_norm": 0.8512134552001953,
"learning_rate": 4.923362353070859e-06,
"loss": 0.7129,
"step": 1376
},
{
"epoch": 0.5083682008368201,
"grad_norm": 0.8974927663803101,
"learning_rate": 4.923242803025232e-06,
"loss": 0.6806,
"step": 1377
},
{
"epoch": 0.5087373861678562,
"grad_norm": 0.8919023275375366,
"learning_rate": 4.92312316126063e-06,
"loss": 0.7334,
"step": 1378
},
{
"epoch": 0.5091065714988925,
"grad_norm": 0.9181932210922241,
"learning_rate": 4.923003427781582e-06,
"loss": 0.7337,
"step": 1379
},
{
"epoch": 0.5094757568299286,
"grad_norm": 0.8906370997428894,
"learning_rate": 4.9228836025926185e-06,
"loss": 0.7356,
"step": 1380
},
{
"epoch": 0.5098449421609648,
"grad_norm": 1.0170215368270874,
"learning_rate": 4.922763685698275e-06,
"loss": 0.7247,
"step": 1381
},
{
"epoch": 0.510214127492001,
"grad_norm": 0.898322343826294,
"learning_rate": 4.922643677103091e-06,
"loss": 0.7254,
"step": 1382
},
{
"epoch": 0.5105833128230371,
"grad_norm": 0.9089271426200867,
"learning_rate": 4.922523576811607e-06,
"loss": 0.7618,
"step": 1383
},
{
"epoch": 0.5109524981540734,
"grad_norm": 0.9560837745666504,
"learning_rate": 4.922403384828373e-06,
"loss": 0.7461,
"step": 1384
},
{
"epoch": 0.5113216834851095,
"grad_norm": 0.9104019403457642,
"learning_rate": 4.922283101157933e-06,
"loss": 0.7622,
"step": 1385
},
{
"epoch": 0.5116908688161457,
"grad_norm": 0.8937442898750305,
"learning_rate": 4.922162725804843e-06,
"loss": 0.6968,
"step": 1386
},
{
"epoch": 0.5120600541471819,
"grad_norm": 0.9203770160675049,
"learning_rate": 4.922042258773658e-06,
"loss": 0.7144,
"step": 1387
},
{
"epoch": 0.5124292394782181,
"grad_norm": 0.9218546748161316,
"learning_rate": 4.921921700068938e-06,
"loss": 0.7849,
"step": 1388
},
{
"epoch": 0.5127984248092542,
"grad_norm": 0.9261901378631592,
"learning_rate": 4.921801049695246e-06,
"loss": 0.744,
"step": 1389
},
{
"epoch": 0.5131676101402904,
"grad_norm": 0.8870830535888672,
"learning_rate": 4.92168030765715e-06,
"loss": 0.719,
"step": 1390
},
{
"epoch": 0.5135367954713266,
"grad_norm": 0.8306862711906433,
"learning_rate": 4.921559473959217e-06,
"loss": 0.6566,
"step": 1391
},
{
"epoch": 0.5139059808023628,
"grad_norm": 0.9043039083480835,
"learning_rate": 4.921438548606022e-06,
"loss": 0.7366,
"step": 1392
},
{
"epoch": 0.514275166133399,
"grad_norm": 0.914897084236145,
"learning_rate": 4.921317531602143e-06,
"loss": 0.7372,
"step": 1393
},
{
"epoch": 0.5146443514644351,
"grad_norm": 0.8624985814094543,
"learning_rate": 4.921196422952159e-06,
"loss": 0.771,
"step": 1394
},
{
"epoch": 0.5150135367954713,
"grad_norm": 0.8990124464035034,
"learning_rate": 4.921075222660655e-06,
"loss": 0.7263,
"step": 1395
},
{
"epoch": 0.5153827221265075,
"grad_norm": 0.906343400478363,
"learning_rate": 4.920953930732217e-06,
"loss": 0.732,
"step": 1396
},
{
"epoch": 0.5157519074575437,
"grad_norm": 0.8977361917495728,
"learning_rate": 4.920832547171438e-06,
"loss": 0.7035,
"step": 1397
},
{
"epoch": 0.5161210927885799,
"grad_norm": 0.8892781138420105,
"learning_rate": 4.920711071982911e-06,
"loss": 0.7441,
"step": 1398
},
{
"epoch": 0.516490278119616,
"grad_norm": 0.8844077587127686,
"learning_rate": 4.920589505171234e-06,
"loss": 0.7602,
"step": 1399
},
{
"epoch": 0.5168594634506523,
"grad_norm": 0.9034737348556519,
"learning_rate": 4.9204678467410075e-06,
"loss": 0.7123,
"step": 1400
},
{
"epoch": 0.5172286487816884,
"grad_norm": 0.9292963147163391,
"learning_rate": 4.920346096696837e-06,
"loss": 0.7266,
"step": 1401
},
{
"epoch": 0.5175978341127246,
"grad_norm": 0.9953920245170593,
"learning_rate": 4.920224255043331e-06,
"loss": 0.757,
"step": 1402
},
{
"epoch": 0.5179670194437608,
"grad_norm": 0.8745222091674805,
"learning_rate": 4.9201023217851e-06,
"loss": 0.7326,
"step": 1403
},
{
"epoch": 0.5183362047747969,
"grad_norm": 0.9055063724517822,
"learning_rate": 4.919980296926761e-06,
"loss": 0.7127,
"step": 1404
},
{
"epoch": 0.5187053901058332,
"grad_norm": 0.9204325079917908,
"learning_rate": 4.91985818047293e-06,
"loss": 0.7741,
"step": 1405
},
{
"epoch": 0.5190745754368693,
"grad_norm": 0.9215565919876099,
"learning_rate": 4.919735972428232e-06,
"loss": 0.7451,
"step": 1406
},
{
"epoch": 0.5194437607679054,
"grad_norm": 0.8616822361946106,
"learning_rate": 4.919613672797291e-06,
"loss": 0.6946,
"step": 1407
},
{
"epoch": 0.5198129460989417,
"grad_norm": 0.8958667516708374,
"learning_rate": 4.919491281584736e-06,
"loss": 0.7137,
"step": 1408
},
{
"epoch": 0.5201821314299778,
"grad_norm": 0.9338605999946594,
"learning_rate": 4.919368798795199e-06,
"loss": 0.7135,
"step": 1409
},
{
"epoch": 0.5205513167610141,
"grad_norm": 0.9158616662025452,
"learning_rate": 4.919246224433317e-06,
"loss": 0.7253,
"step": 1410
},
{
"epoch": 0.5209205020920502,
"grad_norm": 0.9109562635421753,
"learning_rate": 4.919123558503729e-06,
"loss": 0.7315,
"step": 1411
},
{
"epoch": 0.5212896874230863,
"grad_norm": 0.8872061967849731,
"learning_rate": 4.919000801011078e-06,
"loss": 0.7384,
"step": 1412
},
{
"epoch": 0.5216588727541226,
"grad_norm": 0.852870523929596,
"learning_rate": 4.918877951960009e-06,
"loss": 0.6982,
"step": 1413
},
{
"epoch": 0.5220280580851587,
"grad_norm": 0.8745459914207458,
"learning_rate": 4.918755011355174e-06,
"loss": 0.7137,
"step": 1414
},
{
"epoch": 0.522397243416195,
"grad_norm": 0.8926501274108887,
"learning_rate": 4.918631979201225e-06,
"loss": 0.7493,
"step": 1415
},
{
"epoch": 0.5227664287472311,
"grad_norm": 0.9324433207511902,
"learning_rate": 4.918508855502819e-06,
"loss": 0.6959,
"step": 1416
},
{
"epoch": 0.5231356140782673,
"grad_norm": 0.9326198697090149,
"learning_rate": 4.918385640264615e-06,
"loss": 0.7399,
"step": 1417
},
{
"epoch": 0.5235047994093035,
"grad_norm": 0.893012285232544,
"learning_rate": 4.9182623334912796e-06,
"loss": 0.7212,
"step": 1418
},
{
"epoch": 0.5238739847403396,
"grad_norm": 0.8747656941413879,
"learning_rate": 4.918138935187478e-06,
"loss": 0.7471,
"step": 1419
},
{
"epoch": 0.5242431700713759,
"grad_norm": 0.8935147523880005,
"learning_rate": 4.91801544535788e-06,
"loss": 0.7155,
"step": 1420
},
{
"epoch": 0.524612355402412,
"grad_norm": 0.9232352375984192,
"learning_rate": 4.91789186400716e-06,
"loss": 0.7442,
"step": 1421
},
{
"epoch": 0.5249815407334482,
"grad_norm": 0.8894848227500916,
"learning_rate": 4.917768191139997e-06,
"loss": 0.7587,
"step": 1422
},
{
"epoch": 0.5253507260644844,
"grad_norm": 0.8878769278526306,
"learning_rate": 4.91764442676107e-06,
"loss": 0.7335,
"step": 1423
},
{
"epoch": 0.5257199113955205,
"grad_norm": 1.0391231775283813,
"learning_rate": 4.917520570875065e-06,
"loss": 0.7053,
"step": 1424
},
{
"epoch": 0.5260890967265567,
"grad_norm": 0.9046094417572021,
"learning_rate": 4.91739662348667e-06,
"loss": 0.74,
"step": 1425
},
{
"epoch": 0.5264582820575929,
"grad_norm": 0.8990216851234436,
"learning_rate": 4.917272584600575e-06,
"loss": 0.7539,
"step": 1426
},
{
"epoch": 0.5268274673886291,
"grad_norm": 0.876078724861145,
"learning_rate": 4.917148454221477e-06,
"loss": 0.7024,
"step": 1427
},
{
"epoch": 0.5271966527196653,
"grad_norm": 0.9383296966552734,
"learning_rate": 4.917024232354071e-06,
"loss": 0.706,
"step": 1428
},
{
"epoch": 0.5275658380507015,
"grad_norm": 0.9111572504043579,
"learning_rate": 4.916899919003062e-06,
"loss": 0.7463,
"step": 1429
},
{
"epoch": 0.5279350233817376,
"grad_norm": 0.9223730564117432,
"learning_rate": 4.916775514173153e-06,
"loss": 0.759,
"step": 1430
},
{
"epoch": 0.5283042087127738,
"grad_norm": 1.1159535646438599,
"learning_rate": 4.916651017869054e-06,
"loss": 0.7213,
"step": 1431
},
{
"epoch": 0.52867339404381,
"grad_norm": 0.8813338279724121,
"learning_rate": 4.9165264300954765e-06,
"loss": 0.7215,
"step": 1432
},
{
"epoch": 0.5290425793748462,
"grad_norm": 0.90069180727005,
"learning_rate": 4.916401750857136e-06,
"loss": 0.738,
"step": 1433
},
{
"epoch": 0.5294117647058824,
"grad_norm": 0.8849632740020752,
"learning_rate": 4.9162769801587515e-06,
"loss": 0.6921,
"step": 1434
},
{
"epoch": 0.5297809500369185,
"grad_norm": 0.9068708419799805,
"learning_rate": 4.916152118005046e-06,
"loss": 0.7209,
"step": 1435
},
{
"epoch": 0.5301501353679547,
"grad_norm": 0.8788166046142578,
"learning_rate": 4.916027164400746e-06,
"loss": 0.6902,
"step": 1436
},
{
"epoch": 0.5305193206989909,
"grad_norm": 0.8957372903823853,
"learning_rate": 4.9159021193505806e-06,
"loss": 0.7313,
"step": 1437
},
{
"epoch": 0.530888506030027,
"grad_norm": 0.9063811898231506,
"learning_rate": 4.915776982859282e-06,
"loss": 0.7097,
"step": 1438
},
{
"epoch": 0.5312576913610633,
"grad_norm": 0.8811819553375244,
"learning_rate": 4.9156517549315875e-06,
"loss": 0.7085,
"step": 1439
},
{
"epoch": 0.5316268766920994,
"grad_norm": 0.9121610522270203,
"learning_rate": 4.915526435572235e-06,
"loss": 0.753,
"step": 1440
},
{
"epoch": 0.5319960620231357,
"grad_norm": 0.8891005516052246,
"learning_rate": 4.915401024785971e-06,
"loss": 0.7245,
"step": 1441
},
{
"epoch": 0.5323652473541718,
"grad_norm": 0.9231831431388855,
"learning_rate": 4.915275522577539e-06,
"loss": 0.7305,
"step": 1442
},
{
"epoch": 0.5327344326852079,
"grad_norm": 0.9318893551826477,
"learning_rate": 4.915149928951693e-06,
"loss": 0.7543,
"step": 1443
},
{
"epoch": 0.5331036180162442,
"grad_norm": 0.8818265199661255,
"learning_rate": 4.915024243913182e-06,
"loss": 0.6775,
"step": 1444
},
{
"epoch": 0.5334728033472803,
"grad_norm": 0.8918500542640686,
"learning_rate": 4.9148984674667675e-06,
"loss": 0.7264,
"step": 1445
},
{
"epoch": 0.5338419886783166,
"grad_norm": 0.90740966796875,
"learning_rate": 4.914772599617207e-06,
"loss": 0.6985,
"step": 1446
},
{
"epoch": 0.5342111740093527,
"grad_norm": 0.8859344720840454,
"learning_rate": 4.914646640369266e-06,
"loss": 0.7361,
"step": 1447
},
{
"epoch": 0.5345803593403888,
"grad_norm": 0.9221896529197693,
"learning_rate": 4.914520589727712e-06,
"loss": 0.7234,
"step": 1448
},
{
"epoch": 0.5349495446714251,
"grad_norm": 0.925670862197876,
"learning_rate": 4.9143944476973146e-06,
"loss": 0.7197,
"step": 1449
},
{
"epoch": 0.5353187300024612,
"grad_norm": 0.9094201326370239,
"learning_rate": 4.91426821428285e-06,
"loss": 0.7479,
"step": 1450
},
{
"epoch": 0.5356879153334975,
"grad_norm": 0.8817557096481323,
"learning_rate": 4.914141889489095e-06,
"loss": 0.757,
"step": 1451
},
{
"epoch": 0.5360571006645336,
"grad_norm": 0.8657225966453552,
"learning_rate": 4.914015473320833e-06,
"loss": 0.7251,
"step": 1452
},
{
"epoch": 0.5364262859955697,
"grad_norm": 0.8747798204421997,
"learning_rate": 4.913888965782846e-06,
"loss": 0.7471,
"step": 1453
},
{
"epoch": 0.536795471326606,
"grad_norm": 0.898120105266571,
"learning_rate": 4.913762366879924e-06,
"loss": 0.7839,
"step": 1454
},
{
"epoch": 0.5371646566576421,
"grad_norm": 0.9788367748260498,
"learning_rate": 4.913635676616858e-06,
"loss": 0.7449,
"step": 1455
},
{
"epoch": 0.5375338419886783,
"grad_norm": 0.8859315514564514,
"learning_rate": 4.9135088949984425e-06,
"loss": 0.7056,
"step": 1456
},
{
"epoch": 0.5379030273197145,
"grad_norm": 0.8957391977310181,
"learning_rate": 4.913382022029478e-06,
"loss": 0.7059,
"step": 1457
},
{
"epoch": 0.5382722126507506,
"grad_norm": 0.8686297535896301,
"learning_rate": 4.913255057714765e-06,
"loss": 0.7012,
"step": 1458
},
{
"epoch": 0.5386413979817869,
"grad_norm": 0.8956363797187805,
"learning_rate": 4.913128002059111e-06,
"loss": 0.728,
"step": 1459
},
{
"epoch": 0.539010583312823,
"grad_norm": 0.8763745427131653,
"learning_rate": 4.913000855067323e-06,
"loss": 0.7409,
"step": 1460
},
{
"epoch": 0.5393797686438592,
"grad_norm": 0.9013528823852539,
"learning_rate": 4.912873616744213e-06,
"loss": 0.7157,
"step": 1461
},
{
"epoch": 0.5397489539748954,
"grad_norm": 0.9581606984138489,
"learning_rate": 4.9127462870945995e-06,
"loss": 0.719,
"step": 1462
},
{
"epoch": 0.5401181393059316,
"grad_norm": 0.8720983266830444,
"learning_rate": 4.912618866123301e-06,
"loss": 0.6927,
"step": 1463
},
{
"epoch": 0.5404873246369678,
"grad_norm": 0.8839109539985657,
"learning_rate": 4.912491353835138e-06,
"loss": 0.7265,
"step": 1464
},
{
"epoch": 0.5408565099680039,
"grad_norm": 0.9057873487472534,
"learning_rate": 4.91236375023494e-06,
"loss": 0.7321,
"step": 1465
},
{
"epoch": 0.5412256952990401,
"grad_norm": 0.8765888214111328,
"learning_rate": 4.912236055327535e-06,
"loss": 0.7096,
"step": 1466
},
{
"epoch": 0.5415948806300763,
"grad_norm": 0.9088855981826782,
"learning_rate": 4.912108269117757e-06,
"loss": 0.7158,
"step": 1467
},
{
"epoch": 0.5419640659611125,
"grad_norm": 0.9135664105415344,
"learning_rate": 4.911980391610442e-06,
"loss": 0.7148,
"step": 1468
},
{
"epoch": 0.5423332512921487,
"grad_norm": 0.8987833857536316,
"learning_rate": 4.91185242281043e-06,
"loss": 0.6955,
"step": 1469
},
{
"epoch": 0.5427024366231848,
"grad_norm": 0.890201985836029,
"learning_rate": 4.911724362722566e-06,
"loss": 0.7131,
"step": 1470
},
{
"epoch": 0.543071621954221,
"grad_norm": 0.9337875247001648,
"learning_rate": 4.911596211351695e-06,
"loss": 0.7518,
"step": 1471
},
{
"epoch": 0.5434408072852572,
"grad_norm": 0.9150261282920837,
"learning_rate": 4.911467968702669e-06,
"loss": 0.7283,
"step": 1472
},
{
"epoch": 0.5438099926162934,
"grad_norm": 0.9131171107292175,
"learning_rate": 4.911339634780341e-06,
"loss": 0.7292,
"step": 1473
},
{
"epoch": 0.5441791779473295,
"grad_norm": 0.9012570977210999,
"learning_rate": 4.91121120958957e-06,
"loss": 0.7185,
"step": 1474
},
{
"epoch": 0.5445483632783658,
"grad_norm": 0.9188559651374817,
"learning_rate": 4.9110826931352145e-06,
"loss": 0.7277,
"step": 1475
},
{
"epoch": 0.5449175486094019,
"grad_norm": 0.9295513033866882,
"learning_rate": 4.91095408542214e-06,
"loss": 0.7768,
"step": 1476
},
{
"epoch": 0.5452867339404381,
"grad_norm": 0.895024836063385,
"learning_rate": 4.910825386455215e-06,
"loss": 0.7565,
"step": 1477
},
{
"epoch": 0.5456559192714743,
"grad_norm": 0.8578177094459534,
"learning_rate": 4.91069659623931e-06,
"loss": 0.6794,
"step": 1478
},
{
"epoch": 0.5460251046025104,
"grad_norm": 0.8829843401908875,
"learning_rate": 4.9105677147792996e-06,
"loss": 0.6603,
"step": 1479
},
{
"epoch": 0.5463942899335467,
"grad_norm": 0.8859395980834961,
"learning_rate": 4.910438742080061e-06,
"loss": 0.7196,
"step": 1480
},
{
"epoch": 0.5467634752645828,
"grad_norm": 0.8851184844970703,
"learning_rate": 4.910309678146478e-06,
"loss": 0.7091,
"step": 1481
},
{
"epoch": 0.547132660595619,
"grad_norm": 0.8768782615661621,
"learning_rate": 4.910180522983434e-06,
"loss": 0.7177,
"step": 1482
},
{
"epoch": 0.5475018459266552,
"grad_norm": 0.8919864892959595,
"learning_rate": 4.910051276595818e-06,
"loss": 0.7121,
"step": 1483
},
{
"epoch": 0.5478710312576913,
"grad_norm": 0.9079790115356445,
"learning_rate": 4.909921938988521e-06,
"loss": 0.7072,
"step": 1484
},
{
"epoch": 0.5482402165887276,
"grad_norm": 0.9404414892196655,
"learning_rate": 4.90979251016644e-06,
"loss": 0.7396,
"step": 1485
},
{
"epoch": 0.5486094019197637,
"grad_norm": 0.8725743293762207,
"learning_rate": 4.909662990134473e-06,
"loss": 0.6942,
"step": 1486
},
{
"epoch": 0.5489785872508,
"grad_norm": 0.90342777967453,
"learning_rate": 4.909533378897522e-06,
"loss": 0.7341,
"step": 1487
},
{
"epoch": 0.5493477725818361,
"grad_norm": 0.9160227179527283,
"learning_rate": 4.909403676460494e-06,
"loss": 0.7173,
"step": 1488
},
{
"epoch": 0.5497169579128722,
"grad_norm": 0.8947250843048096,
"learning_rate": 4.909273882828296e-06,
"loss": 0.7126,
"step": 1489
},
{
"epoch": 0.5500861432439085,
"grad_norm": 0.889103889465332,
"learning_rate": 4.909143998005842e-06,
"loss": 0.6949,
"step": 1490
},
{
"epoch": 0.5504553285749446,
"grad_norm": 0.8343292474746704,
"learning_rate": 4.909014021998049e-06,
"loss": 0.698,
"step": 1491
},
{
"epoch": 0.5508245139059808,
"grad_norm": 0.8784751892089844,
"learning_rate": 4.908883954809834e-06,
"loss": 0.6957,
"step": 1492
},
{
"epoch": 0.551193699237017,
"grad_norm": 0.9363612532615662,
"learning_rate": 4.908753796446123e-06,
"loss": 0.7385,
"step": 1493
},
{
"epoch": 0.5515628845680531,
"grad_norm": 0.899426281452179,
"learning_rate": 4.908623546911841e-06,
"loss": 0.7354,
"step": 1494
},
{
"epoch": 0.5519320698990894,
"grad_norm": 1.1256046295166016,
"learning_rate": 4.908493206211917e-06,
"loss": 0.7554,
"step": 1495
},
{
"epoch": 0.5523012552301255,
"grad_norm": 0.8998042941093445,
"learning_rate": 4.908362774351286e-06,
"loss": 0.718,
"step": 1496
},
{
"epoch": 0.5526704405611617,
"grad_norm": 0.9059179425239563,
"learning_rate": 4.908232251334884e-06,
"loss": 0.7193,
"step": 1497
},
{
"epoch": 0.5530396258921979,
"grad_norm": 0.8895880579948425,
"learning_rate": 4.90810163716765e-06,
"loss": 0.7431,
"step": 1498
},
{
"epoch": 0.553408811223234,
"grad_norm": 0.8827221393585205,
"learning_rate": 4.907970931854531e-06,
"loss": 0.7553,
"step": 1499
},
{
"epoch": 0.5537779965542703,
"grad_norm": 0.8968391418457031,
"learning_rate": 4.9078401354004715e-06,
"loss": 0.7487,
"step": 1500
},
{
"epoch": 0.5541471818853064,
"grad_norm": 0.8841264247894287,
"learning_rate": 4.907709247810422e-06,
"loss": 0.7482,
"step": 1501
},
{
"epoch": 0.5545163672163426,
"grad_norm": 0.8847429156303406,
"learning_rate": 4.907578269089338e-06,
"loss": 0.7099,
"step": 1502
},
{
"epoch": 0.5548855525473788,
"grad_norm": 0.892648458480835,
"learning_rate": 4.9074471992421765e-06,
"loss": 0.7092,
"step": 1503
},
{
"epoch": 0.555254737878415,
"grad_norm": 0.9089244604110718,
"learning_rate": 4.907316038273899e-06,
"loss": 0.7395,
"step": 1504
},
{
"epoch": 0.5556239232094512,
"grad_norm": 0.9008041620254517,
"learning_rate": 4.9071847861894684e-06,
"loss": 0.7522,
"step": 1505
},
{
"epoch": 0.5559931085404873,
"grad_norm": 1.0230878591537476,
"learning_rate": 4.907053442993853e-06,
"loss": 0.7571,
"step": 1506
},
{
"epoch": 0.5563622938715235,
"grad_norm": 0.9059250950813293,
"learning_rate": 4.906922008692025e-06,
"loss": 0.7478,
"step": 1507
},
{
"epoch": 0.5567314792025597,
"grad_norm": 0.8838487863540649,
"learning_rate": 4.906790483288958e-06,
"loss": 0.7608,
"step": 1508
},
{
"epoch": 0.5571006645335959,
"grad_norm": 0.879643440246582,
"learning_rate": 4.906658866789632e-06,
"loss": 0.7373,
"step": 1509
},
{
"epoch": 0.557469849864632,
"grad_norm": 0.9180140495300293,
"learning_rate": 4.906527159199027e-06,
"loss": 0.7187,
"step": 1510
},
{
"epoch": 0.5578390351956682,
"grad_norm": 0.8671844005584717,
"learning_rate": 4.906395360522128e-06,
"loss": 0.6626,
"step": 1511
},
{
"epoch": 0.5582082205267044,
"grad_norm": 0.9056374430656433,
"learning_rate": 4.9062634707639235e-06,
"loss": 0.7523,
"step": 1512
},
{
"epoch": 0.5585774058577406,
"grad_norm": 0.8730549812316895,
"learning_rate": 4.9061314899294074e-06,
"loss": 0.7356,
"step": 1513
},
{
"epoch": 0.5589465911887768,
"grad_norm": 0.8778785467147827,
"learning_rate": 4.905999418023574e-06,
"loss": 0.7071,
"step": 1514
},
{
"epoch": 0.5593157765198129,
"grad_norm": 0.8926696181297302,
"learning_rate": 4.905867255051421e-06,
"loss": 0.6818,
"step": 1515
},
{
"epoch": 0.5596849618508491,
"grad_norm": 0.8964526057243347,
"learning_rate": 4.905735001017952e-06,
"loss": 0.7376,
"step": 1516
},
{
"epoch": 0.5600541471818853,
"grad_norm": 0.8878997564315796,
"learning_rate": 4.905602655928172e-06,
"loss": 0.702,
"step": 1517
},
{
"epoch": 0.5604233325129215,
"grad_norm": 0.9055455327033997,
"learning_rate": 4.9054702197870905e-06,
"loss": 0.7591,
"step": 1518
},
{
"epoch": 0.5607925178439577,
"grad_norm": 0.8939942717552185,
"learning_rate": 4.9053376925997216e-06,
"loss": 0.7195,
"step": 1519
},
{
"epoch": 0.5611617031749938,
"grad_norm": 0.8912205696105957,
"learning_rate": 4.90520507437108e-06,
"loss": 0.7483,
"step": 1520
},
{
"epoch": 0.5615308885060301,
"grad_norm": 0.9732519388198853,
"learning_rate": 4.905072365106184e-06,
"loss": 0.7273,
"step": 1521
},
{
"epoch": 0.5619000738370662,
"grad_norm": 0.8627989888191223,
"learning_rate": 4.904939564810059e-06,
"loss": 0.7527,
"step": 1522
},
{
"epoch": 0.5622692591681024,
"grad_norm": 0.8785387277603149,
"learning_rate": 4.904806673487731e-06,
"loss": 0.7135,
"step": 1523
},
{
"epoch": 0.5626384444991386,
"grad_norm": 0.8822858929634094,
"learning_rate": 4.904673691144229e-06,
"loss": 0.725,
"step": 1524
},
{
"epoch": 0.5630076298301747,
"grad_norm": 0.8640886545181274,
"learning_rate": 4.904540617784587e-06,
"loss": 0.6923,
"step": 1525
},
{
"epoch": 0.563376815161211,
"grad_norm": 0.9395278096199036,
"learning_rate": 4.904407453413841e-06,
"loss": 0.7389,
"step": 1526
},
{
"epoch": 0.5637460004922471,
"grad_norm": 0.9424323439598083,
"learning_rate": 4.904274198037031e-06,
"loss": 0.7392,
"step": 1527
},
{
"epoch": 0.5641151858232832,
"grad_norm": 0.8736268281936646,
"learning_rate": 4.904140851659203e-06,
"loss": 0.689,
"step": 1528
},
{
"epoch": 0.5644843711543195,
"grad_norm": 0.9197478294372559,
"learning_rate": 4.904007414285401e-06,
"loss": 0.7496,
"step": 1529
},
{
"epoch": 0.5648535564853556,
"grad_norm": 0.8863821029663086,
"learning_rate": 4.903873885920678e-06,
"loss": 0.7162,
"step": 1530
},
{
"epoch": 0.5652227418163919,
"grad_norm": 0.9655389189720154,
"learning_rate": 4.903740266570087e-06,
"loss": 0.735,
"step": 1531
},
{
"epoch": 0.565591927147428,
"grad_norm": 0.8864624500274658,
"learning_rate": 4.903606556238686e-06,
"loss": 0.7066,
"step": 1532
},
{
"epoch": 0.5659611124784641,
"grad_norm": 0.8712696433067322,
"learning_rate": 4.9034727549315344e-06,
"loss": 0.7257,
"step": 1533
},
{
"epoch": 0.5663302978095004,
"grad_norm": 0.902729868888855,
"learning_rate": 4.903338862653698e-06,
"loss": 0.744,
"step": 1534
},
{
"epoch": 0.5666994831405365,
"grad_norm": 0.9051229953765869,
"learning_rate": 4.903204879410245e-06,
"loss": 0.6945,
"step": 1535
},
{
"epoch": 0.5670686684715728,
"grad_norm": 0.9622679352760315,
"learning_rate": 4.9030708052062445e-06,
"loss": 0.7472,
"step": 1536
},
{
"epoch": 0.5674378538026089,
"grad_norm": 0.9077664017677307,
"learning_rate": 4.902936640046772e-06,
"loss": 0.719,
"step": 1537
},
{
"epoch": 0.567807039133645,
"grad_norm": 0.8758202195167542,
"learning_rate": 4.902802383936908e-06,
"loss": 0.7191,
"step": 1538
},
{
"epoch": 0.5681762244646813,
"grad_norm": 0.8584937453269958,
"learning_rate": 4.902668036881731e-06,
"loss": 0.723,
"step": 1539
},
{
"epoch": 0.5685454097957174,
"grad_norm": 0.8474022746086121,
"learning_rate": 4.902533598886327e-06,
"loss": 0.6828,
"step": 1540
},
{
"epoch": 0.5689145951267536,
"grad_norm": 0.8970612287521362,
"learning_rate": 4.902399069955784e-06,
"loss": 0.7298,
"step": 1541
},
{
"epoch": 0.5692837804577898,
"grad_norm": 0.8726117014884949,
"learning_rate": 4.9022644500951956e-06,
"loss": 0.7043,
"step": 1542
},
{
"epoch": 0.569652965788826,
"grad_norm": 0.8593499660491943,
"learning_rate": 4.902129739309655e-06,
"loss": 0.7272,
"step": 1543
},
{
"epoch": 0.5700221511198622,
"grad_norm": 0.9041122794151306,
"learning_rate": 4.901994937604263e-06,
"loss": 0.7057,
"step": 1544
},
{
"epoch": 0.5703913364508983,
"grad_norm": 0.9077982306480408,
"learning_rate": 4.90186004498412e-06,
"loss": 0.7139,
"step": 1545
},
{
"epoch": 0.5707605217819345,
"grad_norm": 0.9217522740364075,
"learning_rate": 4.9017250614543326e-06,
"loss": 0.7491,
"step": 1546
},
{
"epoch": 0.5711297071129707,
"grad_norm": 0.95450758934021,
"learning_rate": 4.901589987020009e-06,
"loss": 0.7225,
"step": 1547
},
{
"epoch": 0.5714988924440069,
"grad_norm": 0.8575482368469238,
"learning_rate": 4.9014548216862635e-06,
"loss": 0.7052,
"step": 1548
},
{
"epoch": 0.5718680777750431,
"grad_norm": 0.8921974897384644,
"learning_rate": 4.90131956545821e-06,
"loss": 0.7423,
"step": 1549
},
{
"epoch": 0.5722372631060793,
"grad_norm": 0.9220647811889648,
"learning_rate": 4.901184218340969e-06,
"loss": 0.779,
"step": 1550
},
{
"epoch": 0.5726064484371154,
"grad_norm": 0.9192151427268982,
"learning_rate": 4.901048780339662e-06,
"loss": 0.7373,
"step": 1551
},
{
"epoch": 0.5729756337681516,
"grad_norm": 0.8914799690246582,
"learning_rate": 4.900913251459418e-06,
"loss": 0.7436,
"step": 1552
},
{
"epoch": 0.5733448190991878,
"grad_norm": 0.9210816621780396,
"learning_rate": 4.9007776317053654e-06,
"loss": 0.7246,
"step": 1553
},
{
"epoch": 0.573714004430224,
"grad_norm": 0.8826539516448975,
"learning_rate": 4.900641921082636e-06,
"loss": 0.6702,
"step": 1554
},
{
"epoch": 0.5740831897612602,
"grad_norm": 0.9123632907867432,
"learning_rate": 4.9005061195963686e-06,
"loss": 0.7505,
"step": 1555
},
{
"epoch": 0.5744523750922963,
"grad_norm": 0.8475764989852905,
"learning_rate": 4.900370227251702e-06,
"loss": 0.7349,
"step": 1556
},
{
"epoch": 0.5748215604233325,
"grad_norm": 0.9372847080230713,
"learning_rate": 4.900234244053778e-06,
"loss": 0.6943,
"step": 1557
},
{
"epoch": 0.5751907457543687,
"grad_norm": 0.8899771571159363,
"learning_rate": 4.900098170007748e-06,
"loss": 0.6975,
"step": 1558
},
{
"epoch": 0.5755599310854048,
"grad_norm": 0.9169413447380066,
"learning_rate": 4.899962005118759e-06,
"loss": 0.7258,
"step": 1559
},
{
"epoch": 0.5759291164164411,
"grad_norm": 0.9141312837600708,
"learning_rate": 4.899825749391965e-06,
"loss": 0.7496,
"step": 1560
},
{
"epoch": 0.5762983017474772,
"grad_norm": 0.8956618905067444,
"learning_rate": 4.8996894028325234e-06,
"loss": 0.7375,
"step": 1561
},
{
"epoch": 0.5766674870785135,
"grad_norm": 0.8596277236938477,
"learning_rate": 4.899552965445596e-06,
"loss": 0.7099,
"step": 1562
},
{
"epoch": 0.5770366724095496,
"grad_norm": 0.887212336063385,
"learning_rate": 4.899416437236346e-06,
"loss": 0.7221,
"step": 1563
},
{
"epoch": 0.5774058577405857,
"grad_norm": 0.9066047668457031,
"learning_rate": 4.8992798182099415e-06,
"loss": 0.7132,
"step": 1564
},
{
"epoch": 0.577775043071622,
"grad_norm": 0.8738695979118347,
"learning_rate": 4.899143108371552e-06,
"loss": 0.7055,
"step": 1565
},
{
"epoch": 0.5781442284026581,
"grad_norm": 0.87455153465271,
"learning_rate": 4.899006307726354e-06,
"loss": 0.7169,
"step": 1566
},
{
"epoch": 0.5785134137336944,
"grad_norm": 0.8736885786056519,
"learning_rate": 4.898869416279524e-06,
"loss": 0.7305,
"step": 1567
},
{
"epoch": 0.5788825990647305,
"grad_norm": 0.8731737732887268,
"learning_rate": 4.8987324340362445e-06,
"loss": 0.771,
"step": 1568
},
{
"epoch": 0.5792517843957666,
"grad_norm": 0.8779594898223877,
"learning_rate": 4.898595361001698e-06,
"loss": 0.7234,
"step": 1569
},
{
"epoch": 0.5796209697268029,
"grad_norm": 0.8702481985092163,
"learning_rate": 4.898458197181075e-06,
"loss": 0.728,
"step": 1570
},
{
"epoch": 0.579990155057839,
"grad_norm": 0.9149512052536011,
"learning_rate": 4.898320942579566e-06,
"loss": 0.7651,
"step": 1571
},
{
"epoch": 0.5803593403888753,
"grad_norm": 0.8694207072257996,
"learning_rate": 4.898183597202366e-06,
"loss": 0.7109,
"step": 1572
},
{
"epoch": 0.5807285257199114,
"grad_norm": 0.9014127850532532,
"learning_rate": 4.898046161054674e-06,
"loss": 0.7631,
"step": 1573
},
{
"epoch": 0.5810977110509475,
"grad_norm": 0.889441967010498,
"learning_rate": 4.897908634141692e-06,
"loss": 0.6963,
"step": 1574
},
{
"epoch": 0.5814668963819838,
"grad_norm": 0.8994700908660889,
"learning_rate": 4.897771016468624e-06,
"loss": 0.718,
"step": 1575
},
{
"epoch": 0.5818360817130199,
"grad_norm": 0.877490758895874,
"learning_rate": 4.897633308040681e-06,
"loss": 0.7188,
"step": 1576
},
{
"epoch": 0.5822052670440561,
"grad_norm": 0.8843386769294739,
"learning_rate": 4.8974955088630736e-06,
"loss": 0.7314,
"step": 1577
},
{
"epoch": 0.5825744523750923,
"grad_norm": 0.8549126386642456,
"learning_rate": 4.897357618941017e-06,
"loss": 0.7254,
"step": 1578
},
{
"epoch": 0.5829436377061284,
"grad_norm": 0.8546504378318787,
"learning_rate": 4.897219638279732e-06,
"loss": 0.737,
"step": 1579
},
{
"epoch": 0.5833128230371647,
"grad_norm": 0.8920966386795044,
"learning_rate": 4.89708156688444e-06,
"loss": 0.7096,
"step": 1580
},
{
"epoch": 0.5836820083682008,
"grad_norm": 0.8906827569007874,
"learning_rate": 4.896943404760368e-06,
"loss": 0.7314,
"step": 1581
},
{
"epoch": 0.584051193699237,
"grad_norm": 0.8761510252952576,
"learning_rate": 4.896805151912743e-06,
"loss": 0.6968,
"step": 1582
},
{
"epoch": 0.5844203790302732,
"grad_norm": 0.9379798173904419,
"learning_rate": 4.896666808346801e-06,
"loss": 0.7284,
"step": 1583
},
{
"epoch": 0.5847895643613094,
"grad_norm": 0.8771979212760925,
"learning_rate": 4.8965283740677765e-06,
"loss": 0.7416,
"step": 1584
},
{
"epoch": 0.5851587496923456,
"grad_norm": 0.8932775259017944,
"learning_rate": 4.896389849080908e-06,
"loss": 0.7203,
"step": 1585
},
{
"epoch": 0.5855279350233817,
"grad_norm": 0.9140869975090027,
"learning_rate": 4.8962512333914415e-06,
"loss": 0.7585,
"step": 1586
},
{
"epoch": 0.5858971203544179,
"grad_norm": 0.9230924844741821,
"learning_rate": 4.896112527004621e-06,
"loss": 0.7134,
"step": 1587
},
{
"epoch": 0.5862663056854541,
"grad_norm": 0.9974596500396729,
"learning_rate": 4.895973729925698e-06,
"loss": 0.6977,
"step": 1588
},
{
"epoch": 0.5866354910164903,
"grad_norm": 0.8764031529426575,
"learning_rate": 4.8958348421599255e-06,
"loss": 0.7449,
"step": 1589
},
{
"epoch": 0.5870046763475265,
"grad_norm": 0.9063859581947327,
"learning_rate": 4.895695863712561e-06,
"loss": 0.7296,
"step": 1590
},
{
"epoch": 0.5873738616785626,
"grad_norm": 0.9148220419883728,
"learning_rate": 4.895556794588864e-06,
"loss": 0.7396,
"step": 1591
},
{
"epoch": 0.5877430470095988,
"grad_norm": 0.8891001343727112,
"learning_rate": 4.895417634794098e-06,
"loss": 0.6993,
"step": 1592
},
{
"epoch": 0.588112232340635,
"grad_norm": 0.8979329466819763,
"learning_rate": 4.89527838433353e-06,
"loss": 0.7306,
"step": 1593
},
{
"epoch": 0.5884814176716712,
"grad_norm": 0.9050261974334717,
"learning_rate": 4.895139043212432e-06,
"loss": 0.7339,
"step": 1594
},
{
"epoch": 0.5888506030027073,
"grad_norm": 0.8708974719047546,
"learning_rate": 4.894999611436076e-06,
"loss": 0.7163,
"step": 1595
},
{
"epoch": 0.5892197883337436,
"grad_norm": 0.9276278018951416,
"learning_rate": 4.894860089009742e-06,
"loss": 0.728,
"step": 1596
},
{
"epoch": 0.5895889736647797,
"grad_norm": 1.0377492904663086,
"learning_rate": 4.894720475938709e-06,
"loss": 0.719,
"step": 1597
},
{
"epoch": 0.5899581589958159,
"grad_norm": 0.8987076282501221,
"learning_rate": 4.894580772228261e-06,
"loss": 0.7321,
"step": 1598
},
{
"epoch": 0.5903273443268521,
"grad_norm": 0.9341242909431458,
"learning_rate": 4.8944409778836874e-06,
"loss": 0.7379,
"step": 1599
},
{
"epoch": 0.5906965296578882,
"grad_norm": 0.9609735012054443,
"learning_rate": 4.894301092910278e-06,
"loss": 0.7361,
"step": 1600
},
{
"epoch": 0.5910657149889245,
"grad_norm": 0.8665148615837097,
"learning_rate": 4.8941611173133285e-06,
"loss": 0.7229,
"step": 1601
},
{
"epoch": 0.5914349003199606,
"grad_norm": 0.9139685034751892,
"learning_rate": 4.894021051098136e-06,
"loss": 0.7416,
"step": 1602
},
{
"epoch": 0.5918040856509968,
"grad_norm": 0.9231698513031006,
"learning_rate": 4.893880894270002e-06,
"loss": 0.7051,
"step": 1603
},
{
"epoch": 0.592173270982033,
"grad_norm": 0.8691010475158691,
"learning_rate": 4.893740646834232e-06,
"loss": 0.699,
"step": 1604
},
{
"epoch": 0.5925424563130691,
"grad_norm": 0.9221158623695374,
"learning_rate": 4.893600308796134e-06,
"loss": 0.745,
"step": 1605
},
{
"epoch": 0.5929116416441054,
"grad_norm": 0.86871337890625,
"learning_rate": 4.893459880161019e-06,
"loss": 0.7578,
"step": 1606
},
{
"epoch": 0.5932808269751415,
"grad_norm": 0.8679521679878235,
"learning_rate": 4.893319360934203e-06,
"loss": 0.728,
"step": 1607
},
{
"epoch": 0.5936500123061778,
"grad_norm": 0.9038932919502258,
"learning_rate": 4.893178751121006e-06,
"loss": 0.7258,
"step": 1608
},
{
"epoch": 0.5940191976372139,
"grad_norm": 0.890328049659729,
"learning_rate": 4.893038050726747e-06,
"loss": 0.7208,
"step": 1609
},
{
"epoch": 0.59438838296825,
"grad_norm": 0.9175987243652344,
"learning_rate": 4.892897259756753e-06,
"loss": 0.7468,
"step": 1610
},
{
"epoch": 0.5947575682992863,
"grad_norm": 0.9225278496742249,
"learning_rate": 4.892756378216354e-06,
"loss": 0.7379,
"step": 1611
},
{
"epoch": 0.5951267536303224,
"grad_norm": 0.8708120584487915,
"learning_rate": 4.8926154061108814e-06,
"loss": 0.7159,
"step": 1612
},
{
"epoch": 0.5954959389613586,
"grad_norm": 0.8939933776855469,
"learning_rate": 4.89247434344567e-06,
"loss": 0.7274,
"step": 1613
},
{
"epoch": 0.5958651242923948,
"grad_norm": 0.9112115502357483,
"learning_rate": 4.8923331902260604e-06,
"loss": 0.74,
"step": 1614
},
{
"epoch": 0.5962343096234309,
"grad_norm": 0.9555295705795288,
"learning_rate": 4.892191946457394e-06,
"loss": 0.7531,
"step": 1615
},
{
"epoch": 0.5966034949544672,
"grad_norm": 0.9047622680664062,
"learning_rate": 4.892050612145017e-06,
"loss": 0.763,
"step": 1616
},
{
"epoch": 0.5969726802855033,
"grad_norm": 0.9045296907424927,
"learning_rate": 4.8919091872942805e-06,
"loss": 0.6807,
"step": 1617
},
{
"epoch": 0.5973418656165395,
"grad_norm": 0.9008351564407349,
"learning_rate": 4.8917676719105355e-06,
"loss": 0.7378,
"step": 1618
},
{
"epoch": 0.5977110509475757,
"grad_norm": 0.8514592051506042,
"learning_rate": 4.891626065999139e-06,
"loss": 0.7158,
"step": 1619
},
{
"epoch": 0.5980802362786118,
"grad_norm": 0.9227151274681091,
"learning_rate": 4.8914843695654504e-06,
"loss": 0.7079,
"step": 1620
},
{
"epoch": 0.5984494216096481,
"grad_norm": 0.9016704559326172,
"learning_rate": 4.891342582614834e-06,
"loss": 0.7307,
"step": 1621
},
{
"epoch": 0.5988186069406842,
"grad_norm": 0.8876495957374573,
"learning_rate": 4.891200705152654e-06,
"loss": 0.7375,
"step": 1622
},
{
"epoch": 0.5991877922717204,
"grad_norm": 0.8613106608390808,
"learning_rate": 4.891058737184284e-06,
"loss": 0.7055,
"step": 1623
},
{
"epoch": 0.5995569776027566,
"grad_norm": 0.9127830862998962,
"learning_rate": 4.890916678715094e-06,
"loss": 0.7439,
"step": 1624
},
{
"epoch": 0.5999261629337928,
"grad_norm": 0.8781334161758423,
"learning_rate": 4.890774529750463e-06,
"loss": 0.7034,
"step": 1625
},
{
"epoch": 0.600295348264829,
"grad_norm": 0.8634480834007263,
"learning_rate": 4.890632290295771e-06,
"loss": 0.7129,
"step": 1626
},
{
"epoch": 0.6006645335958651,
"grad_norm": 0.8753401637077332,
"learning_rate": 4.8904899603564e-06,
"loss": 0.71,
"step": 1627
},
{
"epoch": 0.6010337189269013,
"grad_norm": 0.9049730896949768,
"learning_rate": 4.890347539937739e-06,
"loss": 0.7161,
"step": 1628
},
{
"epoch": 0.6014029042579375,
"grad_norm": 0.8901035785675049,
"learning_rate": 4.890205029045179e-06,
"loss": 0.7126,
"step": 1629
},
{
"epoch": 0.6017720895889737,
"grad_norm": 0.9000959396362305,
"learning_rate": 4.890062427684111e-06,
"loss": 0.6685,
"step": 1630
},
{
"epoch": 0.6021412749200098,
"grad_norm": 0.8935882449150085,
"learning_rate": 4.889919735859936e-06,
"loss": 0.693,
"step": 1631
},
{
"epoch": 0.602510460251046,
"grad_norm": 0.8670996427536011,
"learning_rate": 4.8897769535780525e-06,
"loss": 0.725,
"step": 1632
},
{
"epoch": 0.6028796455820822,
"grad_norm": 0.8828466534614563,
"learning_rate": 4.889634080843866e-06,
"loss": 0.7506,
"step": 1633
},
{
"epoch": 0.6032488309131184,
"grad_norm": 0.8849684596061707,
"learning_rate": 4.889491117662783e-06,
"loss": 0.7249,
"step": 1634
},
{
"epoch": 0.6036180162441546,
"grad_norm": 0.8891832828521729,
"learning_rate": 4.889348064040217e-06,
"loss": 0.7206,
"step": 1635
},
{
"epoch": 0.6039872015751907,
"grad_norm": 0.8497231602668762,
"learning_rate": 4.889204919981579e-06,
"loss": 0.6812,
"step": 1636
},
{
"epoch": 0.604356386906227,
"grad_norm": 0.872078537940979,
"learning_rate": 4.88906168549229e-06,
"loss": 0.7234,
"step": 1637
},
{
"epoch": 0.6047255722372631,
"grad_norm": 0.8662791848182678,
"learning_rate": 4.88891836057777e-06,
"loss": 0.7222,
"step": 1638
},
{
"epoch": 0.6050947575682993,
"grad_norm": 0.8954866528511047,
"learning_rate": 4.888774945243444e-06,
"loss": 0.7175,
"step": 1639
},
{
"epoch": 0.6054639428993355,
"grad_norm": 0.9128788709640503,
"learning_rate": 4.8886314394947396e-06,
"loss": 0.7185,
"step": 1640
},
{
"epoch": 0.6058331282303716,
"grad_norm": 0.9027896523475647,
"learning_rate": 4.888487843337089e-06,
"loss": 0.7559,
"step": 1641
},
{
"epoch": 0.6062023135614079,
"grad_norm": 0.868640124797821,
"learning_rate": 4.888344156775928e-06,
"loss": 0.7144,
"step": 1642
},
{
"epoch": 0.606571498892444,
"grad_norm": 0.9141109585762024,
"learning_rate": 4.888200379816695e-06,
"loss": 0.7381,
"step": 1643
},
{
"epoch": 0.6069406842234802,
"grad_norm": 0.875461757183075,
"learning_rate": 4.88805651246483e-06,
"loss": 0.7539,
"step": 1644
},
{
"epoch": 0.6073098695545164,
"grad_norm": 0.9165515303611755,
"learning_rate": 4.887912554725781e-06,
"loss": 0.7278,
"step": 1645
},
{
"epoch": 0.6076790548855525,
"grad_norm": 0.8719481825828552,
"learning_rate": 4.887768506604995e-06,
"loss": 0.7323,
"step": 1646
},
{
"epoch": 0.6080482402165888,
"grad_norm": 0.8756598830223083,
"learning_rate": 4.887624368107924e-06,
"loss": 0.7104,
"step": 1647
},
{
"epoch": 0.6084174255476249,
"grad_norm": 0.9028515219688416,
"learning_rate": 4.887480139240025e-06,
"loss": 0.731,
"step": 1648
},
{
"epoch": 0.608786610878661,
"grad_norm": 0.8773224353790283,
"learning_rate": 4.887335820006756e-06,
"loss": 0.7058,
"step": 1649
},
{
"epoch": 0.6091557962096973,
"grad_norm": 0.9026007652282715,
"learning_rate": 4.887191410413579e-06,
"loss": 0.6913,
"step": 1650
},
{
"epoch": 0.6095249815407334,
"grad_norm": 0.8898575305938721,
"learning_rate": 4.887046910465961e-06,
"loss": 0.7041,
"step": 1651
},
{
"epoch": 0.6098941668717697,
"grad_norm": 0.9111222624778748,
"learning_rate": 4.886902320169371e-06,
"loss": 0.7135,
"step": 1652
},
{
"epoch": 0.6102633522028058,
"grad_norm": 0.9058326482772827,
"learning_rate": 4.886757639529282e-06,
"loss": 0.6976,
"step": 1653
},
{
"epoch": 0.610632537533842,
"grad_norm": 0.8757637739181519,
"learning_rate": 4.886612868551168e-06,
"loss": 0.7507,
"step": 1654
},
{
"epoch": 0.6110017228648782,
"grad_norm": 0.910811722278595,
"learning_rate": 4.886468007240511e-06,
"loss": 0.757,
"step": 1655
},
{
"epoch": 0.6113709081959143,
"grad_norm": 0.897999107837677,
"learning_rate": 4.886323055602793e-06,
"loss": 0.7752,
"step": 1656
},
{
"epoch": 0.6117400935269506,
"grad_norm": 0.8987488746643066,
"learning_rate": 4.886178013643501e-06,
"loss": 0.7045,
"step": 1657
},
{
"epoch": 0.6121092788579867,
"grad_norm": 0.8889223337173462,
"learning_rate": 4.886032881368124e-06,
"loss": 0.6935,
"step": 1658
},
{
"epoch": 0.6124784641890229,
"grad_norm": 0.8837577700614929,
"learning_rate": 4.885887658782156e-06,
"loss": 0.6639,
"step": 1659
},
{
"epoch": 0.6128476495200591,
"grad_norm": 0.8779164552688599,
"learning_rate": 4.8857423458910925e-06,
"loss": 0.7181,
"step": 1660
},
{
"epoch": 0.6132168348510952,
"grad_norm": 0.9047713279724121,
"learning_rate": 4.885596942700434e-06,
"loss": 0.7417,
"step": 1661
},
{
"epoch": 0.6135860201821314,
"grad_norm": 0.8854183554649353,
"learning_rate": 4.885451449215685e-06,
"loss": 0.7511,
"step": 1662
},
{
"epoch": 0.6139552055131676,
"grad_norm": 0.9122277498245239,
"learning_rate": 4.88530586544235e-06,
"loss": 0.7472,
"step": 1663
},
{
"epoch": 0.6143243908442038,
"grad_norm": 0.8803077340126038,
"learning_rate": 4.885160191385942e-06,
"loss": 0.7052,
"step": 1664
},
{
"epoch": 0.61469357617524,
"grad_norm": 0.878976047039032,
"learning_rate": 4.885014427051973e-06,
"loss": 0.7416,
"step": 1665
},
{
"epoch": 0.6150627615062761,
"grad_norm": 0.8740445375442505,
"learning_rate": 4.884868572445961e-06,
"loss": 0.6892,
"step": 1666
},
{
"epoch": 0.6154319468373123,
"grad_norm": 0.8993122577667236,
"learning_rate": 4.884722627573426e-06,
"loss": 0.7153,
"step": 1667
},
{
"epoch": 0.6158011321683485,
"grad_norm": 0.8919605016708374,
"learning_rate": 4.884576592439893e-06,
"loss": 0.7189,
"step": 1668
},
{
"epoch": 0.6161703174993847,
"grad_norm": 0.8694654107093811,
"learning_rate": 4.884430467050887e-06,
"loss": 0.7068,
"step": 1669
},
{
"epoch": 0.6165395028304209,
"grad_norm": 0.8842293620109558,
"learning_rate": 4.884284251411941e-06,
"loss": 0.7464,
"step": 1670
},
{
"epoch": 0.616908688161457,
"grad_norm": 0.9038980007171631,
"learning_rate": 4.884137945528589e-06,
"loss": 0.7447,
"step": 1671
},
{
"epoch": 0.6172778734924932,
"grad_norm": 0.8944399356842041,
"learning_rate": 4.883991549406368e-06,
"loss": 0.7608,
"step": 1672
},
{
"epoch": 0.6176470588235294,
"grad_norm": 0.8438637256622314,
"learning_rate": 4.883845063050819e-06,
"loss": 0.7548,
"step": 1673
},
{
"epoch": 0.6180162441545656,
"grad_norm": 0.8841381669044495,
"learning_rate": 4.883698486467487e-06,
"loss": 0.7395,
"step": 1674
},
{
"epoch": 0.6183854294856018,
"grad_norm": 0.8783007860183716,
"learning_rate": 4.883551819661919e-06,
"loss": 0.7002,
"step": 1675
},
{
"epoch": 0.618754614816638,
"grad_norm": 0.8673411011695862,
"learning_rate": 4.883405062639668e-06,
"loss": 0.6778,
"step": 1676
},
{
"epoch": 0.6191238001476741,
"grad_norm": 0.9229005575180054,
"learning_rate": 4.883258215406287e-06,
"loss": 0.7444,
"step": 1677
},
{
"epoch": 0.6194929854787103,
"grad_norm": 0.8770948052406311,
"learning_rate": 4.883111277967334e-06,
"loss": 0.7174,
"step": 1678
},
{
"epoch": 0.6198621708097465,
"grad_norm": 0.8880107998847961,
"learning_rate": 4.882964250328373e-06,
"loss": 0.7353,
"step": 1679
},
{
"epoch": 0.6202313561407826,
"grad_norm": 0.8703299760818481,
"learning_rate": 4.882817132494966e-06,
"loss": 0.7469,
"step": 1680
},
{
"epoch": 0.6206005414718189,
"grad_norm": 0.8967667818069458,
"learning_rate": 4.882669924472682e-06,
"loss": 0.702,
"step": 1681
},
{
"epoch": 0.620969726802855,
"grad_norm": 0.9066647291183472,
"learning_rate": 4.882522626267094e-06,
"loss": 0.6851,
"step": 1682
},
{
"epoch": 0.6213389121338913,
"grad_norm": 0.8500033020973206,
"learning_rate": 4.882375237883777e-06,
"loss": 0.74,
"step": 1683
},
{
"epoch": 0.6217080974649274,
"grad_norm": 0.9138725996017456,
"learning_rate": 4.882227759328308e-06,
"loss": 0.7305,
"step": 1684
},
{
"epoch": 0.6220772827959635,
"grad_norm": 0.8820671439170837,
"learning_rate": 4.882080190606271e-06,
"loss": 0.6959,
"step": 1685
},
{
"epoch": 0.6224464681269998,
"grad_norm": 0.8790507316589355,
"learning_rate": 4.881932531723251e-06,
"loss": 0.7276,
"step": 1686
},
{
"epoch": 0.6228156534580359,
"grad_norm": 0.880133330821991,
"learning_rate": 4.881784782684835e-06,
"loss": 0.7213,
"step": 1687
},
{
"epoch": 0.6231848387890722,
"grad_norm": 0.9163568615913391,
"learning_rate": 4.881636943496618e-06,
"loss": 0.7214,
"step": 1688
},
{
"epoch": 0.6235540241201083,
"grad_norm": 0.8676769733428955,
"learning_rate": 4.881489014164194e-06,
"loss": 0.7168,
"step": 1689
},
{
"epoch": 0.6239232094511444,
"grad_norm": 0.9037620425224304,
"learning_rate": 4.881340994693162e-06,
"loss": 0.7092,
"step": 1690
},
{
"epoch": 0.6242923947821807,
"grad_norm": 0.8854486346244812,
"learning_rate": 4.881192885089125e-06,
"loss": 0.6896,
"step": 1691
},
{
"epoch": 0.6246615801132168,
"grad_norm": 0.904160737991333,
"learning_rate": 4.88104468535769e-06,
"loss": 0.7349,
"step": 1692
},
{
"epoch": 0.6250307654442531,
"grad_norm": 0.8994008898735046,
"learning_rate": 4.880896395504464e-06,
"loss": 0.7331,
"step": 1693
},
{
"epoch": 0.6253999507752892,
"grad_norm": 0.8491187691688538,
"learning_rate": 4.8807480155350605e-06,
"loss": 0.7185,
"step": 1694
},
{
"epoch": 0.6257691361063253,
"grad_norm": 0.8684033155441284,
"learning_rate": 4.880599545455097e-06,
"loss": 0.7089,
"step": 1695
},
{
"epoch": 0.6261383214373616,
"grad_norm": 0.9083407521247864,
"learning_rate": 4.880450985270191e-06,
"loss": 0.7018,
"step": 1696
},
{
"epoch": 0.6265075067683977,
"grad_norm": 0.9744650721549988,
"learning_rate": 4.880302334985967e-06,
"loss": 0.7175,
"step": 1697
},
{
"epoch": 0.6268766920994339,
"grad_norm": 0.9108169078826904,
"learning_rate": 4.880153594608051e-06,
"loss": 0.7127,
"step": 1698
},
{
"epoch": 0.6272458774304701,
"grad_norm": 0.8855581879615784,
"learning_rate": 4.880004764142073e-06,
"loss": 0.7121,
"step": 1699
},
{
"epoch": 0.6276150627615062,
"grad_norm": 0.8942852020263672,
"learning_rate": 4.879855843593665e-06,
"loss": 0.6918,
"step": 1700
},
{
"epoch": 0.6279842480925425,
"grad_norm": 0.9338670969009399,
"learning_rate": 4.879706832968465e-06,
"loss": 0.7297,
"step": 1701
},
{
"epoch": 0.6283534334235786,
"grad_norm": 0.8706702589988708,
"learning_rate": 4.879557732272112e-06,
"loss": 0.6909,
"step": 1702
},
{
"epoch": 0.6287226187546148,
"grad_norm": 0.8873736262321472,
"learning_rate": 4.87940854151025e-06,
"loss": 0.7814,
"step": 1703
},
{
"epoch": 0.629091804085651,
"grad_norm": 0.8286136984825134,
"learning_rate": 4.879259260688526e-06,
"loss": 0.6852,
"step": 1704
},
{
"epoch": 0.6294609894166872,
"grad_norm": 0.8823638558387756,
"learning_rate": 4.879109889812589e-06,
"loss": 0.7283,
"step": 1705
},
{
"epoch": 0.6298301747477234,
"grad_norm": 0.8702864646911621,
"learning_rate": 4.878960428888094e-06,
"loss": 0.7124,
"step": 1706
},
{
"epoch": 0.6301993600787595,
"grad_norm": 0.877116858959198,
"learning_rate": 4.878810877920698e-06,
"loss": 0.734,
"step": 1707
},
{
"epoch": 0.6305685454097957,
"grad_norm": 0.8908865451812744,
"learning_rate": 4.878661236916061e-06,
"loss": 0.7197,
"step": 1708
},
{
"epoch": 0.6309377307408319,
"grad_norm": 0.8861148357391357,
"learning_rate": 4.878511505879846e-06,
"loss": 0.6981,
"step": 1709
},
{
"epoch": 0.6313069160718681,
"grad_norm": 0.8963793516159058,
"learning_rate": 4.8783616848177215e-06,
"loss": 0.7136,
"step": 1710
},
{
"epoch": 0.6316761014029043,
"grad_norm": 0.9019988775253296,
"learning_rate": 4.878211773735359e-06,
"loss": 0.7063,
"step": 1711
},
{
"epoch": 0.6320452867339404,
"grad_norm": 0.9044898748397827,
"learning_rate": 4.8780617726384305e-06,
"loss": 0.7491,
"step": 1712
},
{
"epoch": 0.6324144720649766,
"grad_norm": 0.8985450863838196,
"learning_rate": 4.877911681532614e-06,
"loss": 0.6868,
"step": 1713
},
{
"epoch": 0.6327836573960128,
"grad_norm": 0.931446373462677,
"learning_rate": 4.877761500423591e-06,
"loss": 0.7363,
"step": 1714
},
{
"epoch": 0.633152842727049,
"grad_norm": 0.916556715965271,
"learning_rate": 4.877611229317047e-06,
"loss": 0.773,
"step": 1715
},
{
"epoch": 0.6335220280580851,
"grad_norm": 0.8990119695663452,
"learning_rate": 4.877460868218667e-06,
"loss": 0.6959,
"step": 1716
},
{
"epoch": 0.6338912133891214,
"grad_norm": 0.8884509205818176,
"learning_rate": 4.877310417134144e-06,
"loss": 0.7187,
"step": 1717
},
{
"epoch": 0.6342603987201575,
"grad_norm": 0.8993596434593201,
"learning_rate": 4.8771598760691715e-06,
"loss": 0.7331,
"step": 1718
},
{
"epoch": 0.6346295840511937,
"grad_norm": 0.8999956846237183,
"learning_rate": 4.877009245029448e-06,
"loss": 0.7349,
"step": 1719
},
{
"epoch": 0.6349987693822299,
"grad_norm": 0.8785694241523743,
"learning_rate": 4.876858524020675e-06,
"loss": 0.689,
"step": 1720
},
{
"epoch": 0.635367954713266,
"grad_norm": 0.8470606803894043,
"learning_rate": 4.876707713048558e-06,
"loss": 0.676,
"step": 1721
},
{
"epoch": 0.6357371400443023,
"grad_norm": 0.8418689966201782,
"learning_rate": 4.876556812118802e-06,
"loss": 0.709,
"step": 1722
},
{
"epoch": 0.6361063253753384,
"grad_norm": 0.8840335011482239,
"learning_rate": 4.876405821237122e-06,
"loss": 0.7133,
"step": 1723
},
{
"epoch": 0.6364755107063746,
"grad_norm": 0.8696883916854858,
"learning_rate": 4.876254740409232e-06,
"loss": 0.6845,
"step": 1724
},
{
"epoch": 0.6368446960374108,
"grad_norm": 0.8984381556510925,
"learning_rate": 4.876103569640849e-06,
"loss": 0.7111,
"step": 1725
},
{
"epoch": 0.6372138813684469,
"grad_norm": 0.8952850699424744,
"learning_rate": 4.875952308937697e-06,
"loss": 0.7383,
"step": 1726
},
{
"epoch": 0.6375830666994832,
"grad_norm": 0.8560416102409363,
"learning_rate": 4.875800958305499e-06,
"loss": 0.6988,
"step": 1727
},
{
"epoch": 0.6379522520305193,
"grad_norm": 0.8600884079933167,
"learning_rate": 4.875649517749985e-06,
"loss": 0.6895,
"step": 1728
},
{
"epoch": 0.6383214373615556,
"grad_norm": 0.9282815456390381,
"learning_rate": 4.875497987276886e-06,
"loss": 0.7253,
"step": 1729
},
{
"epoch": 0.6386906226925917,
"grad_norm": 0.8954489827156067,
"learning_rate": 4.875346366891939e-06,
"loss": 0.7638,
"step": 1730
},
{
"epoch": 0.6390598080236278,
"grad_norm": 0.8884443640708923,
"learning_rate": 4.875194656600881e-06,
"loss": 0.7284,
"step": 1731
},
{
"epoch": 0.6394289933546641,
"grad_norm": 0.8890431523323059,
"learning_rate": 4.875042856409454e-06,
"loss": 0.7393,
"step": 1732
},
{
"epoch": 0.6397981786857002,
"grad_norm": 0.8974068760871887,
"learning_rate": 4.874890966323406e-06,
"loss": 0.7329,
"step": 1733
},
{
"epoch": 0.6401673640167364,
"grad_norm": 0.8659381866455078,
"learning_rate": 4.874738986348484e-06,
"loss": 0.7035,
"step": 1734
},
{
"epoch": 0.6405365493477726,
"grad_norm": 0.8336740732192993,
"learning_rate": 4.87458691649044e-06,
"loss": 0.6696,
"step": 1735
},
{
"epoch": 0.6409057346788087,
"grad_norm": 0.8795167207717896,
"learning_rate": 4.874434756755032e-06,
"loss": 0.7289,
"step": 1736
},
{
"epoch": 0.641274920009845,
"grad_norm": 0.8558307886123657,
"learning_rate": 4.874282507148017e-06,
"loss": 0.7214,
"step": 1737
},
{
"epoch": 0.6416441053408811,
"grad_norm": 0.8652957677841187,
"learning_rate": 4.8741301676751584e-06,
"loss": 0.7629,
"step": 1738
},
{
"epoch": 0.6420132906719173,
"grad_norm": 0.8892576098442078,
"learning_rate": 4.873977738342222e-06,
"loss": 0.7168,
"step": 1739
},
{
"epoch": 0.6423824760029535,
"grad_norm": 0.8710838556289673,
"learning_rate": 4.873825219154978e-06,
"loss": 0.6837,
"step": 1740
},
{
"epoch": 0.6427516613339896,
"grad_norm": 0.8877007365226746,
"learning_rate": 4.873672610119199e-06,
"loss": 0.6765,
"step": 1741
},
{
"epoch": 0.6431208466650259,
"grad_norm": 0.8830922842025757,
"learning_rate": 4.87351991124066e-06,
"loss": 0.6797,
"step": 1742
},
{
"epoch": 0.643490031996062,
"grad_norm": 0.8394154906272888,
"learning_rate": 4.873367122525142e-06,
"loss": 0.666,
"step": 1743
},
{
"epoch": 0.6438592173270982,
"grad_norm": 0.9082286953926086,
"learning_rate": 4.873214243978427e-06,
"loss": 0.6855,
"step": 1744
},
{
"epoch": 0.6442284026581344,
"grad_norm": 0.869299054145813,
"learning_rate": 4.873061275606302e-06,
"loss": 0.7026,
"step": 1745
},
{
"epoch": 0.6445975879891706,
"grad_norm": 0.8861920833587646,
"learning_rate": 4.872908217414557e-06,
"loss": 0.7525,
"step": 1746
},
{
"epoch": 0.6449667733202068,
"grad_norm": 0.9306628704071045,
"learning_rate": 4.8727550694089845e-06,
"loss": 0.7314,
"step": 1747
},
{
"epoch": 0.6453359586512429,
"grad_norm": 0.8352028131484985,
"learning_rate": 4.872601831595381e-06,
"loss": 0.6876,
"step": 1748
},
{
"epoch": 0.6457051439822791,
"grad_norm": 0.91121506690979,
"learning_rate": 4.872448503979548e-06,
"loss": 0.6885,
"step": 1749
},
{
"epoch": 0.6460743293133153,
"grad_norm": 0.9285972714424133,
"learning_rate": 4.872295086567288e-06,
"loss": 0.7105,
"step": 1750
},
{
"epoch": 0.6464435146443515,
"grad_norm": 0.914553701877594,
"learning_rate": 4.872141579364407e-06,
"loss": 0.7032,
"step": 1751
},
{
"epoch": 0.6468126999753876,
"grad_norm": 0.8394815325737,
"learning_rate": 4.871987982376716e-06,
"loss": 0.6784,
"step": 1752
},
{
"epoch": 0.6471818853064238,
"grad_norm": 0.8877943754196167,
"learning_rate": 4.871834295610028e-06,
"loss": 0.7183,
"step": 1753
},
{
"epoch": 0.64755107063746,
"grad_norm": 0.8248438239097595,
"learning_rate": 4.871680519070162e-06,
"loss": 0.6573,
"step": 1754
},
{
"epoch": 0.6479202559684962,
"grad_norm": 0.88385409116745,
"learning_rate": 4.871526652762936e-06,
"loss": 0.7206,
"step": 1755
},
{
"epoch": 0.6482894412995324,
"grad_norm": 0.8734132647514343,
"learning_rate": 4.8713726966941745e-06,
"loss": 0.7712,
"step": 1756
},
{
"epoch": 0.6486586266305685,
"grad_norm": 0.8956114649772644,
"learning_rate": 4.871218650869704e-06,
"loss": 0.7221,
"step": 1757
},
{
"epoch": 0.6490278119616048,
"grad_norm": 0.9125884771347046,
"learning_rate": 4.871064515295357e-06,
"loss": 0.7374,
"step": 1758
},
{
"epoch": 0.6493969972926409,
"grad_norm": 0.8915518522262573,
"learning_rate": 4.870910289976967e-06,
"loss": 0.7126,
"step": 1759
},
{
"epoch": 0.6497661826236771,
"grad_norm": 0.9576418995857239,
"learning_rate": 4.870755974920369e-06,
"loss": 0.7538,
"step": 1760
},
{
"epoch": 0.6501353679547133,
"grad_norm": 0.8745632171630859,
"learning_rate": 4.870601570131407e-06,
"loss": 0.6837,
"step": 1761
},
{
"epoch": 0.6505045532857494,
"grad_norm": 0.9216246604919434,
"learning_rate": 4.870447075615923e-06,
"loss": 0.7076,
"step": 1762
},
{
"epoch": 0.6508737386167857,
"grad_norm": 0.8735246658325195,
"learning_rate": 4.870292491379765e-06,
"loss": 0.6905,
"step": 1763
},
{
"epoch": 0.6512429239478218,
"grad_norm": 0.8984786868095398,
"learning_rate": 4.870137817428786e-06,
"loss": 0.6877,
"step": 1764
},
{
"epoch": 0.6516121092788579,
"grad_norm": 0.8973082304000854,
"learning_rate": 4.869983053768838e-06,
"loss": 0.715,
"step": 1765
},
{
"epoch": 0.6519812946098942,
"grad_norm": 0.8570342063903809,
"learning_rate": 4.869828200405778e-06,
"loss": 0.6751,
"step": 1766
},
{
"epoch": 0.6523504799409303,
"grad_norm": 0.8807101845741272,
"learning_rate": 4.86967325734547e-06,
"loss": 0.7177,
"step": 1767
},
{
"epoch": 0.6527196652719666,
"grad_norm": 0.8817850947380066,
"learning_rate": 4.869518224593777e-06,
"loss": 0.7227,
"step": 1768
},
{
"epoch": 0.6530888506030027,
"grad_norm": 0.9274417161941528,
"learning_rate": 4.869363102156566e-06,
"loss": 0.7644,
"step": 1769
},
{
"epoch": 0.6534580359340388,
"grad_norm": 0.8698463439941406,
"learning_rate": 4.86920789003971e-06,
"loss": 0.69,
"step": 1770
},
{
"epoch": 0.6538272212650751,
"grad_norm": 0.9024192690849304,
"learning_rate": 4.869052588249083e-06,
"loss": 0.6817,
"step": 1771
},
{
"epoch": 0.6541964065961112,
"grad_norm": 0.8906611800193787,
"learning_rate": 4.868897196790563e-06,
"loss": 0.7094,
"step": 1772
},
{
"epoch": 0.6545655919271475,
"grad_norm": 0.9051023125648499,
"learning_rate": 4.868741715670032e-06,
"loss": 0.7047,
"step": 1773
},
{
"epoch": 0.6549347772581836,
"grad_norm": 0.8683571219444275,
"learning_rate": 4.868586144893375e-06,
"loss": 0.6969,
"step": 1774
},
{
"epoch": 0.6553039625892197,
"grad_norm": 0.8832207322120667,
"learning_rate": 4.8684304844664796e-06,
"loss": 0.7062,
"step": 1775
},
{
"epoch": 0.655673147920256,
"grad_norm": 0.8671314716339111,
"learning_rate": 4.868274734395238e-06,
"loss": 0.7214,
"step": 1776
},
{
"epoch": 0.6560423332512921,
"grad_norm": 0.9176437258720398,
"learning_rate": 4.8681188946855454e-06,
"loss": 0.7633,
"step": 1777
},
{
"epoch": 0.6564115185823284,
"grad_norm": 0.9051275849342346,
"learning_rate": 4.867962965343299e-06,
"loss": 0.7361,
"step": 1778
},
{
"epoch": 0.6567807039133645,
"grad_norm": 0.8660034537315369,
"learning_rate": 4.867806946374403e-06,
"loss": 0.6905,
"step": 1779
},
{
"epoch": 0.6571498892444007,
"grad_norm": 0.8938033580780029,
"learning_rate": 4.86765083778476e-06,
"loss": 0.7329,
"step": 1780
},
{
"epoch": 0.6575190745754369,
"grad_norm": 0.8933357000350952,
"learning_rate": 4.867494639580281e-06,
"loss": 0.7153,
"step": 1781
},
{
"epoch": 0.657888259906473,
"grad_norm": 0.8628551363945007,
"learning_rate": 4.867338351766877e-06,
"loss": 0.7198,
"step": 1782
},
{
"epoch": 0.6582574452375092,
"grad_norm": 0.8424699306488037,
"learning_rate": 4.867181974350463e-06,
"loss": 0.6751,
"step": 1783
},
{
"epoch": 0.6586266305685454,
"grad_norm": 0.8781000375747681,
"learning_rate": 4.867025507336959e-06,
"loss": 0.74,
"step": 1784
},
{
"epoch": 0.6589958158995816,
"grad_norm": 0.8817055821418762,
"learning_rate": 4.866868950732286e-06,
"loss": 0.6806,
"step": 1785
},
{
"epoch": 0.6593650012306178,
"grad_norm": 0.89399254322052,
"learning_rate": 4.8667123045423705e-06,
"loss": 0.7123,
"step": 1786
},
{
"epoch": 0.659734186561654,
"grad_norm": 0.8942681550979614,
"learning_rate": 4.866555568773141e-06,
"loss": 0.7426,
"step": 1787
},
{
"epoch": 0.6601033718926901,
"grad_norm": 0.8757315278053284,
"learning_rate": 4.866398743430531e-06,
"loss": 0.7476,
"step": 1788
},
{
"epoch": 0.6604725572237263,
"grad_norm": 0.8564402461051941,
"learning_rate": 4.866241828520475e-06,
"loss": 0.6926,
"step": 1789
},
{
"epoch": 0.6608417425547625,
"grad_norm": 0.8761123418807983,
"learning_rate": 4.866084824048913e-06,
"loss": 0.7061,
"step": 1790
},
{
"epoch": 0.6612109278857987,
"grad_norm": 0.8826016783714294,
"learning_rate": 4.8659277300217856e-06,
"loss": 0.6939,
"step": 1791
},
{
"epoch": 0.6615801132168349,
"grad_norm": 0.8721649646759033,
"learning_rate": 4.865770546445041e-06,
"loss": 0.727,
"step": 1792
},
{
"epoch": 0.661949298547871,
"grad_norm": 0.91274493932724,
"learning_rate": 4.865613273324629e-06,
"loss": 0.7393,
"step": 1793
},
{
"epoch": 0.6623184838789072,
"grad_norm": 0.8830939531326294,
"learning_rate": 4.8654559106665e-06,
"loss": 0.687,
"step": 1794
},
{
"epoch": 0.6626876692099434,
"grad_norm": 0.8717511892318726,
"learning_rate": 4.865298458476612e-06,
"loss": 0.6921,
"step": 1795
},
{
"epoch": 0.6630568545409796,
"grad_norm": 0.8554193377494812,
"learning_rate": 4.865140916760923e-06,
"loss": 0.6921,
"step": 1796
},
{
"epoch": 0.6634260398720158,
"grad_norm": 0.8773930668830872,
"learning_rate": 4.864983285525397e-06,
"loss": 0.7317,
"step": 1797
},
{
"epoch": 0.6637952252030519,
"grad_norm": 0.8963366150856018,
"learning_rate": 4.864825564776e-06,
"loss": 0.7034,
"step": 1798
},
{
"epoch": 0.6641644105340881,
"grad_norm": 0.8847402334213257,
"learning_rate": 4.864667754518702e-06,
"loss": 0.7242,
"step": 1799
},
{
"epoch": 0.6645335958651243,
"grad_norm": 0.8819407224655151,
"learning_rate": 4.864509854759476e-06,
"loss": 0.7329,
"step": 1800
},
{
"epoch": 0.6649027811961604,
"grad_norm": 0.8914307951927185,
"learning_rate": 4.864351865504298e-06,
"loss": 0.7128,
"step": 1801
},
{
"epoch": 0.6652719665271967,
"grad_norm": 0.8843200206756592,
"learning_rate": 4.864193786759148e-06,
"loss": 0.7198,
"step": 1802
},
{
"epoch": 0.6656411518582328,
"grad_norm": 0.8661046028137207,
"learning_rate": 4.8640356185300094e-06,
"loss": 0.7281,
"step": 1803
},
{
"epoch": 0.666010337189269,
"grad_norm": 0.8948192596435547,
"learning_rate": 4.863877360822869e-06,
"loss": 0.7508,
"step": 1804
},
{
"epoch": 0.6663795225203052,
"grad_norm": 0.8964792490005493,
"learning_rate": 4.863719013643716e-06,
"loss": 0.7193,
"step": 1805
},
{
"epoch": 0.6667487078513413,
"grad_norm": 0.8771764039993286,
"learning_rate": 4.863560576998545e-06,
"loss": 0.7127,
"step": 1806
},
{
"epoch": 0.6671178931823776,
"grad_norm": 0.8624410033226013,
"learning_rate": 4.8634020508933524e-06,
"loss": 0.6808,
"step": 1807
},
{
"epoch": 0.6674870785134137,
"grad_norm": 0.901315450668335,
"learning_rate": 4.863243435334137e-06,
"loss": 0.6907,
"step": 1808
},
{
"epoch": 0.66785626384445,
"grad_norm": 0.8859581351280212,
"learning_rate": 4.8630847303269034e-06,
"loss": 0.7351,
"step": 1809
},
{
"epoch": 0.6682254491754861,
"grad_norm": 0.9065508842468262,
"learning_rate": 4.862925935877659e-06,
"loss": 0.7049,
"step": 1810
},
{
"epoch": 0.6685946345065222,
"grad_norm": 0.8637186288833618,
"learning_rate": 4.8627670519924146e-06,
"loss": 0.702,
"step": 1811
},
{
"epoch": 0.6689638198375585,
"grad_norm": 0.8939265012741089,
"learning_rate": 4.862608078677181e-06,
"loss": 0.7209,
"step": 1812
},
{
"epoch": 0.6693330051685946,
"grad_norm": 0.9074069857597351,
"learning_rate": 4.862449015937979e-06,
"loss": 0.7215,
"step": 1813
},
{
"epoch": 0.6697021904996309,
"grad_norm": 0.898177444934845,
"learning_rate": 4.8622898637808265e-06,
"loss": 0.7341,
"step": 1814
},
{
"epoch": 0.670071375830667,
"grad_norm": 0.8927603960037231,
"learning_rate": 4.862130622211749e-06,
"loss": 0.7372,
"step": 1815
},
{
"epoch": 0.6704405611617031,
"grad_norm": 0.9042506814002991,
"learning_rate": 4.861971291236772e-06,
"loss": 0.6699,
"step": 1816
},
{
"epoch": 0.6708097464927394,
"grad_norm": 0.867923378944397,
"learning_rate": 4.861811870861928e-06,
"loss": 0.7065,
"step": 1817
},
{
"epoch": 0.6711789318237755,
"grad_norm": 0.8792146444320679,
"learning_rate": 4.861652361093249e-06,
"loss": 0.7359,
"step": 1818
},
{
"epoch": 0.6715481171548117,
"grad_norm": 0.8699401021003723,
"learning_rate": 4.861492761936774e-06,
"loss": 0.73,
"step": 1819
},
{
"epoch": 0.6719173024858479,
"grad_norm": 0.9019505381584167,
"learning_rate": 4.861333073398543e-06,
"loss": 0.7142,
"step": 1820
},
{
"epoch": 0.672286487816884,
"grad_norm": 0.8983092904090881,
"learning_rate": 4.8611732954846015e-06,
"loss": 0.7232,
"step": 1821
},
{
"epoch": 0.6726556731479203,
"grad_norm": 0.8775074481964111,
"learning_rate": 4.861013428200995e-06,
"loss": 0.7051,
"step": 1822
},
{
"epoch": 0.6730248584789564,
"grad_norm": 0.8952973484992981,
"learning_rate": 4.8608534715537755e-06,
"loss": 0.7544,
"step": 1823
},
{
"epoch": 0.6733940438099926,
"grad_norm": 0.857286274433136,
"learning_rate": 4.860693425548997e-06,
"loss": 0.6949,
"step": 1824
},
{
"epoch": 0.6737632291410288,
"grad_norm": 0.850751519203186,
"learning_rate": 4.8605332901927175e-06,
"loss": 0.7082,
"step": 1825
},
{
"epoch": 0.674132414472065,
"grad_norm": 0.872599720954895,
"learning_rate": 4.860373065490998e-06,
"loss": 0.7073,
"step": 1826
},
{
"epoch": 0.6745015998031012,
"grad_norm": 0.9076393842697144,
"learning_rate": 4.860212751449903e-06,
"loss": 0.6977,
"step": 1827
},
{
"epoch": 0.6748707851341373,
"grad_norm": 0.8853808045387268,
"learning_rate": 4.8600523480755e-06,
"loss": 0.6811,
"step": 1828
},
{
"epoch": 0.6752399704651735,
"grad_norm": 0.8611722588539124,
"learning_rate": 4.859891855373861e-06,
"loss": 0.7209,
"step": 1829
},
{
"epoch": 0.6756091557962097,
"grad_norm": 0.8485976457595825,
"learning_rate": 4.85973127335106e-06,
"loss": 0.6629,
"step": 1830
},
{
"epoch": 0.6759783411272459,
"grad_norm": 0.875453770160675,
"learning_rate": 4.859570602013175e-06,
"loss": 0.7349,
"step": 1831
},
{
"epoch": 0.6763475264582821,
"grad_norm": 0.8506700992584229,
"learning_rate": 4.859409841366287e-06,
"loss": 0.6818,
"step": 1832
},
{
"epoch": 0.6767167117893182,
"grad_norm": 0.8715220093727112,
"learning_rate": 4.859248991416481e-06,
"loss": 0.7189,
"step": 1833
},
{
"epoch": 0.6770858971203544,
"grad_norm": 0.9876884818077087,
"learning_rate": 4.859088052169845e-06,
"loss": 0.7445,
"step": 1834
},
{
"epoch": 0.6774550824513906,
"grad_norm": 0.8883238434791565,
"learning_rate": 4.858927023632472e-06,
"loss": 0.7338,
"step": 1835
},
{
"epoch": 0.6778242677824268,
"grad_norm": 0.8532505035400391,
"learning_rate": 4.858765905810455e-06,
"loss": 0.6844,
"step": 1836
},
{
"epoch": 0.6781934531134629,
"grad_norm": 0.8952226042747498,
"learning_rate": 4.8586046987098935e-06,
"loss": 0.6937,
"step": 1837
},
{
"epoch": 0.6785626384444992,
"grad_norm": 0.8690524101257324,
"learning_rate": 4.858443402336888e-06,
"loss": 0.7029,
"step": 1838
},
{
"epoch": 0.6789318237755353,
"grad_norm": 0.8886831998825073,
"learning_rate": 4.858282016697544e-06,
"loss": 0.728,
"step": 1839
},
{
"epoch": 0.6793010091065715,
"grad_norm": 0.9667893052101135,
"learning_rate": 4.85812054179797e-06,
"loss": 0.7331,
"step": 1840
},
{
"epoch": 0.6796701944376077,
"grad_norm": 0.8873503804206848,
"learning_rate": 4.857958977644278e-06,
"loss": 0.7073,
"step": 1841
},
{
"epoch": 0.6800393797686438,
"grad_norm": 0.8646997213363647,
"learning_rate": 4.857797324242582e-06,
"loss": 0.6857,
"step": 1842
},
{
"epoch": 0.6804085650996801,
"grad_norm": 0.8797730207443237,
"learning_rate": 4.857635581599003e-06,
"loss": 0.7488,
"step": 1843
},
{
"epoch": 0.6807777504307162,
"grad_norm": 0.8577935695648193,
"learning_rate": 4.85747374971966e-06,
"loss": 0.7206,
"step": 1844
},
{
"epoch": 0.6811469357617524,
"grad_norm": 0.871494472026825,
"learning_rate": 4.8573118286106805e-06,
"loss": 0.707,
"step": 1845
},
{
"epoch": 0.6815161210927886,
"grad_norm": 0.8755682110786438,
"learning_rate": 4.857149818278192e-06,
"loss": 0.7693,
"step": 1846
},
{
"epoch": 0.6818853064238247,
"grad_norm": 0.8881521821022034,
"learning_rate": 4.8569877187283255e-06,
"loss": 0.7216,
"step": 1847
},
{
"epoch": 0.682254491754861,
"grad_norm": 0.8813753724098206,
"learning_rate": 4.856825529967219e-06,
"loss": 0.6836,
"step": 1848
},
{
"epoch": 0.6826236770858971,
"grad_norm": 0.8883729577064514,
"learning_rate": 4.85666325200101e-06,
"loss": 0.6993,
"step": 1849
},
{
"epoch": 0.6829928624169334,
"grad_norm": 0.9154224395751953,
"learning_rate": 4.8565008848358405e-06,
"loss": 0.7361,
"step": 1850
},
{
"epoch": 0.6833620477479695,
"grad_norm": 0.8938775062561035,
"learning_rate": 4.856338428477856e-06,
"loss": 0.7191,
"step": 1851
},
{
"epoch": 0.6837312330790056,
"grad_norm": 0.8818868398666382,
"learning_rate": 4.8561758829332064e-06,
"loss": 0.687,
"step": 1852
},
{
"epoch": 0.6841004184100419,
"grad_norm": 0.9043888449668884,
"learning_rate": 4.856013248208043e-06,
"loss": 0.7375,
"step": 1853
},
{
"epoch": 0.684469603741078,
"grad_norm": 0.8634012341499329,
"learning_rate": 4.855850524308521e-06,
"loss": 0.7032,
"step": 1854
},
{
"epoch": 0.6848387890721142,
"grad_norm": 0.8524016737937927,
"learning_rate": 4.8556877112408e-06,
"loss": 0.6875,
"step": 1855
},
{
"epoch": 0.6852079744031504,
"grad_norm": 0.8432945013046265,
"learning_rate": 4.855524809011043e-06,
"loss": 0.6705,
"step": 1856
},
{
"epoch": 0.6855771597341865,
"grad_norm": 0.8706541657447815,
"learning_rate": 4.855361817625416e-06,
"loss": 0.7322,
"step": 1857
},
{
"epoch": 0.6859463450652228,
"grad_norm": 0.8889358043670654,
"learning_rate": 4.8551987370900875e-06,
"loss": 0.7295,
"step": 1858
},
{
"epoch": 0.6863155303962589,
"grad_norm": 0.8888987302780151,
"learning_rate": 4.8550355674112295e-06,
"loss": 0.7163,
"step": 1859
},
{
"epoch": 0.6866847157272951,
"grad_norm": 0.8585571646690369,
"learning_rate": 4.854872308595019e-06,
"loss": 0.7041,
"step": 1860
},
{
"epoch": 0.6870539010583313,
"grad_norm": 0.872947096824646,
"learning_rate": 4.8547089606476335e-06,
"loss": 0.7142,
"step": 1861
},
{
"epoch": 0.6874230863893674,
"grad_norm": 0.9010311961174011,
"learning_rate": 4.854545523575259e-06,
"loss": 0.729,
"step": 1862
},
{
"epoch": 0.6877922717204037,
"grad_norm": 0.8814377784729004,
"learning_rate": 4.854381997384079e-06,
"loss": 0.6988,
"step": 1863
},
{
"epoch": 0.6881614570514398,
"grad_norm": 0.8774176239967346,
"learning_rate": 4.854218382080283e-06,
"loss": 0.7103,
"step": 1864
},
{
"epoch": 0.688530642382476,
"grad_norm": 0.8719117641448975,
"learning_rate": 4.854054677670064e-06,
"loss": 0.7084,
"step": 1865
},
{
"epoch": 0.6888998277135122,
"grad_norm": 0.874610424041748,
"learning_rate": 4.853890884159619e-06,
"loss": 0.6931,
"step": 1866
},
{
"epoch": 0.6892690130445484,
"grad_norm": 0.9094902276992798,
"learning_rate": 4.853727001555146e-06,
"loss": 0.7156,
"step": 1867
},
{
"epoch": 0.6896381983755845,
"grad_norm": 0.8830024003982544,
"learning_rate": 4.85356302986285e-06,
"loss": 0.7119,
"step": 1868
},
{
"epoch": 0.6900073837066207,
"grad_norm": 0.9095394611358643,
"learning_rate": 4.853398969088936e-06,
"loss": 0.7219,
"step": 1869
},
{
"epoch": 0.6903765690376569,
"grad_norm": 0.8986232280731201,
"learning_rate": 4.853234819239613e-06,
"loss": 0.71,
"step": 1870
},
{
"epoch": 0.6907457543686931,
"grad_norm": 0.8806168437004089,
"learning_rate": 4.8530705803210955e-06,
"loss": 0.6731,
"step": 1871
},
{
"epoch": 0.6911149396997293,
"grad_norm": 0.9023184180259705,
"learning_rate": 4.852906252339598e-06,
"loss": 0.7086,
"step": 1872
},
{
"epoch": 0.6914841250307654,
"grad_norm": 0.8981339931488037,
"learning_rate": 4.852741835301343e-06,
"loss": 0.7037,
"step": 1873
},
{
"epoch": 0.6918533103618016,
"grad_norm": 0.9292305111885071,
"learning_rate": 4.852577329212551e-06,
"loss": 0.7096,
"step": 1874
},
{
"epoch": 0.6922224956928378,
"grad_norm": 0.903359591960907,
"learning_rate": 4.85241273407945e-06,
"loss": 0.6792,
"step": 1875
},
{
"epoch": 0.692591681023874,
"grad_norm": 0.893671452999115,
"learning_rate": 4.85224804990827e-06,
"loss": 0.7367,
"step": 1876
},
{
"epoch": 0.6929608663549102,
"grad_norm": 0.9473923444747925,
"learning_rate": 4.852083276705243e-06,
"loss": 0.6995,
"step": 1877
},
{
"epoch": 0.6933300516859463,
"grad_norm": 0.9057194590568542,
"learning_rate": 4.851918414476606e-06,
"loss": 0.7498,
"step": 1878
},
{
"epoch": 0.6936992370169826,
"grad_norm": 0.8625937700271606,
"learning_rate": 4.8517534632286e-06,
"loss": 0.6826,
"step": 1879
},
{
"epoch": 0.6940684223480187,
"grad_norm": 0.8820124864578247,
"learning_rate": 4.851588422967467e-06,
"loss": 0.7204,
"step": 1880
},
{
"epoch": 0.6944376076790549,
"grad_norm": 0.9044023752212524,
"learning_rate": 4.851423293699455e-06,
"loss": 0.7131,
"step": 1881
},
{
"epoch": 0.6948067930100911,
"grad_norm": 0.8808709979057312,
"learning_rate": 4.851258075430813e-06,
"loss": 0.68,
"step": 1882
},
{
"epoch": 0.6951759783411272,
"grad_norm": 0.875217854976654,
"learning_rate": 4.851092768167795e-06,
"loss": 0.7038,
"step": 1883
},
{
"epoch": 0.6955451636721635,
"grad_norm": 0.8607890605926514,
"learning_rate": 4.850927371916658e-06,
"loss": 0.6845,
"step": 1884
},
{
"epoch": 0.6959143490031996,
"grad_norm": 0.9063311815261841,
"learning_rate": 4.850761886683662e-06,
"loss": 0.7337,
"step": 1885
},
{
"epoch": 0.6962835343342357,
"grad_norm": 0.9188293814659119,
"learning_rate": 4.85059631247507e-06,
"loss": 0.6916,
"step": 1886
},
{
"epoch": 0.696652719665272,
"grad_norm": 0.913374125957489,
"learning_rate": 4.85043064929715e-06,
"loss": 0.759,
"step": 1887
},
{
"epoch": 0.6970219049963081,
"grad_norm": 0.8925139904022217,
"learning_rate": 4.850264897156171e-06,
"loss": 0.7173,
"step": 1888
},
{
"epoch": 0.6973910903273444,
"grad_norm": 0.9073317050933838,
"learning_rate": 4.8500990560584075e-06,
"loss": 0.7424,
"step": 1889
},
{
"epoch": 0.6977602756583805,
"grad_norm": 0.8561435341835022,
"learning_rate": 4.8499331260101365e-06,
"loss": 0.7152,
"step": 1890
},
{
"epoch": 0.6981294609894166,
"grad_norm": 0.8652263283729553,
"learning_rate": 4.8497671070176385e-06,
"loss": 0.6963,
"step": 1891
},
{
"epoch": 0.6984986463204529,
"grad_norm": 0.8837151527404785,
"learning_rate": 4.849600999087197e-06,
"loss": 0.7357,
"step": 1892
},
{
"epoch": 0.698867831651489,
"grad_norm": 0.8606382012367249,
"learning_rate": 4.8494348022251e-06,
"loss": 0.7131,
"step": 1893
},
{
"epoch": 0.6992370169825253,
"grad_norm": 0.8533560633659363,
"learning_rate": 4.8492685164376365e-06,
"loss": 0.695,
"step": 1894
},
{
"epoch": 0.6996062023135614,
"grad_norm": 0.9554173946380615,
"learning_rate": 4.849102141731101e-06,
"loss": 0.6683,
"step": 1895
},
{
"epoch": 0.6999753876445975,
"grad_norm": 0.9184376001358032,
"learning_rate": 4.848935678111792e-06,
"loss": 0.7477,
"step": 1896
},
{
"epoch": 0.7003445729756338,
"grad_norm": 0.9730942845344543,
"learning_rate": 4.848769125586007e-06,
"loss": 0.7392,
"step": 1897
},
{
"epoch": 0.7007137583066699,
"grad_norm": 0.8965922594070435,
"learning_rate": 4.848602484160053e-06,
"loss": 0.7275,
"step": 1898
},
{
"epoch": 0.7010829436377062,
"grad_norm": 0.9296282529830933,
"learning_rate": 4.848435753840236e-06,
"loss": 0.7158,
"step": 1899
},
{
"epoch": 0.7014521289687423,
"grad_norm": 0.9205310940742493,
"learning_rate": 4.8482689346328674e-06,
"loss": 0.6932,
"step": 1900
},
{
"epoch": 0.7018213142997785,
"grad_norm": 0.8746544718742371,
"learning_rate": 4.84810202654426e-06,
"loss": 0.6765,
"step": 1901
},
{
"epoch": 0.7021904996308147,
"grad_norm": 0.890127956867218,
"learning_rate": 4.847935029580732e-06,
"loss": 0.6847,
"step": 1902
},
{
"epoch": 0.7025596849618508,
"grad_norm": 0.8811405897140503,
"learning_rate": 4.847767943748605e-06,
"loss": 0.6643,
"step": 1903
},
{
"epoch": 0.702928870292887,
"grad_norm": 0.8962213397026062,
"learning_rate": 4.847600769054201e-06,
"loss": 0.7078,
"step": 1904
},
{
"epoch": 0.7032980556239232,
"grad_norm": 0.8875841498374939,
"learning_rate": 4.84743350550385e-06,
"loss": 0.707,
"step": 1905
},
{
"epoch": 0.7036672409549594,
"grad_norm": 0.8978013396263123,
"learning_rate": 4.8472661531038815e-06,
"loss": 0.6716,
"step": 1906
},
{
"epoch": 0.7040364262859956,
"grad_norm": 0.9015927910804749,
"learning_rate": 4.847098711860629e-06,
"loss": 0.6643,
"step": 1907
},
{
"epoch": 0.7044056116170317,
"grad_norm": 0.893324077129364,
"learning_rate": 4.846931181780431e-06,
"loss": 0.7005,
"step": 1908
},
{
"epoch": 0.7047747969480679,
"grad_norm": 0.9275810122489929,
"learning_rate": 4.84676356286963e-06,
"loss": 0.724,
"step": 1909
},
{
"epoch": 0.7051439822791041,
"grad_norm": 0.8800505995750427,
"learning_rate": 4.8465958551345675e-06,
"loss": 0.7251,
"step": 1910
},
{
"epoch": 0.7055131676101403,
"grad_norm": 0.8828743100166321,
"learning_rate": 4.846428058581593e-06,
"loss": 0.7277,
"step": 1911
},
{
"epoch": 0.7058823529411765,
"grad_norm": 0.9125253558158875,
"learning_rate": 4.846260173217056e-06,
"loss": 0.7619,
"step": 1912
},
{
"epoch": 0.7062515382722127,
"grad_norm": 0.9037159085273743,
"learning_rate": 4.846092199047314e-06,
"loss": 0.7182,
"step": 1913
},
{
"epoch": 0.7066207236032488,
"grad_norm": 0.8811241984367371,
"learning_rate": 4.8459241360787215e-06,
"loss": 0.749,
"step": 1914
},
{
"epoch": 0.706989908934285,
"grad_norm": 0.8958812952041626,
"learning_rate": 4.845755984317641e-06,
"loss": 0.7191,
"step": 1915
},
{
"epoch": 0.7073590942653212,
"grad_norm": 0.9097388386726379,
"learning_rate": 4.845587743770436e-06,
"loss": 0.7166,
"step": 1916
},
{
"epoch": 0.7077282795963574,
"grad_norm": 0.8992327451705933,
"learning_rate": 4.8454194144434765e-06,
"loss": 0.7493,
"step": 1917
},
{
"epoch": 0.7080974649273936,
"grad_norm": 0.9153758883476257,
"learning_rate": 4.845250996343132e-06,
"loss": 0.7036,
"step": 1918
},
{
"epoch": 0.7084666502584297,
"grad_norm": 0.9150279760360718,
"learning_rate": 4.845082489475777e-06,
"loss": 0.7278,
"step": 1919
},
{
"epoch": 0.708835835589466,
"grad_norm": 0.8316643238067627,
"learning_rate": 4.84491389384779e-06,
"loss": 0.6683,
"step": 1920
},
{
"epoch": 0.7092050209205021,
"grad_norm": 0.8955804705619812,
"learning_rate": 4.844745209465552e-06,
"loss": 0.7344,
"step": 1921
},
{
"epoch": 0.7095742062515382,
"grad_norm": 0.9217088222503662,
"learning_rate": 4.844576436335448e-06,
"loss": 0.6768,
"step": 1922
},
{
"epoch": 0.7099433915825745,
"grad_norm": 0.9741485714912415,
"learning_rate": 4.844407574463866e-06,
"loss": 0.713,
"step": 1923
},
{
"epoch": 0.7103125769136106,
"grad_norm": 0.9079376459121704,
"learning_rate": 4.844238623857197e-06,
"loss": 0.7543,
"step": 1924
},
{
"epoch": 0.7106817622446469,
"grad_norm": 0.8815346956253052,
"learning_rate": 4.844069584521836e-06,
"loss": 0.7318,
"step": 1925
},
{
"epoch": 0.711050947575683,
"grad_norm": 0.9132115244865417,
"learning_rate": 4.843900456464181e-06,
"loss": 0.7084,
"step": 1926
},
{
"epoch": 0.7114201329067191,
"grad_norm": 0.9009430408477783,
"learning_rate": 4.843731239690634e-06,
"loss": 0.7461,
"step": 1927
},
{
"epoch": 0.7117893182377554,
"grad_norm": 0.847509503364563,
"learning_rate": 4.843561934207599e-06,
"loss": 0.6802,
"step": 1928
},
{
"epoch": 0.7121585035687915,
"grad_norm": 0.8662732243537903,
"learning_rate": 4.843392540021485e-06,
"loss": 0.6892,
"step": 1929
},
{
"epoch": 0.7125276888998278,
"grad_norm": 0.9007134437561035,
"learning_rate": 4.843223057138701e-06,
"loss": 0.7029,
"step": 1930
},
{
"epoch": 0.7128968742308639,
"grad_norm": 0.882623553276062,
"learning_rate": 4.8430534855656655e-06,
"loss": 0.6805,
"step": 1931
},
{
"epoch": 0.7132660595619,
"grad_norm": 0.91568922996521,
"learning_rate": 4.842883825308794e-06,
"loss": 0.7135,
"step": 1932
},
{
"epoch": 0.7136352448929363,
"grad_norm": 0.9128447771072388,
"learning_rate": 4.84271407637451e-06,
"loss": 0.7521,
"step": 1933
},
{
"epoch": 0.7140044302239724,
"grad_norm": 0.890592098236084,
"learning_rate": 4.842544238769238e-06,
"loss": 0.7424,
"step": 1934
},
{
"epoch": 0.7143736155550087,
"grad_norm": 0.8989784717559814,
"learning_rate": 4.842374312499405e-06,
"loss": 0.7288,
"step": 1935
},
{
"epoch": 0.7147428008860448,
"grad_norm": 0.8863556981086731,
"learning_rate": 4.842204297571444e-06,
"loss": 0.7189,
"step": 1936
},
{
"epoch": 0.7151119862170809,
"grad_norm": 0.9078112840652466,
"learning_rate": 4.842034193991789e-06,
"loss": 0.7186,
"step": 1937
},
{
"epoch": 0.7154811715481172,
"grad_norm": 0.8869308829307556,
"learning_rate": 4.841864001766879e-06,
"loss": 0.7004,
"step": 1938
},
{
"epoch": 0.7158503568791533,
"grad_norm": 0.8935455083847046,
"learning_rate": 4.8416937209031555e-06,
"loss": 0.7231,
"step": 1939
},
{
"epoch": 0.7162195422101895,
"grad_norm": 0.877627968788147,
"learning_rate": 4.841523351407064e-06,
"loss": 0.6868,
"step": 1940
},
{
"epoch": 0.7165887275412257,
"grad_norm": 0.8890548944473267,
"learning_rate": 4.841352893285053e-06,
"loss": 0.7483,
"step": 1941
},
{
"epoch": 0.7169579128722618,
"grad_norm": 0.8524648547172546,
"learning_rate": 4.841182346543574e-06,
"loss": 0.7152,
"step": 1942
},
{
"epoch": 0.7173270982032981,
"grad_norm": 0.8882040977478027,
"learning_rate": 4.841011711189081e-06,
"loss": 0.7057,
"step": 1943
},
{
"epoch": 0.7176962835343342,
"grad_norm": 0.8520607352256775,
"learning_rate": 4.840840987228035e-06,
"loss": 0.6976,
"step": 1944
},
{
"epoch": 0.7180654688653704,
"grad_norm": 0.883282482624054,
"learning_rate": 4.840670174666896e-06,
"loss": 0.6937,
"step": 1945
},
{
"epoch": 0.7184346541964066,
"grad_norm": 0.8982214331626892,
"learning_rate": 4.840499273512129e-06,
"loss": 0.6427,
"step": 1946
},
{
"epoch": 0.7188038395274428,
"grad_norm": 0.8696303367614746,
"learning_rate": 4.840328283770203e-06,
"loss": 0.7672,
"step": 1947
},
{
"epoch": 0.719173024858479,
"grad_norm": 0.8526875972747803,
"learning_rate": 4.840157205447591e-06,
"loss": 0.7058,
"step": 1948
},
{
"epoch": 0.7195422101895151,
"grad_norm": 0.8616173267364502,
"learning_rate": 4.839986038550767e-06,
"loss": 0.6861,
"step": 1949
},
{
"epoch": 0.7199113955205513,
"grad_norm": 0.8847987651824951,
"learning_rate": 4.83981478308621e-06,
"loss": 0.7424,
"step": 1950
},
{
"epoch": 0.7202805808515875,
"grad_norm": 0.9206272959709167,
"learning_rate": 4.839643439060401e-06,
"loss": 0.712,
"step": 1951
},
{
"epoch": 0.7206497661826237,
"grad_norm": 0.8666979670524597,
"learning_rate": 4.8394720064798275e-06,
"loss": 0.7077,
"step": 1952
},
{
"epoch": 0.7210189515136599,
"grad_norm": 0.8352959752082825,
"learning_rate": 4.839300485350976e-06,
"loss": 0.6755,
"step": 1953
},
{
"epoch": 0.721388136844696,
"grad_norm": 0.8747813105583191,
"learning_rate": 4.83912887568034e-06,
"loss": 0.7047,
"step": 1954
},
{
"epoch": 0.7217573221757322,
"grad_norm": 0.9319245219230652,
"learning_rate": 4.838957177474414e-06,
"loss": 0.752,
"step": 1955
},
{
"epoch": 0.7221265075067684,
"grad_norm": 0.8869343996047974,
"learning_rate": 4.838785390739698e-06,
"loss": 0.7211,
"step": 1956
},
{
"epoch": 0.7224956928378046,
"grad_norm": 0.9012927412986755,
"learning_rate": 4.838613515482692e-06,
"loss": 0.7184,
"step": 1957
},
{
"epoch": 0.7228648781688407,
"grad_norm": 0.8678253293037415,
"learning_rate": 4.838441551709902e-06,
"loss": 0.7164,
"step": 1958
},
{
"epoch": 0.723234063499877,
"grad_norm": 0.8944018483161926,
"learning_rate": 4.838269499427838e-06,
"loss": 0.7141,
"step": 1959
},
{
"epoch": 0.7236032488309131,
"grad_norm": 0.868712306022644,
"learning_rate": 4.838097358643012e-06,
"loss": 0.6841,
"step": 1960
},
{
"epoch": 0.7239724341619493,
"grad_norm": 0.8801015615463257,
"learning_rate": 4.837925129361938e-06,
"loss": 0.6913,
"step": 1961
},
{
"epoch": 0.7243416194929855,
"grad_norm": 0.8590583801269531,
"learning_rate": 4.837752811591136e-06,
"loss": 0.7065,
"step": 1962
},
{
"epoch": 0.7247108048240216,
"grad_norm": 0.895354151725769,
"learning_rate": 4.837580405337128e-06,
"loss": 0.7574,
"step": 1963
},
{
"epoch": 0.7250799901550579,
"grad_norm": 0.8873820304870605,
"learning_rate": 4.83740791060644e-06,
"loss": 0.7083,
"step": 1964
},
{
"epoch": 0.725449175486094,
"grad_norm": 0.8843436241149902,
"learning_rate": 4.837235327405599e-06,
"loss": 0.7178,
"step": 1965
},
{
"epoch": 0.7258183608171302,
"grad_norm": 0.9182543158531189,
"learning_rate": 4.837062655741139e-06,
"loss": 0.7311,
"step": 1966
},
{
"epoch": 0.7261875461481664,
"grad_norm": 0.8999276161193848,
"learning_rate": 4.836889895619595e-06,
"loss": 0.7329,
"step": 1967
},
{
"epoch": 0.7265567314792025,
"grad_norm": 0.9033870697021484,
"learning_rate": 4.836717047047507e-06,
"loss": 0.7254,
"step": 1968
},
{
"epoch": 0.7269259168102388,
"grad_norm": 0.8504629731178284,
"learning_rate": 4.836544110031415e-06,
"loss": 0.6982,
"step": 1969
},
{
"epoch": 0.7272951021412749,
"grad_norm": 0.8877577185630798,
"learning_rate": 4.836371084577867e-06,
"loss": 0.7121,
"step": 1970
},
{
"epoch": 0.7276642874723112,
"grad_norm": 0.845675528049469,
"learning_rate": 4.8361979706934096e-06,
"loss": 0.7187,
"step": 1971
},
{
"epoch": 0.7280334728033473,
"grad_norm": 0.8912767767906189,
"learning_rate": 4.836024768384597e-06,
"loss": 0.7221,
"step": 1972
},
{
"epoch": 0.7284026581343834,
"grad_norm": 0.9066804647445679,
"learning_rate": 4.8358514776579835e-06,
"loss": 0.737,
"step": 1973
},
{
"epoch": 0.7287718434654197,
"grad_norm": 0.874573826789856,
"learning_rate": 4.83567809852013e-06,
"loss": 0.6928,
"step": 1974
},
{
"epoch": 0.7291410287964558,
"grad_norm": 0.9108884930610657,
"learning_rate": 4.835504630977597e-06,
"loss": 0.6933,
"step": 1975
},
{
"epoch": 0.729510214127492,
"grad_norm": 0.8197352290153503,
"learning_rate": 4.8353310750369496e-06,
"loss": 0.6412,
"step": 1976
},
{
"epoch": 0.7298793994585282,
"grad_norm": 0.8398105502128601,
"learning_rate": 4.83515743070476e-06,
"loss": 0.6667,
"step": 1977
},
{
"epoch": 0.7302485847895643,
"grad_norm": 0.8997299075126648,
"learning_rate": 4.834983697987597e-06,
"loss": 0.6776,
"step": 1978
},
{
"epoch": 0.7306177701206006,
"grad_norm": 0.8984546661376953,
"learning_rate": 4.834809876892039e-06,
"loss": 0.7515,
"step": 1979
},
{
"epoch": 0.7309869554516367,
"grad_norm": 0.8575339913368225,
"learning_rate": 4.834635967424664e-06,
"loss": 0.7179,
"step": 1980
},
{
"epoch": 0.7313561407826729,
"grad_norm": 0.8993721008300781,
"learning_rate": 4.8344619695920545e-06,
"loss": 0.7405,
"step": 1981
},
{
"epoch": 0.7317253261137091,
"grad_norm": 0.8856477737426758,
"learning_rate": 4.8342878834007955e-06,
"loss": 0.766,
"step": 1982
},
{
"epoch": 0.7320945114447452,
"grad_norm": 0.8776651620864868,
"learning_rate": 4.834113708857477e-06,
"loss": 0.6993,
"step": 1983
},
{
"epoch": 0.7324636967757815,
"grad_norm": 0.8624359965324402,
"learning_rate": 4.8339394459686925e-06,
"loss": 0.7049,
"step": 1984
},
{
"epoch": 0.7328328821068176,
"grad_norm": 0.9040077924728394,
"learning_rate": 4.833765094741035e-06,
"loss": 0.6616,
"step": 1985
},
{
"epoch": 0.7332020674378538,
"grad_norm": 0.8632827997207642,
"learning_rate": 4.833590655181106e-06,
"loss": 0.6911,
"step": 1986
},
{
"epoch": 0.73357125276889,
"grad_norm": 0.869616687297821,
"learning_rate": 4.833416127295507e-06,
"loss": 0.7216,
"step": 1987
},
{
"epoch": 0.7339404380999262,
"grad_norm": 0.8821524977684021,
"learning_rate": 4.833241511090845e-06,
"loss": 0.7398,
"step": 1988
},
{
"epoch": 0.7343096234309623,
"grad_norm": 0.9026939272880554,
"learning_rate": 4.833066806573727e-06,
"loss": 0.6981,
"step": 1989
},
{
"epoch": 0.7346788087619985,
"grad_norm": 0.9178372621536255,
"learning_rate": 4.8328920137507665e-06,
"loss": 0.7219,
"step": 1990
},
{
"epoch": 0.7350479940930347,
"grad_norm": 0.8885320425033569,
"learning_rate": 4.832717132628581e-06,
"loss": 0.7093,
"step": 1991
},
{
"epoch": 0.7354171794240709,
"grad_norm": 0.8704083561897278,
"learning_rate": 4.832542163213787e-06,
"loss": 0.7475,
"step": 1992
},
{
"epoch": 0.7357863647551071,
"grad_norm": 0.8328550457954407,
"learning_rate": 4.832367105513008e-06,
"loss": 0.6655,
"step": 1993
},
{
"epoch": 0.7361555500861432,
"grad_norm": 0.900768518447876,
"learning_rate": 4.832191959532871e-06,
"loss": 0.6967,
"step": 1994
},
{
"epoch": 0.7365247354171794,
"grad_norm": 0.8467245697975159,
"learning_rate": 4.832016725280005e-06,
"loss": 0.6984,
"step": 1995
},
{
"epoch": 0.7368939207482156,
"grad_norm": 0.8155560493469238,
"learning_rate": 4.83184140276104e-06,
"loss": 0.6468,
"step": 1996
},
{
"epoch": 0.7372631060792518,
"grad_norm": 0.8829312920570374,
"learning_rate": 4.831665991982615e-06,
"loss": 0.7444,
"step": 1997
},
{
"epoch": 0.737632291410288,
"grad_norm": 0.8596073389053345,
"learning_rate": 4.831490492951368e-06,
"loss": 0.686,
"step": 1998
},
{
"epoch": 0.7380014767413241,
"grad_norm": 0.895241916179657,
"learning_rate": 4.831314905673942e-06,
"loss": 0.6905,
"step": 1999
},
{
"epoch": 0.7383706620723604,
"grad_norm": 0.8932267427444458,
"learning_rate": 4.831139230156982e-06,
"loss": 0.6809,
"step": 2000
},
{
"epoch": 0.7387398474033965,
"grad_norm": 0.8914393186569214,
"learning_rate": 4.8309634664071385e-06,
"loss": 0.6851,
"step": 2001
},
{
"epoch": 0.7391090327344327,
"grad_norm": 0.8881421685218811,
"learning_rate": 4.830787614431062e-06,
"loss": 0.7322,
"step": 2002
},
{
"epoch": 0.7394782180654689,
"grad_norm": 0.8394930958747864,
"learning_rate": 4.830611674235411e-06,
"loss": 0.6989,
"step": 2003
},
{
"epoch": 0.739847403396505,
"grad_norm": 0.8811700940132141,
"learning_rate": 4.830435645826844e-06,
"loss": 0.7009,
"step": 2004
},
{
"epoch": 0.7402165887275413,
"grad_norm": 0.8765476942062378,
"learning_rate": 4.830259529212023e-06,
"loss": 0.7325,
"step": 2005
},
{
"epoch": 0.7405857740585774,
"grad_norm": 0.8366778492927551,
"learning_rate": 4.830083324397614e-06,
"loss": 0.6912,
"step": 2006
},
{
"epoch": 0.7409549593896135,
"grad_norm": 0.8748518228530884,
"learning_rate": 4.829907031390287e-06,
"loss": 0.7269,
"step": 2007
},
{
"epoch": 0.7413241447206498,
"grad_norm": 0.93858802318573,
"learning_rate": 4.829730650196714e-06,
"loss": 0.7176,
"step": 2008
},
{
"epoch": 0.7416933300516859,
"grad_norm": 0.8684030771255493,
"learning_rate": 4.8295541808235715e-06,
"loss": 0.7018,
"step": 2009
},
{
"epoch": 0.7420625153827222,
"grad_norm": 0.8710829019546509,
"learning_rate": 4.829377623277538e-06,
"loss": 0.7137,
"step": 2010
},
{
"epoch": 0.7424317007137583,
"grad_norm": 0.8900696039199829,
"learning_rate": 4.829200977565296e-06,
"loss": 0.695,
"step": 2011
},
{
"epoch": 0.7428008860447944,
"grad_norm": 0.8644715547561646,
"learning_rate": 4.8290242436935334e-06,
"loss": 0.6685,
"step": 2012
},
{
"epoch": 0.7431700713758307,
"grad_norm": 0.8664246797561646,
"learning_rate": 4.828847421668938e-06,
"loss": 0.6827,
"step": 2013
},
{
"epoch": 0.7435392567068668,
"grad_norm": 0.9115155935287476,
"learning_rate": 4.828670511498202e-06,
"loss": 0.7093,
"step": 2014
},
{
"epoch": 0.7439084420379031,
"grad_norm": 0.9124789834022522,
"learning_rate": 4.828493513188022e-06,
"loss": 0.7339,
"step": 2015
},
{
"epoch": 0.7442776273689392,
"grad_norm": 0.8920673131942749,
"learning_rate": 4.828316426745098e-06,
"loss": 0.7109,
"step": 2016
},
{
"epoch": 0.7446468126999753,
"grad_norm": 0.9076531529426575,
"learning_rate": 4.828139252176131e-06,
"loss": 0.7117,
"step": 2017
},
{
"epoch": 0.7450159980310116,
"grad_norm": 0.8628717660903931,
"learning_rate": 4.82796198948783e-06,
"loss": 0.7317,
"step": 2018
},
{
"epoch": 0.7453851833620477,
"grad_norm": 0.8688773512840271,
"learning_rate": 4.827784638686901e-06,
"loss": 0.7004,
"step": 2019
},
{
"epoch": 0.745754368693084,
"grad_norm": 0.8732299208641052,
"learning_rate": 4.827607199780059e-06,
"loss": 0.7333,
"step": 2020
},
{
"epoch": 0.7461235540241201,
"grad_norm": 0.8918249011039734,
"learning_rate": 4.8274296727740185e-06,
"loss": 0.6741,
"step": 2021
},
{
"epoch": 0.7464927393551563,
"grad_norm": 0.8658198714256287,
"learning_rate": 4.827252057675499e-06,
"loss": 0.7014,
"step": 2022
},
{
"epoch": 0.7468619246861925,
"grad_norm": 0.9010634422302246,
"learning_rate": 4.827074354491225e-06,
"loss": 0.7067,
"step": 2023
},
{
"epoch": 0.7472311100172286,
"grad_norm": 0.8897079825401306,
"learning_rate": 4.8268965632279194e-06,
"loss": 0.7272,
"step": 2024
},
{
"epoch": 0.7476002953482648,
"grad_norm": 0.8660743236541748,
"learning_rate": 4.8267186838923145e-06,
"loss": 0.7262,
"step": 2025
},
{
"epoch": 0.747969480679301,
"grad_norm": 0.8774269819259644,
"learning_rate": 4.826540716491141e-06,
"loss": 0.7368,
"step": 2026
},
{
"epoch": 0.7483386660103372,
"grad_norm": 0.8606297969818115,
"learning_rate": 4.826362661031136e-06,
"loss": 0.7107,
"step": 2027
},
{
"epoch": 0.7487078513413734,
"grad_norm": 0.8835901618003845,
"learning_rate": 4.826184517519038e-06,
"loss": 0.7234,
"step": 2028
},
{
"epoch": 0.7490770366724095,
"grad_norm": 0.8923384547233582,
"learning_rate": 4.8260062859615915e-06,
"loss": 0.68,
"step": 2029
},
{
"epoch": 0.7494462220034457,
"grad_norm": 0.8915189504623413,
"learning_rate": 4.825827966365541e-06,
"loss": 0.6991,
"step": 2030
},
{
"epoch": 0.7498154073344819,
"grad_norm": 0.8923548460006714,
"learning_rate": 4.825649558737635e-06,
"loss": 0.75,
"step": 2031
},
{
"epoch": 0.7501845926655181,
"grad_norm": 0.8493019342422485,
"learning_rate": 4.825471063084627e-06,
"loss": 0.6659,
"step": 2032
},
{
"epoch": 0.7505537779965543,
"grad_norm": 0.8680519461631775,
"learning_rate": 4.825292479413274e-06,
"loss": 0.6889,
"step": 2033
},
{
"epoch": 0.7509229633275905,
"grad_norm": 0.9089414477348328,
"learning_rate": 4.825113807730334e-06,
"loss": 0.7363,
"step": 2034
},
{
"epoch": 0.7512921486586266,
"grad_norm": 0.8787106275558472,
"learning_rate": 4.8249350480425704e-06,
"loss": 0.6808,
"step": 2035
},
{
"epoch": 0.7516613339896628,
"grad_norm": 0.8691989779472351,
"learning_rate": 4.8247562003567486e-06,
"loss": 0.6974,
"step": 2036
},
{
"epoch": 0.752030519320699,
"grad_norm": 0.8437734246253967,
"learning_rate": 4.824577264679639e-06,
"loss": 0.6861,
"step": 2037
},
{
"epoch": 0.7523997046517352,
"grad_norm": 0.8832452893257141,
"learning_rate": 4.824398241018014e-06,
"loss": 0.7166,
"step": 2038
},
{
"epoch": 0.7527688899827714,
"grad_norm": 0.8944876194000244,
"learning_rate": 4.824219129378648e-06,
"loss": 0.7272,
"step": 2039
},
{
"epoch": 0.7531380753138075,
"grad_norm": 0.8749659657478333,
"learning_rate": 4.824039929768322e-06,
"loss": 0.7176,
"step": 2040
},
{
"epoch": 0.7535072606448437,
"grad_norm": 0.8656901717185974,
"learning_rate": 4.823860642193818e-06,
"loss": 0.7335,
"step": 2041
},
{
"epoch": 0.7538764459758799,
"grad_norm": 0.874985933303833,
"learning_rate": 4.823681266661923e-06,
"loss": 0.6942,
"step": 2042
},
{
"epoch": 0.754245631306916,
"grad_norm": 0.8945735692977905,
"learning_rate": 4.823501803179424e-06,
"loss": 0.733,
"step": 2043
},
{
"epoch": 0.7546148166379523,
"grad_norm": 0.8628880977630615,
"learning_rate": 4.823322251753115e-06,
"loss": 0.7065,
"step": 2044
},
{
"epoch": 0.7549840019689884,
"grad_norm": 0.850796639919281,
"learning_rate": 4.823142612389793e-06,
"loss": 0.6986,
"step": 2045
},
{
"epoch": 0.7553531873000247,
"grad_norm": 0.8502240180969238,
"learning_rate": 4.822962885096256e-06,
"loss": 0.7364,
"step": 2046
},
{
"epoch": 0.7557223726310608,
"grad_norm": 0.8640246987342834,
"learning_rate": 4.8227830698793076e-06,
"loss": 0.7207,
"step": 2047
},
{
"epoch": 0.7560915579620969,
"grad_norm": 0.8806377649307251,
"learning_rate": 4.8226031667457516e-06,
"loss": 0.7591,
"step": 2048
},
{
"epoch": 0.7564607432931332,
"grad_norm": 0.9259816408157349,
"learning_rate": 4.8224231757024e-06,
"loss": 0.7162,
"step": 2049
},
{
"epoch": 0.7568299286241693,
"grad_norm": 0.8613938689231873,
"learning_rate": 4.822243096756064e-06,
"loss": 0.7024,
"step": 2050
},
{
"epoch": 0.7571991139552056,
"grad_norm": 0.8919888734817505,
"learning_rate": 4.822062929913559e-06,
"loss": 0.7056,
"step": 2051
},
{
"epoch": 0.7575682992862417,
"grad_norm": 0.8805553913116455,
"learning_rate": 4.821882675181706e-06,
"loss": 0.7197,
"step": 2052
},
{
"epoch": 0.7579374846172778,
"grad_norm": 0.8903268575668335,
"learning_rate": 4.821702332567326e-06,
"loss": 0.7158,
"step": 2053
},
{
"epoch": 0.7583066699483141,
"grad_norm": 0.8912745118141174,
"learning_rate": 4.8215219020772455e-06,
"loss": 0.6996,
"step": 2054
},
{
"epoch": 0.7586758552793502,
"grad_norm": 0.8754813075065613,
"learning_rate": 4.821341383718293e-06,
"loss": 0.7052,
"step": 2055
},
{
"epoch": 0.7590450406103865,
"grad_norm": 0.8683738708496094,
"learning_rate": 4.821160777497303e-06,
"loss": 0.7288,
"step": 2056
},
{
"epoch": 0.7594142259414226,
"grad_norm": 0.8737871646881104,
"learning_rate": 4.820980083421109e-06,
"loss": 0.7289,
"step": 2057
},
{
"epoch": 0.7597834112724587,
"grad_norm": 0.8430957794189453,
"learning_rate": 4.820799301496552e-06,
"loss": 0.6643,
"step": 2058
},
{
"epoch": 0.760152596603495,
"grad_norm": 0.8619078397750854,
"learning_rate": 4.820618431730474e-06,
"loss": 0.6849,
"step": 2059
},
{
"epoch": 0.7605217819345311,
"grad_norm": 0.8729053139686584,
"learning_rate": 4.820437474129721e-06,
"loss": 0.7081,
"step": 2060
},
{
"epoch": 0.7608909672655673,
"grad_norm": 0.8700425028800964,
"learning_rate": 4.820256428701141e-06,
"loss": 0.6952,
"step": 2061
},
{
"epoch": 0.7612601525966035,
"grad_norm": 0.8315994143486023,
"learning_rate": 4.8200752954515885e-06,
"loss": 0.6861,
"step": 2062
},
{
"epoch": 0.7616293379276396,
"grad_norm": 0.8591640591621399,
"learning_rate": 4.819894074387917e-06,
"loss": 0.7155,
"step": 2063
},
{
"epoch": 0.7619985232586759,
"grad_norm": 0.9315993189811707,
"learning_rate": 4.8197127655169885e-06,
"loss": 0.7121,
"step": 2064
},
{
"epoch": 0.762367708589712,
"grad_norm": 0.8699647188186646,
"learning_rate": 4.819531368845662e-06,
"loss": 0.7095,
"step": 2065
},
{
"epoch": 0.7627368939207482,
"grad_norm": 0.8588031530380249,
"learning_rate": 4.819349884380807e-06,
"loss": 0.6605,
"step": 2066
},
{
"epoch": 0.7631060792517844,
"grad_norm": 0.898235559463501,
"learning_rate": 4.81916831212929e-06,
"loss": 0.7332,
"step": 2067
},
{
"epoch": 0.7634752645828206,
"grad_norm": 0.8969528079032898,
"learning_rate": 4.818986652097985e-06,
"loss": 0.7423,
"step": 2068
},
{
"epoch": 0.7638444499138568,
"grad_norm": 0.8572843074798584,
"learning_rate": 4.818804904293767e-06,
"loss": 0.7698,
"step": 2069
},
{
"epoch": 0.7642136352448929,
"grad_norm": 0.8814283609390259,
"learning_rate": 4.8186230687235145e-06,
"loss": 0.709,
"step": 2070
},
{
"epoch": 0.7645828205759291,
"grad_norm": 0.9045025110244751,
"learning_rate": 4.818441145394111e-06,
"loss": 0.7065,
"step": 2071
},
{
"epoch": 0.7649520059069653,
"grad_norm": 0.8909565210342407,
"learning_rate": 4.818259134312442e-06,
"loss": 0.6989,
"step": 2072
},
{
"epoch": 0.7653211912380015,
"grad_norm": 0.874191164970398,
"learning_rate": 4.818077035485396e-06,
"loss": 0.6966,
"step": 2073
},
{
"epoch": 0.7656903765690377,
"grad_norm": 0.8939563035964966,
"learning_rate": 4.817894848919866e-06,
"loss": 0.7279,
"step": 2074
},
{
"epoch": 0.7660595619000738,
"grad_norm": 0.8762744665145874,
"learning_rate": 4.817712574622748e-06,
"loss": 0.7097,
"step": 2075
},
{
"epoch": 0.76642874723111,
"grad_norm": 0.9104212522506714,
"learning_rate": 4.81753021260094e-06,
"loss": 0.7419,
"step": 2076
},
{
"epoch": 0.7667979325621462,
"grad_norm": 0.8775395154953003,
"learning_rate": 4.817347762861345e-06,
"loss": 0.6853,
"step": 2077
},
{
"epoch": 0.7671671178931824,
"grad_norm": 0.9023780822753906,
"learning_rate": 4.817165225410868e-06,
"loss": 0.7256,
"step": 2078
},
{
"epoch": 0.7675363032242185,
"grad_norm": 0.8750520944595337,
"learning_rate": 4.816982600256419e-06,
"loss": 0.6715,
"step": 2079
},
{
"epoch": 0.7679054885552548,
"grad_norm": 0.8693894743919373,
"learning_rate": 4.816799887404911e-06,
"loss": 0.694,
"step": 2080
},
{
"epoch": 0.7682746738862909,
"grad_norm": 0.8720349073410034,
"learning_rate": 4.816617086863256e-06,
"loss": 0.7284,
"step": 2081
},
{
"epoch": 0.7686438592173271,
"grad_norm": 0.8731261491775513,
"learning_rate": 4.816434198638378e-06,
"loss": 0.7152,
"step": 2082
},
{
"epoch": 0.7690130445483633,
"grad_norm": 0.8482458591461182,
"learning_rate": 4.816251222737195e-06,
"loss": 0.6944,
"step": 2083
},
{
"epoch": 0.7693822298793994,
"grad_norm": 0.9315195083618164,
"learning_rate": 4.816068159166635e-06,
"loss": 0.6859,
"step": 2084
},
{
"epoch": 0.7697514152104357,
"grad_norm": 0.8510304689407349,
"learning_rate": 4.815885007933625e-06,
"loss": 0.6863,
"step": 2085
},
{
"epoch": 0.7701206005414718,
"grad_norm": 0.8732700347900391,
"learning_rate": 4.815701769045099e-06,
"loss": 0.7242,
"step": 2086
},
{
"epoch": 0.770489785872508,
"grad_norm": 0.8937168121337891,
"learning_rate": 4.815518442507992e-06,
"loss": 0.7239,
"step": 2087
},
{
"epoch": 0.7708589712035442,
"grad_norm": 0.8584597706794739,
"learning_rate": 4.815335028329243e-06,
"loss": 0.7197,
"step": 2088
},
{
"epoch": 0.7712281565345803,
"grad_norm": 0.8576000928878784,
"learning_rate": 4.815151526515794e-06,
"loss": 0.6708,
"step": 2089
},
{
"epoch": 0.7715973418656166,
"grad_norm": 0.9142547845840454,
"learning_rate": 4.814967937074589e-06,
"loss": 0.7148,
"step": 2090
},
{
"epoch": 0.7719665271966527,
"grad_norm": 0.8664703369140625,
"learning_rate": 4.81478426001258e-06,
"loss": 0.7159,
"step": 2091
},
{
"epoch": 0.7723357125276888,
"grad_norm": 0.8845775723457336,
"learning_rate": 4.814600495336716e-06,
"loss": 0.7031,
"step": 2092
},
{
"epoch": 0.7727048978587251,
"grad_norm": 0.8626143932342529,
"learning_rate": 4.8144166430539555e-06,
"loss": 0.6828,
"step": 2093
},
{
"epoch": 0.7730740831897612,
"grad_norm": 0.8681825399398804,
"learning_rate": 4.814232703171254e-06,
"loss": 0.7169,
"step": 2094
},
{
"epoch": 0.7734432685207975,
"grad_norm": 0.9187625050544739,
"learning_rate": 4.8140486756955755e-06,
"loss": 0.7019,
"step": 2095
},
{
"epoch": 0.7738124538518336,
"grad_norm": 0.8887724876403809,
"learning_rate": 4.813864560633885e-06,
"loss": 0.7349,
"step": 2096
},
{
"epoch": 0.7741816391828698,
"grad_norm": 0.8629727959632874,
"learning_rate": 4.81368035799315e-06,
"loss": 0.7328,
"step": 2097
},
{
"epoch": 0.774550824513906,
"grad_norm": 0.8609279990196228,
"learning_rate": 4.813496067780345e-06,
"loss": 0.6909,
"step": 2098
},
{
"epoch": 0.7749200098449421,
"grad_norm": 0.8608946800231934,
"learning_rate": 4.813311690002444e-06,
"loss": 0.6885,
"step": 2099
},
{
"epoch": 0.7752891951759784,
"grad_norm": 0.8858616352081299,
"learning_rate": 4.813127224666425e-06,
"loss": 0.6919,
"step": 2100
},
{
"epoch": 0.7756583805070145,
"grad_norm": 0.8694943785667419,
"learning_rate": 4.812942671779271e-06,
"loss": 0.6736,
"step": 2101
},
{
"epoch": 0.7760275658380507,
"grad_norm": 0.8370912671089172,
"learning_rate": 4.812758031347967e-06,
"loss": 0.6608,
"step": 2102
},
{
"epoch": 0.7763967511690869,
"grad_norm": 0.8685939908027649,
"learning_rate": 4.812573303379501e-06,
"loss": 0.74,
"step": 2103
},
{
"epoch": 0.776765936500123,
"grad_norm": 0.8643637299537659,
"learning_rate": 4.812388487880865e-06,
"loss": 0.6988,
"step": 2104
},
{
"epoch": 0.7771351218311593,
"grad_norm": 0.9060371518135071,
"learning_rate": 4.8122035848590555e-06,
"loss": 0.731,
"step": 2105
},
{
"epoch": 0.7775043071621954,
"grad_norm": 0.8900934457778931,
"learning_rate": 4.81201859432107e-06,
"loss": 0.7363,
"step": 2106
},
{
"epoch": 0.7778734924932316,
"grad_norm": 0.8641214966773987,
"learning_rate": 4.8118335162739096e-06,
"loss": 0.6838,
"step": 2107
},
{
"epoch": 0.7782426778242678,
"grad_norm": 0.926879346370697,
"learning_rate": 4.81164835072458e-06,
"loss": 0.7187,
"step": 2108
},
{
"epoch": 0.778611863155304,
"grad_norm": 0.8715068697929382,
"learning_rate": 4.81146309768009e-06,
"loss": 0.7339,
"step": 2109
},
{
"epoch": 0.7789810484863401,
"grad_norm": 0.9065748453140259,
"learning_rate": 4.811277757147452e-06,
"loss": 0.7526,
"step": 2110
},
{
"epoch": 0.7793502338173763,
"grad_norm": 0.8723341822624207,
"learning_rate": 4.8110923291336804e-06,
"loss": 0.7001,
"step": 2111
},
{
"epoch": 0.7797194191484125,
"grad_norm": 0.8835813999176025,
"learning_rate": 4.810906813645793e-06,
"loss": 0.7187,
"step": 2112
},
{
"epoch": 0.7800886044794487,
"grad_norm": 0.8894395232200623,
"learning_rate": 4.810721210690812e-06,
"loss": 0.7349,
"step": 2113
},
{
"epoch": 0.7804577898104849,
"grad_norm": 0.8809918761253357,
"learning_rate": 4.8105355202757635e-06,
"loss": 0.6857,
"step": 2114
},
{
"epoch": 0.780826975141521,
"grad_norm": 0.8665323257446289,
"learning_rate": 4.810349742407673e-06,
"loss": 0.724,
"step": 2115
},
{
"epoch": 0.7811961604725572,
"grad_norm": 0.9003410935401917,
"learning_rate": 4.810163877093575e-06,
"loss": 0.6776,
"step": 2116
},
{
"epoch": 0.7815653458035934,
"grad_norm": 0.9015949368476868,
"learning_rate": 4.8099779243405035e-06,
"loss": 0.7107,
"step": 2117
},
{
"epoch": 0.7819345311346296,
"grad_norm": 0.9056869149208069,
"learning_rate": 4.8097918841554965e-06,
"loss": 0.7009,
"step": 2118
},
{
"epoch": 0.7823037164656658,
"grad_norm": 0.8645703792572021,
"learning_rate": 4.809605756545596e-06,
"loss": 0.7285,
"step": 2119
},
{
"epoch": 0.7826729017967019,
"grad_norm": 0.9308361411094666,
"learning_rate": 4.809419541517845e-06,
"loss": 0.7097,
"step": 2120
},
{
"epoch": 0.7830420871277382,
"grad_norm": 0.8967791795730591,
"learning_rate": 4.809233239079295e-06,
"loss": 0.7163,
"step": 2121
},
{
"epoch": 0.7834112724587743,
"grad_norm": 0.868552565574646,
"learning_rate": 4.809046849236995e-06,
"loss": 0.7397,
"step": 2122
},
{
"epoch": 0.7837804577898105,
"grad_norm": 0.894079864025116,
"learning_rate": 4.808860371998e-06,
"loss": 0.7136,
"step": 2123
},
{
"epoch": 0.7841496431208467,
"grad_norm": 0.874018132686615,
"learning_rate": 4.808673807369369e-06,
"loss": 0.6998,
"step": 2124
},
{
"epoch": 0.7845188284518828,
"grad_norm": 0.8726391792297363,
"learning_rate": 4.808487155358163e-06,
"loss": 0.7335,
"step": 2125
},
{
"epoch": 0.7848880137829191,
"grad_norm": 0.8656647205352783,
"learning_rate": 4.8083004159714465e-06,
"loss": 0.6756,
"step": 2126
},
{
"epoch": 0.7852571991139552,
"grad_norm": 0.8752986788749695,
"learning_rate": 4.808113589216288e-06,
"loss": 0.6846,
"step": 2127
},
{
"epoch": 0.7856263844449913,
"grad_norm": 0.8898423910140991,
"learning_rate": 4.807926675099759e-06,
"loss": 0.7056,
"step": 2128
},
{
"epoch": 0.7859955697760276,
"grad_norm": 0.9080005288124084,
"learning_rate": 4.807739673628934e-06,
"loss": 0.6836,
"step": 2129
},
{
"epoch": 0.7863647551070637,
"grad_norm": 0.9017002582550049,
"learning_rate": 4.8075525848108895e-06,
"loss": 0.7043,
"step": 2130
},
{
"epoch": 0.7867339404381,
"grad_norm": 0.9311098456382751,
"learning_rate": 4.807365408652709e-06,
"loss": 0.7493,
"step": 2131
},
{
"epoch": 0.7871031257691361,
"grad_norm": 1.0424463748931885,
"learning_rate": 4.807178145161475e-06,
"loss": 0.708,
"step": 2132
},
{
"epoch": 0.7874723111001722,
"grad_norm": 0.8803859949111938,
"learning_rate": 4.8069907943442775e-06,
"loss": 0.7011,
"step": 2133
},
{
"epoch": 0.7878414964312085,
"grad_norm": 0.8854016661643982,
"learning_rate": 4.806803356208205e-06,
"loss": 0.7101,
"step": 2134
},
{
"epoch": 0.7882106817622446,
"grad_norm": 0.8736090064048767,
"learning_rate": 4.806615830760355e-06,
"loss": 0.7102,
"step": 2135
},
{
"epoch": 0.7885798670932809,
"grad_norm": 0.8740845918655396,
"learning_rate": 4.806428218007823e-06,
"loss": 0.691,
"step": 2136
},
{
"epoch": 0.788949052424317,
"grad_norm": 0.9291470050811768,
"learning_rate": 4.806240517957711e-06,
"loss": 0.6972,
"step": 2137
},
{
"epoch": 0.7893182377553531,
"grad_norm": 0.8928626775741577,
"learning_rate": 4.8060527306171235e-06,
"loss": 0.6803,
"step": 2138
},
{
"epoch": 0.7896874230863894,
"grad_norm": 0.90341717004776,
"learning_rate": 4.805864855993167e-06,
"loss": 0.7155,
"step": 2139
},
{
"epoch": 0.7900566084174255,
"grad_norm": 0.8977493047714233,
"learning_rate": 4.805676894092956e-06,
"loss": 0.7205,
"step": 2140
},
{
"epoch": 0.7904257937484618,
"grad_norm": 0.8815012574195862,
"learning_rate": 4.805488844923599e-06,
"loss": 0.7137,
"step": 2141
},
{
"epoch": 0.7907949790794979,
"grad_norm": 0.8667290806770325,
"learning_rate": 4.8053007084922185e-06,
"loss": 0.7219,
"step": 2142
},
{
"epoch": 0.791164164410534,
"grad_norm": 0.8803372979164124,
"learning_rate": 4.805112484805934e-06,
"loss": 0.737,
"step": 2143
},
{
"epoch": 0.7915333497415703,
"grad_norm": 0.9153837561607361,
"learning_rate": 4.804924173871869e-06,
"loss": 0.7093,
"step": 2144
},
{
"epoch": 0.7919025350726064,
"grad_norm": 0.875203013420105,
"learning_rate": 4.804735775697151e-06,
"loss": 0.7147,
"step": 2145
},
{
"epoch": 0.7922717204036426,
"grad_norm": 0.8866360187530518,
"learning_rate": 4.8045472902889125e-06,
"loss": 0.7184,
"step": 2146
},
{
"epoch": 0.7926409057346788,
"grad_norm": 0.8125059604644775,
"learning_rate": 4.804358717654286e-06,
"loss": 0.6916,
"step": 2147
},
{
"epoch": 0.793010091065715,
"grad_norm": 0.8981543183326721,
"learning_rate": 4.804170057800408e-06,
"loss": 0.7109,
"step": 2148
},
{
"epoch": 0.7933792763967512,
"grad_norm": 0.841891348361969,
"learning_rate": 4.803981310734422e-06,
"loss": 0.6623,
"step": 2149
},
{
"epoch": 0.7937484617277873,
"grad_norm": 0.9186368584632874,
"learning_rate": 4.80379247646347e-06,
"loss": 0.7312,
"step": 2150
},
{
"epoch": 0.7941176470588235,
"grad_norm": 0.8802003860473633,
"learning_rate": 4.8036035549947e-06,
"loss": 0.7202,
"step": 2151
},
{
"epoch": 0.7944868323898597,
"grad_norm": 0.8542637825012207,
"learning_rate": 4.803414546335262e-06,
"loss": 0.6933,
"step": 2152
},
{
"epoch": 0.7948560177208959,
"grad_norm": 0.8632619976997375,
"learning_rate": 4.803225450492311e-06,
"loss": 0.7034,
"step": 2153
},
{
"epoch": 0.7952252030519321,
"grad_norm": 0.9003282785415649,
"learning_rate": 4.803036267473003e-06,
"loss": 0.707,
"step": 2154
},
{
"epoch": 0.7955943883829683,
"grad_norm": 0.8903799057006836,
"learning_rate": 4.802846997284499e-06,
"loss": 0.7226,
"step": 2155
},
{
"epoch": 0.7959635737140044,
"grad_norm": 0.8972636461257935,
"learning_rate": 4.802657639933964e-06,
"loss": 0.7459,
"step": 2156
},
{
"epoch": 0.7963327590450406,
"grad_norm": 0.8760960102081299,
"learning_rate": 4.802468195428563e-06,
"loss": 0.6983,
"step": 2157
},
{
"epoch": 0.7967019443760768,
"grad_norm": 0.8755945563316345,
"learning_rate": 4.802278663775468e-06,
"loss": 0.6942,
"step": 2158
},
{
"epoch": 0.797071129707113,
"grad_norm": 0.8775022625923157,
"learning_rate": 4.8020890449818524e-06,
"loss": 0.7312,
"step": 2159
},
{
"epoch": 0.7974403150381492,
"grad_norm": 0.8853294253349304,
"learning_rate": 4.801899339054893e-06,
"loss": 0.7129,
"step": 2160
},
{
"epoch": 0.7978095003691853,
"grad_norm": 0.8880541920661926,
"learning_rate": 4.801709546001769e-06,
"loss": 0.6857,
"step": 2161
},
{
"epoch": 0.7981786857002215,
"grad_norm": 0.9117274880409241,
"learning_rate": 4.801519665829666e-06,
"loss": 0.7093,
"step": 2162
},
{
"epoch": 0.7985478710312577,
"grad_norm": 0.8845729827880859,
"learning_rate": 4.8013296985457705e-06,
"loss": 0.7214,
"step": 2163
},
{
"epoch": 0.7989170563622938,
"grad_norm": 0.8805612325668335,
"learning_rate": 4.801139644157272e-06,
"loss": 0.6788,
"step": 2164
},
{
"epoch": 0.7992862416933301,
"grad_norm": 0.8353918790817261,
"learning_rate": 4.800949502671364e-06,
"loss": 0.6798,
"step": 2165
},
{
"epoch": 0.7996554270243662,
"grad_norm": 0.8719606995582581,
"learning_rate": 4.800759274095243e-06,
"loss": 0.6992,
"step": 2166
},
{
"epoch": 0.8000246123554025,
"grad_norm": 0.9136744141578674,
"learning_rate": 4.800568958436111e-06,
"loss": 0.7132,
"step": 2167
},
{
"epoch": 0.8003937976864386,
"grad_norm": 0.8668189644813538,
"learning_rate": 4.800378555701168e-06,
"loss": 0.686,
"step": 2168
},
{
"epoch": 0.8007629830174747,
"grad_norm": 0.8641545176506042,
"learning_rate": 4.800188065897624e-06,
"loss": 0.6975,
"step": 2169
},
{
"epoch": 0.801132168348511,
"grad_norm": 0.8470262289047241,
"learning_rate": 4.799997489032687e-06,
"loss": 0.7277,
"step": 2170
},
{
"epoch": 0.8015013536795471,
"grad_norm": 0.8653181791305542,
"learning_rate": 4.799806825113571e-06,
"loss": 0.7211,
"step": 2171
},
{
"epoch": 0.8018705390105834,
"grad_norm": 0.8891727328300476,
"learning_rate": 4.799616074147493e-06,
"loss": 0.7161,
"step": 2172
},
{
"epoch": 0.8022397243416195,
"grad_norm": 0.8755497336387634,
"learning_rate": 4.799425236141672e-06,
"loss": 0.694,
"step": 2173
},
{
"epoch": 0.8026089096726556,
"grad_norm": 0.8740184903144836,
"learning_rate": 4.7992343111033314e-06,
"loss": 0.6935,
"step": 2174
},
{
"epoch": 0.8029780950036919,
"grad_norm": 0.885025143623352,
"learning_rate": 4.7990432990396985e-06,
"loss": 0.6768,
"step": 2175
},
{
"epoch": 0.803347280334728,
"grad_norm": 0.9006462097167969,
"learning_rate": 4.798852199958002e-06,
"loss": 0.7212,
"step": 2176
},
{
"epoch": 0.8037164656657643,
"grad_norm": 0.8840904831886292,
"learning_rate": 4.798661013865475e-06,
"loss": 0.7266,
"step": 2177
},
{
"epoch": 0.8040856509968004,
"grad_norm": 0.8590693473815918,
"learning_rate": 4.798469740769354e-06,
"loss": 0.6822,
"step": 2178
},
{
"epoch": 0.8044548363278365,
"grad_norm": 0.8712232112884521,
"learning_rate": 4.798278380676879e-06,
"loss": 0.724,
"step": 2179
},
{
"epoch": 0.8048240216588728,
"grad_norm": 0.9065150618553162,
"learning_rate": 4.798086933595293e-06,
"loss": 0.6891,
"step": 2180
},
{
"epoch": 0.8051932069899089,
"grad_norm": 0.8752925395965576,
"learning_rate": 4.797895399531841e-06,
"loss": 0.71,
"step": 2181
},
{
"epoch": 0.8055623923209451,
"grad_norm": 0.8934815526008606,
"learning_rate": 4.797703778493774e-06,
"loss": 0.7422,
"step": 2182
},
{
"epoch": 0.8059315776519813,
"grad_norm": 0.8814444541931152,
"learning_rate": 4.7975120704883435e-06,
"loss": 0.7572,
"step": 2183
},
{
"epoch": 0.8063007629830174,
"grad_norm": 0.9211199879646301,
"learning_rate": 4.797320275522806e-06,
"loss": 0.7112,
"step": 2184
},
{
"epoch": 0.8066699483140537,
"grad_norm": 0.8932773470878601,
"learning_rate": 4.7971283936044226e-06,
"loss": 0.7328,
"step": 2185
},
{
"epoch": 0.8070391336450898,
"grad_norm": 0.8870787620544434,
"learning_rate": 4.796936424740454e-06,
"loss": 0.7048,
"step": 2186
},
{
"epoch": 0.807408318976126,
"grad_norm": 0.8720287680625916,
"learning_rate": 4.796744368938166e-06,
"loss": 0.6703,
"step": 2187
},
{
"epoch": 0.8077775043071622,
"grad_norm": 0.8679975271224976,
"learning_rate": 4.79655222620483e-06,
"loss": 0.7078,
"step": 2188
},
{
"epoch": 0.8081466896381984,
"grad_norm": 0.8599095940589905,
"learning_rate": 4.796359996547715e-06,
"loss": 0.6839,
"step": 2189
},
{
"epoch": 0.8085158749692346,
"grad_norm": 0.8417700529098511,
"learning_rate": 4.7961676799741e-06,
"loss": 0.6757,
"step": 2190
},
{
"epoch": 0.8088850603002707,
"grad_norm": 0.8529911637306213,
"learning_rate": 4.795975276491262e-06,
"loss": 0.7033,
"step": 2191
},
{
"epoch": 0.8092542456313069,
"grad_norm": 0.8772951364517212,
"learning_rate": 4.7957827861064855e-06,
"loss": 0.6562,
"step": 2192
},
{
"epoch": 0.8096234309623431,
"grad_norm": 0.9334204196929932,
"learning_rate": 4.795590208827054e-06,
"loss": 0.7505,
"step": 2193
},
{
"epoch": 0.8099926162933793,
"grad_norm": 0.8638662695884705,
"learning_rate": 4.795397544660258e-06,
"loss": 0.6816,
"step": 2194
},
{
"epoch": 0.8103618016244155,
"grad_norm": 0.9105616211891174,
"learning_rate": 4.795204793613391e-06,
"loss": 0.714,
"step": 2195
},
{
"epoch": 0.8107309869554516,
"grad_norm": 0.8942384719848633,
"learning_rate": 4.7950119556937455e-06,
"loss": 0.7327,
"step": 2196
},
{
"epoch": 0.8111001722864878,
"grad_norm": 0.8448975086212158,
"learning_rate": 4.794819030908622e-06,
"loss": 0.6941,
"step": 2197
},
{
"epoch": 0.811469357617524,
"grad_norm": 0.912215530872345,
"learning_rate": 4.7946260192653215e-06,
"loss": 0.7216,
"step": 2198
},
{
"epoch": 0.8118385429485602,
"grad_norm": 0.8788778185844421,
"learning_rate": 4.794432920771152e-06,
"loss": 0.7399,
"step": 2199
},
{
"epoch": 0.8122077282795963,
"grad_norm": 0.8956362009048462,
"learning_rate": 4.794239735433419e-06,
"loss": 0.7282,
"step": 2200
},
{
"epoch": 0.8125769136106326,
"grad_norm": 0.9069665670394897,
"learning_rate": 4.7940464632594376e-06,
"loss": 0.7298,
"step": 2201
},
{
"epoch": 0.8129460989416687,
"grad_norm": 0.8730249404907227,
"learning_rate": 4.793853104256521e-06,
"loss": 0.7101,
"step": 2202
},
{
"epoch": 0.8133152842727049,
"grad_norm": 1.0605006217956543,
"learning_rate": 4.793659658431988e-06,
"loss": 0.7115,
"step": 2203
},
{
"epoch": 0.8136844696037411,
"grad_norm": 0.9141219258308411,
"learning_rate": 4.793466125793161e-06,
"loss": 0.7554,
"step": 2204
},
{
"epoch": 0.8140536549347772,
"grad_norm": 0.8501049876213074,
"learning_rate": 4.793272506347365e-06,
"loss": 0.6839,
"step": 2205
},
{
"epoch": 0.8144228402658135,
"grad_norm": 0.8409802317619324,
"learning_rate": 4.793078800101929e-06,
"loss": 0.6518,
"step": 2206
},
{
"epoch": 0.8147920255968496,
"grad_norm": 0.8758774995803833,
"learning_rate": 4.792885007064183e-06,
"loss": 0.7349,
"step": 2207
},
{
"epoch": 0.8151612109278858,
"grad_norm": 0.8612200021743774,
"learning_rate": 4.792691127241463e-06,
"loss": 0.6952,
"step": 2208
},
{
"epoch": 0.815530396258922,
"grad_norm": 0.8541130423545837,
"learning_rate": 4.792497160641108e-06,
"loss": 0.6723,
"step": 2209
},
{
"epoch": 0.8158995815899581,
"grad_norm": 0.884846568107605,
"learning_rate": 4.7923031072704595e-06,
"loss": 0.7165,
"step": 2210
},
{
"epoch": 0.8162687669209944,
"grad_norm": 0.8823457956314087,
"learning_rate": 4.792108967136861e-06,
"loss": 0.7599,
"step": 2211
},
{
"epoch": 0.8166379522520305,
"grad_norm": 0.8571475744247437,
"learning_rate": 4.791914740247663e-06,
"loss": 0.7122,
"step": 2212
},
{
"epoch": 0.8170071375830666,
"grad_norm": 0.8436484336853027,
"learning_rate": 4.791720426610214e-06,
"loss": 0.6781,
"step": 2213
},
{
"epoch": 0.8173763229141029,
"grad_norm": 0.8832083344459534,
"learning_rate": 4.791526026231871e-06,
"loss": 0.7202,
"step": 2214
},
{
"epoch": 0.817745508245139,
"grad_norm": 0.9136930108070374,
"learning_rate": 4.79133153911999e-06,
"loss": 0.756,
"step": 2215
},
{
"epoch": 0.8181146935761753,
"grad_norm": 0.87180095911026,
"learning_rate": 4.791136965281934e-06,
"loss": 0.6874,
"step": 2216
},
{
"epoch": 0.8184838789072114,
"grad_norm": 0.8829808831214905,
"learning_rate": 4.790942304725067e-06,
"loss": 0.6974,
"step": 2217
},
{
"epoch": 0.8188530642382476,
"grad_norm": 0.8646160960197449,
"learning_rate": 4.790747557456757e-06,
"loss": 0.704,
"step": 2218
},
{
"epoch": 0.8192222495692838,
"grad_norm": 0.8445609211921692,
"learning_rate": 4.790552723484375e-06,
"loss": 0.7028,
"step": 2219
},
{
"epoch": 0.8195914349003199,
"grad_norm": 0.8569789528846741,
"learning_rate": 4.7903578028152946e-06,
"loss": 0.7304,
"step": 2220
},
{
"epoch": 0.8199606202313562,
"grad_norm": 0.8919884562492371,
"learning_rate": 4.790162795456895e-06,
"loss": 0.7303,
"step": 2221
},
{
"epoch": 0.8203298055623923,
"grad_norm": 0.8782804012298584,
"learning_rate": 4.789967701416556e-06,
"loss": 0.6676,
"step": 2222
},
{
"epoch": 0.8206989908934285,
"grad_norm": 0.8411016464233398,
"learning_rate": 4.789772520701662e-06,
"loss": 0.6856,
"step": 2223
},
{
"epoch": 0.8210681762244647,
"grad_norm": 0.8851430416107178,
"learning_rate": 4.7895772533196e-06,
"loss": 0.6995,
"step": 2224
},
{
"epoch": 0.8214373615555008,
"grad_norm": 0.8756270408630371,
"learning_rate": 4.789381899277763e-06,
"loss": 0.6872,
"step": 2225
},
{
"epoch": 0.8218065468865371,
"grad_norm": 0.8848443031311035,
"learning_rate": 4.7891864585835435e-06,
"loss": 0.7128,
"step": 2226
},
{
"epoch": 0.8221757322175732,
"grad_norm": 0.8853998780250549,
"learning_rate": 4.788990931244338e-06,
"loss": 0.7123,
"step": 2227
},
{
"epoch": 0.8225449175486094,
"grad_norm": 0.8603769540786743,
"learning_rate": 4.788795317267549e-06,
"loss": 0.7094,
"step": 2228
},
{
"epoch": 0.8229141028796456,
"grad_norm": 0.8783960342407227,
"learning_rate": 4.7885996166605795e-06,
"loss": 0.6871,
"step": 2229
},
{
"epoch": 0.8232832882106818,
"grad_norm": 0.9068373441696167,
"learning_rate": 4.788403829430837e-06,
"loss": 0.7508,
"step": 2230
},
{
"epoch": 0.8236524735417179,
"grad_norm": 0.9134407043457031,
"learning_rate": 4.788207955585732e-06,
"loss": 0.685,
"step": 2231
},
{
"epoch": 0.8240216588727541,
"grad_norm": 0.8849846720695496,
"learning_rate": 4.788011995132678e-06,
"loss": 0.6984,
"step": 2232
},
{
"epoch": 0.8243908442037903,
"grad_norm": 0.8880661725997925,
"learning_rate": 4.787815948079092e-06,
"loss": 0.6916,
"step": 2233
},
{
"epoch": 0.8247600295348265,
"grad_norm": 0.8438676595687866,
"learning_rate": 4.787619814432394e-06,
"loss": 0.6781,
"step": 2234
},
{
"epoch": 0.8251292148658627,
"grad_norm": 0.8555399775505066,
"learning_rate": 4.787423594200007e-06,
"loss": 0.7133,
"step": 2235
},
{
"epoch": 0.8254984001968988,
"grad_norm": 0.8856319189071655,
"learning_rate": 4.787227287389361e-06,
"loss": 0.7102,
"step": 2236
},
{
"epoch": 0.825867585527935,
"grad_norm": 0.8559306859970093,
"learning_rate": 4.787030894007882e-06,
"loss": 0.6828,
"step": 2237
},
{
"epoch": 0.8262367708589712,
"grad_norm": 0.8558578491210938,
"learning_rate": 4.7868344140630076e-06,
"loss": 0.692,
"step": 2238
},
{
"epoch": 0.8266059561900074,
"grad_norm": 0.8673276901245117,
"learning_rate": 4.786637847562171e-06,
"loss": 0.7351,
"step": 2239
},
{
"epoch": 0.8269751415210436,
"grad_norm": 0.88542640209198,
"learning_rate": 4.786441194512814e-06,
"loss": 0.7116,
"step": 2240
},
{
"epoch": 0.8273443268520797,
"grad_norm": 0.8700323700904846,
"learning_rate": 4.786244454922379e-06,
"loss": 0.707,
"step": 2241
},
{
"epoch": 0.827713512183116,
"grad_norm": 0.8809252977371216,
"learning_rate": 4.7860476287983124e-06,
"loss": 0.715,
"step": 2242
},
{
"epoch": 0.8280826975141521,
"grad_norm": 0.9027058482170105,
"learning_rate": 4.785850716148066e-06,
"loss": 0.7354,
"step": 2243
},
{
"epoch": 0.8284518828451883,
"grad_norm": 0.8742669224739075,
"learning_rate": 4.7856537169790905e-06,
"loss": 0.6748,
"step": 2244
},
{
"epoch": 0.8288210681762245,
"grad_norm": 0.8710356950759888,
"learning_rate": 4.7854566312988435e-06,
"loss": 0.7133,
"step": 2245
},
{
"epoch": 0.8291902535072606,
"grad_norm": 0.9114298224449158,
"learning_rate": 4.785259459114784e-06,
"loss": 0.7176,
"step": 2246
},
{
"epoch": 0.8295594388382969,
"grad_norm": 0.8762310147285461,
"learning_rate": 4.785062200434376e-06,
"loss": 0.7265,
"step": 2247
},
{
"epoch": 0.829928624169333,
"grad_norm": 0.8757422566413879,
"learning_rate": 4.784864855265083e-06,
"loss": 0.6755,
"step": 2248
},
{
"epoch": 0.8302978095003691,
"grad_norm": 0.8273042440414429,
"learning_rate": 4.784667423614379e-06,
"loss": 0.6903,
"step": 2249
},
{
"epoch": 0.8306669948314054,
"grad_norm": 0.911002516746521,
"learning_rate": 4.784469905489732e-06,
"loss": 0.7196,
"step": 2250
},
{
"epoch": 0.8310361801624415,
"grad_norm": 0.8729998469352722,
"learning_rate": 4.784272300898621e-06,
"loss": 0.727,
"step": 2251
},
{
"epoch": 0.8314053654934778,
"grad_norm": 0.897255003452301,
"learning_rate": 4.784074609848524e-06,
"loss": 0.7145,
"step": 2252
},
{
"epoch": 0.8317745508245139,
"grad_norm": 0.8995161056518555,
"learning_rate": 4.783876832346924e-06,
"loss": 0.6983,
"step": 2253
},
{
"epoch": 0.83214373615555,
"grad_norm": 0.8717133402824402,
"learning_rate": 4.783678968401306e-06,
"loss": 0.7164,
"step": 2254
},
{
"epoch": 0.8325129214865863,
"grad_norm": 0.8860628604888916,
"learning_rate": 4.783481018019161e-06,
"loss": 0.6891,
"step": 2255
},
{
"epoch": 0.8328821068176224,
"grad_norm": 0.8644296526908875,
"learning_rate": 4.783282981207979e-06,
"loss": 0.7449,
"step": 2256
},
{
"epoch": 0.8332512921486587,
"grad_norm": 0.8703963160514832,
"learning_rate": 4.783084857975258e-06,
"loss": 0.7067,
"step": 2257
},
{
"epoch": 0.8336204774796948,
"grad_norm": 0.8384194374084473,
"learning_rate": 4.782886648328495e-06,
"loss": 0.6811,
"step": 2258
},
{
"epoch": 0.833989662810731,
"grad_norm": 0.8807311654090881,
"learning_rate": 4.7826883522751934e-06,
"loss": 0.7016,
"step": 2259
},
{
"epoch": 0.8343588481417672,
"grad_norm": 0.8632846474647522,
"learning_rate": 4.782489969822857e-06,
"loss": 0.6956,
"step": 2260
},
{
"epoch": 0.8347280334728033,
"grad_norm": 0.852558434009552,
"learning_rate": 4.7822915009789965e-06,
"loss": 0.7076,
"step": 2261
},
{
"epoch": 0.8350972188038396,
"grad_norm": 0.8728243708610535,
"learning_rate": 4.782092945751122e-06,
"loss": 0.6962,
"step": 2262
},
{
"epoch": 0.8354664041348757,
"grad_norm": 0.8713400363922119,
"learning_rate": 4.781894304146751e-06,
"loss": 0.7163,
"step": 2263
},
{
"epoch": 0.8358355894659119,
"grad_norm": 0.893301784992218,
"learning_rate": 4.7816955761734e-06,
"loss": 0.7268,
"step": 2264
},
{
"epoch": 0.8362047747969481,
"grad_norm": 0.854168713092804,
"learning_rate": 4.781496761838592e-06,
"loss": 0.6719,
"step": 2265
},
{
"epoch": 0.8365739601279842,
"grad_norm": 0.8409112095832825,
"learning_rate": 4.781297861149852e-06,
"loss": 0.7148,
"step": 2266
},
{
"epoch": 0.8369431454590204,
"grad_norm": 0.8801021575927734,
"learning_rate": 4.781098874114707e-06,
"loss": 0.7365,
"step": 2267
},
{
"epoch": 0.8373123307900566,
"grad_norm": 0.849139392375946,
"learning_rate": 4.780899800740689e-06,
"loss": 0.7219,
"step": 2268
},
{
"epoch": 0.8376815161210928,
"grad_norm": 0.8867851495742798,
"learning_rate": 4.780700641035335e-06,
"loss": 0.7369,
"step": 2269
},
{
"epoch": 0.838050701452129,
"grad_norm": 0.877516508102417,
"learning_rate": 4.780501395006181e-06,
"loss": 0.7083,
"step": 2270
},
{
"epoch": 0.8384198867831651,
"grad_norm": 0.8607859015464783,
"learning_rate": 4.7803020626607686e-06,
"loss": 0.6921,
"step": 2271
},
{
"epoch": 0.8387890721142013,
"grad_norm": 0.8483718633651733,
"learning_rate": 4.7801026440066425e-06,
"loss": 0.6989,
"step": 2272
},
{
"epoch": 0.8391582574452375,
"grad_norm": 0.8639686107635498,
"learning_rate": 4.779903139051352e-06,
"loss": 0.6932,
"step": 2273
},
{
"epoch": 0.8395274427762737,
"grad_norm": 0.8545430302619934,
"learning_rate": 4.779703547802446e-06,
"loss": 0.7133,
"step": 2274
},
{
"epoch": 0.8398966281073099,
"grad_norm": 0.8846385478973389,
"learning_rate": 4.7795038702674816e-06,
"loss": 0.6923,
"step": 2275
},
{
"epoch": 0.840265813438346,
"grad_norm": 0.8536352515220642,
"learning_rate": 4.7793041064540135e-06,
"loss": 0.6885,
"step": 2276
},
{
"epoch": 0.8406349987693822,
"grad_norm": 0.8723476529121399,
"learning_rate": 4.779104256369605e-06,
"loss": 0.7001,
"step": 2277
},
{
"epoch": 0.8410041841004184,
"grad_norm": 0.8628838062286377,
"learning_rate": 4.77890432002182e-06,
"loss": 0.6879,
"step": 2278
},
{
"epoch": 0.8413733694314546,
"grad_norm": 0.8553763031959534,
"learning_rate": 4.778704297418226e-06,
"loss": 0.6245,
"step": 2279
},
{
"epoch": 0.8417425547624908,
"grad_norm": 0.9173882603645325,
"learning_rate": 4.778504188566393e-06,
"loss": 0.7236,
"step": 2280
},
{
"epoch": 0.842111740093527,
"grad_norm": 0.8579378128051758,
"learning_rate": 4.7783039934738955e-06,
"loss": 0.7327,
"step": 2281
},
{
"epoch": 0.8424809254245631,
"grad_norm": 0.8806033134460449,
"learning_rate": 4.778103712148311e-06,
"loss": 0.7236,
"step": 2282
},
{
"epoch": 0.8428501107555993,
"grad_norm": 0.8968467116355896,
"learning_rate": 4.77790334459722e-06,
"loss": 0.744,
"step": 2283
},
{
"epoch": 0.8432192960866355,
"grad_norm": 0.8785833120346069,
"learning_rate": 4.777702890828206e-06,
"loss": 0.6963,
"step": 2284
},
{
"epoch": 0.8435884814176716,
"grad_norm": 0.8928925395011902,
"learning_rate": 4.777502350848857e-06,
"loss": 0.7089,
"step": 2285
},
{
"epoch": 0.8439576667487079,
"grad_norm": 0.8602617383003235,
"learning_rate": 4.777301724666763e-06,
"loss": 0.7025,
"step": 2286
},
{
"epoch": 0.844326852079744,
"grad_norm": 0.9113879799842834,
"learning_rate": 4.777101012289517e-06,
"loss": 0.6959,
"step": 2287
},
{
"epoch": 0.8446960374107803,
"grad_norm": 0.8733735680580139,
"learning_rate": 4.776900213724717e-06,
"loss": 0.7307,
"step": 2288
},
{
"epoch": 0.8450652227418164,
"grad_norm": 0.8886083364486694,
"learning_rate": 4.776699328979961e-06,
"loss": 0.7134,
"step": 2289
},
{
"epoch": 0.8454344080728525,
"grad_norm": 0.9018360376358032,
"learning_rate": 4.776498358062855e-06,
"loss": 0.7246,
"step": 2290
},
{
"epoch": 0.8458035934038888,
"grad_norm": 0.9006841778755188,
"learning_rate": 4.776297300981005e-06,
"loss": 0.7188,
"step": 2291
},
{
"epoch": 0.8461727787349249,
"grad_norm": 0.8728944063186646,
"learning_rate": 4.77609615774202e-06,
"loss": 0.685,
"step": 2292
},
{
"epoch": 0.8465419640659612,
"grad_norm": 0.8995937705039978,
"learning_rate": 4.775894928353514e-06,
"loss": 0.6868,
"step": 2293
},
{
"epoch": 0.8469111493969973,
"grad_norm": 0.8754889369010925,
"learning_rate": 4.7756936128231026e-06,
"loss": 0.7013,
"step": 2294
},
{
"epoch": 0.8472803347280334,
"grad_norm": 0.8546683192253113,
"learning_rate": 4.775492211158407e-06,
"loss": 0.6874,
"step": 2295
},
{
"epoch": 0.8476495200590697,
"grad_norm": 0.8704326152801514,
"learning_rate": 4.775290723367048e-06,
"loss": 0.7295,
"step": 2296
},
{
"epoch": 0.8480187053901058,
"grad_norm": 0.9398274421691895,
"learning_rate": 4.7750891494566555e-06,
"loss": 0.7252,
"step": 2297
},
{
"epoch": 0.8483878907211421,
"grad_norm": 0.8432691097259521,
"learning_rate": 4.774887489434855e-06,
"loss": 0.6964,
"step": 2298
},
{
"epoch": 0.8487570760521782,
"grad_norm": 0.8544699549674988,
"learning_rate": 4.774685743309282e-06,
"loss": 0.6654,
"step": 2299
},
{
"epoch": 0.8491262613832143,
"grad_norm": 0.8489146828651428,
"learning_rate": 4.774483911087571e-06,
"loss": 0.7227,
"step": 2300
},
{
"epoch": 0.8494954467142506,
"grad_norm": 0.882328987121582,
"learning_rate": 4.774281992777361e-06,
"loss": 0.7194,
"step": 2301
},
{
"epoch": 0.8498646320452867,
"grad_norm": 0.8312026858329773,
"learning_rate": 4.7740799883862966e-06,
"loss": 0.6877,
"step": 2302
},
{
"epoch": 0.8502338173763229,
"grad_norm": 0.8677752017974854,
"learning_rate": 4.7738778979220215e-06,
"loss": 0.7522,
"step": 2303
},
{
"epoch": 0.8506030027073591,
"grad_norm": 0.8952152729034424,
"learning_rate": 4.773675721392186e-06,
"loss": 0.7023,
"step": 2304
},
{
"epoch": 0.8509721880383952,
"grad_norm": 0.8487045764923096,
"learning_rate": 4.773473458804442e-06,
"loss": 0.6788,
"step": 2305
},
{
"epoch": 0.8513413733694315,
"grad_norm": 0.8884228467941284,
"learning_rate": 4.7732711101664455e-06,
"loss": 0.6975,
"step": 2306
},
{
"epoch": 0.8517105587004676,
"grad_norm": 0.8592703342437744,
"learning_rate": 4.773068675485854e-06,
"loss": 0.6663,
"step": 2307
},
{
"epoch": 0.8520797440315038,
"grad_norm": 0.8684633374214172,
"learning_rate": 4.772866154770331e-06,
"loss": 0.7363,
"step": 2308
},
{
"epoch": 0.85244892936254,
"grad_norm": 0.8979021906852722,
"learning_rate": 4.772663548027542e-06,
"loss": 0.7207,
"step": 2309
},
{
"epoch": 0.8528181146935762,
"grad_norm": 0.8448242545127869,
"learning_rate": 4.772460855265154e-06,
"loss": 0.6676,
"step": 2310
},
{
"epoch": 0.8531873000246124,
"grad_norm": 0.8736171722412109,
"learning_rate": 4.77225807649084e-06,
"loss": 0.6814,
"step": 2311
},
{
"epoch": 0.8535564853556485,
"grad_norm": 0.908427357673645,
"learning_rate": 4.772055211712276e-06,
"loss": 0.7004,
"step": 2312
},
{
"epoch": 0.8539256706866847,
"grad_norm": 0.9255645275115967,
"learning_rate": 4.771852260937138e-06,
"loss": 0.7046,
"step": 2313
},
{
"epoch": 0.8542948560177209,
"grad_norm": 0.8731818795204163,
"learning_rate": 4.771649224173109e-06,
"loss": 0.6838,
"step": 2314
},
{
"epoch": 0.8546640413487571,
"grad_norm": 0.9106831550598145,
"learning_rate": 4.7714461014278745e-06,
"loss": 0.6892,
"step": 2315
},
{
"epoch": 0.8550332266797932,
"grad_norm": 0.9361001253128052,
"learning_rate": 4.771242892709121e-06,
"loss": 0.7194,
"step": 2316
},
{
"epoch": 0.8554024120108294,
"grad_norm": 0.9008825421333313,
"learning_rate": 4.771039598024542e-06,
"loss": 0.7086,
"step": 2317
},
{
"epoch": 0.8557715973418656,
"grad_norm": 0.8849498629570007,
"learning_rate": 4.77083621738183e-06,
"loss": 0.7,
"step": 2318
},
{
"epoch": 0.8561407826729018,
"grad_norm": 0.9198765158653259,
"learning_rate": 4.770632750788685e-06,
"loss": 0.7492,
"step": 2319
},
{
"epoch": 0.856509968003938,
"grad_norm": 0.8527234196662903,
"learning_rate": 4.770429198252806e-06,
"loss": 0.7013,
"step": 2320
},
{
"epoch": 0.8568791533349741,
"grad_norm": 0.8671658635139465,
"learning_rate": 4.770225559781899e-06,
"loss": 0.6557,
"step": 2321
},
{
"epoch": 0.8572483386660104,
"grad_norm": 0.9001504778862,
"learning_rate": 4.77002183538367e-06,
"loss": 0.7024,
"step": 2322
},
{
"epoch": 0.8576175239970465,
"grad_norm": 0.8563042879104614,
"learning_rate": 4.769818025065832e-06,
"loss": 0.6986,
"step": 2323
},
{
"epoch": 0.8579867093280827,
"grad_norm": 0.8947487473487854,
"learning_rate": 4.769614128836098e-06,
"loss": 0.696,
"step": 2324
},
{
"epoch": 0.8583558946591189,
"grad_norm": 0.8475277423858643,
"learning_rate": 4.769410146702186e-06,
"loss": 0.6987,
"step": 2325
},
{
"epoch": 0.858725079990155,
"grad_norm": 0.8660178184509277,
"learning_rate": 4.769206078671815e-06,
"loss": 0.7162,
"step": 2326
},
{
"epoch": 0.8590942653211913,
"grad_norm": 0.9072052836418152,
"learning_rate": 4.769001924752711e-06,
"loss": 0.7051,
"step": 2327
},
{
"epoch": 0.8594634506522274,
"grad_norm": 0.893517255783081,
"learning_rate": 4.7687976849526e-06,
"loss": 0.6847,
"step": 2328
},
{
"epoch": 0.8598326359832636,
"grad_norm": 0.8827802538871765,
"learning_rate": 4.768593359279212e-06,
"loss": 0.732,
"step": 2329
},
{
"epoch": 0.8602018213142998,
"grad_norm": 0.8519994616508484,
"learning_rate": 4.768388947740282e-06,
"loss": 0.7077,
"step": 2330
},
{
"epoch": 0.8605710066453359,
"grad_norm": 0.8811690211296082,
"learning_rate": 4.768184450343546e-06,
"loss": 0.724,
"step": 2331
},
{
"epoch": 0.8609401919763722,
"grad_norm": 0.8675876259803772,
"learning_rate": 4.7679798670967446e-06,
"loss": 0.723,
"step": 2332
},
{
"epoch": 0.8613093773074083,
"grad_norm": 0.8784676194190979,
"learning_rate": 4.767775198007621e-06,
"loss": 0.7162,
"step": 2333
},
{
"epoch": 0.8616785626384444,
"grad_norm": 0.904371440410614,
"learning_rate": 4.767570443083922e-06,
"loss": 0.746,
"step": 2334
},
{
"epoch": 0.8620477479694807,
"grad_norm": 0.9030969738960266,
"learning_rate": 4.767365602333397e-06,
"loss": 0.7272,
"step": 2335
},
{
"epoch": 0.8624169333005168,
"grad_norm": 0.9071660041809082,
"learning_rate": 4.7671606757638e-06,
"loss": 0.7358,
"step": 2336
},
{
"epoch": 0.8627861186315531,
"grad_norm": 0.8928775191307068,
"learning_rate": 4.766955663382887e-06,
"loss": 0.7025,
"step": 2337
},
{
"epoch": 0.8631553039625892,
"grad_norm": 0.8183935880661011,
"learning_rate": 4.766750565198417e-06,
"loss": 0.6496,
"step": 2338
},
{
"epoch": 0.8635244892936254,
"grad_norm": 0.9024012088775635,
"learning_rate": 4.7665453812181535e-06,
"loss": 0.7378,
"step": 2339
},
{
"epoch": 0.8638936746246616,
"grad_norm": 0.8578570485115051,
"learning_rate": 4.766340111449863e-06,
"loss": 0.7127,
"step": 2340
},
{
"epoch": 0.8642628599556977,
"grad_norm": 0.8736898303031921,
"learning_rate": 4.766134755901315e-06,
"loss": 0.7021,
"step": 2341
},
{
"epoch": 0.864632045286734,
"grad_norm": 0.8850502371788025,
"learning_rate": 4.765929314580281e-06,
"loss": 0.7092,
"step": 2342
},
{
"epoch": 0.8650012306177701,
"grad_norm": 0.8683915734291077,
"learning_rate": 4.765723787494538e-06,
"loss": 0.6966,
"step": 2343
},
{
"epoch": 0.8653704159488063,
"grad_norm": 0.8678285479545593,
"learning_rate": 4.765518174651864e-06,
"loss": 0.6836,
"step": 2344
},
{
"epoch": 0.8657396012798425,
"grad_norm": 0.8723390698432922,
"learning_rate": 4.7653124760600435e-06,
"loss": 0.6934,
"step": 2345
},
{
"epoch": 0.8661087866108786,
"grad_norm": 0.8977248072624207,
"learning_rate": 4.7651066917268595e-06,
"loss": 0.6904,
"step": 2346
},
{
"epoch": 0.8664779719419149,
"grad_norm": 0.8875908255577087,
"learning_rate": 4.764900821660102e-06,
"loss": 0.7268,
"step": 2347
},
{
"epoch": 0.866847157272951,
"grad_norm": 0.8535262942314148,
"learning_rate": 4.764694865867564e-06,
"loss": 0.6876,
"step": 2348
},
{
"epoch": 0.8672163426039872,
"grad_norm": 0.8672966361045837,
"learning_rate": 4.76448882435704e-06,
"loss": 0.7274,
"step": 2349
},
{
"epoch": 0.8675855279350234,
"grad_norm": 0.8733333945274353,
"learning_rate": 4.764282697136328e-06,
"loss": 0.7304,
"step": 2350
},
{
"epoch": 0.8679547132660596,
"grad_norm": 0.8675313591957092,
"learning_rate": 4.764076484213232e-06,
"loss": 0.73,
"step": 2351
},
{
"epoch": 0.8683238985970957,
"grad_norm": 0.9445770382881165,
"learning_rate": 4.763870185595554e-06,
"loss": 0.7039,
"step": 2352
},
{
"epoch": 0.8686930839281319,
"grad_norm": 0.8547798991203308,
"learning_rate": 4.763663801291104e-06,
"loss": 0.6863,
"step": 2353
},
{
"epoch": 0.8690622692591681,
"grad_norm": 0.8876418471336365,
"learning_rate": 4.763457331307695e-06,
"loss": 0.7051,
"step": 2354
},
{
"epoch": 0.8694314545902043,
"grad_norm": 0.8787064552307129,
"learning_rate": 4.763250775653139e-06,
"loss": 0.667,
"step": 2355
},
{
"epoch": 0.8698006399212405,
"grad_norm": 0.8645234704017639,
"learning_rate": 4.763044134335256e-06,
"loss": 0.7211,
"step": 2356
},
{
"epoch": 0.8701698252522766,
"grad_norm": 0.8852728605270386,
"learning_rate": 4.762837407361866e-06,
"loss": 0.6929,
"step": 2357
},
{
"epoch": 0.8705390105833128,
"grad_norm": 0.8681321144104004,
"learning_rate": 4.7626305947407944e-06,
"loss": 0.6783,
"step": 2358
},
{
"epoch": 0.870908195914349,
"grad_norm": 0.8449599146842957,
"learning_rate": 4.7624236964798695e-06,
"loss": 0.6916,
"step": 2359
},
{
"epoch": 0.8712773812453852,
"grad_norm": 0.8922649621963501,
"learning_rate": 4.762216712586922e-06,
"loss": 0.7218,
"step": 2360
},
{
"epoch": 0.8716465665764214,
"grad_norm": 0.8748049736022949,
"learning_rate": 4.762009643069786e-06,
"loss": 0.705,
"step": 2361
},
{
"epoch": 0.8720157519074575,
"grad_norm": 0.90827476978302,
"learning_rate": 4.761802487936298e-06,
"loss": 0.7234,
"step": 2362
},
{
"epoch": 0.8723849372384938,
"grad_norm": 0.8977128863334656,
"learning_rate": 4.7615952471943006e-06,
"loss": 0.7423,
"step": 2363
},
{
"epoch": 0.8727541225695299,
"grad_norm": 0.8443285822868347,
"learning_rate": 4.761387920851636e-06,
"loss": 0.6924,
"step": 2364
},
{
"epoch": 0.8731233079005661,
"grad_norm": 0.8841611742973328,
"learning_rate": 4.761180508916152e-06,
"loss": 0.6902,
"step": 2365
},
{
"epoch": 0.8734924932316023,
"grad_norm": 0.8554913401603699,
"learning_rate": 4.760973011395701e-06,
"loss": 0.6946,
"step": 2366
},
{
"epoch": 0.8738616785626384,
"grad_norm": 0.8738844990730286,
"learning_rate": 4.760765428298134e-06,
"loss": 0.6728,
"step": 2367
},
{
"epoch": 0.8742308638936747,
"grad_norm": 0.8898583650588989,
"learning_rate": 4.760557759631309e-06,
"loss": 0.7027,
"step": 2368
},
{
"epoch": 0.8746000492247108,
"grad_norm": 0.8664471507072449,
"learning_rate": 4.760350005403086e-06,
"loss": 0.7114,
"step": 2369
},
{
"epoch": 0.8749692345557469,
"grad_norm": 0.902643084526062,
"learning_rate": 4.76014216562133e-06,
"loss": 0.7317,
"step": 2370
},
{
"epoch": 0.8753384198867832,
"grad_norm": 0.8991020917892456,
"learning_rate": 4.759934240293906e-06,
"loss": 0.7036,
"step": 2371
},
{
"epoch": 0.8757076052178193,
"grad_norm": 0.8848997950553894,
"learning_rate": 4.759726229428683e-06,
"loss": 0.7416,
"step": 2372
},
{
"epoch": 0.8760767905488556,
"grad_norm": 0.8591799736022949,
"learning_rate": 4.759518133033536e-06,
"loss": 0.6713,
"step": 2373
},
{
"epoch": 0.8764459758798917,
"grad_norm": 0.8584043979644775,
"learning_rate": 4.7593099511163405e-06,
"loss": 0.6793,
"step": 2374
},
{
"epoch": 0.8768151612109278,
"grad_norm": 0.8347330689430237,
"learning_rate": 4.759101683684977e-06,
"loss": 0.6585,
"step": 2375
},
{
"epoch": 0.8771843465419641,
"grad_norm": 0.8343052864074707,
"learning_rate": 4.7588933307473275e-06,
"loss": 0.6982,
"step": 2376
},
{
"epoch": 0.8775535318730002,
"grad_norm": 0.8740595579147339,
"learning_rate": 4.758684892311278e-06,
"loss": 0.6836,
"step": 2377
},
{
"epoch": 0.8779227172040365,
"grad_norm": 0.8705688118934631,
"learning_rate": 4.758476368384719e-06,
"loss": 0.6859,
"step": 2378
},
{
"epoch": 0.8782919025350726,
"grad_norm": 1.0071030855178833,
"learning_rate": 4.758267758975541e-06,
"loss": 0.7224,
"step": 2379
},
{
"epoch": 0.8786610878661087,
"grad_norm": 0.8606764078140259,
"learning_rate": 4.758059064091642e-06,
"loss": 0.6912,
"step": 2380
},
{
"epoch": 0.879030273197145,
"grad_norm": 0.9006412625312805,
"learning_rate": 4.75785028374092e-06,
"loss": 0.7044,
"step": 2381
},
{
"epoch": 0.8793994585281811,
"grad_norm": 0.8861314058303833,
"learning_rate": 4.757641417931278e-06,
"loss": 0.6825,
"step": 2382
},
{
"epoch": 0.8797686438592174,
"grad_norm": 0.8772760033607483,
"learning_rate": 4.75743246667062e-06,
"loss": 0.6987,
"step": 2383
},
{
"epoch": 0.8801378291902535,
"grad_norm": 0.8669841289520264,
"learning_rate": 4.757223429966855e-06,
"loss": 0.6761,
"step": 2384
},
{
"epoch": 0.8805070145212897,
"grad_norm": 0.8585817813873291,
"learning_rate": 4.757014307827897e-06,
"loss": 0.6793,
"step": 2385
},
{
"epoch": 0.8808761998523259,
"grad_norm": 0.8737130165100098,
"learning_rate": 4.756805100261658e-06,
"loss": 0.6956,
"step": 2386
},
{
"epoch": 0.881245385183362,
"grad_norm": 0.8613405823707581,
"learning_rate": 4.75659580727606e-06,
"loss": 0.6852,
"step": 2387
},
{
"epoch": 0.8816145705143982,
"grad_norm": 0.8970152735710144,
"learning_rate": 4.756386428879022e-06,
"loss": 0.7446,
"step": 2388
},
{
"epoch": 0.8819837558454344,
"grad_norm": 0.8712779879570007,
"learning_rate": 4.75617696507847e-06,
"loss": 0.7216,
"step": 2389
},
{
"epoch": 0.8823529411764706,
"grad_norm": 0.8699203133583069,
"learning_rate": 4.755967415882331e-06,
"loss": 0.7225,
"step": 2390
},
{
"epoch": 0.8827221265075068,
"grad_norm": 0.8482356071472168,
"learning_rate": 4.755757781298539e-06,
"loss": 0.7013,
"step": 2391
},
{
"epoch": 0.883091311838543,
"grad_norm": 0.8718639016151428,
"learning_rate": 4.7555480613350255e-06,
"loss": 0.7017,
"step": 2392
},
{
"epoch": 0.8834604971695791,
"grad_norm": 0.886998176574707,
"learning_rate": 4.7553382559997305e-06,
"loss": 0.7333,
"step": 2393
},
{
"epoch": 0.8838296825006153,
"grad_norm": 0.9135888814926147,
"learning_rate": 4.755128365300594e-06,
"loss": 0.7033,
"step": 2394
},
{
"epoch": 0.8841988678316515,
"grad_norm": 0.8773205280303955,
"learning_rate": 4.7549183892455605e-06,
"loss": 0.71,
"step": 2395
},
{
"epoch": 0.8845680531626877,
"grad_norm": 0.8733408451080322,
"learning_rate": 4.754708327842579e-06,
"loss": 0.6728,
"step": 2396
},
{
"epoch": 0.8849372384937239,
"grad_norm": 0.86775141954422,
"learning_rate": 4.7544981810995976e-06,
"loss": 0.6991,
"step": 2397
},
{
"epoch": 0.88530642382476,
"grad_norm": 0.8773441314697266,
"learning_rate": 4.7542879490245705e-06,
"loss": 0.7403,
"step": 2398
},
{
"epoch": 0.8856756091557962,
"grad_norm": 0.8608452677726746,
"learning_rate": 4.754077631625457e-06,
"loss": 0.6506,
"step": 2399
},
{
"epoch": 0.8860447944868324,
"grad_norm": 0.866370677947998,
"learning_rate": 4.753867228910217e-06,
"loss": 0.7008,
"step": 2400
},
{
"epoch": 0.8864139798178686,
"grad_norm": 0.8670253753662109,
"learning_rate": 4.753656740886814e-06,
"loss": 0.7363,
"step": 2401
},
{
"epoch": 0.8867831651489048,
"grad_norm": 0.8610836267471313,
"learning_rate": 4.753446167563214e-06,
"loss": 0.6961,
"step": 2402
},
{
"epoch": 0.8871523504799409,
"grad_norm": 0.8401498198509216,
"learning_rate": 4.753235508947388e-06,
"loss": 0.6867,
"step": 2403
},
{
"epoch": 0.8875215358109771,
"grad_norm": 0.8600946068763733,
"learning_rate": 4.753024765047309e-06,
"loss": 0.6911,
"step": 2404
},
{
"epoch": 0.8878907211420133,
"grad_norm": 0.8780009150505066,
"learning_rate": 4.752813935870954e-06,
"loss": 0.7171,
"step": 2405
},
{
"epoch": 0.8882599064730494,
"grad_norm": 0.8487856984138489,
"learning_rate": 4.752603021426302e-06,
"loss": 0.6779,
"step": 2406
},
{
"epoch": 0.8886290918040857,
"grad_norm": 0.8483532071113586,
"learning_rate": 4.752392021721337e-06,
"loss": 0.6661,
"step": 2407
},
{
"epoch": 0.8889982771351218,
"grad_norm": 0.858508825302124,
"learning_rate": 4.752180936764044e-06,
"loss": 0.715,
"step": 2408
},
{
"epoch": 0.889367462466158,
"grad_norm": 0.850054919719696,
"learning_rate": 4.751969766562414e-06,
"loss": 0.6773,
"step": 2409
},
{
"epoch": 0.8897366477971942,
"grad_norm": 0.8784133195877075,
"learning_rate": 4.751758511124439e-06,
"loss": 0.6924,
"step": 2410
},
{
"epoch": 0.8901058331282303,
"grad_norm": 0.9067288041114807,
"learning_rate": 4.751547170458115e-06,
"loss": 0.698,
"step": 2411
},
{
"epoch": 0.8904750184592666,
"grad_norm": 0.8615005016326904,
"learning_rate": 4.751335744571441e-06,
"loss": 0.6891,
"step": 2412
},
{
"epoch": 0.8908442037903027,
"grad_norm": 0.8885693550109863,
"learning_rate": 4.75112423347242e-06,
"loss": 0.7179,
"step": 2413
},
{
"epoch": 0.891213389121339,
"grad_norm": 0.8749710321426392,
"learning_rate": 4.750912637169057e-06,
"loss": 0.7343,
"step": 2414
},
{
"epoch": 0.8915825744523751,
"grad_norm": 0.8808736205101013,
"learning_rate": 4.750700955669362e-06,
"loss": 0.7388,
"step": 2415
},
{
"epoch": 0.8919517597834112,
"grad_norm": 0.8593876957893372,
"learning_rate": 4.750489188981345e-06,
"loss": 0.6887,
"step": 2416
},
{
"epoch": 0.8923209451144475,
"grad_norm": 0.8734657168388367,
"learning_rate": 4.7502773371130225e-06,
"loss": 0.6933,
"step": 2417
},
{
"epoch": 0.8926901304454836,
"grad_norm": 0.8681308031082153,
"learning_rate": 4.750065400072413e-06,
"loss": 0.6763,
"step": 2418
},
{
"epoch": 0.8930593157765198,
"grad_norm": 0.8781231641769409,
"learning_rate": 4.7498533778675386e-06,
"loss": 0.6863,
"step": 2419
},
{
"epoch": 0.893428501107556,
"grad_norm": 1.038848876953125,
"learning_rate": 4.749641270506424e-06,
"loss": 0.6943,
"step": 2420
},
{
"epoch": 0.8937976864385921,
"grad_norm": 0.8760762214660645,
"learning_rate": 4.749429077997098e-06,
"loss": 0.7224,
"step": 2421
},
{
"epoch": 0.8941668717696284,
"grad_norm": 0.8649592995643616,
"learning_rate": 4.7492168003475894e-06,
"loss": 0.7052,
"step": 2422
},
{
"epoch": 0.8945360571006645,
"grad_norm": 0.8721056580543518,
"learning_rate": 4.749004437565936e-06,
"loss": 0.7032,
"step": 2423
},
{
"epoch": 0.8949052424317007,
"grad_norm": 0.9054531455039978,
"learning_rate": 4.748791989660174e-06,
"loss": 0.7161,
"step": 2424
},
{
"epoch": 0.8952744277627369,
"grad_norm": 0.8350791335105896,
"learning_rate": 4.748579456638346e-06,
"loss": 0.6682,
"step": 2425
},
{
"epoch": 0.895643613093773,
"grad_norm": 0.8753942251205444,
"learning_rate": 4.748366838508494e-06,
"loss": 0.6904,
"step": 2426
},
{
"epoch": 0.8960127984248093,
"grad_norm": 0.9068854451179504,
"learning_rate": 4.748154135278667e-06,
"loss": 0.7247,
"step": 2427
},
{
"epoch": 0.8963819837558454,
"grad_norm": 0.9158604145050049,
"learning_rate": 4.747941346956916e-06,
"loss": 0.7043,
"step": 2428
},
{
"epoch": 0.8967511690868816,
"grad_norm": 0.8779573440551758,
"learning_rate": 4.747728473551294e-06,
"loss": 0.7254,
"step": 2429
},
{
"epoch": 0.8971203544179178,
"grad_norm": 0.8369114398956299,
"learning_rate": 4.7475155150698585e-06,
"loss": 0.687,
"step": 2430
},
{
"epoch": 0.897489539748954,
"grad_norm": 0.8734737038612366,
"learning_rate": 4.747302471520671e-06,
"loss": 0.7011,
"step": 2431
},
{
"epoch": 0.8978587250799902,
"grad_norm": 0.8805413246154785,
"learning_rate": 4.747089342911793e-06,
"loss": 0.6706,
"step": 2432
},
{
"epoch": 0.8982279104110263,
"grad_norm": 0.8578121662139893,
"learning_rate": 4.746876129251293e-06,
"loss": 0.666,
"step": 2433
},
{
"epoch": 0.8985970957420625,
"grad_norm": 0.8652933835983276,
"learning_rate": 4.746662830547242e-06,
"loss": 0.6837,
"step": 2434
},
{
"epoch": 0.8989662810730987,
"grad_norm": 0.8619236350059509,
"learning_rate": 4.74644944680771e-06,
"loss": 0.7021,
"step": 2435
},
{
"epoch": 0.8993354664041349,
"grad_norm": 0.9408307075500488,
"learning_rate": 4.746235978040776e-06,
"loss": 0.7223,
"step": 2436
},
{
"epoch": 0.899704651735171,
"grad_norm": 0.8929667472839355,
"learning_rate": 4.74602242425452e-06,
"loss": 0.7132,
"step": 2437
},
{
"epoch": 0.9000738370662072,
"grad_norm": 0.8759931921958923,
"learning_rate": 4.745808785457023e-06,
"loss": 0.7078,
"step": 2438
},
{
"epoch": 0.9004430223972434,
"grad_norm": 0.8628905415534973,
"learning_rate": 4.745595061656372e-06,
"loss": 0.685,
"step": 2439
},
{
"epoch": 0.9008122077282796,
"grad_norm": 0.8453993797302246,
"learning_rate": 4.745381252860658e-06,
"loss": 0.6745,
"step": 2440
},
{
"epoch": 0.9011813930593158,
"grad_norm": 0.8623847365379333,
"learning_rate": 4.745167359077971e-06,
"loss": 0.6999,
"step": 2441
},
{
"epoch": 0.9015505783903519,
"grad_norm": 0.8913419246673584,
"learning_rate": 4.7449533803164085e-06,
"loss": 0.7087,
"step": 2442
},
{
"epoch": 0.9019197637213882,
"grad_norm": 0.8496670722961426,
"learning_rate": 4.744739316584069e-06,
"loss": 0.7265,
"step": 2443
},
{
"epoch": 0.9022889490524243,
"grad_norm": 0.8533394932746887,
"learning_rate": 4.7445251678890555e-06,
"loss": 0.7205,
"step": 2444
},
{
"epoch": 0.9026581343834605,
"grad_norm": 0.886719286441803,
"learning_rate": 4.744310934239472e-06,
"loss": 0.7136,
"step": 2445
},
{
"epoch": 0.9030273197144967,
"grad_norm": 0.8670817613601685,
"learning_rate": 4.744096615643428e-06,
"loss": 0.7349,
"step": 2446
},
{
"epoch": 0.9033965050455328,
"grad_norm": 0.8604776263237,
"learning_rate": 4.743882212109036e-06,
"loss": 0.6867,
"step": 2447
},
{
"epoch": 0.9037656903765691,
"grad_norm": 0.8604888916015625,
"learning_rate": 4.74366772364441e-06,
"loss": 0.6832,
"step": 2448
},
{
"epoch": 0.9041348757076052,
"grad_norm": 0.9036632776260376,
"learning_rate": 4.743453150257668e-06,
"loss": 0.7155,
"step": 2449
},
{
"epoch": 0.9045040610386414,
"grad_norm": 0.8700803518295288,
"learning_rate": 4.743238491956934e-06,
"loss": 0.6916,
"step": 2450
},
{
"epoch": 0.9048732463696776,
"grad_norm": 0.8630246520042419,
"learning_rate": 4.74302374875033e-06,
"loss": 0.6927,
"step": 2451
},
{
"epoch": 0.9052424317007137,
"grad_norm": 0.8841362595558167,
"learning_rate": 4.7428089206459845e-06,
"loss": 0.7137,
"step": 2452
},
{
"epoch": 0.90561161703175,
"grad_norm": 0.8694362044334412,
"learning_rate": 4.742594007652031e-06,
"loss": 0.6853,
"step": 2453
},
{
"epoch": 0.9059808023627861,
"grad_norm": 0.8494770526885986,
"learning_rate": 4.7423790097766006e-06,
"loss": 0.6711,
"step": 2454
},
{
"epoch": 0.9063499876938222,
"grad_norm": 0.8646619915962219,
"learning_rate": 4.742163927027833e-06,
"loss": 0.7179,
"step": 2455
},
{
"epoch": 0.9067191730248585,
"grad_norm": 0.877597987651825,
"learning_rate": 4.741948759413868e-06,
"loss": 0.7234,
"step": 2456
},
{
"epoch": 0.9070883583558946,
"grad_norm": 0.850517988204956,
"learning_rate": 4.741733506942849e-06,
"loss": 0.7318,
"step": 2457
},
{
"epoch": 0.9074575436869309,
"grad_norm": 0.8301242589950562,
"learning_rate": 4.741518169622926e-06,
"loss": 0.694,
"step": 2458
},
{
"epoch": 0.907826729017967,
"grad_norm": 0.8391947150230408,
"learning_rate": 4.741302747462248e-06,
"loss": 0.7091,
"step": 2459
},
{
"epoch": 0.9081959143490032,
"grad_norm": 0.8886072039604187,
"learning_rate": 4.741087240468967e-06,
"loss": 0.7058,
"step": 2460
},
{
"epoch": 0.9085650996800394,
"grad_norm": 0.8848855495452881,
"learning_rate": 4.7408716486512416e-06,
"loss": 0.7176,
"step": 2461
},
{
"epoch": 0.9089342850110755,
"grad_norm": 0.8657143712043762,
"learning_rate": 4.740655972017232e-06,
"loss": 0.72,
"step": 2462
},
{
"epoch": 0.9093034703421118,
"grad_norm": 0.879115104675293,
"learning_rate": 4.7404402105751e-06,
"loss": 0.7008,
"step": 2463
},
{
"epoch": 0.9096726556731479,
"grad_norm": 0.8512206673622131,
"learning_rate": 4.740224364333013e-06,
"loss": 0.7051,
"step": 2464
},
{
"epoch": 0.9100418410041841,
"grad_norm": 0.8688360452651978,
"learning_rate": 4.740008433299142e-06,
"loss": 0.7031,
"step": 2465
},
{
"epoch": 0.9104110263352203,
"grad_norm": 0.8307510614395142,
"learning_rate": 4.739792417481659e-06,
"loss": 0.6484,
"step": 2466
},
{
"epoch": 0.9107802116662564,
"grad_norm": 0.8932550549507141,
"learning_rate": 4.7395763168887395e-06,
"loss": 0.7204,
"step": 2467
},
{
"epoch": 0.9111493969972927,
"grad_norm": 0.8596043586730957,
"learning_rate": 4.739360131528563e-06,
"loss": 0.7266,
"step": 2468
},
{
"epoch": 0.9115185823283288,
"grad_norm": 0.8368245959281921,
"learning_rate": 4.739143861409312e-06,
"loss": 0.6906,
"step": 2469
},
{
"epoch": 0.911887767659365,
"grad_norm": 0.8623902797698975,
"learning_rate": 4.738927506539173e-06,
"loss": 0.6946,
"step": 2470
},
{
"epoch": 0.9122569529904012,
"grad_norm": 0.8611836433410645,
"learning_rate": 4.738711066926335e-06,
"loss": 0.6655,
"step": 2471
},
{
"epoch": 0.9126261383214374,
"grad_norm": 0.8788560032844543,
"learning_rate": 4.738494542578989e-06,
"loss": 0.7327,
"step": 2472
},
{
"epoch": 0.9129953236524735,
"grad_norm": 0.9105246663093567,
"learning_rate": 4.73827793350533e-06,
"loss": 0.7424,
"step": 2473
},
{
"epoch": 0.9133645089835097,
"grad_norm": 0.8795875310897827,
"learning_rate": 4.738061239713559e-06,
"loss": 0.7092,
"step": 2474
},
{
"epoch": 0.9137336943145459,
"grad_norm": 0.8684108257293701,
"learning_rate": 4.737844461211876e-06,
"loss": 0.6988,
"step": 2475
},
{
"epoch": 0.9141028796455821,
"grad_norm": 0.8691182136535645,
"learning_rate": 4.737627598008486e-06,
"loss": 0.7363,
"step": 2476
},
{
"epoch": 0.9144720649766183,
"grad_norm": 0.8747261166572571,
"learning_rate": 4.737410650111599e-06,
"loss": 0.6877,
"step": 2477
},
{
"epoch": 0.9148412503076544,
"grad_norm": 0.8895252346992493,
"learning_rate": 4.7371936175294246e-06,
"loss": 0.7078,
"step": 2478
},
{
"epoch": 0.9152104356386906,
"grad_norm": 0.877252459526062,
"learning_rate": 4.736976500270177e-06,
"loss": 0.7118,
"step": 2479
},
{
"epoch": 0.9155796209697268,
"grad_norm": 0.8511465787887573,
"learning_rate": 4.736759298342075e-06,
"loss": 0.6753,
"step": 2480
},
{
"epoch": 0.915948806300763,
"grad_norm": 0.8913504481315613,
"learning_rate": 4.7365420117533404e-06,
"loss": 0.6777,
"step": 2481
},
{
"epoch": 0.9163179916317992,
"grad_norm": 0.8873251080513,
"learning_rate": 4.736324640512195e-06,
"loss": 0.7191,
"step": 2482
},
{
"epoch": 0.9166871769628353,
"grad_norm": 0.8914928436279297,
"learning_rate": 4.736107184626869e-06,
"loss": 0.7094,
"step": 2483
},
{
"epoch": 0.9170563622938716,
"grad_norm": 0.8406426310539246,
"learning_rate": 4.735889644105591e-06,
"loss": 0.7082,
"step": 2484
},
{
"epoch": 0.9174255476249077,
"grad_norm": 0.9501475095748901,
"learning_rate": 4.735672018956596e-06,
"loss": 0.7158,
"step": 2485
},
{
"epoch": 0.9177947329559439,
"grad_norm": 0.8835409283638,
"learning_rate": 4.735454309188121e-06,
"loss": 0.6742,
"step": 2486
},
{
"epoch": 0.9181639182869801,
"grad_norm": 0.883091926574707,
"learning_rate": 4.735236514808406e-06,
"loss": 0.6994,
"step": 2487
},
{
"epoch": 0.9185331036180162,
"grad_norm": 0.8853040337562561,
"learning_rate": 4.735018635825693e-06,
"loss": 0.7197,
"step": 2488
},
{
"epoch": 0.9189022889490525,
"grad_norm": 0.8868618011474609,
"learning_rate": 4.734800672248231e-06,
"loss": 0.6744,
"step": 2489
},
{
"epoch": 0.9192714742800886,
"grad_norm": 0.9185392260551453,
"learning_rate": 4.73458262408427e-06,
"loss": 0.6616,
"step": 2490
},
{
"epoch": 0.9196406596111247,
"grad_norm": 0.8852180242538452,
"learning_rate": 4.734364491342061e-06,
"loss": 0.6921,
"step": 2491
},
{
"epoch": 0.920009844942161,
"grad_norm": 0.8949165344238281,
"learning_rate": 4.7341462740298605e-06,
"loss": 0.7063,
"step": 2492
},
{
"epoch": 0.9203790302731971,
"grad_norm": 0.8711039423942566,
"learning_rate": 4.7339279721559285e-06,
"loss": 0.7267,
"step": 2493
},
{
"epoch": 0.9207482156042334,
"grad_norm": 0.8843966126441956,
"learning_rate": 4.733709585728528e-06,
"loss": 0.7179,
"step": 2494
},
{
"epoch": 0.9211174009352695,
"grad_norm": 0.93259596824646,
"learning_rate": 4.733491114755926e-06,
"loss": 0.7065,
"step": 2495
},
{
"epoch": 0.9214865862663056,
"grad_norm": 0.9091447591781616,
"learning_rate": 4.733272559246389e-06,
"loss": 0.7224,
"step": 2496
},
{
"epoch": 0.9218557715973419,
"grad_norm": 0.8795716762542725,
"learning_rate": 4.73305391920819e-06,
"loss": 0.7119,
"step": 2497
},
{
"epoch": 0.922224956928378,
"grad_norm": 0.8672060966491699,
"learning_rate": 4.732835194649607e-06,
"loss": 0.7081,
"step": 2498
},
{
"epoch": 0.9225941422594143,
"grad_norm": 0.9056263566017151,
"learning_rate": 4.732616385578914e-06,
"loss": 0.7254,
"step": 2499
},
{
"epoch": 0.9229633275904504,
"grad_norm": 0.9208519458770752,
"learning_rate": 4.7323974920043965e-06,
"loss": 0.7183,
"step": 2500
},
{
"epoch": 0.9233325129214865,
"grad_norm": 0.8479480743408203,
"learning_rate": 4.732178513934339e-06,
"loss": 0.7086,
"step": 2501
},
{
"epoch": 0.9237016982525228,
"grad_norm": 0.84977126121521,
"learning_rate": 4.73195945137703e-06,
"loss": 0.701,
"step": 2502
},
{
"epoch": 0.9240708835835589,
"grad_norm": 0.8850725889205933,
"learning_rate": 4.7317403043407584e-06,
"loss": 0.6984,
"step": 2503
},
{
"epoch": 0.9244400689145952,
"grad_norm": 0.8803204298019409,
"learning_rate": 4.7315210728338215e-06,
"loss": 0.7318,
"step": 2504
},
{
"epoch": 0.9248092542456313,
"grad_norm": 0.8582308888435364,
"learning_rate": 4.731301756864516e-06,
"loss": 0.7015,
"step": 2505
},
{
"epoch": 0.9251784395766675,
"grad_norm": 0.8383595943450928,
"learning_rate": 4.731082356441143e-06,
"loss": 0.6385,
"step": 2506
},
{
"epoch": 0.9255476249077037,
"grad_norm": 0.8716678619384766,
"learning_rate": 4.730862871572008e-06,
"loss": 0.6989,
"step": 2507
},
{
"epoch": 0.9259168102387398,
"grad_norm": 0.8813826441764832,
"learning_rate": 4.730643302265416e-06,
"loss": 0.6775,
"step": 2508
},
{
"epoch": 0.926285995569776,
"grad_norm": 0.8515213131904602,
"learning_rate": 4.730423648529679e-06,
"loss": 0.7048,
"step": 2509
},
{
"epoch": 0.9266551809008122,
"grad_norm": 0.8710786700248718,
"learning_rate": 4.730203910373112e-06,
"loss": 0.7262,
"step": 2510
},
{
"epoch": 0.9270243662318484,
"grad_norm": 0.8492597341537476,
"learning_rate": 4.729984087804031e-06,
"loss": 0.7031,
"step": 2511
},
{
"epoch": 0.9273935515628846,
"grad_norm": 0.8640585541725159,
"learning_rate": 4.729764180830754e-06,
"loss": 0.683,
"step": 2512
},
{
"epoch": 0.9277627368939207,
"grad_norm": 0.85969078540802,
"learning_rate": 4.729544189461608e-06,
"loss": 0.6709,
"step": 2513
},
{
"epoch": 0.9281319222249569,
"grad_norm": 0.8955800533294678,
"learning_rate": 4.729324113704918e-06,
"loss": 0.7304,
"step": 2514
},
{
"epoch": 0.9285011075559931,
"grad_norm": 0.882088303565979,
"learning_rate": 4.729103953569014e-06,
"loss": 0.7078,
"step": 2515
},
{
"epoch": 0.9288702928870293,
"grad_norm": 0.8810111880302429,
"learning_rate": 4.728883709062229e-06,
"loss": 0.7412,
"step": 2516
},
{
"epoch": 0.9292394782180655,
"grad_norm": 0.9056873321533203,
"learning_rate": 4.728663380192898e-06,
"loss": 0.7202,
"step": 2517
},
{
"epoch": 0.9296086635491017,
"grad_norm": 0.8942249417304993,
"learning_rate": 4.728442966969363e-06,
"loss": 0.6895,
"step": 2518
},
{
"epoch": 0.9299778488801378,
"grad_norm": 0.8909201622009277,
"learning_rate": 4.728222469399964e-06,
"loss": 0.7394,
"step": 2519
},
{
"epoch": 0.930347034211174,
"grad_norm": 0.8551014065742493,
"learning_rate": 4.728001887493048e-06,
"loss": 0.6906,
"step": 2520
},
{
"epoch": 0.9307162195422102,
"grad_norm": 0.8875094652175903,
"learning_rate": 4.727781221256963e-06,
"loss": 0.7134,
"step": 2521
},
{
"epoch": 0.9310854048732464,
"grad_norm": 0.8559072613716125,
"learning_rate": 4.727560470700064e-06,
"loss": 0.6838,
"step": 2522
},
{
"epoch": 0.9314545902042826,
"grad_norm": 0.8632515668869019,
"learning_rate": 4.727339635830702e-06,
"loss": 0.6783,
"step": 2523
},
{
"epoch": 0.9318237755353187,
"grad_norm": 0.8152856230735779,
"learning_rate": 4.727118716657239e-06,
"loss": 0.6142,
"step": 2524
},
{
"epoch": 0.932192960866355,
"grad_norm": 0.9158169031143188,
"learning_rate": 4.726897713188035e-06,
"loss": 0.7147,
"step": 2525
},
{
"epoch": 0.9325621461973911,
"grad_norm": 0.8849809765815735,
"learning_rate": 4.726676625431454e-06,
"loss": 0.6753,
"step": 2526
},
{
"epoch": 0.9329313315284272,
"grad_norm": 0.8620732426643372,
"learning_rate": 4.726455453395867e-06,
"loss": 0.6979,
"step": 2527
},
{
"epoch": 0.9333005168594635,
"grad_norm": 0.8598245978355408,
"learning_rate": 4.726234197089644e-06,
"loss": 0.7447,
"step": 2528
},
{
"epoch": 0.9336697021904996,
"grad_norm": 0.8678368330001831,
"learning_rate": 4.726012856521158e-06,
"loss": 0.693,
"step": 2529
},
{
"epoch": 0.9340388875215359,
"grad_norm": 0.8540114164352417,
"learning_rate": 4.72579143169879e-06,
"loss": 0.6663,
"step": 2530
},
{
"epoch": 0.934408072852572,
"grad_norm": 0.8417086601257324,
"learning_rate": 4.725569922630917e-06,
"loss": 0.689,
"step": 2531
},
{
"epoch": 0.9347772581836081,
"grad_norm": 0.8707804679870605,
"learning_rate": 4.725348329325925e-06,
"loss": 0.7039,
"step": 2532
},
{
"epoch": 0.9351464435146444,
"grad_norm": 0.8656783103942871,
"learning_rate": 4.725126651792202e-06,
"loss": 0.7056,
"step": 2533
},
{
"epoch": 0.9355156288456805,
"grad_norm": 0.8286459445953369,
"learning_rate": 4.724904890038137e-06,
"loss": 0.6859,
"step": 2534
},
{
"epoch": 0.9358848141767168,
"grad_norm": 0.8686536550521851,
"learning_rate": 4.724683044072124e-06,
"loss": 0.6817,
"step": 2535
},
{
"epoch": 0.9362539995077529,
"grad_norm": 0.8872658014297485,
"learning_rate": 4.7244611139025595e-06,
"loss": 0.7093,
"step": 2536
},
{
"epoch": 0.936623184838789,
"grad_norm": 0.8674843907356262,
"learning_rate": 4.724239099537845e-06,
"loss": 0.7042,
"step": 2537
},
{
"epoch": 0.9369923701698253,
"grad_norm": 0.8639495968818665,
"learning_rate": 4.7240170009863816e-06,
"loss": 0.7111,
"step": 2538
},
{
"epoch": 0.9373615555008614,
"grad_norm": 0.8369062542915344,
"learning_rate": 4.7237948182565765e-06,
"loss": 0.6811,
"step": 2539
},
{
"epoch": 0.9377307408318976,
"grad_norm": 0.8846485614776611,
"learning_rate": 4.72357255135684e-06,
"loss": 0.6932,
"step": 2540
},
{
"epoch": 0.9380999261629338,
"grad_norm": 0.8939961194992065,
"learning_rate": 4.723350200295584e-06,
"loss": 0.7092,
"step": 2541
},
{
"epoch": 0.9384691114939699,
"grad_norm": 0.864433228969574,
"learning_rate": 4.723127765081225e-06,
"loss": 0.6878,
"step": 2542
},
{
"epoch": 0.9388382968250062,
"grad_norm": 0.8481778502464294,
"learning_rate": 4.7229052457221816e-06,
"loss": 0.6775,
"step": 2543
},
{
"epoch": 0.9392074821560423,
"grad_norm": 0.8581600785255432,
"learning_rate": 4.722682642226875e-06,
"loss": 0.6961,
"step": 2544
},
{
"epoch": 0.9395766674870785,
"grad_norm": 0.8689659833908081,
"learning_rate": 4.722459954603733e-06,
"loss": 0.6986,
"step": 2545
},
{
"epoch": 0.9399458528181147,
"grad_norm": 0.8631998896598816,
"learning_rate": 4.722237182861183e-06,
"loss": 0.6756,
"step": 2546
},
{
"epoch": 0.9403150381491509,
"grad_norm": 0.8720018863677979,
"learning_rate": 4.722014327007657e-06,
"loss": 0.7085,
"step": 2547
},
{
"epoch": 0.9406842234801871,
"grad_norm": 0.8763655424118042,
"learning_rate": 4.72179138705159e-06,
"loss": 0.6919,
"step": 2548
},
{
"epoch": 0.9410534088112232,
"grad_norm": 0.8881059288978577,
"learning_rate": 4.72156836300142e-06,
"loss": 0.6924,
"step": 2549
},
{
"epoch": 0.9414225941422594,
"grad_norm": 0.8810322284698486,
"learning_rate": 4.721345254865589e-06,
"loss": 0.7164,
"step": 2550
},
{
"epoch": 0.9417917794732956,
"grad_norm": 0.8332381248474121,
"learning_rate": 4.721122062652541e-06,
"loss": 0.7418,
"step": 2551
},
{
"epoch": 0.9421609648043318,
"grad_norm": 0.8442291617393494,
"learning_rate": 4.720898786370723e-06,
"loss": 0.6884,
"step": 2552
},
{
"epoch": 0.942530150135368,
"grad_norm": 0.8065565824508667,
"learning_rate": 4.720675426028588e-06,
"loss": 0.6191,
"step": 2553
},
{
"epoch": 0.9428993354664041,
"grad_norm": 0.8640344142913818,
"learning_rate": 4.720451981634589e-06,
"loss": 0.7072,
"step": 2554
},
{
"epoch": 0.9432685207974403,
"grad_norm": 0.8636446595191956,
"learning_rate": 4.720228453197183e-06,
"loss": 0.6901,
"step": 2555
},
{
"epoch": 0.9436377061284765,
"grad_norm": 0.8631062507629395,
"learning_rate": 4.720004840724831e-06,
"loss": 0.7035,
"step": 2556
},
{
"epoch": 0.9440068914595127,
"grad_norm": 0.8414477705955505,
"learning_rate": 4.7197811442259955e-06,
"loss": 0.6541,
"step": 2557
},
{
"epoch": 0.9443760767905488,
"grad_norm": 0.8783524632453918,
"learning_rate": 4.719557363709145e-06,
"loss": 0.6971,
"step": 2558
},
{
"epoch": 0.944745262121585,
"grad_norm": 0.8537473678588867,
"learning_rate": 4.7193334991827486e-06,
"loss": 0.6927,
"step": 2559
},
{
"epoch": 0.9451144474526212,
"grad_norm": 0.9084930419921875,
"learning_rate": 4.7191095506552795e-06,
"loss": 0.6974,
"step": 2560
},
{
"epoch": 0.9454836327836574,
"grad_norm": 0.9035632610321045,
"learning_rate": 4.718885518135215e-06,
"loss": 0.746,
"step": 2561
},
{
"epoch": 0.9458528181146936,
"grad_norm": 0.8823369741439819,
"learning_rate": 4.718661401631033e-06,
"loss": 0.6662,
"step": 2562
},
{
"epoch": 0.9462220034457297,
"grad_norm": 0.8840197324752808,
"learning_rate": 4.718437201151218e-06,
"loss": 0.692,
"step": 2563
},
{
"epoch": 0.946591188776766,
"grad_norm": 0.865744948387146,
"learning_rate": 4.718212916704254e-06,
"loss": 0.7252,
"step": 2564
},
{
"epoch": 0.9469603741078021,
"grad_norm": 0.8541647791862488,
"learning_rate": 4.717988548298633e-06,
"loss": 0.69,
"step": 2565
},
{
"epoch": 0.9473295594388383,
"grad_norm": 0.9258102774620056,
"learning_rate": 4.717764095942844e-06,
"loss": 0.7265,
"step": 2566
},
{
"epoch": 0.9476987447698745,
"grad_norm": 0.8850582242012024,
"learning_rate": 4.717539559645384e-06,
"loss": 0.7063,
"step": 2567
},
{
"epoch": 0.9480679301009106,
"grad_norm": 0.8705160021781921,
"learning_rate": 4.717314939414752e-06,
"loss": 0.713,
"step": 2568
},
{
"epoch": 0.9484371154319469,
"grad_norm": 0.8894596099853516,
"learning_rate": 4.717090235259449e-06,
"loss": 0.6615,
"step": 2569
},
{
"epoch": 0.948806300762983,
"grad_norm": 0.8761441707611084,
"learning_rate": 4.7168654471879806e-06,
"loss": 0.6738,
"step": 2570
},
{
"epoch": 0.9491754860940192,
"grad_norm": 0.9032109975814819,
"learning_rate": 4.716640575208855e-06,
"loss": 0.7081,
"step": 2571
},
{
"epoch": 0.9495446714250554,
"grad_norm": 0.8425540924072266,
"learning_rate": 4.716415619330582e-06,
"loss": 0.6321,
"step": 2572
},
{
"epoch": 0.9499138567560915,
"grad_norm": 0.87245112657547,
"learning_rate": 4.716190579561678e-06,
"loss": 0.7024,
"step": 2573
},
{
"epoch": 0.9502830420871278,
"grad_norm": 0.8776750564575195,
"learning_rate": 4.71596545591066e-06,
"loss": 0.6897,
"step": 2574
},
{
"epoch": 0.9506522274181639,
"grad_norm": 0.8822212815284729,
"learning_rate": 4.7157402483860496e-06,
"loss": 0.6829,
"step": 2575
},
{
"epoch": 0.9510214127492,
"grad_norm": 0.8839916586875916,
"learning_rate": 4.7155149569963696e-06,
"loss": 0.6821,
"step": 2576
},
{
"epoch": 0.9513905980802363,
"grad_norm": 0.8598265647888184,
"learning_rate": 4.715289581750147e-06,
"loss": 0.714,
"step": 2577
},
{
"epoch": 0.9517597834112724,
"grad_norm": 0.8639992475509644,
"learning_rate": 4.7150641226559136e-06,
"loss": 0.7089,
"step": 2578
},
{
"epoch": 0.9521289687423087,
"grad_norm": 0.8500651717185974,
"learning_rate": 4.714838579722202e-06,
"loss": 0.6681,
"step": 2579
},
{
"epoch": 0.9524981540733448,
"grad_norm": 0.8705741763114929,
"learning_rate": 4.714612952957549e-06,
"loss": 0.6966,
"step": 2580
},
{
"epoch": 0.952867339404381,
"grad_norm": 0.8744728565216064,
"learning_rate": 4.714387242370495e-06,
"loss": 0.7105,
"step": 2581
},
{
"epoch": 0.9532365247354172,
"grad_norm": 0.844121515750885,
"learning_rate": 4.714161447969583e-06,
"loss": 0.6552,
"step": 2582
},
{
"epoch": 0.9536057100664533,
"grad_norm": 0.8838944435119629,
"learning_rate": 4.7139355697633584e-06,
"loss": 0.6735,
"step": 2583
},
{
"epoch": 0.9539748953974896,
"grad_norm": 0.8674659132957458,
"learning_rate": 4.713709607760371e-06,
"loss": 0.6755,
"step": 2584
},
{
"epoch": 0.9543440807285257,
"grad_norm": 0.8746424913406372,
"learning_rate": 4.713483561969175e-06,
"loss": 0.7173,
"step": 2585
},
{
"epoch": 0.9547132660595619,
"grad_norm": 0.8750790357589722,
"learning_rate": 4.713257432398324e-06,
"loss": 0.7156,
"step": 2586
},
{
"epoch": 0.9550824513905981,
"grad_norm": 0.9194653034210205,
"learning_rate": 4.713031219056377e-06,
"loss": 0.7156,
"step": 2587
},
{
"epoch": 0.9554516367216342,
"grad_norm": 0.8654546737670898,
"learning_rate": 4.712804921951898e-06,
"loss": 0.6755,
"step": 2588
},
{
"epoch": 0.9558208220526705,
"grad_norm": 0.8833775520324707,
"learning_rate": 4.71257854109345e-06,
"loss": 0.6777,
"step": 2589
},
{
"epoch": 0.9561900073837066,
"grad_norm": 0.8929355144500732,
"learning_rate": 4.712352076489603e-06,
"loss": 0.7201,
"step": 2590
},
{
"epoch": 0.9565591927147428,
"grad_norm": 0.8803203105926514,
"learning_rate": 4.7121255281489275e-06,
"loss": 0.7037,
"step": 2591
},
{
"epoch": 0.956928378045779,
"grad_norm": 3.147451877593994,
"learning_rate": 4.71189889608e-06,
"loss": 0.7059,
"step": 2592
},
{
"epoch": 0.9572975633768152,
"grad_norm": 0.8626553416252136,
"learning_rate": 4.711672180291397e-06,
"loss": 0.7017,
"step": 2593
},
{
"epoch": 0.9576667487078513,
"grad_norm": 0.8505844473838806,
"learning_rate": 4.711445380791699e-06,
"loss": 0.6857,
"step": 2594
},
{
"epoch": 0.9580359340388875,
"grad_norm": 0.8895529508590698,
"learning_rate": 4.711218497589493e-06,
"loss": 0.7022,
"step": 2595
},
{
"epoch": 0.9584051193699237,
"grad_norm": 0.8883222341537476,
"learning_rate": 4.710991530693364e-06,
"loss": 0.6423,
"step": 2596
},
{
"epoch": 0.9587743047009599,
"grad_norm": 0.8487628102302551,
"learning_rate": 4.710764480111903e-06,
"loss": 0.6979,
"step": 2597
},
{
"epoch": 0.9591434900319961,
"grad_norm": 0.8244770765304565,
"learning_rate": 4.710537345853704e-06,
"loss": 0.6925,
"step": 2598
},
{
"epoch": 0.9595126753630322,
"grad_norm": 0.9382370710372925,
"learning_rate": 4.710310127927364e-06,
"loss": 0.7106,
"step": 2599
},
{
"epoch": 0.9598818606940684,
"grad_norm": 0.8556570410728455,
"learning_rate": 4.710082826341484e-06,
"loss": 0.6918,
"step": 2600
},
{
"epoch": 0.9602510460251046,
"grad_norm": 0.8350138068199158,
"learning_rate": 4.709855441104667e-06,
"loss": 0.6908,
"step": 2601
},
{
"epoch": 0.9606202313561408,
"grad_norm": 0.8622647523880005,
"learning_rate": 4.7096279722255175e-06,
"loss": 0.7214,
"step": 2602
},
{
"epoch": 0.960989416687177,
"grad_norm": 0.8928597569465637,
"learning_rate": 4.709400419712648e-06,
"loss": 0.7067,
"step": 2603
},
{
"epoch": 0.9613586020182131,
"grad_norm": 0.8485970497131348,
"learning_rate": 4.709172783574669e-06,
"loss": 0.691,
"step": 2604
},
{
"epoch": 0.9617277873492494,
"grad_norm": 0.8822405934333801,
"learning_rate": 4.708945063820198e-06,
"loss": 0.684,
"step": 2605
},
{
"epoch": 0.9620969726802855,
"grad_norm": 0.854975700378418,
"learning_rate": 4.708717260457853e-06,
"loss": 0.6718,
"step": 2606
},
{
"epoch": 0.9624661580113217,
"grad_norm": 0.86021488904953,
"learning_rate": 4.7084893734962565e-06,
"loss": 0.6677,
"step": 2607
},
{
"epoch": 0.9628353433423579,
"grad_norm": 0.9012849926948547,
"learning_rate": 4.708261402944036e-06,
"loss": 0.7292,
"step": 2608
},
{
"epoch": 0.963204528673394,
"grad_norm": 0.864490270614624,
"learning_rate": 4.708033348809816e-06,
"loss": 0.7166,
"step": 2609
},
{
"epoch": 0.9635737140044303,
"grad_norm": 0.8407897353172302,
"learning_rate": 4.707805211102232e-06,
"loss": 0.7277,
"step": 2610
},
{
"epoch": 0.9639428993354664,
"grad_norm": 0.8841310739517212,
"learning_rate": 4.707576989829917e-06,
"loss": 0.6973,
"step": 2611
},
{
"epoch": 0.9643120846665025,
"grad_norm": 0.887823760509491,
"learning_rate": 4.7073486850015095e-06,
"loss": 0.71,
"step": 2612
},
{
"epoch": 0.9646812699975388,
"grad_norm": 0.8758772015571594,
"learning_rate": 4.707120296625651e-06,
"loss": 0.6605,
"step": 2613
},
{
"epoch": 0.9650504553285749,
"grad_norm": 0.8583731055259705,
"learning_rate": 4.7068918247109865e-06,
"loss": 0.6964,
"step": 2614
},
{
"epoch": 0.9654196406596112,
"grad_norm": 0.8955153822898865,
"learning_rate": 4.706663269266163e-06,
"loss": 0.7276,
"step": 2615
},
{
"epoch": 0.9657888259906473,
"grad_norm": 0.8411804437637329,
"learning_rate": 4.70643463029983e-06,
"loss": 0.6319,
"step": 2616
},
{
"epoch": 0.9661580113216834,
"grad_norm": 0.8701980710029602,
"learning_rate": 4.706205907820643e-06,
"loss": 0.6796,
"step": 2617
},
{
"epoch": 0.9665271966527197,
"grad_norm": 0.8797522783279419,
"learning_rate": 4.705977101837259e-06,
"loss": 0.7139,
"step": 2618
},
{
"epoch": 0.9668963819837558,
"grad_norm": 0.8831002712249756,
"learning_rate": 4.705748212358339e-06,
"loss": 0.6813,
"step": 2619
},
{
"epoch": 0.9672655673147921,
"grad_norm": 0.8374364376068115,
"learning_rate": 4.705519239392544e-06,
"loss": 0.6509,
"step": 2620
},
{
"epoch": 0.9676347526458282,
"grad_norm": 0.8649409413337708,
"learning_rate": 4.705290182948542e-06,
"loss": 0.6692,
"step": 2621
},
{
"epoch": 0.9680039379768643,
"grad_norm": 0.9725558757781982,
"learning_rate": 4.705061043035002e-06,
"loss": 0.6921,
"step": 2622
},
{
"epoch": 0.9683731233079006,
"grad_norm": 1.5726717710494995,
"learning_rate": 4.704831819660598e-06,
"loss": 0.73,
"step": 2623
},
{
"epoch": 0.9687423086389367,
"grad_norm": 0.8848654627799988,
"learning_rate": 4.704602512834006e-06,
"loss": 0.692,
"step": 2624
},
{
"epoch": 0.969111493969973,
"grad_norm": 0.8604928255081177,
"learning_rate": 4.7043731225639045e-06,
"loss": 0.6849,
"step": 2625
},
{
"epoch": 0.9694806793010091,
"grad_norm": 0.9043660759925842,
"learning_rate": 4.704143648858976e-06,
"loss": 0.7108,
"step": 2626
},
{
"epoch": 0.9698498646320453,
"grad_norm": 0.8489352464675903,
"learning_rate": 4.703914091727906e-06,
"loss": 0.6897,
"step": 2627
},
{
"epoch": 0.9702190499630815,
"grad_norm": 0.8445559144020081,
"learning_rate": 4.703684451179382e-06,
"loss": 0.7079,
"step": 2628
},
{
"epoch": 0.9705882352941176,
"grad_norm": 0.8575627207756042,
"learning_rate": 4.7034547272220985e-06,
"loss": 0.7166,
"step": 2629
},
{
"epoch": 0.9709574206251538,
"grad_norm": 0.8811299800872803,
"learning_rate": 4.703224919864748e-06,
"loss": 0.6726,
"step": 2630
},
{
"epoch": 0.97132660595619,
"grad_norm": 0.8279196619987488,
"learning_rate": 4.702995029116031e-06,
"loss": 0.6713,
"step": 2631
},
{
"epoch": 0.9716957912872262,
"grad_norm": 0.8848355412483215,
"learning_rate": 4.702765054984646e-06,
"loss": 0.6671,
"step": 2632
},
{
"epoch": 0.9720649766182624,
"grad_norm": 0.8621148467063904,
"learning_rate": 4.7025349974793e-06,
"loss": 0.6558,
"step": 2633
},
{
"epoch": 0.9724341619492985,
"grad_norm": 0.8555501699447632,
"learning_rate": 4.702304856608698e-06,
"loss": 0.7102,
"step": 2634
},
{
"epoch": 0.9728033472803347,
"grad_norm": 0.8470124006271362,
"learning_rate": 4.702074632381553e-06,
"loss": 0.6579,
"step": 2635
},
{
"epoch": 0.9731725326113709,
"grad_norm": 0.8950283527374268,
"learning_rate": 4.701844324806579e-06,
"loss": 0.7557,
"step": 2636
},
{
"epoch": 0.9735417179424071,
"grad_norm": 0.8791554570198059,
"learning_rate": 4.701613933892491e-06,
"loss": 0.7336,
"step": 2637
},
{
"epoch": 0.9739109032734433,
"grad_norm": 0.8484707474708557,
"learning_rate": 4.701383459648011e-06,
"loss": 0.6983,
"step": 2638
},
{
"epoch": 0.9742800886044795,
"grad_norm": 0.8885069489479065,
"learning_rate": 4.701152902081863e-06,
"loss": 0.6982,
"step": 2639
},
{
"epoch": 0.9746492739355156,
"grad_norm": 0.8474615216255188,
"learning_rate": 4.700922261202771e-06,
"loss": 0.6982,
"step": 2640
},
{
"epoch": 0.9750184592665518,
"grad_norm": 0.8749250769615173,
"learning_rate": 4.7006915370194655e-06,
"loss": 0.6771,
"step": 2641
},
{
"epoch": 0.975387644597588,
"grad_norm": 0.8137475848197937,
"learning_rate": 4.70046072954068e-06,
"loss": 0.6859,
"step": 2642
},
{
"epoch": 0.9757568299286241,
"grad_norm": 0.8566490411758423,
"learning_rate": 4.700229838775151e-06,
"loss": 0.7159,
"step": 2643
},
{
"epoch": 0.9761260152596604,
"grad_norm": 0.8723783493041992,
"learning_rate": 4.699998864731617e-06,
"loss": 0.7309,
"step": 2644
},
{
"epoch": 0.9764952005906965,
"grad_norm": 0.8612366914749146,
"learning_rate": 4.699767807418821e-06,
"loss": 0.6921,
"step": 2645
},
{
"epoch": 0.9768643859217327,
"grad_norm": 0.8468891382217407,
"learning_rate": 4.699536666845507e-06,
"loss": 0.6677,
"step": 2646
},
{
"epoch": 0.9772335712527689,
"grad_norm": 0.8550125956535339,
"learning_rate": 4.699305443020424e-06,
"loss": 0.6939,
"step": 2647
},
{
"epoch": 0.977602756583805,
"grad_norm": 0.9459806680679321,
"learning_rate": 4.699074135952324e-06,
"loss": 0.6429,
"step": 2648
},
{
"epoch": 0.9779719419148413,
"grad_norm": 0.8765318989753723,
"learning_rate": 4.6988427456499624e-06,
"loss": 0.6997,
"step": 2649
},
{
"epoch": 0.9783411272458774,
"grad_norm": 0.8627922534942627,
"learning_rate": 4.698611272122097e-06,
"loss": 0.6853,
"step": 2650
},
{
"epoch": 0.9787103125769137,
"grad_norm": 0.8556888103485107,
"learning_rate": 4.698379715377489e-06,
"loss": 0.6969,
"step": 2651
},
{
"epoch": 0.9790794979079498,
"grad_norm": 0.8721787929534912,
"learning_rate": 4.698148075424902e-06,
"loss": 0.6976,
"step": 2652
},
{
"epoch": 0.9794486832389859,
"grad_norm": 0.8606870770454407,
"learning_rate": 4.697916352273104e-06,
"loss": 0.6786,
"step": 2653
},
{
"epoch": 0.9798178685700222,
"grad_norm": 0.8361164927482605,
"learning_rate": 4.6976845459308664e-06,
"loss": 0.6847,
"step": 2654
},
{
"epoch": 0.9801870539010583,
"grad_norm": 0.8861737847328186,
"learning_rate": 4.697452656406963e-06,
"loss": 0.721,
"step": 2655
},
{
"epoch": 0.9805562392320946,
"grad_norm": 0.8805361390113831,
"learning_rate": 4.697220683710168e-06,
"loss": 0.681,
"step": 2656
},
{
"epoch": 0.9809254245631307,
"grad_norm": 0.8715226650238037,
"learning_rate": 4.696988627849265e-06,
"loss": 0.7021,
"step": 2657
},
{
"epoch": 0.9812946098941668,
"grad_norm": 0.866218090057373,
"learning_rate": 4.696756488833035e-06,
"loss": 0.6957,
"step": 2658
},
{
"epoch": 0.9816637952252031,
"grad_norm": 0.8797804713249207,
"learning_rate": 4.696524266670266e-06,
"loss": 0.7064,
"step": 2659
},
{
"epoch": 0.9820329805562392,
"grad_norm": 0.9008505940437317,
"learning_rate": 4.696291961369747e-06,
"loss": 0.6895,
"step": 2660
},
{
"epoch": 0.9824021658872754,
"grad_norm": 0.8940179944038391,
"learning_rate": 4.696059572940269e-06,
"loss": 0.6735,
"step": 2661
},
{
"epoch": 0.9827713512183116,
"grad_norm": 0.8762912750244141,
"learning_rate": 4.69582710139063e-06,
"loss": 0.7487,
"step": 2662
},
{
"epoch": 0.9831405365493477,
"grad_norm": 0.8566141724586487,
"learning_rate": 4.695594546729629e-06,
"loss": 0.7056,
"step": 2663
},
{
"epoch": 0.983509721880384,
"grad_norm": 0.8390647172927856,
"learning_rate": 4.695361908966066e-06,
"loss": 0.6885,
"step": 2664
},
{
"epoch": 0.9838789072114201,
"grad_norm": 0.8426521420478821,
"learning_rate": 4.6951291881087495e-06,
"loss": 0.7038,
"step": 2665
},
{
"epoch": 0.9842480925424563,
"grad_norm": 0.8740254640579224,
"learning_rate": 4.6948963841664845e-06,
"loss": 0.7153,
"step": 2666
},
{
"epoch": 0.9846172778734925,
"grad_norm": 0.8205375075340271,
"learning_rate": 4.694663497148084e-06,
"loss": 0.7011,
"step": 2667
},
{
"epoch": 0.9849864632045287,
"grad_norm": 0.8569832444190979,
"learning_rate": 4.6944305270623634e-06,
"loss": 0.6939,
"step": 2668
},
{
"epoch": 0.9853556485355649,
"grad_norm": 0.8959291577339172,
"learning_rate": 4.694197473918139e-06,
"loss": 0.7057,
"step": 2669
},
{
"epoch": 0.985724833866601,
"grad_norm": 0.8417198657989502,
"learning_rate": 4.6939643377242335e-06,
"loss": 0.6881,
"step": 2670
},
{
"epoch": 0.9860940191976372,
"grad_norm": 0.8587856888771057,
"learning_rate": 4.693731118489471e-06,
"loss": 0.7073,
"step": 2671
},
{
"epoch": 0.9864632045286734,
"grad_norm": 0.8757145404815674,
"learning_rate": 4.693497816222676e-06,
"loss": 0.7277,
"step": 2672
},
{
"epoch": 0.9868323898597096,
"grad_norm": 0.8414481282234192,
"learning_rate": 4.693264430932683e-06,
"loss": 0.68,
"step": 2673
},
{
"epoch": 0.9872015751907458,
"grad_norm": 0.8073148131370544,
"learning_rate": 4.693030962628322e-06,
"loss": 0.6434,
"step": 2674
},
{
"epoch": 0.9875707605217819,
"grad_norm": 0.8849506378173828,
"learning_rate": 4.692797411318432e-06,
"loss": 0.7039,
"step": 2675
},
{
"epoch": 0.9879399458528181,
"grad_norm": 0.829002857208252,
"learning_rate": 4.692563777011852e-06,
"loss": 0.6811,
"step": 2676
},
{
"epoch": 0.9883091311838543,
"grad_norm": 0.8336803317070007,
"learning_rate": 4.692330059717425e-06,
"loss": 0.6915,
"step": 2677
},
{
"epoch": 0.9886783165148905,
"grad_norm": 0.8502470850944519,
"learning_rate": 4.6920962594439965e-06,
"loss": 0.6678,
"step": 2678
},
{
"epoch": 0.9890475018459266,
"grad_norm": 0.8810563683509827,
"learning_rate": 4.691862376200418e-06,
"loss": 0.702,
"step": 2679
},
{
"epoch": 0.9894166871769629,
"grad_norm": 0.8533656597137451,
"learning_rate": 4.691628409995539e-06,
"loss": 0.6748,
"step": 2680
},
{
"epoch": 0.989785872507999,
"grad_norm": 0.8637438416481018,
"learning_rate": 4.6913943608382166e-06,
"loss": 0.7256,
"step": 2681
},
{
"epoch": 0.9901550578390352,
"grad_norm": 0.8353433609008789,
"learning_rate": 4.691160228737309e-06,
"loss": 0.6965,
"step": 2682
},
{
"epoch": 0.9905242431700714,
"grad_norm": 0.8654287457466125,
"learning_rate": 4.690926013701678e-06,
"loss": 0.6864,
"step": 2683
},
{
"epoch": 0.9908934285011075,
"grad_norm": 0.8387684226036072,
"learning_rate": 4.69069171574019e-06,
"loss": 0.6389,
"step": 2684
},
{
"epoch": 0.9912626138321438,
"grad_norm": 0.908822774887085,
"learning_rate": 4.690457334861711e-06,
"loss": 0.7105,
"step": 2685
},
{
"epoch": 0.9916317991631799,
"grad_norm": 0.8545365333557129,
"learning_rate": 4.690222871075114e-06,
"loss": 0.722,
"step": 2686
},
{
"epoch": 0.9920009844942161,
"grad_norm": 0.8538405299186707,
"learning_rate": 4.689988324389272e-06,
"loss": 0.6503,
"step": 2687
},
{
"epoch": 0.9923701698252523,
"grad_norm": 0.8731146454811096,
"learning_rate": 4.689753694813063e-06,
"loss": 0.6982,
"step": 2688
},
{
"epoch": 0.9927393551562884,
"grad_norm": 0.8791890144348145,
"learning_rate": 4.689518982355369e-06,
"loss": 0.6837,
"step": 2689
},
{
"epoch": 0.9931085404873247,
"grad_norm": 0.8614819049835205,
"learning_rate": 4.68928418702507e-06,
"loss": 0.7137,
"step": 2690
},
{
"epoch": 0.9934777258183608,
"grad_norm": 0.8743491768836975,
"learning_rate": 4.689049308831057e-06,
"loss": 0.7015,
"step": 2691
},
{
"epoch": 0.993846911149397,
"grad_norm": 0.8765535354614258,
"learning_rate": 4.688814347782219e-06,
"loss": 0.6774,
"step": 2692
},
{
"epoch": 0.9942160964804332,
"grad_norm": 0.8722323775291443,
"learning_rate": 4.6885793038874486e-06,
"loss": 0.6862,
"step": 2693
},
{
"epoch": 0.9945852818114693,
"grad_norm": 0.8379456400871277,
"learning_rate": 4.688344177155642e-06,
"loss": 0.6752,
"step": 2694
},
{
"epoch": 0.9949544671425056,
"grad_norm": 0.8696368932723999,
"learning_rate": 4.688108967595699e-06,
"loss": 0.6786,
"step": 2695
},
{
"epoch": 0.9953236524735417,
"grad_norm": 0.8123127818107605,
"learning_rate": 4.687873675216522e-06,
"loss": 0.6704,
"step": 2696
},
{
"epoch": 0.9956928378045778,
"grad_norm": 0.8309715390205383,
"learning_rate": 4.687638300027016e-06,
"loss": 0.6552,
"step": 2697
},
{
"epoch": 0.9960620231356141,
"grad_norm": 0.8737226724624634,
"learning_rate": 4.687402842036092e-06,
"loss": 0.7223,
"step": 2698
},
{
"epoch": 0.9964312084666502,
"grad_norm": 0.8684542179107666,
"learning_rate": 4.687167301252661e-06,
"loss": 0.6992,
"step": 2699
},
{
"epoch": 0.9968003937976865,
"grad_norm": 0.848534345626831,
"learning_rate": 4.686931677685637e-06,
"loss": 0.7061,
"step": 2700
},
{
"epoch": 0.9971695791287226,
"grad_norm": 0.862679123878479,
"learning_rate": 4.68669597134394e-06,
"loss": 0.7094,
"step": 2701
},
{
"epoch": 0.9975387644597588,
"grad_norm": 0.8820730447769165,
"learning_rate": 4.68646018223649e-06,
"loss": 0.6943,
"step": 2702
},
{
"epoch": 0.997907949790795,
"grad_norm": 0.8731999397277832,
"learning_rate": 4.686224310372213e-06,
"loss": 0.7011,
"step": 2703
},
{
"epoch": 0.9982771351218311,
"grad_norm": 0.852350115776062,
"learning_rate": 4.685988355760035e-06,
"loss": 0.6635,
"step": 2704
},
{
"epoch": 0.9986463204528674,
"grad_norm": 0.8917254209518433,
"learning_rate": 4.6857523184088875e-06,
"loss": 0.7044,
"step": 2705
},
{
"epoch": 0.9990155057839035,
"grad_norm": 0.900327742099762,
"learning_rate": 4.685516198327705e-06,
"loss": 0.7145,
"step": 2706
},
{
"epoch": 0.9993846911149397,
"grad_norm": 0.9128772020339966,
"learning_rate": 4.685279995525424e-06,
"loss": 0.7169,
"step": 2707
},
{
"epoch": 0.9997538764459759,
"grad_norm": 0.8816156983375549,
"learning_rate": 4.685043710010985e-06,
"loss": 0.7372,
"step": 2708
}
],
"logging_steps": 1,
"max_steps": 16248,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 2708,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 6.840507374826947e+18,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}