Qwen_Math_low_7B / checkpoint-1110 /trainer_state.json
redsgnaoh's picture
Upload folder using huggingface_hub
d3bfd39 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.2525597269624573,
"eval_steps": 500,
"global_step": 1110,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00022753128555176336,
"grad_norm": 12.3071932545212,
"learning_rate": 1.25e-06,
"loss": 0.3186,
"step": 1
},
{
"epoch": 0.0004550625711035267,
"grad_norm": 11.959825961880057,
"learning_rate": 1.2499999936130725e-06,
"loss": 0.3776,
"step": 2
},
{
"epoch": 0.0006825938566552901,
"grad_norm": 5.4315221034586365,
"learning_rate": 1.2499999744522896e-06,
"loss": 0.4755,
"step": 3
},
{
"epoch": 0.0009101251422070534,
"grad_norm": 21.003860231065644,
"learning_rate": 1.2499999425176518e-06,
"loss": 0.3334,
"step": 4
},
{
"epoch": 0.0011376564277588168,
"grad_norm": 9.549170994835775,
"learning_rate": 1.2499998978091598e-06,
"loss": 0.375,
"step": 5
},
{
"epoch": 0.0013651877133105802,
"grad_norm": 3.400827392368318,
"learning_rate": 1.2499998403268147e-06,
"loss": 0.2286,
"step": 6
},
{
"epoch": 0.0015927189988623437,
"grad_norm": 8.451175634489234,
"learning_rate": 1.2499997700706173e-06,
"loss": 0.3216,
"step": 7
},
{
"epoch": 0.0018202502844141069,
"grad_norm": 7.494987211346803,
"learning_rate": 1.2499996870405692e-06,
"loss": 0.2339,
"step": 8
},
{
"epoch": 0.0020477815699658703,
"grad_norm": 9.138399201835718,
"learning_rate": 1.2499995912366722e-06,
"loss": 0.326,
"step": 9
},
{
"epoch": 0.0022753128555176336,
"grad_norm": 3.2188295955534123,
"learning_rate": 1.2499994826589282e-06,
"loss": 0.2514,
"step": 10
},
{
"epoch": 0.002502844141069397,
"grad_norm": 22.66663249526738,
"learning_rate": 1.2499993613073393e-06,
"loss": 0.4005,
"step": 11
},
{
"epoch": 0.0027303754266211604,
"grad_norm": 21.954799290073527,
"learning_rate": 1.2499992271819083e-06,
"loss": 0.1492,
"step": 12
},
{
"epoch": 0.0029579067121729237,
"grad_norm": 6.624298118045555,
"learning_rate": 1.2499990802826377e-06,
"loss": 0.3024,
"step": 13
},
{
"epoch": 0.0031854379977246873,
"grad_norm": 3.923975454400113,
"learning_rate": 1.2499989206095304e-06,
"loss": 0.2411,
"step": 14
},
{
"epoch": 0.0034129692832764505,
"grad_norm": 1.9398605915746092,
"learning_rate": 1.2499987481625899e-06,
"loss": 0.1849,
"step": 15
},
{
"epoch": 0.0036405005688282138,
"grad_norm": 5.482695785208493,
"learning_rate": 1.2499985629418195e-06,
"loss": 0.3122,
"step": 16
},
{
"epoch": 0.0038680318543799774,
"grad_norm": 5.552109300872593,
"learning_rate": 1.2499983649472233e-06,
"loss": 0.3393,
"step": 17
},
{
"epoch": 0.004095563139931741,
"grad_norm": 4.610318891370888,
"learning_rate": 1.249998154178805e-06,
"loss": 0.3,
"step": 18
},
{
"epoch": 0.004323094425483504,
"grad_norm": 8.793267315718285,
"learning_rate": 1.2499979306365692e-06,
"loss": 0.2266,
"step": 19
},
{
"epoch": 0.004550625711035267,
"grad_norm": 11.785540460314868,
"learning_rate": 1.2499976943205202e-06,
"loss": 0.258,
"step": 20
},
{
"epoch": 0.00477815699658703,
"grad_norm": 7.848333910468807,
"learning_rate": 1.249997445230663e-06,
"loss": 0.3733,
"step": 21
},
{
"epoch": 0.005005688282138794,
"grad_norm": 11.509651474854413,
"learning_rate": 1.2499971833670026e-06,
"loss": 0.3606,
"step": 22
},
{
"epoch": 0.005233219567690558,
"grad_norm": 8.662973783002895,
"learning_rate": 1.2499969087295443e-06,
"loss": 0.3884,
"step": 23
},
{
"epoch": 0.005460750853242321,
"grad_norm": 5.341258812295752,
"learning_rate": 1.249996621318294e-06,
"loss": 0.2677,
"step": 24
},
{
"epoch": 0.005688282138794084,
"grad_norm": 4.742018594757072,
"learning_rate": 1.2499963211332573e-06,
"loss": 0.3253,
"step": 25
},
{
"epoch": 0.005915813424345847,
"grad_norm": 2.4536573603250624,
"learning_rate": 1.2499960081744405e-06,
"loss": 0.2393,
"step": 26
},
{
"epoch": 0.0061433447098976105,
"grad_norm": 6.34705088291597,
"learning_rate": 1.24999568244185e-06,
"loss": 0.4326,
"step": 27
},
{
"epoch": 0.006370875995449375,
"grad_norm": 9.775833264439491,
"learning_rate": 1.249995343935492e-06,
"loss": 0.4252,
"step": 28
},
{
"epoch": 0.006598407281001138,
"grad_norm": 6.064212735225404,
"learning_rate": 1.2499949926553743e-06,
"loss": 0.2988,
"step": 29
},
{
"epoch": 0.006825938566552901,
"grad_norm": 4.4254830237015845,
"learning_rate": 1.2499946286015032e-06,
"loss": 0.2988,
"step": 30
},
{
"epoch": 0.007053469852104664,
"grad_norm": 4.883047495609927,
"learning_rate": 1.2499942517738867e-06,
"loss": 0.2285,
"step": 31
},
{
"epoch": 0.0072810011376564275,
"grad_norm": 8.135398866699179,
"learning_rate": 1.2499938621725322e-06,
"loss": 0.1529,
"step": 32
},
{
"epoch": 0.007508532423208191,
"grad_norm": 2.973365765084456,
"learning_rate": 1.2499934597974478e-06,
"loss": 0.2436,
"step": 33
},
{
"epoch": 0.007736063708759955,
"grad_norm": 5.612693729952574,
"learning_rate": 1.2499930446486416e-06,
"loss": 0.3466,
"step": 34
},
{
"epoch": 0.007963594994311717,
"grad_norm": 3.022290156639827,
"learning_rate": 1.2499926167261224e-06,
"loss": 0.2728,
"step": 35
},
{
"epoch": 0.008191126279863481,
"grad_norm": 3.1279992715224467,
"learning_rate": 1.2499921760298987e-06,
"loss": 0.2469,
"step": 36
},
{
"epoch": 0.008418657565415245,
"grad_norm": 14.845448376418034,
"learning_rate": 1.2499917225599796e-06,
"loss": 0.5145,
"step": 37
},
{
"epoch": 0.008646188850967008,
"grad_norm": 14.138433401115075,
"learning_rate": 1.2499912563163742e-06,
"loss": 0.2705,
"step": 38
},
{
"epoch": 0.008873720136518772,
"grad_norm": 4.324563647824762,
"learning_rate": 1.249990777299092e-06,
"loss": 0.1563,
"step": 39
},
{
"epoch": 0.009101251422070534,
"grad_norm": 11.315529959215173,
"learning_rate": 1.249990285508143e-06,
"loss": 0.4123,
"step": 40
},
{
"epoch": 0.009328782707622298,
"grad_norm": 6.3112839729366765,
"learning_rate": 1.2499897809435374e-06,
"loss": 0.1742,
"step": 41
},
{
"epoch": 0.00955631399317406,
"grad_norm": 8.25726966946455,
"learning_rate": 1.249989263605285e-06,
"loss": 0.3229,
"step": 42
},
{
"epoch": 0.009783845278725825,
"grad_norm": 6.3545712967505334,
"learning_rate": 1.249988733493397e-06,
"loss": 0.3055,
"step": 43
},
{
"epoch": 0.010011376564277589,
"grad_norm": 5.356373706603287,
"learning_rate": 1.2499881906078836e-06,
"loss": 0.2601,
"step": 44
},
{
"epoch": 0.010238907849829351,
"grad_norm": 1.9215795165819936,
"learning_rate": 1.2499876349487564e-06,
"loss": 0.1517,
"step": 45
},
{
"epoch": 0.010466439135381115,
"grad_norm": 8.506503892761648,
"learning_rate": 1.2499870665160262e-06,
"loss": 0.2831,
"step": 46
},
{
"epoch": 0.010693970420932878,
"grad_norm": 5.909503420571465,
"learning_rate": 1.2499864853097054e-06,
"loss": 0.2252,
"step": 47
},
{
"epoch": 0.010921501706484642,
"grad_norm": 5.488265194188453,
"learning_rate": 1.2499858913298053e-06,
"loss": 0.3466,
"step": 48
},
{
"epoch": 0.011149032992036406,
"grad_norm": 12.162427245650075,
"learning_rate": 1.249985284576338e-06,
"loss": 0.2426,
"step": 49
},
{
"epoch": 0.011376564277588168,
"grad_norm": 9.969211407495816,
"learning_rate": 1.2499846650493164e-06,
"loss": 0.2801,
"step": 50
},
{
"epoch": 0.011604095563139932,
"grad_norm": 5.741578552447352,
"learning_rate": 1.2499840327487528e-06,
"loss": 0.2664,
"step": 51
},
{
"epoch": 0.011831626848691695,
"grad_norm": 2.937767840084915,
"learning_rate": 1.24998338767466e-06,
"loss": 0.1834,
"step": 52
},
{
"epoch": 0.012059158134243459,
"grad_norm": 4.130655112830682,
"learning_rate": 1.2499827298270515e-06,
"loss": 0.2675,
"step": 53
},
{
"epoch": 0.012286689419795221,
"grad_norm": 4.5227789119131625,
"learning_rate": 1.2499820592059405e-06,
"loss": 0.3205,
"step": 54
},
{
"epoch": 0.012514220705346985,
"grad_norm": 4.653850683576537,
"learning_rate": 1.2499813758113409e-06,
"loss": 0.1921,
"step": 55
},
{
"epoch": 0.01274175199089875,
"grad_norm": 6.204991552012506,
"learning_rate": 1.2499806796432665e-06,
"loss": 0.1989,
"step": 56
},
{
"epoch": 0.012969283276450512,
"grad_norm": 7.81696538748595,
"learning_rate": 1.2499799707017315e-06,
"loss": 0.1301,
"step": 57
},
{
"epoch": 0.013196814562002276,
"grad_norm": 6.427887275035889,
"learning_rate": 1.2499792489867508e-06,
"loss": 0.3376,
"step": 58
},
{
"epoch": 0.013424345847554038,
"grad_norm": 4.713573539887475,
"learning_rate": 1.2499785144983386e-06,
"loss": 0.1673,
"step": 59
},
{
"epoch": 0.013651877133105802,
"grad_norm": 6.7169275734426055,
"learning_rate": 1.24997776723651e-06,
"loss": 0.2501,
"step": 60
},
{
"epoch": 0.013879408418657566,
"grad_norm": 11.702392641770421,
"learning_rate": 1.2499770072012809e-06,
"loss": 0.293,
"step": 61
},
{
"epoch": 0.014106939704209329,
"grad_norm": 5.86563350345107,
"learning_rate": 1.2499762343926661e-06,
"loss": 0.2346,
"step": 62
},
{
"epoch": 0.014334470989761093,
"grad_norm": 4.562933746130791,
"learning_rate": 1.2499754488106817e-06,
"loss": 0.1349,
"step": 63
},
{
"epoch": 0.014562002275312855,
"grad_norm": 16.935870758573948,
"learning_rate": 1.2499746504553436e-06,
"loss": 0.2869,
"step": 64
},
{
"epoch": 0.01478953356086462,
"grad_norm": 3.252674290241083,
"learning_rate": 1.2499738393266684e-06,
"loss": 0.2125,
"step": 65
},
{
"epoch": 0.015017064846416382,
"grad_norm": 3.767321260449828,
"learning_rate": 1.2499730154246726e-06,
"loss": 0.2049,
"step": 66
},
{
"epoch": 0.015244596131968146,
"grad_norm": 7.264091175555215,
"learning_rate": 1.2499721787493726e-06,
"loss": 0.2521,
"step": 67
},
{
"epoch": 0.01547212741751991,
"grad_norm": 2.846384337735166,
"learning_rate": 1.2499713293007862e-06,
"loss": 0.1745,
"step": 68
},
{
"epoch": 0.015699658703071672,
"grad_norm": 30.829215228751778,
"learning_rate": 1.2499704670789301e-06,
"loss": 0.1514,
"step": 69
},
{
"epoch": 0.015927189988623434,
"grad_norm": 7.168923083631056,
"learning_rate": 1.2499695920838225e-06,
"loss": 0.2393,
"step": 70
},
{
"epoch": 0.0161547212741752,
"grad_norm": 3.418723817035884,
"learning_rate": 1.2499687043154809e-06,
"loss": 0.1342,
"step": 71
},
{
"epoch": 0.016382252559726963,
"grad_norm": 6.316537441364383,
"learning_rate": 1.2499678037739235e-06,
"loss": 0.1698,
"step": 72
},
{
"epoch": 0.016609783845278725,
"grad_norm": 3.8561981086650596,
"learning_rate": 1.2499668904591688e-06,
"loss": 0.3104,
"step": 73
},
{
"epoch": 0.01683731513083049,
"grad_norm": 4.679806938064617,
"learning_rate": 1.2499659643712356e-06,
"loss": 0.2139,
"step": 74
},
{
"epoch": 0.017064846416382253,
"grad_norm": 4.26137230837329,
"learning_rate": 1.2499650255101425e-06,
"loss": 0.2433,
"step": 75
},
{
"epoch": 0.017292377701934016,
"grad_norm": 3.7227188471827914,
"learning_rate": 1.2499640738759088e-06,
"loss": 0.2334,
"step": 76
},
{
"epoch": 0.017519908987485778,
"grad_norm": 6.044525591826923,
"learning_rate": 1.249963109468554e-06,
"loss": 0.3106,
"step": 77
},
{
"epoch": 0.017747440273037544,
"grad_norm": 6.248705646938244,
"learning_rate": 1.2499621322880979e-06,
"loss": 0.2025,
"step": 78
},
{
"epoch": 0.017974971558589306,
"grad_norm": 2.8368621495357313,
"learning_rate": 1.2499611423345604e-06,
"loss": 0.1492,
"step": 79
},
{
"epoch": 0.01820250284414107,
"grad_norm": 5.049736361542706,
"learning_rate": 1.2499601396079617e-06,
"loss": 0.1341,
"step": 80
},
{
"epoch": 0.018430034129692834,
"grad_norm": 6.760221850362585,
"learning_rate": 1.2499591241083222e-06,
"loss": 0.2092,
"step": 81
},
{
"epoch": 0.018657565415244597,
"grad_norm": 6.630540720646431,
"learning_rate": 1.2499580958356628e-06,
"loss": 0.2181,
"step": 82
},
{
"epoch": 0.01888509670079636,
"grad_norm": 3.8482585047631863,
"learning_rate": 1.2499570547900045e-06,
"loss": 0.1613,
"step": 83
},
{
"epoch": 0.01911262798634812,
"grad_norm": 6.605304588968454,
"learning_rate": 1.2499560009713684e-06,
"loss": 0.2959,
"step": 84
},
{
"epoch": 0.019340159271899887,
"grad_norm": 6.012809221970948,
"learning_rate": 1.2499549343797764e-06,
"loss": 0.2393,
"step": 85
},
{
"epoch": 0.01956769055745165,
"grad_norm": 6.254621323206641,
"learning_rate": 1.24995385501525e-06,
"loss": 0.2285,
"step": 86
},
{
"epoch": 0.019795221843003412,
"grad_norm": 3.4046999226542733,
"learning_rate": 1.2499527628778116e-06,
"loss": 0.1187,
"step": 87
},
{
"epoch": 0.020022753128555178,
"grad_norm": 7.419781715158706,
"learning_rate": 1.2499516579674831e-06,
"loss": 0.2817,
"step": 88
},
{
"epoch": 0.02025028441410694,
"grad_norm": 21.819719933471735,
"learning_rate": 1.2499505402842872e-06,
"loss": 0.2469,
"step": 89
},
{
"epoch": 0.020477815699658702,
"grad_norm": 2.8418419055080766,
"learning_rate": 1.2499494098282469e-06,
"loss": 0.2955,
"step": 90
},
{
"epoch": 0.020705346985210465,
"grad_norm": 7.066317637431583,
"learning_rate": 1.2499482665993851e-06,
"loss": 0.2044,
"step": 91
},
{
"epoch": 0.02093287827076223,
"grad_norm": 5.925737098985834,
"learning_rate": 1.2499471105977252e-06,
"loss": 0.2335,
"step": 92
},
{
"epoch": 0.021160409556313993,
"grad_norm": 3.0480275776898473,
"learning_rate": 1.249945941823291e-06,
"loss": 0.3633,
"step": 93
},
{
"epoch": 0.021387940841865755,
"grad_norm": 2.946352549362824,
"learning_rate": 1.2499447602761063e-06,
"loss": 0.2011,
"step": 94
},
{
"epoch": 0.02161547212741752,
"grad_norm": 6.07129225638081,
"learning_rate": 1.2499435659561954e-06,
"loss": 0.2585,
"step": 95
},
{
"epoch": 0.021843003412969283,
"grad_norm": 4.592794032374342,
"learning_rate": 1.2499423588635823e-06,
"loss": 0.2336,
"step": 96
},
{
"epoch": 0.022070534698521046,
"grad_norm": 19.61835193566366,
"learning_rate": 1.2499411389982919e-06,
"loss": 0.2438,
"step": 97
},
{
"epoch": 0.02229806598407281,
"grad_norm": 4.697964666160796,
"learning_rate": 1.2499399063603492e-06,
"loss": 0.26,
"step": 98
},
{
"epoch": 0.022525597269624574,
"grad_norm": 6.831528796415563,
"learning_rate": 1.2499386609497793e-06,
"loss": 0.1291,
"step": 99
},
{
"epoch": 0.022753128555176336,
"grad_norm": 3.3770537551655653,
"learning_rate": 1.2499374027666078e-06,
"loss": 0.1919,
"step": 100
},
{
"epoch": 0.0229806598407281,
"grad_norm": 10.54402988548413,
"learning_rate": 1.2499361318108602e-06,
"loss": 0.2695,
"step": 101
},
{
"epoch": 0.023208191126279865,
"grad_norm": 6.4464740357818116,
"learning_rate": 1.2499348480825627e-06,
"loss": 0.1883,
"step": 102
},
{
"epoch": 0.023435722411831627,
"grad_norm": 5.7228283849137895,
"learning_rate": 1.2499335515817413e-06,
"loss": 0.225,
"step": 103
},
{
"epoch": 0.02366325369738339,
"grad_norm": 8.575195167369158,
"learning_rate": 1.2499322423084226e-06,
"loss": 0.1988,
"step": 104
},
{
"epoch": 0.023890784982935155,
"grad_norm": 5.524822469569831,
"learning_rate": 1.2499309202626336e-06,
"loss": 0.1362,
"step": 105
},
{
"epoch": 0.024118316268486917,
"grad_norm": 1.4259194554286314,
"learning_rate": 1.249929585444401e-06,
"loss": 0.1341,
"step": 106
},
{
"epoch": 0.02434584755403868,
"grad_norm": 5.569399731315438,
"learning_rate": 1.2499282378537522e-06,
"loss": 0.1823,
"step": 107
},
{
"epoch": 0.024573378839590442,
"grad_norm": 5.131038290322419,
"learning_rate": 1.2499268774907144e-06,
"loss": 0.1674,
"step": 108
},
{
"epoch": 0.024800910125142208,
"grad_norm": 2.9740215362829368,
"learning_rate": 1.249925504355316e-06,
"loss": 0.1443,
"step": 109
},
{
"epoch": 0.02502844141069397,
"grad_norm": 7.125610878241638,
"learning_rate": 1.2499241184475848e-06,
"loss": 0.1993,
"step": 110
},
{
"epoch": 0.025255972696245733,
"grad_norm": 3.5104920582246284,
"learning_rate": 1.249922719767549e-06,
"loss": 0.1387,
"step": 111
},
{
"epoch": 0.0254835039817975,
"grad_norm": 15.180689323576399,
"learning_rate": 1.2499213083152374e-06,
"loss": 0.1609,
"step": 112
},
{
"epoch": 0.02571103526734926,
"grad_norm": 2.6467486780240077,
"learning_rate": 1.2499198840906787e-06,
"loss": 0.0766,
"step": 113
},
{
"epoch": 0.025938566552901023,
"grad_norm": 6.947833673299234,
"learning_rate": 1.249918447093902e-06,
"loss": 0.1988,
"step": 114
},
{
"epoch": 0.026166097838452786,
"grad_norm": 3.236155694827761,
"learning_rate": 1.249916997324937e-06,
"loss": 0.2822,
"step": 115
},
{
"epoch": 0.02639362912400455,
"grad_norm": 4.424229361394889,
"learning_rate": 1.2499155347838129e-06,
"loss": 0.2639,
"step": 116
},
{
"epoch": 0.026621160409556314,
"grad_norm": 6.7125880752306,
"learning_rate": 1.2499140594705596e-06,
"loss": 0.1758,
"step": 117
},
{
"epoch": 0.026848691695108076,
"grad_norm": 12.978485247890044,
"learning_rate": 1.2499125713852076e-06,
"loss": 0.2966,
"step": 118
},
{
"epoch": 0.027076222980659842,
"grad_norm": 2.4562187666064297,
"learning_rate": 1.2499110705277869e-06,
"loss": 0.1317,
"step": 119
},
{
"epoch": 0.027303754266211604,
"grad_norm": 2.450514697648912,
"learning_rate": 1.2499095568983284e-06,
"loss": 0.2491,
"step": 120
},
{
"epoch": 0.027531285551763367,
"grad_norm": 2.962900989508568,
"learning_rate": 1.2499080304968634e-06,
"loss": 0.1782,
"step": 121
},
{
"epoch": 0.027758816837315133,
"grad_norm": 4.706451675787787,
"learning_rate": 1.2499064913234222e-06,
"loss": 0.2063,
"step": 122
},
{
"epoch": 0.027986348122866895,
"grad_norm": 4.848247166198472,
"learning_rate": 1.249904939378037e-06,
"loss": 0.1873,
"step": 123
},
{
"epoch": 0.028213879408418657,
"grad_norm": 5.57275566955423,
"learning_rate": 1.2499033746607395e-06,
"loss": 0.2362,
"step": 124
},
{
"epoch": 0.02844141069397042,
"grad_norm": 4.528761927217566,
"learning_rate": 1.2499017971715614e-06,
"loss": 0.2686,
"step": 125
},
{
"epoch": 0.028668941979522185,
"grad_norm": 7.35859467900191,
"learning_rate": 1.2499002069105348e-06,
"loss": 0.275,
"step": 126
},
{
"epoch": 0.028896473265073948,
"grad_norm": 4.494727686955716,
"learning_rate": 1.2498986038776926e-06,
"loss": 0.1759,
"step": 127
},
{
"epoch": 0.02912400455062571,
"grad_norm": 7.273216392666622,
"learning_rate": 1.2498969880730671e-06,
"loss": 0.2159,
"step": 128
},
{
"epoch": 0.029351535836177476,
"grad_norm": 4.955227920384567,
"learning_rate": 1.249895359496692e-06,
"loss": 0.1888,
"step": 129
},
{
"epoch": 0.02957906712172924,
"grad_norm": 6.321445200949685,
"learning_rate": 1.2498937181486e-06,
"loss": 0.3007,
"step": 130
},
{
"epoch": 0.029806598407281,
"grad_norm": 2.76312902269676,
"learning_rate": 1.2498920640288248e-06,
"loss": 0.2442,
"step": 131
},
{
"epoch": 0.030034129692832763,
"grad_norm": 56.774720129580295,
"learning_rate": 1.2498903971374005e-06,
"loss": 0.223,
"step": 132
},
{
"epoch": 0.03026166097838453,
"grad_norm": 3.9468490187056324,
"learning_rate": 1.2498887174743606e-06,
"loss": 0.2504,
"step": 133
},
{
"epoch": 0.03048919226393629,
"grad_norm": 3.9118814976883542,
"learning_rate": 1.24988702503974e-06,
"loss": 0.1939,
"step": 134
},
{
"epoch": 0.030716723549488054,
"grad_norm": 3.7837188268010506,
"learning_rate": 1.2498853198335728e-06,
"loss": 0.2199,
"step": 135
},
{
"epoch": 0.03094425483503982,
"grad_norm": 4.0297942240817175,
"learning_rate": 1.2498836018558942e-06,
"loss": 0.1566,
"step": 136
},
{
"epoch": 0.031171786120591582,
"grad_norm": 3.4754550482446698,
"learning_rate": 1.2498818711067392e-06,
"loss": 0.2666,
"step": 137
},
{
"epoch": 0.031399317406143344,
"grad_norm": 3.864651244769,
"learning_rate": 1.2498801275861433e-06,
"loss": 0.1173,
"step": 138
},
{
"epoch": 0.03162684869169511,
"grad_norm": 8.216814820623972,
"learning_rate": 1.2498783712941418e-06,
"loss": 0.1879,
"step": 139
},
{
"epoch": 0.03185437997724687,
"grad_norm": 3.637457358045326,
"learning_rate": 1.2498766022307709e-06,
"loss": 0.2047,
"step": 140
},
{
"epoch": 0.032081911262798635,
"grad_norm": 2.58051980801193,
"learning_rate": 1.2498748203960665e-06,
"loss": 0.1008,
"step": 141
},
{
"epoch": 0.0323094425483504,
"grad_norm": 3.8775724824241764,
"learning_rate": 1.2498730257900655e-06,
"loss": 0.2042,
"step": 142
},
{
"epoch": 0.03253697383390216,
"grad_norm": 5.772591680829651,
"learning_rate": 1.249871218412804e-06,
"loss": 0.2352,
"step": 143
},
{
"epoch": 0.032764505119453925,
"grad_norm": 2.210254874393301,
"learning_rate": 1.2498693982643192e-06,
"loss": 0.1803,
"step": 144
},
{
"epoch": 0.03299203640500569,
"grad_norm": 6.540771980552272,
"learning_rate": 1.2498675653446485e-06,
"loss": 0.2304,
"step": 145
},
{
"epoch": 0.03321956769055745,
"grad_norm": 2.904522388367919,
"learning_rate": 1.249865719653829e-06,
"loss": 0.1707,
"step": 146
},
{
"epoch": 0.033447098976109216,
"grad_norm": 9.318986716894935,
"learning_rate": 1.2498638611918985e-06,
"loss": 0.2038,
"step": 147
},
{
"epoch": 0.03367463026166098,
"grad_norm": 9.58516027118141,
"learning_rate": 1.249861989958895e-06,
"loss": 0.2357,
"step": 148
},
{
"epoch": 0.03390216154721274,
"grad_norm": 3.559770501878285,
"learning_rate": 1.2498601059548572e-06,
"loss": 0.1613,
"step": 149
},
{
"epoch": 0.034129692832764506,
"grad_norm": 3.348814329958542,
"learning_rate": 1.2498582091798228e-06,
"loss": 0.2016,
"step": 150
},
{
"epoch": 0.034357224118316265,
"grad_norm": 6.375342543891093,
"learning_rate": 1.2498562996338312e-06,
"loss": 0.2231,
"step": 151
},
{
"epoch": 0.03458475540386803,
"grad_norm": 7.488809251815451,
"learning_rate": 1.249854377316921e-06,
"loss": 0.1819,
"step": 152
},
{
"epoch": 0.0348122866894198,
"grad_norm": 2.508487580474721,
"learning_rate": 1.2498524422291319e-06,
"loss": 0.182,
"step": 153
},
{
"epoch": 0.035039817974971556,
"grad_norm": 3.656563964135558,
"learning_rate": 1.2498504943705033e-06,
"loss": 0.165,
"step": 154
},
{
"epoch": 0.03526734926052332,
"grad_norm": 2.771070563762278,
"learning_rate": 1.249848533741075e-06,
"loss": 0.2569,
"step": 155
},
{
"epoch": 0.03549488054607509,
"grad_norm": 5.610529774003187,
"learning_rate": 1.2498465603408865e-06,
"loss": 0.2873,
"step": 156
},
{
"epoch": 0.035722411831626846,
"grad_norm": 3.6657793262286638,
"learning_rate": 1.2498445741699792e-06,
"loss": 0.1086,
"step": 157
},
{
"epoch": 0.03594994311717861,
"grad_norm": 11.136381961854878,
"learning_rate": 1.249842575228393e-06,
"loss": 0.1653,
"step": 158
},
{
"epoch": 0.03617747440273038,
"grad_norm": 4.607920317694178,
"learning_rate": 1.249840563516169e-06,
"loss": 0.1816,
"step": 159
},
{
"epoch": 0.03640500568828214,
"grad_norm": 4.765507333684582,
"learning_rate": 1.249838539033348e-06,
"loss": 0.1735,
"step": 160
},
{
"epoch": 0.0366325369738339,
"grad_norm": 3.024559515436515,
"learning_rate": 1.2498365017799715e-06,
"loss": 0.0997,
"step": 161
},
{
"epoch": 0.03686006825938567,
"grad_norm": 3.0006086205585594,
"learning_rate": 1.2498344517560815e-06,
"loss": 0.2742,
"step": 162
},
{
"epoch": 0.03708759954493743,
"grad_norm": 4.390575337778858,
"learning_rate": 1.2498323889617198e-06,
"loss": 0.2112,
"step": 163
},
{
"epoch": 0.03731513083048919,
"grad_norm": 4.987032274568943,
"learning_rate": 1.2498303133969281e-06,
"loss": 0.2282,
"step": 164
},
{
"epoch": 0.03754266211604096,
"grad_norm": 3.813775711394782,
"learning_rate": 1.2498282250617492e-06,
"loss": 0.1944,
"step": 165
},
{
"epoch": 0.03777019340159272,
"grad_norm": 3.361678763128891,
"learning_rate": 1.2498261239562257e-06,
"loss": 0.2018,
"step": 166
},
{
"epoch": 0.037997724687144484,
"grad_norm": 4.992072192203259,
"learning_rate": 1.2498240100804005e-06,
"loss": 0.2089,
"step": 167
},
{
"epoch": 0.03822525597269624,
"grad_norm": 8.050790934059092,
"learning_rate": 1.249821883434317e-06,
"loss": 0.2696,
"step": 168
},
{
"epoch": 0.03845278725824801,
"grad_norm": 2.642297340192281,
"learning_rate": 1.2498197440180182e-06,
"loss": 0.2691,
"step": 169
},
{
"epoch": 0.038680318543799774,
"grad_norm": 3.35790306734272,
"learning_rate": 1.2498175918315484e-06,
"loss": 0.1851,
"step": 170
},
{
"epoch": 0.03890784982935153,
"grad_norm": 3.524642269348137,
"learning_rate": 1.2498154268749513e-06,
"loss": 0.2276,
"step": 171
},
{
"epoch": 0.0391353811149033,
"grad_norm": 2.188667506818875,
"learning_rate": 1.249813249148271e-06,
"loss": 0.1616,
"step": 172
},
{
"epoch": 0.039362912400455065,
"grad_norm": 5.1958946099491845,
"learning_rate": 1.2498110586515525e-06,
"loss": 0.1987,
"step": 173
},
{
"epoch": 0.039590443686006824,
"grad_norm": 5.09328084896296,
"learning_rate": 1.2498088553848398e-06,
"loss": 0.195,
"step": 174
},
{
"epoch": 0.03981797497155859,
"grad_norm": 2.8290595777512952,
"learning_rate": 1.2498066393481787e-06,
"loss": 0.1568,
"step": 175
},
{
"epoch": 0.040045506257110355,
"grad_norm": 2.360697357040943,
"learning_rate": 1.249804410541614e-06,
"loss": 0.2065,
"step": 176
},
{
"epoch": 0.040273037542662114,
"grad_norm": 4.718810327826489,
"learning_rate": 1.2498021689651916e-06,
"loss": 0.2003,
"step": 177
},
{
"epoch": 0.04050056882821388,
"grad_norm": 2.6458436624930237,
"learning_rate": 1.249799914618957e-06,
"loss": 0.1589,
"step": 178
},
{
"epoch": 0.040728100113765646,
"grad_norm": 3.289621635927127,
"learning_rate": 1.2497976475029566e-06,
"loss": 0.1905,
"step": 179
},
{
"epoch": 0.040955631399317405,
"grad_norm": 2.7547654896260028,
"learning_rate": 1.2497953676172364e-06,
"loss": 0.1538,
"step": 180
},
{
"epoch": 0.04118316268486917,
"grad_norm": 4.715970073162376,
"learning_rate": 1.2497930749618431e-06,
"loss": 0.1297,
"step": 181
},
{
"epoch": 0.04141069397042093,
"grad_norm": 13.147614048372157,
"learning_rate": 1.2497907695368238e-06,
"loss": 0.164,
"step": 182
},
{
"epoch": 0.041638225255972695,
"grad_norm": 2.692225418023433,
"learning_rate": 1.2497884513422253e-06,
"loss": 0.2537,
"step": 183
},
{
"epoch": 0.04186575654152446,
"grad_norm": 5.166049507007355,
"learning_rate": 1.249786120378095e-06,
"loss": 0.074,
"step": 184
},
{
"epoch": 0.04209328782707622,
"grad_norm": 3.0648916024092596,
"learning_rate": 1.2497837766444806e-06,
"loss": 0.1639,
"step": 185
},
{
"epoch": 0.042320819112627986,
"grad_norm": 4.567688921451397,
"learning_rate": 1.2497814201414304e-06,
"loss": 0.2905,
"step": 186
},
{
"epoch": 0.04254835039817975,
"grad_norm": 3.970377559361967,
"learning_rate": 1.249779050868992e-06,
"loss": 0.2001,
"step": 187
},
{
"epoch": 0.04277588168373151,
"grad_norm": 2.2768846909587763,
"learning_rate": 1.249776668827214e-06,
"loss": 0.0951,
"step": 188
},
{
"epoch": 0.043003412969283276,
"grad_norm": 6.438142708090974,
"learning_rate": 1.249774274016145e-06,
"loss": 0.203,
"step": 189
},
{
"epoch": 0.04323094425483504,
"grad_norm": 2.4175466744317977,
"learning_rate": 1.2497718664358341e-06,
"loss": 0.1713,
"step": 190
},
{
"epoch": 0.0434584755403868,
"grad_norm": 4.37204480901975,
"learning_rate": 1.2497694460863307e-06,
"loss": 0.2986,
"step": 191
},
{
"epoch": 0.04368600682593857,
"grad_norm": 3.2046762676937255,
"learning_rate": 1.2497670129676838e-06,
"loss": 0.1288,
"step": 192
},
{
"epoch": 0.04391353811149033,
"grad_norm": 3.901472238917995,
"learning_rate": 1.2497645670799436e-06,
"loss": 0.1291,
"step": 193
},
{
"epoch": 0.04414106939704209,
"grad_norm": 3.891177273974114,
"learning_rate": 1.2497621084231595e-06,
"loss": 0.1165,
"step": 194
},
{
"epoch": 0.04436860068259386,
"grad_norm": 3.831124951630966,
"learning_rate": 1.2497596369973823e-06,
"loss": 0.175,
"step": 195
},
{
"epoch": 0.04459613196814562,
"grad_norm": 7.137497588920377,
"learning_rate": 1.2497571528026623e-06,
"loss": 0.2319,
"step": 196
},
{
"epoch": 0.04482366325369738,
"grad_norm": 2.9787063992991256,
"learning_rate": 1.2497546558390503e-06,
"loss": 0.2044,
"step": 197
},
{
"epoch": 0.04505119453924915,
"grad_norm": 2.5728244375494413,
"learning_rate": 1.2497521461065973e-06,
"loss": 0.1395,
"step": 198
},
{
"epoch": 0.04527872582480091,
"grad_norm": 7.102221321561537,
"learning_rate": 1.2497496236053547e-06,
"loss": 0.1969,
"step": 199
},
{
"epoch": 0.04550625711035267,
"grad_norm": 2.579422809989494,
"learning_rate": 1.2497470883353738e-06,
"loss": 0.1019,
"step": 200
},
{
"epoch": 0.04573378839590444,
"grad_norm": 4.340132040430137,
"learning_rate": 1.2497445402967068e-06,
"loss": 0.241,
"step": 201
},
{
"epoch": 0.0459613196814562,
"grad_norm": 2.2195665044126276,
"learning_rate": 1.2497419794894053e-06,
"loss": 0.2059,
"step": 202
},
{
"epoch": 0.04618885096700796,
"grad_norm": 3.274345001247324,
"learning_rate": 1.249739405913522e-06,
"loss": 0.1328,
"step": 203
},
{
"epoch": 0.04641638225255973,
"grad_norm": 2.527264534705696,
"learning_rate": 1.2497368195691095e-06,
"loss": 0.1408,
"step": 204
},
{
"epoch": 0.04664391353811149,
"grad_norm": 3.306757570747259,
"learning_rate": 1.2497342204562205e-06,
"loss": 0.2233,
"step": 205
},
{
"epoch": 0.046871444823663254,
"grad_norm": 3.6647451852915336,
"learning_rate": 1.2497316085749081e-06,
"loss": 0.1239,
"step": 206
},
{
"epoch": 0.04709897610921502,
"grad_norm": 4.68508784917087,
"learning_rate": 1.249728983925226e-06,
"loss": 0.1707,
"step": 207
},
{
"epoch": 0.04732650739476678,
"grad_norm": 3.18438034976801,
"learning_rate": 1.2497263465072274e-06,
"loss": 0.1325,
"step": 208
},
{
"epoch": 0.047554038680318544,
"grad_norm": 2.665536371480516,
"learning_rate": 1.2497236963209663e-06,
"loss": 0.247,
"step": 209
},
{
"epoch": 0.04778156996587031,
"grad_norm": 3.6305897675111822,
"learning_rate": 1.2497210333664972e-06,
"loss": 0.1399,
"step": 210
},
{
"epoch": 0.04800910125142207,
"grad_norm": 3.427786312260657,
"learning_rate": 1.2497183576438743e-06,
"loss": 0.1595,
"step": 211
},
{
"epoch": 0.048236632536973835,
"grad_norm": 3.501593030667954,
"learning_rate": 1.2497156691531523e-06,
"loss": 0.1895,
"step": 212
},
{
"epoch": 0.048464163822525594,
"grad_norm": 2.29399983953313,
"learning_rate": 1.249712967894386e-06,
"loss": 0.1273,
"step": 213
},
{
"epoch": 0.04869169510807736,
"grad_norm": 4.248497703608046,
"learning_rate": 1.2497102538676308e-06,
"loss": 0.2118,
"step": 214
},
{
"epoch": 0.048919226393629126,
"grad_norm": 5.009911727752511,
"learning_rate": 1.249707527072942e-06,
"loss": 0.1533,
"step": 215
},
{
"epoch": 0.049146757679180884,
"grad_norm": 3.254064879259487,
"learning_rate": 1.2497047875103757e-06,
"loss": 0.3042,
"step": 216
},
{
"epoch": 0.04937428896473265,
"grad_norm": 2.700363753095535,
"learning_rate": 1.2497020351799875e-06,
"loss": 0.1933,
"step": 217
},
{
"epoch": 0.049601820250284416,
"grad_norm": 2.2159854350533763,
"learning_rate": 1.2496992700818335e-06,
"loss": 0.1733,
"step": 218
},
{
"epoch": 0.049829351535836175,
"grad_norm": 6.438623712108173,
"learning_rate": 1.249696492215971e-06,
"loss": 0.2233,
"step": 219
},
{
"epoch": 0.05005688282138794,
"grad_norm": 3.6403163135182552,
"learning_rate": 1.249693701582456e-06,
"loss": 0.1542,
"step": 220
},
{
"epoch": 0.05028441410693971,
"grad_norm": 3.280631643810882,
"learning_rate": 1.2496908981813458e-06,
"loss": 0.1799,
"step": 221
},
{
"epoch": 0.050511945392491465,
"grad_norm": 2.5684306853319687,
"learning_rate": 1.2496880820126977e-06,
"loss": 0.2051,
"step": 222
},
{
"epoch": 0.05073947667804323,
"grad_norm": 2.7401430199461108,
"learning_rate": 1.2496852530765695e-06,
"loss": 0.1828,
"step": 223
},
{
"epoch": 0.050967007963595,
"grad_norm": 2.95485123311806,
"learning_rate": 1.2496824113730186e-06,
"loss": 0.2602,
"step": 224
},
{
"epoch": 0.051194539249146756,
"grad_norm": 2.5679914292312738,
"learning_rate": 1.2496795569021033e-06,
"loss": 0.1838,
"step": 225
},
{
"epoch": 0.05142207053469852,
"grad_norm": 4.2106953289503055,
"learning_rate": 1.2496766896638819e-06,
"loss": 0.1831,
"step": 226
},
{
"epoch": 0.05164960182025029,
"grad_norm": 2.4133590857510603,
"learning_rate": 1.249673809658413e-06,
"loss": 0.1869,
"step": 227
},
{
"epoch": 0.05187713310580205,
"grad_norm": 2.009672236932174,
"learning_rate": 1.2496709168857555e-06,
"loss": 0.1297,
"step": 228
},
{
"epoch": 0.05210466439135381,
"grad_norm": 2.57569428799923,
"learning_rate": 1.2496680113459683e-06,
"loss": 0.1887,
"step": 229
},
{
"epoch": 0.05233219567690557,
"grad_norm": 3.3094428680937464,
"learning_rate": 1.2496650930391113e-06,
"loss": 0.2654,
"step": 230
},
{
"epoch": 0.05255972696245734,
"grad_norm": 2.847650693015463,
"learning_rate": 1.2496621619652435e-06,
"loss": 0.1704,
"step": 231
},
{
"epoch": 0.0527872582480091,
"grad_norm": 2.9888611972362167,
"learning_rate": 1.2496592181244253e-06,
"loss": 0.1601,
"step": 232
},
{
"epoch": 0.05301478953356086,
"grad_norm": 2.08648737949565,
"learning_rate": 1.249656261516717e-06,
"loss": 0.1953,
"step": 233
},
{
"epoch": 0.05324232081911263,
"grad_norm": 2.531082669247976,
"learning_rate": 1.2496532921421781e-06,
"loss": 0.1717,
"step": 234
},
{
"epoch": 0.053469852104664393,
"grad_norm": 2.7509933573597896,
"learning_rate": 1.2496503100008704e-06,
"loss": 0.2469,
"step": 235
},
{
"epoch": 0.05369738339021615,
"grad_norm": 3.5155091690123923,
"learning_rate": 1.249647315092854e-06,
"loss": 0.1314,
"step": 236
},
{
"epoch": 0.05392491467576792,
"grad_norm": 3.2336581137529135,
"learning_rate": 1.2496443074181905e-06,
"loss": 0.1479,
"step": 237
},
{
"epoch": 0.054152445961319684,
"grad_norm": 1.9727228995954271,
"learning_rate": 1.2496412869769415e-06,
"loss": 0.1072,
"step": 238
},
{
"epoch": 0.05437997724687144,
"grad_norm": 9.030280638699303,
"learning_rate": 1.2496382537691686e-06,
"loss": 0.1993,
"step": 239
},
{
"epoch": 0.05460750853242321,
"grad_norm": 2.012237999972146,
"learning_rate": 1.2496352077949336e-06,
"loss": 0.2021,
"step": 240
},
{
"epoch": 0.054835039817974975,
"grad_norm": 2.875480352440569,
"learning_rate": 1.249632149054299e-06,
"loss": 0.1071,
"step": 241
},
{
"epoch": 0.05506257110352673,
"grad_norm": 3.027078266755971,
"learning_rate": 1.249629077547327e-06,
"loss": 0.2081,
"step": 242
},
{
"epoch": 0.0552901023890785,
"grad_norm": 3.212706521917931,
"learning_rate": 1.2496259932740813e-06,
"loss": 0.235,
"step": 243
},
{
"epoch": 0.055517633674630265,
"grad_norm": 1.5899391805286471,
"learning_rate": 1.2496228962346236e-06,
"loss": 0.1498,
"step": 244
},
{
"epoch": 0.055745164960182024,
"grad_norm": 2.252897408154709,
"learning_rate": 1.249619786429018e-06,
"loss": 0.0875,
"step": 245
},
{
"epoch": 0.05597269624573379,
"grad_norm": 1.7851217439709355,
"learning_rate": 1.2496166638573278e-06,
"loss": 0.163,
"step": 246
},
{
"epoch": 0.05620022753128555,
"grad_norm": 4.076208180076855,
"learning_rate": 1.2496135285196172e-06,
"loss": 0.1298,
"step": 247
},
{
"epoch": 0.056427758816837315,
"grad_norm": 8.235783447081577,
"learning_rate": 1.2496103804159497e-06,
"loss": 0.1994,
"step": 248
},
{
"epoch": 0.05665529010238908,
"grad_norm": 4.224863516307238,
"learning_rate": 1.2496072195463904e-06,
"loss": 0.1917,
"step": 249
},
{
"epoch": 0.05688282138794084,
"grad_norm": 2.600108393969465,
"learning_rate": 1.249604045911003e-06,
"loss": 0.1728,
"step": 250
},
{
"epoch": 0.057110352673492605,
"grad_norm": 4.193154020881599,
"learning_rate": 1.249600859509853e-06,
"loss": 0.1469,
"step": 251
},
{
"epoch": 0.05733788395904437,
"grad_norm": 3.3023049454358957,
"learning_rate": 1.2495976603430054e-06,
"loss": 0.3015,
"step": 252
},
{
"epoch": 0.05756541524459613,
"grad_norm": 2.1335803404002815,
"learning_rate": 1.2495944484105254e-06,
"loss": 0.1237,
"step": 253
},
{
"epoch": 0.057792946530147896,
"grad_norm": 5.342229724882705,
"learning_rate": 1.2495912237124787e-06,
"loss": 0.1134,
"step": 254
},
{
"epoch": 0.05802047781569966,
"grad_norm": 4.8799722775641765,
"learning_rate": 1.2495879862489312e-06,
"loss": 0.1865,
"step": 255
},
{
"epoch": 0.05824800910125142,
"grad_norm": 5.731543371657422,
"learning_rate": 1.2495847360199495e-06,
"loss": 0.2008,
"step": 256
},
{
"epoch": 0.058475540386803186,
"grad_norm": 2.313924736001694,
"learning_rate": 1.2495814730255993e-06,
"loss": 0.1361,
"step": 257
},
{
"epoch": 0.05870307167235495,
"grad_norm": 1.3942403935107488,
"learning_rate": 1.2495781972659479e-06,
"loss": 0.1103,
"step": 258
},
{
"epoch": 0.05893060295790671,
"grad_norm": 1.8635600367271647,
"learning_rate": 1.2495749087410618e-06,
"loss": 0.1736,
"step": 259
},
{
"epoch": 0.05915813424345848,
"grad_norm": 3.934800507138662,
"learning_rate": 1.2495716074510087e-06,
"loss": 0.1706,
"step": 260
},
{
"epoch": 0.059385665529010236,
"grad_norm": 7.067913001607123,
"learning_rate": 1.2495682933958555e-06,
"loss": 0.1963,
"step": 261
},
{
"epoch": 0.059613196814562,
"grad_norm": 2.692944909371077,
"learning_rate": 1.2495649665756705e-06,
"loss": 0.2486,
"step": 262
},
{
"epoch": 0.05984072810011377,
"grad_norm": 2.4930462253175305,
"learning_rate": 1.2495616269905212e-06,
"loss": 0.1447,
"step": 263
},
{
"epoch": 0.060068259385665526,
"grad_norm": 1.7948148568482771,
"learning_rate": 1.2495582746404762e-06,
"loss": 0.0994,
"step": 264
},
{
"epoch": 0.06029579067121729,
"grad_norm": 2.021876252112372,
"learning_rate": 1.249554909525604e-06,
"loss": 0.1386,
"step": 265
},
{
"epoch": 0.06052332195676906,
"grad_norm": 2.069960058640526,
"learning_rate": 1.249551531645973e-06,
"loss": 0.1866,
"step": 266
},
{
"epoch": 0.06075085324232082,
"grad_norm": 8.549797598789278,
"learning_rate": 1.2495481410016527e-06,
"loss": 0.3426,
"step": 267
},
{
"epoch": 0.06097838452787258,
"grad_norm": 6.033524800668443,
"learning_rate": 1.2495447375927122e-06,
"loss": 0.2039,
"step": 268
},
{
"epoch": 0.06120591581342435,
"grad_norm": 3.3984019223631656,
"learning_rate": 1.2495413214192209e-06,
"loss": 0.1562,
"step": 269
},
{
"epoch": 0.06143344709897611,
"grad_norm": 2.78909231360363,
"learning_rate": 1.2495378924812486e-06,
"loss": 0.2056,
"step": 270
},
{
"epoch": 0.06166097838452787,
"grad_norm": 5.781877877875473,
"learning_rate": 1.2495344507788662e-06,
"loss": 0.2293,
"step": 271
},
{
"epoch": 0.06188850967007964,
"grad_norm": 2.3180826263300607,
"learning_rate": 1.249530996312143e-06,
"loss": 0.1489,
"step": 272
},
{
"epoch": 0.0621160409556314,
"grad_norm": 7.2617460886104475,
"learning_rate": 1.2495275290811499e-06,
"loss": 0.2172,
"step": 273
},
{
"epoch": 0.062343572241183164,
"grad_norm": 2.1316035699431173,
"learning_rate": 1.2495240490859581e-06,
"loss": 0.2176,
"step": 274
},
{
"epoch": 0.06257110352673492,
"grad_norm": 2.5542857532037235,
"learning_rate": 1.2495205563266384e-06,
"loss": 0.1521,
"step": 275
},
{
"epoch": 0.06279863481228669,
"grad_norm": 3.5696131149812644,
"learning_rate": 1.2495170508032624e-06,
"loss": 0.2817,
"step": 276
},
{
"epoch": 0.06302616609783845,
"grad_norm": 4.055804927691344,
"learning_rate": 1.2495135325159015e-06,
"loss": 0.1484,
"step": 277
},
{
"epoch": 0.06325369738339022,
"grad_norm": 2.830287596995614,
"learning_rate": 1.2495100014646277e-06,
"loss": 0.1714,
"step": 278
},
{
"epoch": 0.06348122866894199,
"grad_norm": 5.2323794095215685,
"learning_rate": 1.2495064576495134e-06,
"loss": 0.3121,
"step": 279
},
{
"epoch": 0.06370875995449374,
"grad_norm": 2.500465425444752,
"learning_rate": 1.2495029010706306e-06,
"loss": 0.1005,
"step": 280
},
{
"epoch": 0.0639362912400455,
"grad_norm": 2.7474098845449433,
"learning_rate": 1.2494993317280524e-06,
"loss": 0.1755,
"step": 281
},
{
"epoch": 0.06416382252559727,
"grad_norm": 3.1110646620479967,
"learning_rate": 1.2494957496218516e-06,
"loss": 0.194,
"step": 282
},
{
"epoch": 0.06439135381114904,
"grad_norm": 1.162926170243262,
"learning_rate": 1.2494921547521013e-06,
"loss": 0.1667,
"step": 283
},
{
"epoch": 0.0646188850967008,
"grad_norm": 2.034958588386092,
"learning_rate": 1.249488547118875e-06,
"loss": 0.1031,
"step": 284
},
{
"epoch": 0.06484641638225255,
"grad_norm": 2.8585727096596214,
"learning_rate": 1.2494849267222466e-06,
"loss": 0.1199,
"step": 285
},
{
"epoch": 0.06507394766780432,
"grad_norm": 2.3756686418598916,
"learning_rate": 1.24948129356229e-06,
"loss": 0.203,
"step": 286
},
{
"epoch": 0.06530147895335608,
"grad_norm": 6.080154909085321,
"learning_rate": 1.2494776476390793e-06,
"loss": 0.2723,
"step": 287
},
{
"epoch": 0.06552901023890785,
"grad_norm": 3.1578927707769684,
"learning_rate": 1.2494739889526894e-06,
"loss": 0.1218,
"step": 288
},
{
"epoch": 0.06575654152445962,
"grad_norm": 2.7745317736308373,
"learning_rate": 1.2494703175031946e-06,
"loss": 0.194,
"step": 289
},
{
"epoch": 0.06598407281001138,
"grad_norm": 2.872306438815133,
"learning_rate": 1.2494666332906702e-06,
"loss": 0.143,
"step": 290
},
{
"epoch": 0.06621160409556313,
"grad_norm": 2.2661659384858277,
"learning_rate": 1.2494629363151916e-06,
"loss": 0.1497,
"step": 291
},
{
"epoch": 0.0664391353811149,
"grad_norm": 2.7978250826969586,
"learning_rate": 1.2494592265768343e-06,
"loss": 0.1817,
"step": 292
},
{
"epoch": 0.06666666666666667,
"grad_norm": 2.9435086338480496,
"learning_rate": 1.2494555040756737e-06,
"loss": 0.1195,
"step": 293
},
{
"epoch": 0.06689419795221843,
"grad_norm": 2.525871560805257,
"learning_rate": 1.2494517688117867e-06,
"loss": 0.2054,
"step": 294
},
{
"epoch": 0.0671217292377702,
"grad_norm": 3.3530486331117126,
"learning_rate": 1.2494480207852489e-06,
"loss": 0.1186,
"step": 295
},
{
"epoch": 0.06734926052332196,
"grad_norm": 3.791549905681902,
"learning_rate": 1.249444259996137e-06,
"loss": 0.1616,
"step": 296
},
{
"epoch": 0.06757679180887372,
"grad_norm": 2.3603348366809236,
"learning_rate": 1.2494404864445284e-06,
"loss": 0.1392,
"step": 297
},
{
"epoch": 0.06780432309442548,
"grad_norm": 2.161901751847752,
"learning_rate": 1.2494367001304996e-06,
"loss": 0.1548,
"step": 298
},
{
"epoch": 0.06803185437997725,
"grad_norm": 2.3978175716297634,
"learning_rate": 1.2494329010541284e-06,
"loss": 0.1634,
"step": 299
},
{
"epoch": 0.06825938566552901,
"grad_norm": 5.413503442113624,
"learning_rate": 1.2494290892154922e-06,
"loss": 0.2876,
"step": 300
},
{
"epoch": 0.06848691695108078,
"grad_norm": 1.904095426332445,
"learning_rate": 1.2494252646146692e-06,
"loss": 0.1942,
"step": 301
},
{
"epoch": 0.06871444823663253,
"grad_norm": 2.0091735504190504,
"learning_rate": 1.249421427251737e-06,
"loss": 0.1403,
"step": 302
},
{
"epoch": 0.0689419795221843,
"grad_norm": 2.6001586830103123,
"learning_rate": 1.2494175771267748e-06,
"loss": 0.2376,
"step": 303
},
{
"epoch": 0.06916951080773606,
"grad_norm": 2.8009063420794265,
"learning_rate": 1.2494137142398607e-06,
"loss": 0.1877,
"step": 304
},
{
"epoch": 0.06939704209328783,
"grad_norm": 2.0648464255318517,
"learning_rate": 1.249409838591074e-06,
"loss": 0.1462,
"step": 305
},
{
"epoch": 0.0696245733788396,
"grad_norm": 2.6396516124770657,
"learning_rate": 1.2494059501804937e-06,
"loss": 0.256,
"step": 306
},
{
"epoch": 0.06985210466439136,
"grad_norm": 2.9901343092043837,
"learning_rate": 1.249402049008199e-06,
"loss": 0.1483,
"step": 307
},
{
"epoch": 0.07007963594994311,
"grad_norm": 3.0343546498099356,
"learning_rate": 1.2493981350742704e-06,
"loss": 0.1561,
"step": 308
},
{
"epoch": 0.07030716723549488,
"grad_norm": 3.2148889672864636,
"learning_rate": 1.2493942083787872e-06,
"loss": 0.1856,
"step": 309
},
{
"epoch": 0.07053469852104664,
"grad_norm": 2.795539793994042,
"learning_rate": 1.2493902689218299e-06,
"loss": 0.1294,
"step": 310
},
{
"epoch": 0.07076222980659841,
"grad_norm": 2.1866434219410307,
"learning_rate": 1.249386316703479e-06,
"loss": 0.1789,
"step": 311
},
{
"epoch": 0.07098976109215017,
"grad_norm": 4.93386744278198,
"learning_rate": 1.2493823517238154e-06,
"loss": 0.1529,
"step": 312
},
{
"epoch": 0.07121729237770194,
"grad_norm": 2.127480030167813,
"learning_rate": 1.2493783739829202e-06,
"loss": 0.1593,
"step": 313
},
{
"epoch": 0.07144482366325369,
"grad_norm": 2.565861378561538,
"learning_rate": 1.2493743834808741e-06,
"loss": 0.1442,
"step": 314
},
{
"epoch": 0.07167235494880546,
"grad_norm": 3.129314599970171,
"learning_rate": 1.2493703802177594e-06,
"loss": 0.1936,
"step": 315
},
{
"epoch": 0.07189988623435722,
"grad_norm": 4.26603531282599,
"learning_rate": 1.2493663641936576e-06,
"loss": 0.1343,
"step": 316
},
{
"epoch": 0.07212741751990899,
"grad_norm": 1.778626655821605,
"learning_rate": 1.2493623354086507e-06,
"loss": 0.1751,
"step": 317
},
{
"epoch": 0.07235494880546076,
"grad_norm": 2.576979617695665,
"learning_rate": 1.2493582938628213e-06,
"loss": 0.1405,
"step": 318
},
{
"epoch": 0.07258248009101251,
"grad_norm": 2.528946823784448,
"learning_rate": 1.2493542395562516e-06,
"loss": 0.1207,
"step": 319
},
{
"epoch": 0.07281001137656427,
"grad_norm": 1.7105561186222351,
"learning_rate": 1.2493501724890247e-06,
"loss": 0.1067,
"step": 320
},
{
"epoch": 0.07303754266211604,
"grad_norm": 3.0021555230652144,
"learning_rate": 1.249346092661224e-06,
"loss": 0.1769,
"step": 321
},
{
"epoch": 0.0732650739476678,
"grad_norm": 3.2473648686733787,
"learning_rate": 1.2493420000729322e-06,
"loss": 0.1797,
"step": 322
},
{
"epoch": 0.07349260523321957,
"grad_norm": 2.9141882965376644,
"learning_rate": 1.2493378947242336e-06,
"loss": 0.1936,
"step": 323
},
{
"epoch": 0.07372013651877134,
"grad_norm": 2.139000059452357,
"learning_rate": 1.2493337766152119e-06,
"loss": 0.1323,
"step": 324
},
{
"epoch": 0.07394766780432309,
"grad_norm": 3.7562365963393773,
"learning_rate": 1.249329645745951e-06,
"loss": 0.1521,
"step": 325
},
{
"epoch": 0.07417519908987485,
"grad_norm": 3.1427328506374343,
"learning_rate": 1.2493255021165357e-06,
"loss": 0.1426,
"step": 326
},
{
"epoch": 0.07440273037542662,
"grad_norm": 2.5928821859504225,
"learning_rate": 1.2493213457270504e-06,
"loss": 0.1492,
"step": 327
},
{
"epoch": 0.07463026166097839,
"grad_norm": 2.6116349350740773,
"learning_rate": 1.2493171765775804e-06,
"loss": 0.1079,
"step": 328
},
{
"epoch": 0.07485779294653015,
"grad_norm": 2.5063754100070796,
"learning_rate": 1.2493129946682107e-06,
"loss": 0.1449,
"step": 329
},
{
"epoch": 0.07508532423208192,
"grad_norm": 2.7029390289735247,
"learning_rate": 1.2493087999990263e-06,
"loss": 0.2012,
"step": 330
},
{
"epoch": 0.07531285551763367,
"grad_norm": 3.168250561710959,
"learning_rate": 1.249304592570114e-06,
"loss": 0.135,
"step": 331
},
{
"epoch": 0.07554038680318544,
"grad_norm": 3.358825282989208,
"learning_rate": 1.2493003723815588e-06,
"loss": 0.202,
"step": 332
},
{
"epoch": 0.0757679180887372,
"grad_norm": 3.4712230061099367,
"learning_rate": 1.2492961394334474e-06,
"loss": 0.1796,
"step": 333
},
{
"epoch": 0.07599544937428897,
"grad_norm": 2.7447934095202586,
"learning_rate": 1.2492918937258663e-06,
"loss": 0.1529,
"step": 334
},
{
"epoch": 0.07622298065984073,
"grad_norm": 4.884489478774658,
"learning_rate": 1.2492876352589024e-06,
"loss": 0.1983,
"step": 335
},
{
"epoch": 0.07645051194539249,
"grad_norm": 11.840111431867928,
"learning_rate": 1.2492833640326424e-06,
"loss": 0.1701,
"step": 336
},
{
"epoch": 0.07667804323094425,
"grad_norm": 3.6493332372043032,
"learning_rate": 1.2492790800471738e-06,
"loss": 0.1894,
"step": 337
},
{
"epoch": 0.07690557451649602,
"grad_norm": 2.2273861687776657,
"learning_rate": 1.249274783302584e-06,
"loss": 0.1168,
"step": 338
},
{
"epoch": 0.07713310580204778,
"grad_norm": 3.0155968100929016,
"learning_rate": 1.249270473798961e-06,
"loss": 0.1877,
"step": 339
},
{
"epoch": 0.07736063708759955,
"grad_norm": 3.6811309004263197,
"learning_rate": 1.249266151536393e-06,
"loss": 0.1841,
"step": 340
},
{
"epoch": 0.07758816837315131,
"grad_norm": 3.3318670131929355,
"learning_rate": 1.249261816514968e-06,
"loss": 0.1425,
"step": 341
},
{
"epoch": 0.07781569965870307,
"grad_norm": 1.542707864707429,
"learning_rate": 1.2492574687347747e-06,
"loss": 0.0954,
"step": 342
},
{
"epoch": 0.07804323094425483,
"grad_norm": 5.219514434003638,
"learning_rate": 1.249253108195902e-06,
"loss": 0.1523,
"step": 343
},
{
"epoch": 0.0782707622298066,
"grad_norm": 2.685054702258556,
"learning_rate": 1.249248734898439e-06,
"loss": 0.1932,
"step": 344
},
{
"epoch": 0.07849829351535836,
"grad_norm": 3.782143044532345,
"learning_rate": 1.2492443488424753e-06,
"loss": 0.1782,
"step": 345
},
{
"epoch": 0.07872582480091013,
"grad_norm": 2.987081909452687,
"learning_rate": 1.2492399500281002e-06,
"loss": 0.1174,
"step": 346
},
{
"epoch": 0.07895335608646188,
"grad_norm": 2.4163752446451667,
"learning_rate": 1.2492355384554039e-06,
"loss": 0.1864,
"step": 347
},
{
"epoch": 0.07918088737201365,
"grad_norm": 2.881696468020635,
"learning_rate": 1.2492311141244764e-06,
"loss": 0.1509,
"step": 348
},
{
"epoch": 0.07940841865756541,
"grad_norm": 4.2425549257036925,
"learning_rate": 1.249226677035408e-06,
"loss": 0.1384,
"step": 349
},
{
"epoch": 0.07963594994311718,
"grad_norm": 2.999886291999185,
"learning_rate": 1.2492222271882896e-06,
"loss": 0.1631,
"step": 350
},
{
"epoch": 0.07986348122866894,
"grad_norm": 4.681484131322112,
"learning_rate": 1.2492177645832121e-06,
"loss": 0.1752,
"step": 351
},
{
"epoch": 0.08009101251422071,
"grad_norm": 2.921704965075288,
"learning_rate": 1.2492132892202668e-06,
"loss": 0.1486,
"step": 352
},
{
"epoch": 0.08031854379977246,
"grad_norm": 5.592595582830648,
"learning_rate": 1.2492088010995449e-06,
"loss": 0.2707,
"step": 353
},
{
"epoch": 0.08054607508532423,
"grad_norm": 2.9440013961704823,
"learning_rate": 1.2492043002211385e-06,
"loss": 0.2054,
"step": 354
},
{
"epoch": 0.080773606370876,
"grad_norm": 2.2221784159000006,
"learning_rate": 1.2491997865851392e-06,
"loss": 0.1373,
"step": 355
},
{
"epoch": 0.08100113765642776,
"grad_norm": 1.7381570114572884,
"learning_rate": 1.2491952601916395e-06,
"loss": 0.0858,
"step": 356
},
{
"epoch": 0.08122866894197953,
"grad_norm": 2.930524510809462,
"learning_rate": 1.2491907210407319e-06,
"loss": 0.2179,
"step": 357
},
{
"epoch": 0.08145620022753129,
"grad_norm": 1.329914120982883,
"learning_rate": 1.249186169132509e-06,
"loss": 0.1839,
"step": 358
},
{
"epoch": 0.08168373151308304,
"grad_norm": 4.774637200381304,
"learning_rate": 1.2491816044670641e-06,
"loss": 0.1266,
"step": 359
},
{
"epoch": 0.08191126279863481,
"grad_norm": 3.0085506218930442,
"learning_rate": 1.24917702704449e-06,
"loss": 0.1813,
"step": 360
},
{
"epoch": 0.08213879408418658,
"grad_norm": 2.683588571853357,
"learning_rate": 1.2491724368648808e-06,
"loss": 0.1182,
"step": 361
},
{
"epoch": 0.08236632536973834,
"grad_norm": 4.142859587264675,
"learning_rate": 1.2491678339283303e-06,
"loss": 0.1213,
"step": 362
},
{
"epoch": 0.08259385665529011,
"grad_norm": 2.266538556877378,
"learning_rate": 1.249163218234932e-06,
"loss": 0.1669,
"step": 363
},
{
"epoch": 0.08282138794084186,
"grad_norm": 3.340308786527698,
"learning_rate": 1.249158589784781e-06,
"loss": 0.1449,
"step": 364
},
{
"epoch": 0.08304891922639362,
"grad_norm": 3.600922134824311,
"learning_rate": 1.2491539485779713e-06,
"loss": 0.1934,
"step": 365
},
{
"epoch": 0.08327645051194539,
"grad_norm": 2.5603148777390796,
"learning_rate": 1.2491492946145981e-06,
"loss": 0.1215,
"step": 366
},
{
"epoch": 0.08350398179749716,
"grad_norm": 1.4306937563740754,
"learning_rate": 1.2491446278947563e-06,
"loss": 0.1218,
"step": 367
},
{
"epoch": 0.08373151308304892,
"grad_norm": 6.514691076015768,
"learning_rate": 1.2491399484185413e-06,
"loss": 0.1723,
"step": 368
},
{
"epoch": 0.08395904436860069,
"grad_norm": 2.1513333963844214,
"learning_rate": 1.249135256186049e-06,
"loss": 0.242,
"step": 369
},
{
"epoch": 0.08418657565415244,
"grad_norm": 1.697947937157404,
"learning_rate": 1.249130551197375e-06,
"loss": 0.1045,
"step": 370
},
{
"epoch": 0.0844141069397042,
"grad_norm": 1.4338559958770856,
"learning_rate": 1.2491258334526155e-06,
"loss": 0.1671,
"step": 371
},
{
"epoch": 0.08464163822525597,
"grad_norm": 2.7532236684188773,
"learning_rate": 1.2491211029518672e-06,
"loss": 0.1034,
"step": 372
},
{
"epoch": 0.08486916951080774,
"grad_norm": 2.665642318134447,
"learning_rate": 1.2491163596952264e-06,
"loss": 0.1737,
"step": 373
},
{
"epoch": 0.0850967007963595,
"grad_norm": 1.5130437493435105,
"learning_rate": 1.2491116036827902e-06,
"loss": 0.0804,
"step": 374
},
{
"epoch": 0.08532423208191127,
"grad_norm": 1.3642320073282543,
"learning_rate": 1.2491068349146559e-06,
"loss": 0.1428,
"step": 375
},
{
"epoch": 0.08555176336746302,
"grad_norm": 2.1006895230964444,
"learning_rate": 1.249102053390921e-06,
"loss": 0.2759,
"step": 376
},
{
"epoch": 0.08577929465301479,
"grad_norm": 1.5335225229109515,
"learning_rate": 1.249097259111683e-06,
"loss": 0.1836,
"step": 377
},
{
"epoch": 0.08600682593856655,
"grad_norm": 4.09523641946509,
"learning_rate": 1.24909245207704e-06,
"loss": 0.2771,
"step": 378
},
{
"epoch": 0.08623435722411832,
"grad_norm": 2.2658393838403477,
"learning_rate": 1.2490876322870904e-06,
"loss": 0.1815,
"step": 379
},
{
"epoch": 0.08646188850967008,
"grad_norm": 3.053596441038967,
"learning_rate": 1.2490827997419325e-06,
"loss": 0.1183,
"step": 380
},
{
"epoch": 0.08668941979522184,
"grad_norm": 2.9366601199125153,
"learning_rate": 1.249077954441665e-06,
"loss": 0.1738,
"step": 381
},
{
"epoch": 0.0869169510807736,
"grad_norm": 1.9726593738442935,
"learning_rate": 1.249073096386387e-06,
"loss": 0.1427,
"step": 382
},
{
"epoch": 0.08714448236632537,
"grad_norm": 2.8452874204285985,
"learning_rate": 1.249068225576198e-06,
"loss": 0.2767,
"step": 383
},
{
"epoch": 0.08737201365187713,
"grad_norm": 4.292343700500067,
"learning_rate": 1.2490633420111974e-06,
"loss": 0.127,
"step": 384
},
{
"epoch": 0.0875995449374289,
"grad_norm": 4.105827667785258,
"learning_rate": 1.249058445691485e-06,
"loss": 0.1639,
"step": 385
},
{
"epoch": 0.08782707622298067,
"grad_norm": 4.310698395146462,
"learning_rate": 1.2490535366171607e-06,
"loss": 0.1289,
"step": 386
},
{
"epoch": 0.08805460750853242,
"grad_norm": 3.5788743602832795,
"learning_rate": 1.249048614788325e-06,
"loss": 0.1804,
"step": 387
},
{
"epoch": 0.08828213879408418,
"grad_norm": 2.6616942664445413,
"learning_rate": 1.249043680205079e-06,
"loss": 0.144,
"step": 388
},
{
"epoch": 0.08850967007963595,
"grad_norm": 2.989163897960478,
"learning_rate": 1.2490387328675226e-06,
"loss": 0.2016,
"step": 389
},
{
"epoch": 0.08873720136518772,
"grad_norm": 4.587176162210019,
"learning_rate": 1.2490337727757576e-06,
"loss": 0.2284,
"step": 390
},
{
"epoch": 0.08896473265073948,
"grad_norm": 2.794747809075531,
"learning_rate": 1.249028799929885e-06,
"loss": 0.2002,
"step": 391
},
{
"epoch": 0.08919226393629125,
"grad_norm": 2.0197262567230276,
"learning_rate": 1.2490238143300066e-06,
"loss": 0.1143,
"step": 392
},
{
"epoch": 0.089419795221843,
"grad_norm": 3.184614553894442,
"learning_rate": 1.2490188159762243e-06,
"loss": 0.1913,
"step": 393
},
{
"epoch": 0.08964732650739476,
"grad_norm": 2.518010477046937,
"learning_rate": 1.2490138048686405e-06,
"loss": 0.1981,
"step": 394
},
{
"epoch": 0.08987485779294653,
"grad_norm": 5.010077865699377,
"learning_rate": 1.249008781007357e-06,
"loss": 0.1423,
"step": 395
},
{
"epoch": 0.0901023890784983,
"grad_norm": 1.420461399090385,
"learning_rate": 1.2490037443924768e-06,
"loss": 0.1363,
"step": 396
},
{
"epoch": 0.09032992036405006,
"grad_norm": 2.5810652557759863,
"learning_rate": 1.2489986950241032e-06,
"loss": 0.1002,
"step": 397
},
{
"epoch": 0.09055745164960181,
"grad_norm": 1.8725706501255737,
"learning_rate": 1.2489936329023387e-06,
"loss": 0.1974,
"step": 398
},
{
"epoch": 0.09078498293515358,
"grad_norm": 3.2869147678539554,
"learning_rate": 1.2489885580272874e-06,
"loss": 0.1629,
"step": 399
},
{
"epoch": 0.09101251422070535,
"grad_norm": 1.7546095764098488,
"learning_rate": 1.2489834703990527e-06,
"loss": 0.1326,
"step": 400
},
{
"epoch": 0.09124004550625711,
"grad_norm": 3.0930989898336407,
"learning_rate": 1.2489783700177385e-06,
"loss": 0.2565,
"step": 401
},
{
"epoch": 0.09146757679180888,
"grad_norm": 4.363886237065706,
"learning_rate": 1.2489732568834492e-06,
"loss": 0.1425,
"step": 402
},
{
"epoch": 0.09169510807736064,
"grad_norm": 2.141413419957395,
"learning_rate": 1.2489681309962895e-06,
"loss": 0.1458,
"step": 403
},
{
"epoch": 0.0919226393629124,
"grad_norm": 4.5478526718009205,
"learning_rate": 1.2489629923563637e-06,
"loss": 0.1655,
"step": 404
},
{
"epoch": 0.09215017064846416,
"grad_norm": 5.253865415098631,
"learning_rate": 1.2489578409637774e-06,
"loss": 0.2702,
"step": 405
},
{
"epoch": 0.09237770193401593,
"grad_norm": 6.114423825591168,
"learning_rate": 1.2489526768186352e-06,
"loss": 0.1364,
"step": 406
},
{
"epoch": 0.09260523321956769,
"grad_norm": 2.4260049242900505,
"learning_rate": 1.2489474999210434e-06,
"loss": 0.1573,
"step": 407
},
{
"epoch": 0.09283276450511946,
"grad_norm": 6.696614155480106,
"learning_rate": 1.2489423102711068e-06,
"loss": 0.2365,
"step": 408
},
{
"epoch": 0.09306029579067122,
"grad_norm": 3.4093511525509848,
"learning_rate": 1.2489371078689326e-06,
"loss": 0.1552,
"step": 409
},
{
"epoch": 0.09328782707622298,
"grad_norm": 3.512014449058475,
"learning_rate": 1.2489318927146263e-06,
"loss": 0.1392,
"step": 410
},
{
"epoch": 0.09351535836177474,
"grad_norm": 4.385040034701264,
"learning_rate": 1.2489266648082951e-06,
"loss": 0.1184,
"step": 411
},
{
"epoch": 0.09374288964732651,
"grad_norm": 11.030038016242493,
"learning_rate": 1.2489214241500453e-06,
"loss": 0.2445,
"step": 412
},
{
"epoch": 0.09397042093287827,
"grad_norm": 3.8160488235069487,
"learning_rate": 1.2489161707399843e-06,
"loss": 0.2422,
"step": 413
},
{
"epoch": 0.09419795221843004,
"grad_norm": 2.5154081754915554,
"learning_rate": 1.2489109045782194e-06,
"loss": 0.1284,
"step": 414
},
{
"epoch": 0.09442548350398179,
"grad_norm": 2.186602019326803,
"learning_rate": 1.2489056256648582e-06,
"loss": 0.1387,
"step": 415
},
{
"epoch": 0.09465301478953356,
"grad_norm": 3.1244704898712223,
"learning_rate": 1.2489003340000089e-06,
"loss": 0.2695,
"step": 416
},
{
"epoch": 0.09488054607508532,
"grad_norm": 1.9015703147093774,
"learning_rate": 1.2488950295837792e-06,
"loss": 0.2029,
"step": 417
},
{
"epoch": 0.09510807736063709,
"grad_norm": 3.2255120343889523,
"learning_rate": 1.2488897124162777e-06,
"loss": 0.1708,
"step": 418
},
{
"epoch": 0.09533560864618885,
"grad_norm": 2.4361554392110354,
"learning_rate": 1.248884382497613e-06,
"loss": 0.237,
"step": 419
},
{
"epoch": 0.09556313993174062,
"grad_norm": 5.44904137240634,
"learning_rate": 1.2488790398278941e-06,
"loss": 0.2259,
"step": 420
},
{
"epoch": 0.09579067121729237,
"grad_norm": 2.5542725247665725,
"learning_rate": 1.2488736844072304e-06,
"loss": 0.1706,
"step": 421
},
{
"epoch": 0.09601820250284414,
"grad_norm": 3.3440828684749837,
"learning_rate": 1.248868316235731e-06,
"loss": 0.166,
"step": 422
},
{
"epoch": 0.0962457337883959,
"grad_norm": 2.837980086891423,
"learning_rate": 1.2488629353135059e-06,
"loss": 0.1974,
"step": 423
},
{
"epoch": 0.09647326507394767,
"grad_norm": 3.0821716156484413,
"learning_rate": 1.2488575416406649e-06,
"loss": 0.2029,
"step": 424
},
{
"epoch": 0.09670079635949944,
"grad_norm": 4.11082660525738,
"learning_rate": 1.2488521352173183e-06,
"loss": 0.1288,
"step": 425
},
{
"epoch": 0.09692832764505119,
"grad_norm": 2.792375492899653,
"learning_rate": 1.2488467160435765e-06,
"loss": 0.1318,
"step": 426
},
{
"epoch": 0.09715585893060295,
"grad_norm": 2.54978143800456,
"learning_rate": 1.2488412841195505e-06,
"loss": 0.2235,
"step": 427
},
{
"epoch": 0.09738339021615472,
"grad_norm": 1.8685713785223814,
"learning_rate": 1.2488358394453512e-06,
"loss": 0.1018,
"step": 428
},
{
"epoch": 0.09761092150170649,
"grad_norm": 2.19856597261874,
"learning_rate": 1.2488303820210897e-06,
"loss": 0.0955,
"step": 429
},
{
"epoch": 0.09783845278725825,
"grad_norm": 2.756460140283964,
"learning_rate": 1.2488249118468776e-06,
"loss": 0.161,
"step": 430
},
{
"epoch": 0.09806598407281002,
"grad_norm": 3.1658885878432446,
"learning_rate": 1.248819428922827e-06,
"loss": 0.1707,
"step": 431
},
{
"epoch": 0.09829351535836177,
"grad_norm": 3.574624372801338,
"learning_rate": 1.2488139332490495e-06,
"loss": 0.2412,
"step": 432
},
{
"epoch": 0.09852104664391353,
"grad_norm": 2.63473599121384,
"learning_rate": 1.248808424825658e-06,
"loss": 0.1195,
"step": 433
},
{
"epoch": 0.0987485779294653,
"grad_norm": 3.928170371490413,
"learning_rate": 1.2488029036527645e-06,
"loss": 0.1478,
"step": 434
},
{
"epoch": 0.09897610921501707,
"grad_norm": 2.0459697190569583,
"learning_rate": 1.2487973697304822e-06,
"loss": 0.0868,
"step": 435
},
{
"epoch": 0.09920364050056883,
"grad_norm": 2.2037192709560283,
"learning_rate": 1.248791823058924e-06,
"loss": 0.1911,
"step": 436
},
{
"epoch": 0.0994311717861206,
"grad_norm": 3.549121049187713,
"learning_rate": 1.2487862636382034e-06,
"loss": 0.1218,
"step": 437
},
{
"epoch": 0.09965870307167235,
"grad_norm": 1.4303061363329783,
"learning_rate": 1.248780691468434e-06,
"loss": 0.1116,
"step": 438
},
{
"epoch": 0.09988623435722412,
"grad_norm": 3.8141735085769746,
"learning_rate": 1.2487751065497296e-06,
"loss": 0.2179,
"step": 439
},
{
"epoch": 0.10011376564277588,
"grad_norm": 2.6329169063924986,
"learning_rate": 1.2487695088822044e-06,
"loss": 0.1492,
"step": 440
},
{
"epoch": 0.10034129692832765,
"grad_norm": 2.8773216855185635,
"learning_rate": 1.2487638984659729e-06,
"loss": 0.0988,
"step": 441
},
{
"epoch": 0.10056882821387941,
"grad_norm": 2.5448731857786284,
"learning_rate": 1.2487582753011496e-06,
"loss": 0.1023,
"step": 442
},
{
"epoch": 0.10079635949943117,
"grad_norm": 2.4399816480891445,
"learning_rate": 1.2487526393878497e-06,
"loss": 0.2015,
"step": 443
},
{
"epoch": 0.10102389078498293,
"grad_norm": 2.056202357783669,
"learning_rate": 1.248746990726188e-06,
"loss": 0.1376,
"step": 444
},
{
"epoch": 0.1012514220705347,
"grad_norm": 2.489946255383071,
"learning_rate": 1.2487413293162803e-06,
"loss": 0.1389,
"step": 445
},
{
"epoch": 0.10147895335608646,
"grad_norm": 2.3660691937468807,
"learning_rate": 1.2487356551582421e-06,
"loss": 0.2235,
"step": 446
},
{
"epoch": 0.10170648464163823,
"grad_norm": 2.5030375037996575,
"learning_rate": 1.2487299682521893e-06,
"loss": 0.2156,
"step": 447
},
{
"epoch": 0.10193401592719,
"grad_norm": 2.210721856008811,
"learning_rate": 1.2487242685982384e-06,
"loss": 0.1101,
"step": 448
},
{
"epoch": 0.10216154721274175,
"grad_norm": 2.250420318734035,
"learning_rate": 1.2487185561965057e-06,
"loss": 0.1241,
"step": 449
},
{
"epoch": 0.10238907849829351,
"grad_norm": 2.019413043508561,
"learning_rate": 1.248712831047108e-06,
"loss": 0.1217,
"step": 450
},
{
"epoch": 0.10261660978384528,
"grad_norm": 3.2295330442493713,
"learning_rate": 1.2487070931501624e-06,
"loss": 0.2304,
"step": 451
},
{
"epoch": 0.10284414106939704,
"grad_norm": 2.444299385213433,
"learning_rate": 1.2487013425057858e-06,
"loss": 0.2084,
"step": 452
},
{
"epoch": 0.10307167235494881,
"grad_norm": 2.8966369631126367,
"learning_rate": 1.2486955791140964e-06,
"loss": 0.1838,
"step": 453
},
{
"epoch": 0.10329920364050058,
"grad_norm": 2.0941566856763387,
"learning_rate": 1.2486898029752113e-06,
"loss": 0.1043,
"step": 454
},
{
"epoch": 0.10352673492605233,
"grad_norm": 2.3019250022426925,
"learning_rate": 1.248684014089249e-06,
"loss": 0.1189,
"step": 455
},
{
"epoch": 0.1037542662116041,
"grad_norm": 2.1349092143720387,
"learning_rate": 1.2486782124563277e-06,
"loss": 0.1708,
"step": 456
},
{
"epoch": 0.10398179749715586,
"grad_norm": 3.101054381668985,
"learning_rate": 1.2486723980765659e-06,
"loss": 0.1796,
"step": 457
},
{
"epoch": 0.10420932878270762,
"grad_norm": 1.9574694651381292,
"learning_rate": 1.2486665709500826e-06,
"loss": 0.1762,
"step": 458
},
{
"epoch": 0.10443686006825939,
"grad_norm": 1.9997685220641748,
"learning_rate": 1.2486607310769965e-06,
"loss": 0.1626,
"step": 459
},
{
"epoch": 0.10466439135381114,
"grad_norm": 1.4987645243428842,
"learning_rate": 1.2486548784574275e-06,
"loss": 0.1104,
"step": 460
},
{
"epoch": 0.10489192263936291,
"grad_norm": 3.0056305765303857,
"learning_rate": 1.2486490130914948e-06,
"loss": 0.1526,
"step": 461
},
{
"epoch": 0.10511945392491467,
"grad_norm": 1.6498658926200307,
"learning_rate": 1.2486431349793185e-06,
"loss": 0.1158,
"step": 462
},
{
"epoch": 0.10534698521046644,
"grad_norm": 2.8097802744351035,
"learning_rate": 1.2486372441210188e-06,
"loss": 0.174,
"step": 463
},
{
"epoch": 0.1055745164960182,
"grad_norm": 2.2295425114906955,
"learning_rate": 1.248631340516716e-06,
"loss": 0.0993,
"step": 464
},
{
"epoch": 0.10580204778156997,
"grad_norm": 1.7352971105344217,
"learning_rate": 1.2486254241665302e-06,
"loss": 0.1799,
"step": 465
},
{
"epoch": 0.10602957906712172,
"grad_norm": 3.37890451450669,
"learning_rate": 1.2486194950705831e-06,
"loss": 0.1456,
"step": 466
},
{
"epoch": 0.10625711035267349,
"grad_norm": 4.485196875503332,
"learning_rate": 1.248613553228996e-06,
"loss": 0.1509,
"step": 467
},
{
"epoch": 0.10648464163822526,
"grad_norm": 3.8128664414272833,
"learning_rate": 1.2486075986418896e-06,
"loss": 0.1217,
"step": 468
},
{
"epoch": 0.10671217292377702,
"grad_norm": 1.9049325746647565,
"learning_rate": 1.248601631309386e-06,
"loss": 0.1973,
"step": 469
},
{
"epoch": 0.10693970420932879,
"grad_norm": 1.9433225744575688,
"learning_rate": 1.2485956512316072e-06,
"loss": 0.1422,
"step": 470
},
{
"epoch": 0.10716723549488055,
"grad_norm": 1.7542185976103952,
"learning_rate": 1.2485896584086754e-06,
"loss": 0.1187,
"step": 471
},
{
"epoch": 0.1073947667804323,
"grad_norm": 0.985585738392577,
"learning_rate": 1.248583652840713e-06,
"loss": 0.1116,
"step": 472
},
{
"epoch": 0.10762229806598407,
"grad_norm": 6.520293791736507,
"learning_rate": 1.2485776345278427e-06,
"loss": 0.1634,
"step": 473
},
{
"epoch": 0.10784982935153584,
"grad_norm": 2.9958165676640935,
"learning_rate": 1.2485716034701876e-06,
"loss": 0.1468,
"step": 474
},
{
"epoch": 0.1080773606370876,
"grad_norm": 3.496540224028896,
"learning_rate": 1.2485655596678712e-06,
"loss": 0.1444,
"step": 475
},
{
"epoch": 0.10830489192263937,
"grad_norm": 2.6887910577996603,
"learning_rate": 1.2485595031210164e-06,
"loss": 0.2257,
"step": 476
},
{
"epoch": 0.10853242320819112,
"grad_norm": 2.210859712757279,
"learning_rate": 1.2485534338297475e-06,
"loss": 0.0858,
"step": 477
},
{
"epoch": 0.10875995449374289,
"grad_norm": 1.5912288577365465,
"learning_rate": 1.2485473517941884e-06,
"loss": 0.1021,
"step": 478
},
{
"epoch": 0.10898748577929465,
"grad_norm": 2.162920899638659,
"learning_rate": 1.2485412570144633e-06,
"loss": 0.2051,
"step": 479
},
{
"epoch": 0.10921501706484642,
"grad_norm": 2.3337569161162186,
"learning_rate": 1.2485351494906969e-06,
"loss": 0.1726,
"step": 480
},
{
"epoch": 0.10944254835039818,
"grad_norm": 1.6587972530161754,
"learning_rate": 1.2485290292230142e-06,
"loss": 0.1589,
"step": 481
},
{
"epoch": 0.10967007963594995,
"grad_norm": 2.549443212629399,
"learning_rate": 1.24852289621154e-06,
"loss": 0.1107,
"step": 482
},
{
"epoch": 0.1098976109215017,
"grad_norm": 1.9600173744992218,
"learning_rate": 1.2485167504563995e-06,
"loss": 0.1497,
"step": 483
},
{
"epoch": 0.11012514220705347,
"grad_norm": 2.914488733886043,
"learning_rate": 1.2485105919577187e-06,
"loss": 0.2242,
"step": 484
},
{
"epoch": 0.11035267349260523,
"grad_norm": 2.4334592724633475,
"learning_rate": 1.2485044207156233e-06,
"loss": 0.1326,
"step": 485
},
{
"epoch": 0.110580204778157,
"grad_norm": 2.1918094312708374,
"learning_rate": 1.2484982367302395e-06,
"loss": 0.1611,
"step": 486
},
{
"epoch": 0.11080773606370876,
"grad_norm": 2.2072766100880843,
"learning_rate": 1.2484920400016936e-06,
"loss": 0.1402,
"step": 487
},
{
"epoch": 0.11103526734926053,
"grad_norm": 1.6859469474720183,
"learning_rate": 1.2484858305301122e-06,
"loss": 0.1472,
"step": 488
},
{
"epoch": 0.11126279863481228,
"grad_norm": 1.590244696061809,
"learning_rate": 1.2484796083156222e-06,
"loss": 0.0824,
"step": 489
},
{
"epoch": 0.11149032992036405,
"grad_norm": 4.525638347888733,
"learning_rate": 1.2484733733583511e-06,
"loss": 0.1257,
"step": 490
},
{
"epoch": 0.11171786120591581,
"grad_norm": 2.6721724669454723,
"learning_rate": 1.248467125658426e-06,
"loss": 0.2084,
"step": 491
},
{
"epoch": 0.11194539249146758,
"grad_norm": 2.300055245713483,
"learning_rate": 1.2484608652159746e-06,
"loss": 0.1053,
"step": 492
},
{
"epoch": 0.11217292377701935,
"grad_norm": 3.273977920110333,
"learning_rate": 1.248454592031125e-06,
"loss": 0.1176,
"step": 493
},
{
"epoch": 0.1124004550625711,
"grad_norm": 2.101057790899636,
"learning_rate": 1.2484483061040054e-06,
"loss": 0.1277,
"step": 494
},
{
"epoch": 0.11262798634812286,
"grad_norm": 3.6133620556599984,
"learning_rate": 1.2484420074347441e-06,
"loss": 0.1845,
"step": 495
},
{
"epoch": 0.11285551763367463,
"grad_norm": 1.9619725915027257,
"learning_rate": 1.24843569602347e-06,
"loss": 0.1894,
"step": 496
},
{
"epoch": 0.1130830489192264,
"grad_norm": 2.636905846270966,
"learning_rate": 1.2484293718703119e-06,
"loss": 0.1874,
"step": 497
},
{
"epoch": 0.11331058020477816,
"grad_norm": 2.5593822043936125,
"learning_rate": 1.2484230349753994e-06,
"loss": 0.0927,
"step": 498
},
{
"epoch": 0.11353811149032993,
"grad_norm": 2.2440609982402715,
"learning_rate": 1.2484166853388617e-06,
"loss": 0.1381,
"step": 499
},
{
"epoch": 0.11376564277588168,
"grad_norm": 2.7232866925160506,
"learning_rate": 1.2484103229608288e-06,
"loss": 0.1758,
"step": 500
},
{
"epoch": 0.11399317406143344,
"grad_norm": 2.6484317978572816,
"learning_rate": 1.2484039478414305e-06,
"loss": 0.1259,
"step": 501
},
{
"epoch": 0.11422070534698521,
"grad_norm": 2.1058374053464464,
"learning_rate": 1.2483975599807972e-06,
"loss": 0.1369,
"step": 502
},
{
"epoch": 0.11444823663253698,
"grad_norm": 2.1458925241645903,
"learning_rate": 1.2483911593790595e-06,
"loss": 0.1004,
"step": 503
},
{
"epoch": 0.11467576791808874,
"grad_norm": 3.031837353586065,
"learning_rate": 1.2483847460363482e-06,
"loss": 0.154,
"step": 504
},
{
"epoch": 0.1149032992036405,
"grad_norm": 3.1297621875057544,
"learning_rate": 1.2483783199527943e-06,
"loss": 0.1071,
"step": 505
},
{
"epoch": 0.11513083048919226,
"grad_norm": 2.5407911203085787,
"learning_rate": 1.2483718811285296e-06,
"loss": 0.1744,
"step": 506
},
{
"epoch": 0.11535836177474403,
"grad_norm": 3.1175064627764377,
"learning_rate": 1.2483654295636848e-06,
"loss": 0.1072,
"step": 507
},
{
"epoch": 0.11558589306029579,
"grad_norm": 3.0988741009535667,
"learning_rate": 1.2483589652583924e-06,
"loss": 0.1753,
"step": 508
},
{
"epoch": 0.11581342434584756,
"grad_norm": 1.8808814641931946,
"learning_rate": 1.2483524882127846e-06,
"loss": 0.0859,
"step": 509
},
{
"epoch": 0.11604095563139932,
"grad_norm": 2.8937543802568158,
"learning_rate": 1.2483459984269933e-06,
"loss": 0.1816,
"step": 510
},
{
"epoch": 0.11626848691695107,
"grad_norm": 2.186370885841539,
"learning_rate": 1.2483394959011514e-06,
"loss": 0.0819,
"step": 511
},
{
"epoch": 0.11649601820250284,
"grad_norm": 1.8650801779387822,
"learning_rate": 1.248332980635392e-06,
"loss": 0.1436,
"step": 512
},
{
"epoch": 0.1167235494880546,
"grad_norm": 2.9270321544640994,
"learning_rate": 1.2483264526298478e-06,
"loss": 0.1308,
"step": 513
},
{
"epoch": 0.11695108077360637,
"grad_norm": 1.9942689645578024,
"learning_rate": 1.2483199118846525e-06,
"loss": 0.1656,
"step": 514
},
{
"epoch": 0.11717861205915814,
"grad_norm": 2.8104633311436116,
"learning_rate": 1.2483133583999399e-06,
"loss": 0.1681,
"step": 515
},
{
"epoch": 0.1174061433447099,
"grad_norm": 2.546169206593085,
"learning_rate": 1.2483067921758439e-06,
"loss": 0.0925,
"step": 516
},
{
"epoch": 0.11763367463026166,
"grad_norm": 2.0758430805982178,
"learning_rate": 1.2483002132124983e-06,
"loss": 0.203,
"step": 517
},
{
"epoch": 0.11786120591581342,
"grad_norm": 2.1497459150584386,
"learning_rate": 1.2482936215100382e-06,
"loss": 0.1056,
"step": 518
},
{
"epoch": 0.11808873720136519,
"grad_norm": 2.197584956184683,
"learning_rate": 1.2482870170685978e-06,
"loss": 0.0933,
"step": 519
},
{
"epoch": 0.11831626848691695,
"grad_norm": 4.944962250057973,
"learning_rate": 1.2482803998883122e-06,
"loss": 0.2129,
"step": 520
},
{
"epoch": 0.11854379977246872,
"grad_norm": 1.5333537239736301,
"learning_rate": 1.2482737699693168e-06,
"loss": 0.1729,
"step": 521
},
{
"epoch": 0.11877133105802047,
"grad_norm": 2.5556570479037948,
"learning_rate": 1.248267127311747e-06,
"loss": 0.1607,
"step": 522
},
{
"epoch": 0.11899886234357224,
"grad_norm": 2.0949542782407398,
"learning_rate": 1.2482604719157386e-06,
"loss": 0.1857,
"step": 523
},
{
"epoch": 0.119226393629124,
"grad_norm": 2.2586097350216385,
"learning_rate": 1.2482538037814277e-06,
"loss": 0.1258,
"step": 524
},
{
"epoch": 0.11945392491467577,
"grad_norm": 3.036602602741407,
"learning_rate": 1.2482471229089502e-06,
"loss": 0.161,
"step": 525
},
{
"epoch": 0.11968145620022753,
"grad_norm": 3.382002996482515,
"learning_rate": 1.2482404292984431e-06,
"loss": 0.1784,
"step": 526
},
{
"epoch": 0.1199089874857793,
"grad_norm": 1.571226708630226,
"learning_rate": 1.248233722950043e-06,
"loss": 0.1605,
"step": 527
},
{
"epoch": 0.12013651877133105,
"grad_norm": 3.0053996402943737,
"learning_rate": 1.2482270038638872e-06,
"loss": 0.1201,
"step": 528
},
{
"epoch": 0.12036405005688282,
"grad_norm": 4.663906907753179,
"learning_rate": 1.2482202720401128e-06,
"loss": 0.203,
"step": 529
},
{
"epoch": 0.12059158134243458,
"grad_norm": 2.107107186527039,
"learning_rate": 1.248213527478857e-06,
"loss": 0.1933,
"step": 530
},
{
"epoch": 0.12081911262798635,
"grad_norm": 2.191569921182264,
"learning_rate": 1.2482067701802583e-06,
"loss": 0.1735,
"step": 531
},
{
"epoch": 0.12104664391353812,
"grad_norm": 1.611611034864374,
"learning_rate": 1.2482000001444547e-06,
"loss": 0.1299,
"step": 532
},
{
"epoch": 0.12127417519908988,
"grad_norm": 1.9644367618752439,
"learning_rate": 1.2481932173715845e-06,
"loss": 0.0868,
"step": 533
},
{
"epoch": 0.12150170648464163,
"grad_norm": 1.7597689357542332,
"learning_rate": 1.2481864218617859e-06,
"loss": 0.1977,
"step": 534
},
{
"epoch": 0.1217292377701934,
"grad_norm": 1.0455766882042379,
"learning_rate": 1.2481796136151984e-06,
"loss": 0.0856,
"step": 535
},
{
"epoch": 0.12195676905574517,
"grad_norm": 3.2419347761543684,
"learning_rate": 1.2481727926319609e-06,
"loss": 0.2399,
"step": 536
},
{
"epoch": 0.12218430034129693,
"grad_norm": 3.339873316715719,
"learning_rate": 1.2481659589122127e-06,
"loss": 0.186,
"step": 537
},
{
"epoch": 0.1224118316268487,
"grad_norm": 3.4453888669974146,
"learning_rate": 1.2481591124560934e-06,
"loss": 0.2007,
"step": 538
},
{
"epoch": 0.12263936291240045,
"grad_norm": 3.4700673703521736,
"learning_rate": 1.2481522532637435e-06,
"loss": 0.1632,
"step": 539
},
{
"epoch": 0.12286689419795221,
"grad_norm": 2.355397510374851,
"learning_rate": 1.2481453813353026e-06,
"loss": 0.1212,
"step": 540
},
{
"epoch": 0.12309442548350398,
"grad_norm": 5.338957920220655,
"learning_rate": 1.2481384966709116e-06,
"loss": 0.1592,
"step": 541
},
{
"epoch": 0.12332195676905575,
"grad_norm": 2.990026650956376,
"learning_rate": 1.2481315992707104e-06,
"loss": 0.2656,
"step": 542
},
{
"epoch": 0.12354948805460751,
"grad_norm": 1.8798810865858828,
"learning_rate": 1.248124689134841e-06,
"loss": 0.1125,
"step": 543
},
{
"epoch": 0.12377701934015928,
"grad_norm": 1.6104299610891197,
"learning_rate": 1.2481177662634438e-06,
"loss": 0.1557,
"step": 544
},
{
"epoch": 0.12400455062571103,
"grad_norm": 3.302283676048537,
"learning_rate": 1.2481108306566609e-06,
"loss": 0.1799,
"step": 545
},
{
"epoch": 0.1242320819112628,
"grad_norm": 2.0532951352869513,
"learning_rate": 1.2481038823146338e-06,
"loss": 0.0815,
"step": 546
},
{
"epoch": 0.12445961319681456,
"grad_norm": 1.4326913794879275,
"learning_rate": 1.2480969212375043e-06,
"loss": 0.177,
"step": 547
},
{
"epoch": 0.12468714448236633,
"grad_norm": 3.5494676426295286,
"learning_rate": 1.2480899474254151e-06,
"loss": 0.136,
"step": 548
},
{
"epoch": 0.12491467576791809,
"grad_norm": 1.3410455744599155,
"learning_rate": 1.2480829608785085e-06,
"loss": 0.1078,
"step": 549
},
{
"epoch": 0.12514220705346984,
"grad_norm": 1.7709434217848017,
"learning_rate": 1.2480759615969273e-06,
"loss": 0.1114,
"step": 550
},
{
"epoch": 0.12536973833902162,
"grad_norm": 1.4865770903343614,
"learning_rate": 1.2480689495808144e-06,
"loss": 0.1377,
"step": 551
},
{
"epoch": 0.12559726962457338,
"grad_norm": 1.6211826207402742,
"learning_rate": 1.2480619248303133e-06,
"loss": 0.1873,
"step": 552
},
{
"epoch": 0.12582480091012513,
"grad_norm": 3.1755876159758794,
"learning_rate": 1.2480548873455675e-06,
"loss": 0.2135,
"step": 553
},
{
"epoch": 0.1260523321956769,
"grad_norm": 3.6986046315140952,
"learning_rate": 1.248047837126721e-06,
"loss": 0.3549,
"step": 554
},
{
"epoch": 0.12627986348122866,
"grad_norm": 2.782290781984551,
"learning_rate": 1.248040774173918e-06,
"loss": 0.1936,
"step": 555
},
{
"epoch": 0.12650739476678044,
"grad_norm": 2.329760734261347,
"learning_rate": 1.248033698487302e-06,
"loss": 0.1395,
"step": 556
},
{
"epoch": 0.1267349260523322,
"grad_norm": 2.258554836923121,
"learning_rate": 1.2480266100670189e-06,
"loss": 0.1605,
"step": 557
},
{
"epoch": 0.12696245733788397,
"grad_norm": 3.058041285297341,
"learning_rate": 1.2480195089132125e-06,
"loss": 0.1975,
"step": 558
},
{
"epoch": 0.12718998862343572,
"grad_norm": 2.406042057945949,
"learning_rate": 1.2480123950260284e-06,
"loss": 0.1405,
"step": 559
},
{
"epoch": 0.12741751990898748,
"grad_norm": 1.4634033865621767,
"learning_rate": 1.248005268405612e-06,
"loss": 0.0686,
"step": 560
},
{
"epoch": 0.12764505119453926,
"grad_norm": 1.1470288222889338,
"learning_rate": 1.2479981290521087e-06,
"loss": 0.0649,
"step": 561
},
{
"epoch": 0.127872582480091,
"grad_norm": 3.357158703331078,
"learning_rate": 1.2479909769656648e-06,
"loss": 0.1684,
"step": 562
},
{
"epoch": 0.1281001137656428,
"grad_norm": 2.4363436867877595,
"learning_rate": 1.2479838121464263e-06,
"loss": 0.2155,
"step": 563
},
{
"epoch": 0.12832764505119454,
"grad_norm": 4.051636355021599,
"learning_rate": 1.2479766345945395e-06,
"loss": 0.1853,
"step": 564
},
{
"epoch": 0.1285551763367463,
"grad_norm": 1.6707836764627593,
"learning_rate": 1.2479694443101513e-06,
"loss": 0.2261,
"step": 565
},
{
"epoch": 0.12878270762229807,
"grad_norm": 1.3008647546251737,
"learning_rate": 1.2479622412934087e-06,
"loss": 0.1606,
"step": 566
},
{
"epoch": 0.12901023890784982,
"grad_norm": 3.421202381350775,
"learning_rate": 1.2479550255444586e-06,
"loss": 0.147,
"step": 567
},
{
"epoch": 0.1292377701934016,
"grad_norm": 1.5157864652280186,
"learning_rate": 1.2479477970634487e-06,
"loss": 0.1536,
"step": 568
},
{
"epoch": 0.12946530147895335,
"grad_norm": 3.27856184412377,
"learning_rate": 1.2479405558505267e-06,
"loss": 0.1931,
"step": 569
},
{
"epoch": 0.1296928327645051,
"grad_norm": 2.5943823025048474,
"learning_rate": 1.247933301905841e-06,
"loss": 0.1384,
"step": 570
},
{
"epoch": 0.12992036405005689,
"grad_norm": 4.278003846990416,
"learning_rate": 1.2479260352295388e-06,
"loss": 0.1771,
"step": 571
},
{
"epoch": 0.13014789533560864,
"grad_norm": 3.446486195671729,
"learning_rate": 1.2479187558217697e-06,
"loss": 0.1323,
"step": 572
},
{
"epoch": 0.13037542662116042,
"grad_norm": 1.5099352019896337,
"learning_rate": 1.247911463682682e-06,
"loss": 0.1444,
"step": 573
},
{
"epoch": 0.13060295790671217,
"grad_norm": 3.798908546439363,
"learning_rate": 1.2479041588124247e-06,
"loss": 0.1504,
"step": 574
},
{
"epoch": 0.13083048919226395,
"grad_norm": 3.7532424433768754,
"learning_rate": 1.2478968412111471e-06,
"loss": 0.1518,
"step": 575
},
{
"epoch": 0.1310580204778157,
"grad_norm": 2.056630545760187,
"learning_rate": 1.247889510878999e-06,
"loss": 0.2708,
"step": 576
},
{
"epoch": 0.13128555176336745,
"grad_norm": 2.303355999452058,
"learning_rate": 1.24788216781613e-06,
"loss": 0.1662,
"step": 577
},
{
"epoch": 0.13151308304891923,
"grad_norm": 2.269104241548175,
"learning_rate": 1.2478748120226902e-06,
"loss": 0.1337,
"step": 578
},
{
"epoch": 0.13174061433447098,
"grad_norm": 3.0692597907642862,
"learning_rate": 1.2478674434988299e-06,
"loss": 0.1326,
"step": 579
},
{
"epoch": 0.13196814562002276,
"grad_norm": 1.6865202158454742,
"learning_rate": 1.2478600622447001e-06,
"loss": 0.1647,
"step": 580
},
{
"epoch": 0.13219567690557452,
"grad_norm": 2.939283703136826,
"learning_rate": 1.2478526682604512e-06,
"loss": 0.1303,
"step": 581
},
{
"epoch": 0.13242320819112627,
"grad_norm": 3.1064926411391713,
"learning_rate": 1.2478452615462345e-06,
"loss": 0.1409,
"step": 582
},
{
"epoch": 0.13265073947667805,
"grad_norm": 2.5571749562826485,
"learning_rate": 1.247837842102201e-06,
"loss": 0.1791,
"step": 583
},
{
"epoch": 0.1328782707622298,
"grad_norm": 2.795629539563545,
"learning_rate": 1.2478304099285031e-06,
"loss": 0.1567,
"step": 584
},
{
"epoch": 0.13310580204778158,
"grad_norm": 2.0832780528771466,
"learning_rate": 1.2478229650252921e-06,
"loss": 0.1639,
"step": 585
},
{
"epoch": 0.13333333333333333,
"grad_norm": 2.9969798024524117,
"learning_rate": 1.2478155073927204e-06,
"loss": 0.2444,
"step": 586
},
{
"epoch": 0.13356086461888508,
"grad_norm": 1.9274087851448982,
"learning_rate": 1.2478080370309404e-06,
"loss": 0.105,
"step": 587
},
{
"epoch": 0.13378839590443686,
"grad_norm": 4.021015627831867,
"learning_rate": 1.2478005539401046e-06,
"loss": 0.1734,
"step": 588
},
{
"epoch": 0.13401592718998862,
"grad_norm": 2.9342976021528027,
"learning_rate": 1.2477930581203663e-06,
"loss": 0.1465,
"step": 589
},
{
"epoch": 0.1342434584755404,
"grad_norm": 2.3242426333780632,
"learning_rate": 1.2477855495718782e-06,
"loss": 0.2241,
"step": 590
},
{
"epoch": 0.13447098976109215,
"grad_norm": 2.957504561813871,
"learning_rate": 1.2477780282947942e-06,
"loss": 0.1734,
"step": 591
},
{
"epoch": 0.13469852104664393,
"grad_norm": 1.8788696793522301,
"learning_rate": 1.2477704942892677e-06,
"loss": 0.1469,
"step": 592
},
{
"epoch": 0.13492605233219568,
"grad_norm": 2.339527187323086,
"learning_rate": 1.2477629475554532e-06,
"loss": 0.1312,
"step": 593
},
{
"epoch": 0.13515358361774743,
"grad_norm": 3.707567497860105,
"learning_rate": 1.2477553880935043e-06,
"loss": 0.1916,
"step": 594
},
{
"epoch": 0.1353811149032992,
"grad_norm": 3.2750827489523022,
"learning_rate": 1.2477478159035758e-06,
"loss": 0.1774,
"step": 595
},
{
"epoch": 0.13560864618885096,
"grad_norm": 2.777476705753077,
"learning_rate": 1.2477402309858226e-06,
"loss": 0.1789,
"step": 596
},
{
"epoch": 0.13583617747440274,
"grad_norm": 2.144596195630353,
"learning_rate": 1.2477326333403995e-06,
"loss": 0.147,
"step": 597
},
{
"epoch": 0.1360637087599545,
"grad_norm": 2.3685083837175935,
"learning_rate": 1.2477250229674618e-06,
"loss": 0.1831,
"step": 598
},
{
"epoch": 0.13629124004550625,
"grad_norm": 1.9843295041761948,
"learning_rate": 1.2477173998671653e-06,
"loss": 0.178,
"step": 599
},
{
"epoch": 0.13651877133105803,
"grad_norm": 3.434039497211011,
"learning_rate": 1.2477097640396655e-06,
"loss": 0.1235,
"step": 600
},
{
"epoch": 0.13674630261660978,
"grad_norm": 1.4586285890850859,
"learning_rate": 1.2477021154851185e-06,
"loss": 0.0977,
"step": 601
},
{
"epoch": 0.13697383390216156,
"grad_norm": 3.3913304667052198,
"learning_rate": 1.2476944542036806e-06,
"loss": 0.1786,
"step": 602
},
{
"epoch": 0.1372013651877133,
"grad_norm": 2.667804003182341,
"learning_rate": 1.2476867801955086e-06,
"loss": 0.1204,
"step": 603
},
{
"epoch": 0.13742889647326506,
"grad_norm": 2.4655446209984033,
"learning_rate": 1.247679093460759e-06,
"loss": 0.2298,
"step": 604
},
{
"epoch": 0.13765642775881684,
"grad_norm": 3.1521634114958816,
"learning_rate": 1.2476713939995895e-06,
"loss": 0.1264,
"step": 605
},
{
"epoch": 0.1378839590443686,
"grad_norm": 1.8219187381761075,
"learning_rate": 1.2476636818121568e-06,
"loss": 0.1028,
"step": 606
},
{
"epoch": 0.13811149032992037,
"grad_norm": 2.337156447435568,
"learning_rate": 1.247655956898619e-06,
"loss": 0.1946,
"step": 607
},
{
"epoch": 0.13833902161547212,
"grad_norm": 3.2562899945752966,
"learning_rate": 1.2476482192591335e-06,
"loss": 0.1465,
"step": 608
},
{
"epoch": 0.1385665529010239,
"grad_norm": 1.8250022998173558,
"learning_rate": 1.247640468893859e-06,
"loss": 0.1467,
"step": 609
},
{
"epoch": 0.13879408418657566,
"grad_norm": 3.5242803865119603,
"learning_rate": 1.2476327058029534e-06,
"loss": 0.1225,
"step": 610
},
{
"epoch": 0.1390216154721274,
"grad_norm": 3.027013883019154,
"learning_rate": 1.2476249299865757e-06,
"loss": 0.1595,
"step": 611
},
{
"epoch": 0.1392491467576792,
"grad_norm": 2.3807833370240843,
"learning_rate": 1.2476171414448847e-06,
"loss": 0.0984,
"step": 612
},
{
"epoch": 0.13947667804323094,
"grad_norm": 3.1119739781274416,
"learning_rate": 1.2476093401780397e-06,
"loss": 0.154,
"step": 613
},
{
"epoch": 0.13970420932878272,
"grad_norm": 3.4567643287811958,
"learning_rate": 1.2476015261861998e-06,
"loss": 0.1405,
"step": 614
},
{
"epoch": 0.13993174061433447,
"grad_norm": 2.6730132596017504,
"learning_rate": 1.247593699469525e-06,
"loss": 0.117,
"step": 615
},
{
"epoch": 0.14015927189988622,
"grad_norm": 2.78286071664722,
"learning_rate": 1.2475858600281754e-06,
"loss": 0.1504,
"step": 616
},
{
"epoch": 0.140386803185438,
"grad_norm": 2.0905809356248803,
"learning_rate": 1.247578007862311e-06,
"loss": 0.1221,
"step": 617
},
{
"epoch": 0.14061433447098975,
"grad_norm": 2.307570493464016,
"learning_rate": 1.2475701429720923e-06,
"loss": 0.1166,
"step": 618
},
{
"epoch": 0.14084186575654153,
"grad_norm": 1.2783682538203782,
"learning_rate": 1.24756226535768e-06,
"loss": 0.1346,
"step": 619
},
{
"epoch": 0.1410693970420933,
"grad_norm": 1.497656716954093,
"learning_rate": 1.2475543750192352e-06,
"loss": 0.2064,
"step": 620
},
{
"epoch": 0.14129692832764504,
"grad_norm": 3.79056695480817,
"learning_rate": 1.2475464719569192e-06,
"loss": 0.2673,
"step": 621
},
{
"epoch": 0.14152445961319682,
"grad_norm": 1.4805750856049538,
"learning_rate": 1.2475385561708934e-06,
"loss": 0.1992,
"step": 622
},
{
"epoch": 0.14175199089874857,
"grad_norm": 1.6748002073239907,
"learning_rate": 1.2475306276613194e-06,
"loss": 0.0979,
"step": 623
},
{
"epoch": 0.14197952218430035,
"grad_norm": 2.5674392190565736,
"learning_rate": 1.2475226864283596e-06,
"loss": 0.1337,
"step": 624
},
{
"epoch": 0.1422070534698521,
"grad_norm": 2.656075374063454,
"learning_rate": 1.2475147324721764e-06,
"loss": 0.2501,
"step": 625
},
{
"epoch": 0.14243458475540388,
"grad_norm": 2.03707084801983,
"learning_rate": 1.2475067657929319e-06,
"loss": 0.1673,
"step": 626
},
{
"epoch": 0.14266211604095563,
"grad_norm": 2.975904435297751,
"learning_rate": 1.2474987863907894e-06,
"loss": 0.135,
"step": 627
},
{
"epoch": 0.14288964732650739,
"grad_norm": 2.2205623276633295,
"learning_rate": 1.2474907942659116e-06,
"loss": 0.2149,
"step": 628
},
{
"epoch": 0.14311717861205916,
"grad_norm": 2.271865927518249,
"learning_rate": 1.247482789418462e-06,
"loss": 0.1519,
"step": 629
},
{
"epoch": 0.14334470989761092,
"grad_norm": 6.542697842484103,
"learning_rate": 1.2474747718486044e-06,
"loss": 0.1757,
"step": 630
},
{
"epoch": 0.1435722411831627,
"grad_norm": 1.8493295758356152,
"learning_rate": 1.2474667415565022e-06,
"loss": 0.096,
"step": 631
},
{
"epoch": 0.14379977246871445,
"grad_norm": 4.567549869753572,
"learning_rate": 1.24745869854232e-06,
"loss": 0.1745,
"step": 632
},
{
"epoch": 0.1440273037542662,
"grad_norm": 3.104479250541457,
"learning_rate": 1.2474506428062219e-06,
"loss": 0.14,
"step": 633
},
{
"epoch": 0.14425483503981798,
"grad_norm": 2.9519743566943464,
"learning_rate": 1.2474425743483726e-06,
"loss": 0.237,
"step": 634
},
{
"epoch": 0.14448236632536973,
"grad_norm": 1.4814831832284159,
"learning_rate": 1.2474344931689371e-06,
"loss": 0.0873,
"step": 635
},
{
"epoch": 0.1447098976109215,
"grad_norm": 2.0222816327136712,
"learning_rate": 1.2474263992680805e-06,
"loss": 0.155,
"step": 636
},
{
"epoch": 0.14493742889647326,
"grad_norm": 2.0590304829666914,
"learning_rate": 1.247418292645968e-06,
"loss": 0.107,
"step": 637
},
{
"epoch": 0.14516496018202502,
"grad_norm": 2.5562023131920633,
"learning_rate": 1.2474101733027659e-06,
"loss": 0.2256,
"step": 638
},
{
"epoch": 0.1453924914675768,
"grad_norm": 2.3833084873555195,
"learning_rate": 1.2474020412386395e-06,
"loss": 0.1087,
"step": 639
},
{
"epoch": 0.14562002275312855,
"grad_norm": 1.5076273114920544,
"learning_rate": 1.2473938964537551e-06,
"loss": 0.0893,
"step": 640
},
{
"epoch": 0.14584755403868033,
"grad_norm": 2.3708066851044887,
"learning_rate": 1.2473857389482797e-06,
"loss": 0.1247,
"step": 641
},
{
"epoch": 0.14607508532423208,
"grad_norm": 1.5590215080673084,
"learning_rate": 1.2473775687223794e-06,
"loss": 0.1504,
"step": 642
},
{
"epoch": 0.14630261660978386,
"grad_norm": 1.6107910166409294,
"learning_rate": 1.2473693857762215e-06,
"loss": 0.149,
"step": 643
},
{
"epoch": 0.1465301478953356,
"grad_norm": 1.7918533159116738,
"learning_rate": 1.247361190109973e-06,
"loss": 0.1104,
"step": 644
},
{
"epoch": 0.14675767918088736,
"grad_norm": 2.8984966135096566,
"learning_rate": 1.2473529817238016e-06,
"loss": 0.1755,
"step": 645
},
{
"epoch": 0.14698521046643914,
"grad_norm": 1.9091822418599347,
"learning_rate": 1.2473447606178754e-06,
"loss": 0.1077,
"step": 646
},
{
"epoch": 0.1472127417519909,
"grad_norm": 4.199288030915391,
"learning_rate": 1.2473365267923617e-06,
"loss": 0.2124,
"step": 647
},
{
"epoch": 0.14744027303754267,
"grad_norm": 2.331859473332942,
"learning_rate": 1.2473282802474293e-06,
"loss": 0.1576,
"step": 648
},
{
"epoch": 0.14766780432309443,
"grad_norm": 3.5722786659910577,
"learning_rate": 1.2473200209832465e-06,
"loss": 0.2027,
"step": 649
},
{
"epoch": 0.14789533560864618,
"grad_norm": 1.5390826591189062,
"learning_rate": 1.2473117489999823e-06,
"loss": 0.161,
"step": 650
},
{
"epoch": 0.14812286689419796,
"grad_norm": 2.741044883004237,
"learning_rate": 1.2473034642978057e-06,
"loss": 0.1656,
"step": 651
},
{
"epoch": 0.1483503981797497,
"grad_norm": 2.2681711762464034,
"learning_rate": 1.247295166876886e-06,
"loss": 0.1254,
"step": 652
},
{
"epoch": 0.1485779294653015,
"grad_norm": 2.2254637289761194,
"learning_rate": 1.2472868567373924e-06,
"loss": 0.1291,
"step": 653
},
{
"epoch": 0.14880546075085324,
"grad_norm": 2.213517163461755,
"learning_rate": 1.2472785338794953e-06,
"loss": 0.1541,
"step": 654
},
{
"epoch": 0.149032992036405,
"grad_norm": 1.6789308605390307,
"learning_rate": 1.247270198303365e-06,
"loss": 0.1316,
"step": 655
},
{
"epoch": 0.14926052332195677,
"grad_norm": 2.179149997459725,
"learning_rate": 1.247261850009171e-06,
"loss": 0.2437,
"step": 656
},
{
"epoch": 0.14948805460750852,
"grad_norm": 2.910894270371587,
"learning_rate": 1.2472534889970848e-06,
"loss": 0.2038,
"step": 657
},
{
"epoch": 0.1497155858930603,
"grad_norm": 1.751607816792672,
"learning_rate": 1.2472451152672766e-06,
"loss": 0.1164,
"step": 658
},
{
"epoch": 0.14994311717861206,
"grad_norm": 1.6602009490349432,
"learning_rate": 1.2472367288199177e-06,
"loss": 0.1193,
"step": 659
},
{
"epoch": 0.15017064846416384,
"grad_norm": 2.038150970938399,
"learning_rate": 1.2472283296551798e-06,
"loss": 0.102,
"step": 660
},
{
"epoch": 0.1503981797497156,
"grad_norm": 2.1439804373776936,
"learning_rate": 1.2472199177732346e-06,
"loss": 0.1502,
"step": 661
},
{
"epoch": 0.15062571103526734,
"grad_norm": 2.5777822840030358,
"learning_rate": 1.2472114931742537e-06,
"loss": 0.1168,
"step": 662
},
{
"epoch": 0.15085324232081912,
"grad_norm": 2.4175964563163177,
"learning_rate": 1.2472030558584093e-06,
"loss": 0.1035,
"step": 663
},
{
"epoch": 0.15108077360637087,
"grad_norm": 2.635267423704016,
"learning_rate": 1.2471946058258742e-06,
"loss": 0.1701,
"step": 664
},
{
"epoch": 0.15130830489192265,
"grad_norm": 1.9337561786859772,
"learning_rate": 1.2471861430768205e-06,
"loss": 0.1075,
"step": 665
},
{
"epoch": 0.1515358361774744,
"grad_norm": 1.7937795679496227,
"learning_rate": 1.2471776676114217e-06,
"loss": 0.1785,
"step": 666
},
{
"epoch": 0.15176336746302616,
"grad_norm": 3.0588710289274816,
"learning_rate": 1.2471691794298508e-06,
"loss": 0.1798,
"step": 667
},
{
"epoch": 0.15199089874857794,
"grad_norm": 2.638986072752188,
"learning_rate": 1.2471606785322814e-06,
"loss": 0.0878,
"step": 668
},
{
"epoch": 0.1522184300341297,
"grad_norm": 2.732712357601826,
"learning_rate": 1.247152164918887e-06,
"loss": 0.1267,
"step": 669
},
{
"epoch": 0.15244596131968147,
"grad_norm": 1.7481991977105777,
"learning_rate": 1.247143638589842e-06,
"loss": 0.1584,
"step": 670
},
{
"epoch": 0.15267349260523322,
"grad_norm": 2.794672743532085,
"learning_rate": 1.2471350995453203e-06,
"loss": 0.1584,
"step": 671
},
{
"epoch": 0.15290102389078497,
"grad_norm": 3.1279366528301633,
"learning_rate": 1.2471265477854966e-06,
"loss": 0.148,
"step": 672
},
{
"epoch": 0.15312855517633675,
"grad_norm": 3.920575109905724,
"learning_rate": 1.2471179833105454e-06,
"loss": 0.1732,
"step": 673
},
{
"epoch": 0.1533560864618885,
"grad_norm": 1.7916571238390178,
"learning_rate": 1.2471094061206422e-06,
"loss": 0.2336,
"step": 674
},
{
"epoch": 0.15358361774744028,
"grad_norm": 1.7363850632393116,
"learning_rate": 1.247100816215962e-06,
"loss": 0.1244,
"step": 675
},
{
"epoch": 0.15381114903299203,
"grad_norm": 2.504377712379844,
"learning_rate": 1.2470922135966806e-06,
"loss": 0.1674,
"step": 676
},
{
"epoch": 0.1540386803185438,
"grad_norm": 2.43043947984636,
"learning_rate": 1.2470835982629736e-06,
"loss": 0.1249,
"step": 677
},
{
"epoch": 0.15426621160409557,
"grad_norm": 3.950497364660697,
"learning_rate": 1.247074970215017e-06,
"loss": 0.2401,
"step": 678
},
{
"epoch": 0.15449374288964732,
"grad_norm": 3.1492013494233846,
"learning_rate": 1.2470663294529873e-06,
"loss": 0.1605,
"step": 679
},
{
"epoch": 0.1547212741751991,
"grad_norm": 1.80598204305421,
"learning_rate": 1.2470576759770612e-06,
"loss": 0.113,
"step": 680
},
{
"epoch": 0.15494880546075085,
"grad_norm": 2.0454054940402506,
"learning_rate": 1.2470490097874155e-06,
"loss": 0.1453,
"step": 681
},
{
"epoch": 0.15517633674630263,
"grad_norm": 3.6952564849548053,
"learning_rate": 1.247040330884227e-06,
"loss": 0.1581,
"step": 682
},
{
"epoch": 0.15540386803185438,
"grad_norm": 2.3655397835651075,
"learning_rate": 1.2470316392676738e-06,
"loss": 0.169,
"step": 683
},
{
"epoch": 0.15563139931740613,
"grad_norm": 3.416348712472315,
"learning_rate": 1.2470229349379326e-06,
"loss": 0.1347,
"step": 684
},
{
"epoch": 0.1558589306029579,
"grad_norm": 2.618995350775909,
"learning_rate": 1.2470142178951822e-06,
"loss": 0.1924,
"step": 685
},
{
"epoch": 0.15608646188850966,
"grad_norm": 1.344663220923034,
"learning_rate": 1.2470054881396002e-06,
"loss": 0.2013,
"step": 686
},
{
"epoch": 0.15631399317406144,
"grad_norm": 1.1568986493989724,
"learning_rate": 1.246996745671365e-06,
"loss": 0.131,
"step": 687
},
{
"epoch": 0.1565415244596132,
"grad_norm": 3.0558312091963473,
"learning_rate": 1.2469879904906556e-06,
"loss": 0.14,
"step": 688
},
{
"epoch": 0.15676905574516495,
"grad_norm": 4.767157427966137,
"learning_rate": 1.2469792225976507e-06,
"loss": 0.156,
"step": 689
},
{
"epoch": 0.15699658703071673,
"grad_norm": 1.9971770266956603,
"learning_rate": 1.2469704419925296e-06,
"loss": 0.1413,
"step": 690
},
{
"epoch": 0.15722411831626848,
"grad_norm": 3.560138993273607,
"learning_rate": 1.246961648675472e-06,
"loss": 0.2274,
"step": 691
},
{
"epoch": 0.15745164960182026,
"grad_norm": 1.8091873297743188,
"learning_rate": 1.246952842646657e-06,
"loss": 0.2606,
"step": 692
},
{
"epoch": 0.157679180887372,
"grad_norm": 1.9524492716137443,
"learning_rate": 1.2469440239062653e-06,
"loss": 0.1888,
"step": 693
},
{
"epoch": 0.15790671217292376,
"grad_norm": 1.978419283294589,
"learning_rate": 1.2469351924544766e-06,
"loss": 0.168,
"step": 694
},
{
"epoch": 0.15813424345847554,
"grad_norm": 1.909977232991382,
"learning_rate": 1.2469263482914716e-06,
"loss": 0.1302,
"step": 695
},
{
"epoch": 0.1583617747440273,
"grad_norm": 2.786836009335205,
"learning_rate": 1.246917491417431e-06,
"loss": 0.1603,
"step": 696
},
{
"epoch": 0.15858930602957907,
"grad_norm": 2.700038379786115,
"learning_rate": 1.246908621832536e-06,
"loss": 0.2268,
"step": 697
},
{
"epoch": 0.15881683731513083,
"grad_norm": 1.4116863857464026,
"learning_rate": 1.2468997395369677e-06,
"loss": 0.1761,
"step": 698
},
{
"epoch": 0.1590443686006826,
"grad_norm": 2.8928190492615133,
"learning_rate": 1.2468908445309077e-06,
"loss": 0.1789,
"step": 699
},
{
"epoch": 0.15927189988623436,
"grad_norm": 1.650749552825084,
"learning_rate": 1.2468819368145376e-06,
"loss": 0.1324,
"step": 700
},
{
"epoch": 0.1594994311717861,
"grad_norm": 2.3722473947353677,
"learning_rate": 1.2468730163880398e-06,
"loss": 0.1116,
"step": 701
},
{
"epoch": 0.1597269624573379,
"grad_norm": 2.879822957568519,
"learning_rate": 1.2468640832515962e-06,
"loss": 0.0564,
"step": 702
},
{
"epoch": 0.15995449374288964,
"grad_norm": 2.162764734574199,
"learning_rate": 1.24685513740539e-06,
"loss": 0.1739,
"step": 703
},
{
"epoch": 0.16018202502844142,
"grad_norm": 2.8968364936480206,
"learning_rate": 1.2468461788496036e-06,
"loss": 0.2091,
"step": 704
},
{
"epoch": 0.16040955631399317,
"grad_norm": 1.8559610510087743,
"learning_rate": 1.24683720758442e-06,
"loss": 0.1533,
"step": 705
},
{
"epoch": 0.16063708759954493,
"grad_norm": 2.184281056476426,
"learning_rate": 1.2468282236100226e-06,
"loss": 0.1582,
"step": 706
},
{
"epoch": 0.1608646188850967,
"grad_norm": 1.3209438595657337,
"learning_rate": 1.2468192269265955e-06,
"loss": 0.1914,
"step": 707
},
{
"epoch": 0.16109215017064846,
"grad_norm": 2.1470386790088174,
"learning_rate": 1.246810217534322e-06,
"loss": 0.0831,
"step": 708
},
{
"epoch": 0.16131968145620024,
"grad_norm": 1.594792083731403,
"learning_rate": 1.2468011954333864e-06,
"loss": 0.1349,
"step": 709
},
{
"epoch": 0.161547212741752,
"grad_norm": 1.9899900139983586,
"learning_rate": 1.2467921606239734e-06,
"loss": 0.1406,
"step": 710
},
{
"epoch": 0.16177474402730374,
"grad_norm": 2.161056989124219,
"learning_rate": 1.2467831131062672e-06,
"loss": 0.1186,
"step": 711
},
{
"epoch": 0.16200227531285552,
"grad_norm": 3.2786168252573438,
"learning_rate": 1.2467740528804528e-06,
"loss": 0.1525,
"step": 712
},
{
"epoch": 0.16222980659840727,
"grad_norm": 2.152367629184536,
"learning_rate": 1.2467649799467156e-06,
"loss": 0.1403,
"step": 713
},
{
"epoch": 0.16245733788395905,
"grad_norm": 2.658644939282435,
"learning_rate": 1.246755894305241e-06,
"loss": 0.1287,
"step": 714
},
{
"epoch": 0.1626848691695108,
"grad_norm": 1.8320157906526173,
"learning_rate": 1.2467467959562143e-06,
"loss": 0.1489,
"step": 715
},
{
"epoch": 0.16291240045506258,
"grad_norm": 3.0792158572997526,
"learning_rate": 1.2467376848998221e-06,
"loss": 0.1929,
"step": 716
},
{
"epoch": 0.16313993174061434,
"grad_norm": 2.592666663523021,
"learning_rate": 1.2467285611362501e-06,
"loss": 0.1198,
"step": 717
},
{
"epoch": 0.1633674630261661,
"grad_norm": 2.3270639642215123,
"learning_rate": 1.2467194246656851e-06,
"loss": 0.119,
"step": 718
},
{
"epoch": 0.16359499431171787,
"grad_norm": 1.5662096056295784,
"learning_rate": 1.2467102754883136e-06,
"loss": 0.1488,
"step": 719
},
{
"epoch": 0.16382252559726962,
"grad_norm": 2.0754259992407174,
"learning_rate": 1.2467011136043228e-06,
"loss": 0.1206,
"step": 720
},
{
"epoch": 0.1640500568828214,
"grad_norm": 2.377809704915352,
"learning_rate": 1.2466919390138995e-06,
"loss": 0.2349,
"step": 721
},
{
"epoch": 0.16427758816837315,
"grad_norm": 2.1373727350700205,
"learning_rate": 1.246682751717232e-06,
"loss": 0.1333,
"step": 722
},
{
"epoch": 0.1645051194539249,
"grad_norm": 3.8601459911234697,
"learning_rate": 1.2466735517145074e-06,
"loss": 0.3259,
"step": 723
},
{
"epoch": 0.16473265073947668,
"grad_norm": 2.1273982856593614,
"learning_rate": 1.2466643390059138e-06,
"loss": 0.199,
"step": 724
},
{
"epoch": 0.16496018202502843,
"grad_norm": 2.274158988300012,
"learning_rate": 1.2466551135916398e-06,
"loss": 0.1351,
"step": 725
},
{
"epoch": 0.16518771331058021,
"grad_norm": 2.1566789936379287,
"learning_rate": 1.2466458754718737e-06,
"loss": 0.219,
"step": 726
},
{
"epoch": 0.16541524459613197,
"grad_norm": 3.388462178150055,
"learning_rate": 1.2466366246468045e-06,
"loss": 0.1456,
"step": 727
},
{
"epoch": 0.16564277588168372,
"grad_norm": 2.792548754369155,
"learning_rate": 1.246627361116621e-06,
"loss": 0.2178,
"step": 728
},
{
"epoch": 0.1658703071672355,
"grad_norm": 1.7787275123381943,
"learning_rate": 1.246618084881513e-06,
"loss": 0.2584,
"step": 729
},
{
"epoch": 0.16609783845278725,
"grad_norm": 2.150845029279013,
"learning_rate": 1.2466087959416695e-06,
"loss": 0.1474,
"step": 730
},
{
"epoch": 0.16632536973833903,
"grad_norm": 3.4162019984229213,
"learning_rate": 1.2465994942972805e-06,
"loss": 0.1415,
"step": 731
},
{
"epoch": 0.16655290102389078,
"grad_norm": 3.5172418167047743,
"learning_rate": 1.2465901799485366e-06,
"loss": 0.2267,
"step": 732
},
{
"epoch": 0.16678043230944256,
"grad_norm": 1.9664520821504867,
"learning_rate": 1.2465808528956277e-06,
"loss": 0.1027,
"step": 733
},
{
"epoch": 0.1670079635949943,
"grad_norm": 2.053925645911197,
"learning_rate": 1.2465715131387446e-06,
"loss": 0.1405,
"step": 734
},
{
"epoch": 0.16723549488054607,
"grad_norm": 1.6417683696863474,
"learning_rate": 1.2465621606780778e-06,
"loss": 0.1804,
"step": 735
},
{
"epoch": 0.16746302616609784,
"grad_norm": 1.9532511665276102,
"learning_rate": 1.2465527955138191e-06,
"loss": 0.1438,
"step": 736
},
{
"epoch": 0.1676905574516496,
"grad_norm": 2.7978077296538295,
"learning_rate": 1.2465434176461596e-06,
"loss": 0.1806,
"step": 737
},
{
"epoch": 0.16791808873720138,
"grad_norm": 1.7861222447513503,
"learning_rate": 1.2465340270752908e-06,
"loss": 0.0953,
"step": 738
},
{
"epoch": 0.16814562002275313,
"grad_norm": 1.2545980680473232,
"learning_rate": 1.2465246238014047e-06,
"loss": 0.0881,
"step": 739
},
{
"epoch": 0.16837315130830488,
"grad_norm": 2.49195685975364,
"learning_rate": 1.2465152078246936e-06,
"loss": 0.1643,
"step": 740
},
{
"epoch": 0.16860068259385666,
"grad_norm": 2.0211233157427637,
"learning_rate": 1.24650577914535e-06,
"loss": 0.1263,
"step": 741
},
{
"epoch": 0.1688282138794084,
"grad_norm": 2.7858317155477317,
"learning_rate": 1.2464963377635667e-06,
"loss": 0.1547,
"step": 742
},
{
"epoch": 0.1690557451649602,
"grad_norm": 1.7097291360774547,
"learning_rate": 1.246486883679536e-06,
"loss": 0.2516,
"step": 743
},
{
"epoch": 0.16928327645051194,
"grad_norm": 3.9137648292026737,
"learning_rate": 1.246477416893452e-06,
"loss": 0.2036,
"step": 744
},
{
"epoch": 0.1695108077360637,
"grad_norm": 3.005605654107358,
"learning_rate": 1.2464679374055074e-06,
"loss": 0.1481,
"step": 745
},
{
"epoch": 0.16973833902161548,
"grad_norm": 3.401532765227879,
"learning_rate": 1.2464584452158968e-06,
"loss": 0.1841,
"step": 746
},
{
"epoch": 0.16996587030716723,
"grad_norm": 2.843140048954733,
"learning_rate": 1.2464489403248133e-06,
"loss": 0.184,
"step": 747
},
{
"epoch": 0.170193401592719,
"grad_norm": 1.515779223289782,
"learning_rate": 1.246439422732452e-06,
"loss": 0.1262,
"step": 748
},
{
"epoch": 0.17042093287827076,
"grad_norm": 2.618293101772126,
"learning_rate": 1.2464298924390066e-06,
"loss": 0.1415,
"step": 749
},
{
"epoch": 0.17064846416382254,
"grad_norm": 2.248269138511338,
"learning_rate": 1.2464203494446725e-06,
"loss": 0.185,
"step": 750
},
{
"epoch": 0.1708759954493743,
"grad_norm": 1.3558978429200024,
"learning_rate": 1.2464107937496444e-06,
"loss": 0.096,
"step": 751
},
{
"epoch": 0.17110352673492604,
"grad_norm": 1.8355286869437153,
"learning_rate": 1.246401225354118e-06,
"loss": 0.0936,
"step": 752
},
{
"epoch": 0.17133105802047782,
"grad_norm": 2.611386377303649,
"learning_rate": 1.2463916442582883e-06,
"loss": 0.2058,
"step": 753
},
{
"epoch": 0.17155858930602957,
"grad_norm": 1.81511526173022,
"learning_rate": 1.2463820504623516e-06,
"loss": 0.0722,
"step": 754
},
{
"epoch": 0.17178612059158135,
"grad_norm": 1.6836561465138316,
"learning_rate": 1.246372443966504e-06,
"loss": 0.1419,
"step": 755
},
{
"epoch": 0.1720136518771331,
"grad_norm": 3.189715404864015,
"learning_rate": 1.246362824770941e-06,
"loss": 0.1604,
"step": 756
},
{
"epoch": 0.17224118316268486,
"grad_norm": 2.8556456489625193,
"learning_rate": 1.2463531928758605e-06,
"loss": 0.1793,
"step": 757
},
{
"epoch": 0.17246871444823664,
"grad_norm": 2.1490228034084344,
"learning_rate": 1.2463435482814585e-06,
"loss": 0.1928,
"step": 758
},
{
"epoch": 0.1726962457337884,
"grad_norm": 1.866877451814791,
"learning_rate": 1.246333890987932e-06,
"loss": 0.2064,
"step": 759
},
{
"epoch": 0.17292377701934017,
"grad_norm": 2.7361601673612284,
"learning_rate": 1.246324220995479e-06,
"loss": 0.1024,
"step": 760
},
{
"epoch": 0.17315130830489192,
"grad_norm": 3.6715173407277004,
"learning_rate": 1.2463145383042966e-06,
"loss": 0.1741,
"step": 761
},
{
"epoch": 0.17337883959044367,
"grad_norm": 4.388914943676026,
"learning_rate": 1.2463048429145832e-06,
"loss": 0.2951,
"step": 762
},
{
"epoch": 0.17360637087599545,
"grad_norm": 3.0864567661578075,
"learning_rate": 1.2462951348265364e-06,
"loss": 0.1681,
"step": 763
},
{
"epoch": 0.1738339021615472,
"grad_norm": 2.2429137189515487,
"learning_rate": 1.2462854140403553e-06,
"loss": 0.1698,
"step": 764
},
{
"epoch": 0.17406143344709898,
"grad_norm": 3.7655750343422487,
"learning_rate": 1.2462756805562378e-06,
"loss": 0.1972,
"step": 765
},
{
"epoch": 0.17428896473265074,
"grad_norm": 1.4821109763148475,
"learning_rate": 1.2462659343743832e-06,
"loss": 0.1144,
"step": 766
},
{
"epoch": 0.17451649601820252,
"grad_norm": 2.9261323093043234,
"learning_rate": 1.2462561754949908e-06,
"loss": 0.1354,
"step": 767
},
{
"epoch": 0.17474402730375427,
"grad_norm": 2.021278631174851,
"learning_rate": 1.2462464039182598e-06,
"loss": 0.1158,
"step": 768
},
{
"epoch": 0.17497155858930602,
"grad_norm": 2.189903163956334,
"learning_rate": 1.2462366196443903e-06,
"loss": 0.1587,
"step": 769
},
{
"epoch": 0.1751990898748578,
"grad_norm": 3.7285174958892364,
"learning_rate": 1.246226822673582e-06,
"loss": 0.2024,
"step": 770
},
{
"epoch": 0.17542662116040955,
"grad_norm": 1.9007743093993184,
"learning_rate": 1.2462170130060351e-06,
"loss": 0.1025,
"step": 771
},
{
"epoch": 0.17565415244596133,
"grad_norm": 3.3341124392840134,
"learning_rate": 1.24620719064195e-06,
"loss": 0.1718,
"step": 772
},
{
"epoch": 0.17588168373151308,
"grad_norm": 2.271177623744295,
"learning_rate": 1.246197355581528e-06,
"loss": 0.1713,
"step": 773
},
{
"epoch": 0.17610921501706484,
"grad_norm": 2.631276315974309,
"learning_rate": 1.2461875078249694e-06,
"loss": 0.1769,
"step": 774
},
{
"epoch": 0.17633674630261661,
"grad_norm": 2.2924143983188765,
"learning_rate": 1.246177647372476e-06,
"loss": 0.1155,
"step": 775
},
{
"epoch": 0.17656427758816837,
"grad_norm": 4.145219852575127,
"learning_rate": 1.246167774224249e-06,
"loss": 0.1997,
"step": 776
},
{
"epoch": 0.17679180887372015,
"grad_norm": 3.5955716696986237,
"learning_rate": 1.2461578883804903e-06,
"loss": 0.1434,
"step": 777
},
{
"epoch": 0.1770193401592719,
"grad_norm": 3.5823237759342477,
"learning_rate": 1.246147989841402e-06,
"loss": 0.131,
"step": 778
},
{
"epoch": 0.17724687144482365,
"grad_norm": 1.7885388560764315,
"learning_rate": 1.2461380786071863e-06,
"loss": 0.0755,
"step": 779
},
{
"epoch": 0.17747440273037543,
"grad_norm": 2.362853335883513,
"learning_rate": 1.246128154678046e-06,
"loss": 0.1285,
"step": 780
},
{
"epoch": 0.17770193401592718,
"grad_norm": 2.826403481752188,
"learning_rate": 1.2461182180541835e-06,
"loss": 0.0898,
"step": 781
},
{
"epoch": 0.17792946530147896,
"grad_norm": 5.793503549962082,
"learning_rate": 1.2461082687358022e-06,
"loss": 0.0971,
"step": 782
},
{
"epoch": 0.1781569965870307,
"grad_norm": 1.8035940463938722,
"learning_rate": 1.2460983067231055e-06,
"loss": 0.1105,
"step": 783
},
{
"epoch": 0.1783845278725825,
"grad_norm": 2.3286047675537613,
"learning_rate": 1.246088332016297e-06,
"loss": 0.0997,
"step": 784
},
{
"epoch": 0.17861205915813425,
"grad_norm": 2.4331158536688067,
"learning_rate": 1.2460783446155802e-06,
"loss": 0.2145,
"step": 785
},
{
"epoch": 0.178839590443686,
"grad_norm": 2.4301917574272234,
"learning_rate": 1.2460683445211596e-06,
"loss": 0.1826,
"step": 786
},
{
"epoch": 0.17906712172923778,
"grad_norm": 3.191042960124482,
"learning_rate": 1.2460583317332395e-06,
"loss": 0.2224,
"step": 787
},
{
"epoch": 0.17929465301478953,
"grad_norm": 1.9281932990563415,
"learning_rate": 1.2460483062520246e-06,
"loss": 0.1012,
"step": 788
},
{
"epoch": 0.1795221843003413,
"grad_norm": 1.9401318974845003,
"learning_rate": 1.2460382680777196e-06,
"loss": 0.0761,
"step": 789
},
{
"epoch": 0.17974971558589306,
"grad_norm": 13.086161362963225,
"learning_rate": 1.2460282172105298e-06,
"loss": 0.2088,
"step": 790
},
{
"epoch": 0.1799772468714448,
"grad_norm": 1.4783130702588718,
"learning_rate": 1.2460181536506608e-06,
"loss": 0.2126,
"step": 791
},
{
"epoch": 0.1802047781569966,
"grad_norm": 2.4964786740518763,
"learning_rate": 1.2460080773983177e-06,
"loss": 0.1385,
"step": 792
},
{
"epoch": 0.18043230944254834,
"grad_norm": 2.7778972521749545,
"learning_rate": 1.2459979884537072e-06,
"loss": 0.1448,
"step": 793
},
{
"epoch": 0.18065984072810012,
"grad_norm": 2.167813491126184,
"learning_rate": 1.2459878868170348e-06,
"loss": 0.1379,
"step": 794
},
{
"epoch": 0.18088737201365188,
"grad_norm": 1.9654699615947284,
"learning_rate": 1.2459777724885075e-06,
"loss": 0.1314,
"step": 795
},
{
"epoch": 0.18111490329920363,
"grad_norm": 2.293952257528565,
"learning_rate": 1.2459676454683318e-06,
"loss": 0.1695,
"step": 796
},
{
"epoch": 0.1813424345847554,
"grad_norm": 3.9215044200778144,
"learning_rate": 1.2459575057567144e-06,
"loss": 0.2204,
"step": 797
},
{
"epoch": 0.18156996587030716,
"grad_norm": 2.8214133097210117,
"learning_rate": 1.245947353353863e-06,
"loss": 0.1558,
"step": 798
},
{
"epoch": 0.18179749715585894,
"grad_norm": 5.317020653859289,
"learning_rate": 1.245937188259985e-06,
"loss": 0.2603,
"step": 799
},
{
"epoch": 0.1820250284414107,
"grad_norm": 4.004955818619992,
"learning_rate": 1.245927010475288e-06,
"loss": 0.1196,
"step": 800
},
{
"epoch": 0.18225255972696247,
"grad_norm": 3.792524464667178,
"learning_rate": 1.24591681999998e-06,
"loss": 0.1821,
"step": 801
},
{
"epoch": 0.18248009101251422,
"grad_norm": 2.813011742342484,
"learning_rate": 1.2459066168342693e-06,
"loss": 0.1513,
"step": 802
},
{
"epoch": 0.18270762229806597,
"grad_norm": 3.511510747002315,
"learning_rate": 1.2458964009783646e-06,
"loss": 0.2163,
"step": 803
},
{
"epoch": 0.18293515358361775,
"grad_norm": 2.802158661308834,
"learning_rate": 1.2458861724324745e-06,
"loss": 0.1963,
"step": 804
},
{
"epoch": 0.1831626848691695,
"grad_norm": 3.64850186041969,
"learning_rate": 1.2458759311968084e-06,
"loss": 0.303,
"step": 805
},
{
"epoch": 0.1833902161547213,
"grad_norm": 2.6182595326596725,
"learning_rate": 1.245865677271575e-06,
"loss": 0.1456,
"step": 806
},
{
"epoch": 0.18361774744027304,
"grad_norm": 2.399741320725503,
"learning_rate": 1.2458554106569844e-06,
"loss": 0.2288,
"step": 807
},
{
"epoch": 0.1838452787258248,
"grad_norm": 1.252106549654472,
"learning_rate": 1.2458451313532463e-06,
"loss": 0.0801,
"step": 808
},
{
"epoch": 0.18407281001137657,
"grad_norm": 3.696224132577839,
"learning_rate": 1.2458348393605708e-06,
"loss": 0.2059,
"step": 809
},
{
"epoch": 0.18430034129692832,
"grad_norm": 1.3783330613855644,
"learning_rate": 1.2458245346791678e-06,
"loss": 0.1164,
"step": 810
},
{
"epoch": 0.1845278725824801,
"grad_norm": 1.5623432135982267,
"learning_rate": 1.2458142173092486e-06,
"loss": 0.176,
"step": 811
},
{
"epoch": 0.18475540386803185,
"grad_norm": 6.552053967433837,
"learning_rate": 1.2458038872510237e-06,
"loss": 0.118,
"step": 812
},
{
"epoch": 0.1849829351535836,
"grad_norm": 3.2237210845046964,
"learning_rate": 1.2457935445047042e-06,
"loss": 0.1875,
"step": 813
},
{
"epoch": 0.18521046643913538,
"grad_norm": 1.7463109516387256,
"learning_rate": 1.2457831890705018e-06,
"loss": 0.1945,
"step": 814
},
{
"epoch": 0.18543799772468714,
"grad_norm": 2.8292409598595953,
"learning_rate": 1.2457728209486279e-06,
"loss": 0.1711,
"step": 815
},
{
"epoch": 0.18566552901023892,
"grad_norm": 3.198074487753419,
"learning_rate": 1.2457624401392943e-06,
"loss": 0.2552,
"step": 816
},
{
"epoch": 0.18589306029579067,
"grad_norm": 3.2293783551138278,
"learning_rate": 1.2457520466427135e-06,
"loss": 0.1955,
"step": 817
},
{
"epoch": 0.18612059158134245,
"grad_norm": 2.5604778410965383,
"learning_rate": 1.2457416404590974e-06,
"loss": 0.1689,
"step": 818
},
{
"epoch": 0.1863481228668942,
"grad_norm": 2.4475267016374427,
"learning_rate": 1.2457312215886592e-06,
"loss": 0.1165,
"step": 819
},
{
"epoch": 0.18657565415244595,
"grad_norm": 1.9856047790588058,
"learning_rate": 1.2457207900316115e-06,
"loss": 0.195,
"step": 820
},
{
"epoch": 0.18680318543799773,
"grad_norm": 3.030251865029441,
"learning_rate": 1.245710345788168e-06,
"loss": 0.2233,
"step": 821
},
{
"epoch": 0.18703071672354948,
"grad_norm": 6.914472069589314,
"learning_rate": 1.2456998888585414e-06,
"loss": 0.1294,
"step": 822
},
{
"epoch": 0.18725824800910126,
"grad_norm": 1.5392801223632877,
"learning_rate": 1.245689419242946e-06,
"loss": 0.1031,
"step": 823
},
{
"epoch": 0.18748577929465302,
"grad_norm": 1.5563008585328006,
"learning_rate": 1.2456789369415955e-06,
"loss": 0.1233,
"step": 824
},
{
"epoch": 0.18771331058020477,
"grad_norm": 1.5005319006316646,
"learning_rate": 1.2456684419547044e-06,
"loss": 0.1698,
"step": 825
},
{
"epoch": 0.18794084186575655,
"grad_norm": 2.5311436309198245,
"learning_rate": 1.245657934282487e-06,
"loss": 0.1242,
"step": 826
},
{
"epoch": 0.1881683731513083,
"grad_norm": 1.3382771790085715,
"learning_rate": 1.245647413925158e-06,
"loss": 0.1173,
"step": 827
},
{
"epoch": 0.18839590443686008,
"grad_norm": 2.455502403566395,
"learning_rate": 1.2456368808829327e-06,
"loss": 0.0912,
"step": 828
},
{
"epoch": 0.18862343572241183,
"grad_norm": 2.9752303589937212,
"learning_rate": 1.2456263351560261e-06,
"loss": 0.2599,
"step": 829
},
{
"epoch": 0.18885096700796358,
"grad_norm": 5.043835077918359,
"learning_rate": 1.2456157767446538e-06,
"loss": 0.1609,
"step": 830
},
{
"epoch": 0.18907849829351536,
"grad_norm": 2.756359704558054,
"learning_rate": 1.245605205649032e-06,
"loss": 0.1323,
"step": 831
},
{
"epoch": 0.18930602957906711,
"grad_norm": 1.835440265718024,
"learning_rate": 1.245594621869376e-06,
"loss": 0.2094,
"step": 832
},
{
"epoch": 0.1895335608646189,
"grad_norm": 1.2880237601014817,
"learning_rate": 1.2455840254059026e-06,
"loss": 0.1085,
"step": 833
},
{
"epoch": 0.18976109215017065,
"grad_norm": 1.4808086873300856,
"learning_rate": 1.2455734162588282e-06,
"loss": 0.1067,
"step": 834
},
{
"epoch": 0.1899886234357224,
"grad_norm": 2.3351983872627597,
"learning_rate": 1.2455627944283697e-06,
"loss": 0.1493,
"step": 835
},
{
"epoch": 0.19021615472127418,
"grad_norm": 2.422722379821762,
"learning_rate": 1.245552159914744e-06,
"loss": 0.1387,
"step": 836
},
{
"epoch": 0.19044368600682593,
"grad_norm": 2.2005548282870477,
"learning_rate": 1.245541512718169e-06,
"loss": 0.1047,
"step": 837
},
{
"epoch": 0.1906712172923777,
"grad_norm": 2.379475571028047,
"learning_rate": 1.245530852838862e-06,
"loss": 0.1524,
"step": 838
},
{
"epoch": 0.19089874857792946,
"grad_norm": 1.669935289366072,
"learning_rate": 1.2455201802770405e-06,
"loss": 0.157,
"step": 839
},
{
"epoch": 0.19112627986348124,
"grad_norm": 2.357020791051429,
"learning_rate": 1.245509495032923e-06,
"loss": 0.2156,
"step": 840
},
{
"epoch": 0.191353811149033,
"grad_norm": 3.871602599108809,
"learning_rate": 1.2454987971067278e-06,
"loss": 0.1557,
"step": 841
},
{
"epoch": 0.19158134243458474,
"grad_norm": 2.5332197020943887,
"learning_rate": 1.2454880864986737e-06,
"loss": 0.1644,
"step": 842
},
{
"epoch": 0.19180887372013652,
"grad_norm": 3.1286962973408596,
"learning_rate": 1.2454773632089795e-06,
"loss": 0.0794,
"step": 843
},
{
"epoch": 0.19203640500568828,
"grad_norm": 2.3210649274985666,
"learning_rate": 1.2454666272378644e-06,
"loss": 0.129,
"step": 844
},
{
"epoch": 0.19226393629124006,
"grad_norm": 3.000200402253768,
"learning_rate": 1.2454558785855475e-06,
"loss": 0.1628,
"step": 845
},
{
"epoch": 0.1924914675767918,
"grad_norm": 2.3643323080869902,
"learning_rate": 1.245445117252249e-06,
"loss": 0.1345,
"step": 846
},
{
"epoch": 0.19271899886234356,
"grad_norm": 2.532625203594351,
"learning_rate": 1.2454343432381886e-06,
"loss": 0.2082,
"step": 847
},
{
"epoch": 0.19294653014789534,
"grad_norm": 1.9628657145639428,
"learning_rate": 1.2454235565435862e-06,
"loss": 0.0782,
"step": 848
},
{
"epoch": 0.1931740614334471,
"grad_norm": 1.609178421923729,
"learning_rate": 1.2454127571686629e-06,
"loss": 0.1405,
"step": 849
},
{
"epoch": 0.19340159271899887,
"grad_norm": 1.7728115247069527,
"learning_rate": 1.245401945113639e-06,
"loss": 0.203,
"step": 850
},
{
"epoch": 0.19362912400455062,
"grad_norm": 3.2450475274049118,
"learning_rate": 1.2453911203787355e-06,
"loss": 0.1524,
"step": 851
},
{
"epoch": 0.19385665529010238,
"grad_norm": 22.097060091469434,
"learning_rate": 1.2453802829641736e-06,
"loss": 0.2636,
"step": 852
},
{
"epoch": 0.19408418657565416,
"grad_norm": 2.5365065820289496,
"learning_rate": 1.2453694328701752e-06,
"loss": 0.1019,
"step": 853
},
{
"epoch": 0.1943117178612059,
"grad_norm": 2.090322149834491,
"learning_rate": 1.2453585700969614e-06,
"loss": 0.1498,
"step": 854
},
{
"epoch": 0.1945392491467577,
"grad_norm": 2.6606765925685787,
"learning_rate": 1.2453476946447547e-06,
"loss": 0.1398,
"step": 855
},
{
"epoch": 0.19476678043230944,
"grad_norm": 3.56083888144899,
"learning_rate": 1.2453368065137772e-06,
"loss": 0.1463,
"step": 856
},
{
"epoch": 0.19499431171786122,
"grad_norm": 2.1276836242796793,
"learning_rate": 1.2453259057042514e-06,
"loss": 0.1753,
"step": 857
},
{
"epoch": 0.19522184300341297,
"grad_norm": 2.5690977004159805,
"learning_rate": 1.2453149922164003e-06,
"loss": 0.1292,
"step": 858
},
{
"epoch": 0.19544937428896472,
"grad_norm": 4.345742784369693,
"learning_rate": 1.2453040660504468e-06,
"loss": 0.15,
"step": 859
},
{
"epoch": 0.1956769055745165,
"grad_norm": 3.118246879884093,
"learning_rate": 1.2452931272066141e-06,
"loss": 0.169,
"step": 860
},
{
"epoch": 0.19590443686006825,
"grad_norm": 2.68254786515319,
"learning_rate": 1.245282175685126e-06,
"loss": 0.157,
"step": 861
},
{
"epoch": 0.19613196814562003,
"grad_norm": 2.088476673647213,
"learning_rate": 1.2452712114862063e-06,
"loss": 0.1782,
"step": 862
},
{
"epoch": 0.19635949943117179,
"grad_norm": 1.568141769132608,
"learning_rate": 1.245260234610079e-06,
"loss": 0.1295,
"step": 863
},
{
"epoch": 0.19658703071672354,
"grad_norm": 2.186319656948205,
"learning_rate": 1.2452492450569682e-06,
"loss": 0.1734,
"step": 864
},
{
"epoch": 0.19681456200227532,
"grad_norm": 2.7655739546712135,
"learning_rate": 1.245238242827099e-06,
"loss": 0.1694,
"step": 865
},
{
"epoch": 0.19704209328782707,
"grad_norm": 3.0373302408208196,
"learning_rate": 1.245227227920696e-06,
"loss": 0.1356,
"step": 866
},
{
"epoch": 0.19726962457337885,
"grad_norm": 2.1820099415146914,
"learning_rate": 1.2452162003379842e-06,
"loss": 0.2082,
"step": 867
},
{
"epoch": 0.1974971558589306,
"grad_norm": 3.6721625065681827,
"learning_rate": 1.2452051600791891e-06,
"loss": 0.1915,
"step": 868
},
{
"epoch": 0.19772468714448235,
"grad_norm": 6.490462296454016,
"learning_rate": 1.2451941071445367e-06,
"loss": 0.1815,
"step": 869
},
{
"epoch": 0.19795221843003413,
"grad_norm": 3.246518762107006,
"learning_rate": 1.2451830415342524e-06,
"loss": 0.137,
"step": 870
},
{
"epoch": 0.19817974971558588,
"grad_norm": 2.7033364330836873,
"learning_rate": 1.2451719632485627e-06,
"loss": 0.1317,
"step": 871
},
{
"epoch": 0.19840728100113766,
"grad_norm": 3.30778551761739,
"learning_rate": 1.2451608722876938e-06,
"loss": 0.1099,
"step": 872
},
{
"epoch": 0.19863481228668942,
"grad_norm": 2.2687509460631294,
"learning_rate": 1.2451497686518722e-06,
"loss": 0.1361,
"step": 873
},
{
"epoch": 0.1988623435722412,
"grad_norm": 1.641721237453431,
"learning_rate": 1.2451386523413252e-06,
"loss": 0.1052,
"step": 874
},
{
"epoch": 0.19908987485779295,
"grad_norm": 2.206444085506852,
"learning_rate": 1.24512752335628e-06,
"loss": 0.1018,
"step": 875
},
{
"epoch": 0.1993174061433447,
"grad_norm": 2.210652731669232,
"learning_rate": 1.2451163816969639e-06,
"loss": 0.1879,
"step": 876
},
{
"epoch": 0.19954493742889648,
"grad_norm": 2.085600222270482,
"learning_rate": 1.2451052273636045e-06,
"loss": 0.127,
"step": 877
},
{
"epoch": 0.19977246871444823,
"grad_norm": 2.6309536592299705,
"learning_rate": 1.24509406035643e-06,
"loss": 0.1678,
"step": 878
},
{
"epoch": 0.2,
"grad_norm": 4.158698099165945,
"learning_rate": 1.2450828806756685e-06,
"loss": 0.2095,
"step": 879
},
{
"epoch": 0.20022753128555176,
"grad_norm": 2.602198490586786,
"learning_rate": 1.245071688321549e-06,
"loss": 0.1436,
"step": 880
},
{
"epoch": 0.20045506257110352,
"grad_norm": 2.252594865848713,
"learning_rate": 1.2450604832942991e-06,
"loss": 0.1231,
"step": 881
},
{
"epoch": 0.2006825938566553,
"grad_norm": 1.912453352899942,
"learning_rate": 1.245049265594149e-06,
"loss": 0.1408,
"step": 882
},
{
"epoch": 0.20091012514220705,
"grad_norm": 3.264942350461524,
"learning_rate": 1.2450380352213271e-06,
"loss": 0.1697,
"step": 883
},
{
"epoch": 0.20113765642775883,
"grad_norm": 2.415399674888119,
"learning_rate": 1.2450267921760636e-06,
"loss": 0.1331,
"step": 884
},
{
"epoch": 0.20136518771331058,
"grad_norm": 2.62867521080006,
"learning_rate": 1.2450155364585878e-06,
"loss": 0.1217,
"step": 885
},
{
"epoch": 0.20159271899886233,
"grad_norm": 2.3552959017058477,
"learning_rate": 1.2450042680691301e-06,
"loss": 0.1216,
"step": 886
},
{
"epoch": 0.2018202502844141,
"grad_norm": 1.4369969713280852,
"learning_rate": 1.2449929870079206e-06,
"loss": 0.1282,
"step": 887
},
{
"epoch": 0.20204778156996586,
"grad_norm": 2.305787931213179,
"learning_rate": 1.24498169327519e-06,
"loss": 0.1076,
"step": 888
},
{
"epoch": 0.20227531285551764,
"grad_norm": 1.7868835912702514,
"learning_rate": 1.2449703868711688e-06,
"loss": 0.1225,
"step": 889
},
{
"epoch": 0.2025028441410694,
"grad_norm": 2.1124657583403494,
"learning_rate": 1.2449590677960886e-06,
"loss": 0.1765,
"step": 890
},
{
"epoch": 0.20273037542662117,
"grad_norm": 1.6102832172606196,
"learning_rate": 1.2449477360501802e-06,
"loss": 0.0719,
"step": 891
},
{
"epoch": 0.20295790671217293,
"grad_norm": 3.8988824882283843,
"learning_rate": 1.2449363916336756e-06,
"loss": 0.1854,
"step": 892
},
{
"epoch": 0.20318543799772468,
"grad_norm": 3.2116126604298882,
"learning_rate": 1.2449250345468065e-06,
"loss": 0.2028,
"step": 893
},
{
"epoch": 0.20341296928327646,
"grad_norm": 2.083882159988442,
"learning_rate": 1.244913664789805e-06,
"loss": 0.1337,
"step": 894
},
{
"epoch": 0.2036405005688282,
"grad_norm": 1.8394649372022975,
"learning_rate": 1.2449022823629036e-06,
"loss": 0.1205,
"step": 895
},
{
"epoch": 0.20386803185438,
"grad_norm": 2.6323013014057004,
"learning_rate": 1.2448908872663347e-06,
"loss": 0.1133,
"step": 896
},
{
"epoch": 0.20409556313993174,
"grad_norm": 1.8291857038844686,
"learning_rate": 1.2448794795003313e-06,
"loss": 0.1142,
"step": 897
},
{
"epoch": 0.2043230944254835,
"grad_norm": 1.7184606914815217,
"learning_rate": 1.2448680590651269e-06,
"loss": 0.1222,
"step": 898
},
{
"epoch": 0.20455062571103527,
"grad_norm": 2.7034652156706716,
"learning_rate": 1.2448566259609543e-06,
"loss": 0.1991,
"step": 899
},
{
"epoch": 0.20477815699658702,
"grad_norm": 2.5930455129642653,
"learning_rate": 1.2448451801880476e-06,
"loss": 0.1085,
"step": 900
},
{
"epoch": 0.2050056882821388,
"grad_norm": 2.44560677998223,
"learning_rate": 1.2448337217466404e-06,
"loss": 0.1735,
"step": 901
},
{
"epoch": 0.20523321956769056,
"grad_norm": 2.257000828394708,
"learning_rate": 1.2448222506369675e-06,
"loss": 0.1118,
"step": 902
},
{
"epoch": 0.2054607508532423,
"grad_norm": 2.5459054260546323,
"learning_rate": 1.2448107668592626e-06,
"loss": 0.1975,
"step": 903
},
{
"epoch": 0.2056882821387941,
"grad_norm": 5.093888329917388,
"learning_rate": 1.244799270413761e-06,
"loss": 0.2277,
"step": 904
},
{
"epoch": 0.20591581342434584,
"grad_norm": 4.116266489839909,
"learning_rate": 1.2447877613006972e-06,
"loss": 0.2004,
"step": 905
},
{
"epoch": 0.20614334470989762,
"grad_norm": 1.8199951318249294,
"learning_rate": 1.244776239520307e-06,
"loss": 0.2131,
"step": 906
},
{
"epoch": 0.20637087599544937,
"grad_norm": 2.7663340604707267,
"learning_rate": 1.244764705072825e-06,
"loss": 0.2145,
"step": 907
},
{
"epoch": 0.20659840728100115,
"grad_norm": 1.8748872621346087,
"learning_rate": 1.2447531579584878e-06,
"loss": 0.1327,
"step": 908
},
{
"epoch": 0.2068259385665529,
"grad_norm": 3.4272822632320237,
"learning_rate": 1.2447415981775312e-06,
"loss": 0.2198,
"step": 909
},
{
"epoch": 0.20705346985210465,
"grad_norm": 3.1215491420073396,
"learning_rate": 1.2447300257301912e-06,
"loss": 0.1342,
"step": 910
},
{
"epoch": 0.20728100113765643,
"grad_norm": 2.5239722345332396,
"learning_rate": 1.2447184406167045e-06,
"loss": 0.1868,
"step": 911
},
{
"epoch": 0.2075085324232082,
"grad_norm": 1.9655955083845185,
"learning_rate": 1.2447068428373077e-06,
"loss": 0.1769,
"step": 912
},
{
"epoch": 0.20773606370875997,
"grad_norm": 3.157478086474276,
"learning_rate": 1.244695232392238e-06,
"loss": 0.1824,
"step": 913
},
{
"epoch": 0.20796359499431172,
"grad_norm": 1.9386984879122342,
"learning_rate": 1.2446836092817328e-06,
"loss": 0.1036,
"step": 914
},
{
"epoch": 0.20819112627986347,
"grad_norm": 2.2587342441489997,
"learning_rate": 1.2446719735060293e-06,
"loss": 0.2175,
"step": 915
},
{
"epoch": 0.20841865756541525,
"grad_norm": 2.3841098586953846,
"learning_rate": 1.2446603250653658e-06,
"loss": 0.1917,
"step": 916
},
{
"epoch": 0.208646188850967,
"grad_norm": 2.0643080194861496,
"learning_rate": 1.24464866395998e-06,
"loss": 0.1276,
"step": 917
},
{
"epoch": 0.20887372013651878,
"grad_norm": 1.1445975014034748,
"learning_rate": 1.2446369901901102e-06,
"loss": 0.0884,
"step": 918
},
{
"epoch": 0.20910125142207053,
"grad_norm": 3.359267538919808,
"learning_rate": 1.2446253037559952e-06,
"loss": 0.1214,
"step": 919
},
{
"epoch": 0.20932878270762229,
"grad_norm": 2.1583486474112927,
"learning_rate": 1.2446136046578739e-06,
"loss": 0.1093,
"step": 920
},
{
"epoch": 0.20955631399317406,
"grad_norm": 2.692763960200507,
"learning_rate": 1.2446018928959853e-06,
"loss": 0.2289,
"step": 921
},
{
"epoch": 0.20978384527872582,
"grad_norm": 2.356276890733175,
"learning_rate": 1.2445901684705685e-06,
"loss": 0.2222,
"step": 922
},
{
"epoch": 0.2100113765642776,
"grad_norm": 2.596476104334523,
"learning_rate": 1.2445784313818638e-06,
"loss": 0.1574,
"step": 923
},
{
"epoch": 0.21023890784982935,
"grad_norm": 2.788233818738729,
"learning_rate": 1.2445666816301102e-06,
"loss": 0.1303,
"step": 924
},
{
"epoch": 0.21046643913538113,
"grad_norm": 2.3013258694625245,
"learning_rate": 1.2445549192155487e-06,
"loss": 0.2232,
"step": 925
},
{
"epoch": 0.21069397042093288,
"grad_norm": 2.364410552617768,
"learning_rate": 1.244543144138419e-06,
"loss": 0.1967,
"step": 926
},
{
"epoch": 0.21092150170648463,
"grad_norm": 1.4320620142185012,
"learning_rate": 1.2445313563989624e-06,
"loss": 0.1533,
"step": 927
},
{
"epoch": 0.2111490329920364,
"grad_norm": 1.8979786639459473,
"learning_rate": 1.2445195559974194e-06,
"loss": 0.1494,
"step": 928
},
{
"epoch": 0.21137656427758816,
"grad_norm": 2.1174466003626446,
"learning_rate": 1.244507742934031e-06,
"loss": 0.1973,
"step": 929
},
{
"epoch": 0.21160409556313994,
"grad_norm": 2.164188059326067,
"learning_rate": 1.2444959172090393e-06,
"loss": 0.1336,
"step": 930
},
{
"epoch": 0.2118316268486917,
"grad_norm": 1.5503789009056947,
"learning_rate": 1.2444840788226854e-06,
"loss": 0.1948,
"step": 931
},
{
"epoch": 0.21205915813424345,
"grad_norm": 1.8654319466920093,
"learning_rate": 1.2444722277752114e-06,
"loss": 0.2043,
"step": 932
},
{
"epoch": 0.21228668941979523,
"grad_norm": 2.020474941013341,
"learning_rate": 1.2444603640668596e-06,
"loss": 0.2211,
"step": 933
},
{
"epoch": 0.21251422070534698,
"grad_norm": 2.0138343922511206,
"learning_rate": 1.2444484876978725e-06,
"loss": 0.1402,
"step": 934
},
{
"epoch": 0.21274175199089876,
"grad_norm": 1.5804379894073013,
"learning_rate": 1.2444365986684929e-06,
"loss": 0.1311,
"step": 935
},
{
"epoch": 0.2129692832764505,
"grad_norm": 2.2151819679335367,
"learning_rate": 1.2444246969789633e-06,
"loss": 0.0884,
"step": 936
},
{
"epoch": 0.21319681456200226,
"grad_norm": 2.4707341962723834,
"learning_rate": 1.2444127826295277e-06,
"loss": 0.1138,
"step": 937
},
{
"epoch": 0.21342434584755404,
"grad_norm": 2.142646726979162,
"learning_rate": 1.244400855620429e-06,
"loss": 0.1234,
"step": 938
},
{
"epoch": 0.2136518771331058,
"grad_norm": 1.3461044168942922,
"learning_rate": 1.2443889159519113e-06,
"loss": 0.0966,
"step": 939
},
{
"epoch": 0.21387940841865757,
"grad_norm": 2.824705608850421,
"learning_rate": 1.2443769636242185e-06,
"loss": 0.1736,
"step": 940
},
{
"epoch": 0.21410693970420933,
"grad_norm": 3.3926592270656526,
"learning_rate": 1.244364998637595e-06,
"loss": 0.102,
"step": 941
},
{
"epoch": 0.2143344709897611,
"grad_norm": 2.1478829302272278,
"learning_rate": 1.2443530209922848e-06,
"loss": 0.0958,
"step": 942
},
{
"epoch": 0.21456200227531286,
"grad_norm": 2.084791701381943,
"learning_rate": 1.2443410306885337e-06,
"loss": 0.128,
"step": 943
},
{
"epoch": 0.2147895335608646,
"grad_norm": 2.667044034523646,
"learning_rate": 1.244329027726586e-06,
"loss": 0.2088,
"step": 944
},
{
"epoch": 0.2150170648464164,
"grad_norm": 1.4354076627961647,
"learning_rate": 1.2443170121066872e-06,
"loss": 0.1295,
"step": 945
},
{
"epoch": 0.21524459613196814,
"grad_norm": 3.608014557262876,
"learning_rate": 1.2443049838290827e-06,
"loss": 0.1479,
"step": 946
},
{
"epoch": 0.21547212741751992,
"grad_norm": 2.4907426669888424,
"learning_rate": 1.2442929428940186e-06,
"loss": 0.2094,
"step": 947
},
{
"epoch": 0.21569965870307167,
"grad_norm": 1.889292577370491,
"learning_rate": 1.2442808893017414e-06,
"loss": 0.1182,
"step": 948
},
{
"epoch": 0.21592718998862342,
"grad_norm": 1.295703999044032,
"learning_rate": 1.2442688230524965e-06,
"loss": 0.1493,
"step": 949
},
{
"epoch": 0.2161547212741752,
"grad_norm": 3.010053578949512,
"learning_rate": 1.244256744146531e-06,
"loss": 0.1837,
"step": 950
},
{
"epoch": 0.21638225255972696,
"grad_norm": 2.2542440250817357,
"learning_rate": 1.244244652584092e-06,
"loss": 0.2011,
"step": 951
},
{
"epoch": 0.21660978384527874,
"grad_norm": 1.8471360091007536,
"learning_rate": 1.2442325483654263e-06,
"loss": 0.1529,
"step": 952
},
{
"epoch": 0.2168373151308305,
"grad_norm": 3.360264898638295,
"learning_rate": 1.2442204314907812e-06,
"loss": 0.1952,
"step": 953
},
{
"epoch": 0.21706484641638224,
"grad_norm": 2.2836983418694308,
"learning_rate": 1.2442083019604047e-06,
"loss": 0.2068,
"step": 954
},
{
"epoch": 0.21729237770193402,
"grad_norm": 2.534259478561885,
"learning_rate": 1.2441961597745447e-06,
"loss": 0.131,
"step": 955
},
{
"epoch": 0.21751990898748577,
"grad_norm": 2.116332324988344,
"learning_rate": 1.244184004933449e-06,
"loss": 0.1433,
"step": 956
},
{
"epoch": 0.21774744027303755,
"grad_norm": 1.9239447267712195,
"learning_rate": 1.2441718374373662e-06,
"loss": 0.1296,
"step": 957
},
{
"epoch": 0.2179749715585893,
"grad_norm": 3.11283517907892,
"learning_rate": 1.244159657286545e-06,
"loss": 0.1556,
"step": 958
},
{
"epoch": 0.21820250284414108,
"grad_norm": 2.1030310163998,
"learning_rate": 1.2441474644812345e-06,
"loss": 0.1398,
"step": 959
},
{
"epoch": 0.21843003412969283,
"grad_norm": 2.6301386027385734,
"learning_rate": 1.2441352590216836e-06,
"loss": 0.1328,
"step": 960
},
{
"epoch": 0.2186575654152446,
"grad_norm": 1.6843043929069075,
"learning_rate": 1.244123040908142e-06,
"loss": 0.2169,
"step": 961
},
{
"epoch": 0.21888509670079637,
"grad_norm": 2.021371056385805,
"learning_rate": 1.2441108101408592e-06,
"loss": 0.105,
"step": 962
},
{
"epoch": 0.21911262798634812,
"grad_norm": 2.932640255317413,
"learning_rate": 1.2440985667200853e-06,
"loss": 0.1186,
"step": 963
},
{
"epoch": 0.2193401592718999,
"grad_norm": 2.287879466073487,
"learning_rate": 1.2440863106460705e-06,
"loss": 0.1418,
"step": 964
},
{
"epoch": 0.21956769055745165,
"grad_norm": 2.4323172112890807,
"learning_rate": 1.2440740419190655e-06,
"loss": 0.2116,
"step": 965
},
{
"epoch": 0.2197952218430034,
"grad_norm": 2.906286752213052,
"learning_rate": 1.2440617605393208e-06,
"loss": 0.2029,
"step": 966
},
{
"epoch": 0.22002275312855518,
"grad_norm": 2.420234503572233,
"learning_rate": 1.2440494665070874e-06,
"loss": 0.2227,
"step": 967
},
{
"epoch": 0.22025028441410693,
"grad_norm": 2.1531642600457874,
"learning_rate": 1.2440371598226165e-06,
"loss": 0.1565,
"step": 968
},
{
"epoch": 0.2204778156996587,
"grad_norm": 1.7851844835265829,
"learning_rate": 1.2440248404861598e-06,
"loss": 0.1132,
"step": 969
},
{
"epoch": 0.22070534698521047,
"grad_norm": 2.2253443799094605,
"learning_rate": 1.2440125084979693e-06,
"loss": 0.1141,
"step": 970
},
{
"epoch": 0.22093287827076222,
"grad_norm": 3.491367387042196,
"learning_rate": 1.2440001638582965e-06,
"loss": 0.1678,
"step": 971
},
{
"epoch": 0.221160409556314,
"grad_norm": 2.6799332639547297,
"learning_rate": 1.2439878065673944e-06,
"loss": 0.1791,
"step": 972
},
{
"epoch": 0.22138794084186575,
"grad_norm": 0.9028117739016462,
"learning_rate": 1.2439754366255149e-06,
"loss": 0.0794,
"step": 973
},
{
"epoch": 0.22161547212741753,
"grad_norm": 1.6629358802939667,
"learning_rate": 1.2439630540329111e-06,
"loss": 0.1328,
"step": 974
},
{
"epoch": 0.22184300341296928,
"grad_norm": 2.734953415687441,
"learning_rate": 1.2439506587898358e-06,
"loss": 0.1168,
"step": 975
},
{
"epoch": 0.22207053469852106,
"grad_norm": 2.0986779517624745,
"learning_rate": 1.243938250896543e-06,
"loss": 0.1288,
"step": 976
},
{
"epoch": 0.2222980659840728,
"grad_norm": 2.4554262769941766,
"learning_rate": 1.2439258303532858e-06,
"loss": 0.1545,
"step": 977
},
{
"epoch": 0.22252559726962456,
"grad_norm": 1.7628888954012072,
"learning_rate": 1.243913397160318e-06,
"loss": 0.0967,
"step": 978
},
{
"epoch": 0.22275312855517634,
"grad_norm": 1.8371409568342896,
"learning_rate": 1.2439009513178938e-06,
"loss": 0.1184,
"step": 979
},
{
"epoch": 0.2229806598407281,
"grad_norm": 3.4838138279645103,
"learning_rate": 1.2438884928262678e-06,
"loss": 0.1686,
"step": 980
},
{
"epoch": 0.22320819112627988,
"grad_norm": 1.743212643613601,
"learning_rate": 1.2438760216856944e-06,
"loss": 0.1005,
"step": 981
},
{
"epoch": 0.22343572241183163,
"grad_norm": 2.2940811110233135,
"learning_rate": 1.2438635378964284e-06,
"loss": 0.1261,
"step": 982
},
{
"epoch": 0.22366325369738338,
"grad_norm": 3.306786589733754,
"learning_rate": 1.2438510414587251e-06,
"loss": 0.1057,
"step": 983
},
{
"epoch": 0.22389078498293516,
"grad_norm": 1.8312197926008273,
"learning_rate": 1.24383853237284e-06,
"loss": 0.1121,
"step": 984
},
{
"epoch": 0.2241183162684869,
"grad_norm": 1.375951456745173,
"learning_rate": 1.2438260106390285e-06,
"loss": 0.1137,
"step": 985
},
{
"epoch": 0.2243458475540387,
"grad_norm": 2.2850475547846507,
"learning_rate": 1.2438134762575467e-06,
"loss": 0.1528,
"step": 986
},
{
"epoch": 0.22457337883959044,
"grad_norm": 1.7811601291763544,
"learning_rate": 1.243800929228651e-06,
"loss": 0.114,
"step": 987
},
{
"epoch": 0.2248009101251422,
"grad_norm": 2.175503500486742,
"learning_rate": 1.2437883695525974e-06,
"loss": 0.2246,
"step": 988
},
{
"epoch": 0.22502844141069397,
"grad_norm": 2.5853887611675375,
"learning_rate": 1.2437757972296427e-06,
"loss": 0.2126,
"step": 989
},
{
"epoch": 0.22525597269624573,
"grad_norm": 2.4622729490723065,
"learning_rate": 1.2437632122600442e-06,
"loss": 0.1806,
"step": 990
},
{
"epoch": 0.2254835039817975,
"grad_norm": 2.2336859931017794,
"learning_rate": 1.2437506146440587e-06,
"loss": 0.1948,
"step": 991
},
{
"epoch": 0.22571103526734926,
"grad_norm": 2.388802906376772,
"learning_rate": 1.243738004381944e-06,
"loss": 0.1028,
"step": 992
},
{
"epoch": 0.225938566552901,
"grad_norm": 2.526457136508687,
"learning_rate": 1.2437253814739572e-06,
"loss": 0.1394,
"step": 993
},
{
"epoch": 0.2261660978384528,
"grad_norm": 2.282347439516019,
"learning_rate": 1.2437127459203572e-06,
"loss": 0.1678,
"step": 994
},
{
"epoch": 0.22639362912400454,
"grad_norm": 1.3050466119815518,
"learning_rate": 1.2437000977214015e-06,
"loss": 0.0753,
"step": 995
},
{
"epoch": 0.22662116040955632,
"grad_norm": 2.159334429482828,
"learning_rate": 1.243687436877349e-06,
"loss": 0.2767,
"step": 996
},
{
"epoch": 0.22684869169510807,
"grad_norm": 2.4741243617261617,
"learning_rate": 1.2436747633884583e-06,
"loss": 0.167,
"step": 997
},
{
"epoch": 0.22707622298065985,
"grad_norm": 2.522130011756034,
"learning_rate": 1.2436620772549885e-06,
"loss": 0.2229,
"step": 998
},
{
"epoch": 0.2273037542662116,
"grad_norm": 2.2654639871535873,
"learning_rate": 1.243649378477199e-06,
"loss": 0.1376,
"step": 999
},
{
"epoch": 0.22753128555176336,
"grad_norm": 2.737389406083516,
"learning_rate": 1.2436366670553491e-06,
"loss": 0.1672,
"step": 1000
},
{
"epoch": 0.22775881683731514,
"grad_norm": 2.497999857751637,
"learning_rate": 1.2436239429896988e-06,
"loss": 0.2831,
"step": 1001
},
{
"epoch": 0.2279863481228669,
"grad_norm": 2.3986139069373125,
"learning_rate": 1.2436112062805081e-06,
"loss": 0.1413,
"step": 1002
},
{
"epoch": 0.22821387940841867,
"grad_norm": 1.63194618315687,
"learning_rate": 1.2435984569280372e-06,
"loss": 0.1509,
"step": 1003
},
{
"epoch": 0.22844141069397042,
"grad_norm": 1.9884735218546312,
"learning_rate": 1.2435856949325467e-06,
"loss": 0.0909,
"step": 1004
},
{
"epoch": 0.22866894197952217,
"grad_norm": 3.7364717574130877,
"learning_rate": 1.2435729202942972e-06,
"loss": 0.1362,
"step": 1005
},
{
"epoch": 0.22889647326507395,
"grad_norm": 4.3498400339740595,
"learning_rate": 1.2435601330135506e-06,
"loss": 0.1364,
"step": 1006
},
{
"epoch": 0.2291240045506257,
"grad_norm": 1.468486521047109,
"learning_rate": 1.2435473330905674e-06,
"loss": 0.1902,
"step": 1007
},
{
"epoch": 0.22935153583617748,
"grad_norm": 2.602985360302298,
"learning_rate": 1.2435345205256097e-06,
"loss": 0.0947,
"step": 1008
},
{
"epoch": 0.22957906712172924,
"grad_norm": 2.117002790495142,
"learning_rate": 1.243521695318939e-06,
"loss": 0.1228,
"step": 1009
},
{
"epoch": 0.229806598407281,
"grad_norm": 2.0012843231226034,
"learning_rate": 1.2435088574708178e-06,
"loss": 0.1156,
"step": 1010
},
{
"epoch": 0.23003412969283277,
"grad_norm": 2.490148339748286,
"learning_rate": 1.2434960069815083e-06,
"loss": 0.164,
"step": 1011
},
{
"epoch": 0.23026166097838452,
"grad_norm": 2.450730689081713,
"learning_rate": 1.243483143851273e-06,
"loss": 0.138,
"step": 1012
},
{
"epoch": 0.2304891922639363,
"grad_norm": 2.892744061430906,
"learning_rate": 1.2434702680803751e-06,
"loss": 0.1061,
"step": 1013
},
{
"epoch": 0.23071672354948805,
"grad_norm": 2.790226387512928,
"learning_rate": 1.2434573796690774e-06,
"loss": 0.1957,
"step": 1014
},
{
"epoch": 0.23094425483503983,
"grad_norm": 2.4036726186705972,
"learning_rate": 1.2434444786176435e-06,
"loss": 0.1544,
"step": 1015
},
{
"epoch": 0.23117178612059158,
"grad_norm": 1.3271746602955339,
"learning_rate": 1.2434315649263372e-06,
"loss": 0.061,
"step": 1016
},
{
"epoch": 0.23139931740614333,
"grad_norm": 1.4063593684445947,
"learning_rate": 1.2434186385954225e-06,
"loss": 0.1068,
"step": 1017
},
{
"epoch": 0.23162684869169511,
"grad_norm": 2.9525793198909724,
"learning_rate": 1.243405699625163e-06,
"loss": 0.1067,
"step": 1018
},
{
"epoch": 0.23185437997724687,
"grad_norm": 2.7846219600282747,
"learning_rate": 1.243392748015824e-06,
"loss": 0.1435,
"step": 1019
},
{
"epoch": 0.23208191126279865,
"grad_norm": 1.5658061687677385,
"learning_rate": 1.2433797837676694e-06,
"loss": 0.1492,
"step": 1020
},
{
"epoch": 0.2323094425483504,
"grad_norm": 4.123388323133236,
"learning_rate": 1.2433668068809648e-06,
"loss": 0.1699,
"step": 1021
},
{
"epoch": 0.23253697383390215,
"grad_norm": 2.0976126762166403,
"learning_rate": 1.243353817355975e-06,
"loss": 0.1257,
"step": 1022
},
{
"epoch": 0.23276450511945393,
"grad_norm": 2.4116621601065296,
"learning_rate": 1.2433408151929655e-06,
"loss": 0.133,
"step": 1023
},
{
"epoch": 0.23299203640500568,
"grad_norm": 1.395623834578789,
"learning_rate": 1.2433278003922026e-06,
"loss": 0.0936,
"step": 1024
},
{
"epoch": 0.23321956769055746,
"grad_norm": 1.7768669244027402,
"learning_rate": 1.2433147729539514e-06,
"loss": 0.1264,
"step": 1025
},
{
"epoch": 0.2334470989761092,
"grad_norm": 2.489847520949891,
"learning_rate": 1.2433017328784788e-06,
"loss": 0.1714,
"step": 1026
},
{
"epoch": 0.23367463026166096,
"grad_norm": 1.722648702759186,
"learning_rate": 1.2432886801660513e-06,
"loss": 0.122,
"step": 1027
},
{
"epoch": 0.23390216154721274,
"grad_norm": 1.3061284883014919,
"learning_rate": 1.2432756148169354e-06,
"loss": 0.0726,
"step": 1028
},
{
"epoch": 0.2341296928327645,
"grad_norm": 2.807955909764041,
"learning_rate": 1.2432625368313983e-06,
"loss": 0.1667,
"step": 1029
},
{
"epoch": 0.23435722411831628,
"grad_norm": 1.9724601313774524,
"learning_rate": 1.2432494462097072e-06,
"loss": 0.1995,
"step": 1030
},
{
"epoch": 0.23458475540386803,
"grad_norm": 2.3943947067430895,
"learning_rate": 1.2432363429521295e-06,
"loss": 0.1625,
"step": 1031
},
{
"epoch": 0.2348122866894198,
"grad_norm": 1.5436408096888365,
"learning_rate": 1.2432232270589335e-06,
"loss": 0.076,
"step": 1032
},
{
"epoch": 0.23503981797497156,
"grad_norm": 1.1938881747627557,
"learning_rate": 1.2432100985303868e-06,
"loss": 0.1002,
"step": 1033
},
{
"epoch": 0.2352673492605233,
"grad_norm": 2.0446974564823304,
"learning_rate": 1.243196957366758e-06,
"loss": 0.1721,
"step": 1034
},
{
"epoch": 0.2354948805460751,
"grad_norm": 1.079879180238331,
"learning_rate": 1.2431838035683155e-06,
"loss": 0.1257,
"step": 1035
},
{
"epoch": 0.23572241183162684,
"grad_norm": 1.8378535292320874,
"learning_rate": 1.2431706371353282e-06,
"loss": 0.1821,
"step": 1036
},
{
"epoch": 0.23594994311717862,
"grad_norm": 1.969855842746801,
"learning_rate": 1.2431574580680653e-06,
"loss": 0.1436,
"step": 1037
},
{
"epoch": 0.23617747440273038,
"grad_norm": 3.058757707801488,
"learning_rate": 1.2431442663667958e-06,
"loss": 0.1605,
"step": 1038
},
{
"epoch": 0.23640500568828213,
"grad_norm": 1.2648716547694445,
"learning_rate": 1.2431310620317898e-06,
"loss": 0.1614,
"step": 1039
},
{
"epoch": 0.2366325369738339,
"grad_norm": 1.9610877034271015,
"learning_rate": 1.2431178450633168e-06,
"loss": 0.139,
"step": 1040
},
{
"epoch": 0.23686006825938566,
"grad_norm": 1.5919631273318544,
"learning_rate": 1.2431046154616473e-06,
"loss": 0.0888,
"step": 1041
},
{
"epoch": 0.23708759954493744,
"grad_norm": 1.791707313865184,
"learning_rate": 1.2430913732270512e-06,
"loss": 0.1087,
"step": 1042
},
{
"epoch": 0.2373151308304892,
"grad_norm": 3.1377911678690666,
"learning_rate": 1.2430781183597995e-06,
"loss": 0.1565,
"step": 1043
},
{
"epoch": 0.23754266211604094,
"grad_norm": 2.2837991793589607,
"learning_rate": 1.243064850860163e-06,
"loss": 0.1126,
"step": 1044
},
{
"epoch": 0.23777019340159272,
"grad_norm": 2.6823412767535246,
"learning_rate": 1.243051570728413e-06,
"loss": 0.2083,
"step": 1045
},
{
"epoch": 0.23799772468714447,
"grad_norm": 4.365244516577561,
"learning_rate": 1.2430382779648208e-06,
"loss": 0.1904,
"step": 1046
},
{
"epoch": 0.23822525597269625,
"grad_norm": 2.434739692035364,
"learning_rate": 1.243024972569658e-06,
"loss": 0.1347,
"step": 1047
},
{
"epoch": 0.238452787258248,
"grad_norm": 2.1595986496307384,
"learning_rate": 1.2430116545431966e-06,
"loss": 0.1926,
"step": 1048
},
{
"epoch": 0.23868031854379979,
"grad_norm": 2.2542031412662573,
"learning_rate": 1.2429983238857088e-06,
"loss": 0.1667,
"step": 1049
},
{
"epoch": 0.23890784982935154,
"grad_norm": 2.0405926385207787,
"learning_rate": 1.2429849805974673e-06,
"loss": 0.1872,
"step": 1050
},
{
"epoch": 0.2391353811149033,
"grad_norm": 2.2037085916589043,
"learning_rate": 1.2429716246787444e-06,
"loss": 0.0775,
"step": 1051
},
{
"epoch": 0.23936291240045507,
"grad_norm": 0.9628371959013814,
"learning_rate": 1.242958256129813e-06,
"loss": 0.1378,
"step": 1052
},
{
"epoch": 0.23959044368600682,
"grad_norm": 2.1187588487355424,
"learning_rate": 1.242944874950947e-06,
"loss": 0.159,
"step": 1053
},
{
"epoch": 0.2398179749715586,
"grad_norm": 1.9961766997876433,
"learning_rate": 1.2429314811424192e-06,
"loss": 0.1568,
"step": 1054
},
{
"epoch": 0.24004550625711035,
"grad_norm": 1.935471261024473,
"learning_rate": 1.242918074704504e-06,
"loss": 0.1596,
"step": 1055
},
{
"epoch": 0.2402730375426621,
"grad_norm": 1.4988665110908368,
"learning_rate": 1.2429046556374747e-06,
"loss": 0.0987,
"step": 1056
},
{
"epoch": 0.24050056882821388,
"grad_norm": 2.4283216098462015,
"learning_rate": 1.2428912239416057e-06,
"loss": 0.1127,
"step": 1057
},
{
"epoch": 0.24072810011376564,
"grad_norm": 2.3264824459084448,
"learning_rate": 1.242877779617172e-06,
"loss": 0.1274,
"step": 1058
},
{
"epoch": 0.24095563139931742,
"grad_norm": 2.159687331291489,
"learning_rate": 1.242864322664448e-06,
"loss": 0.1399,
"step": 1059
},
{
"epoch": 0.24118316268486917,
"grad_norm": 2.3632421336063087,
"learning_rate": 1.2428508530837088e-06,
"loss": 0.1751,
"step": 1060
},
{
"epoch": 0.24141069397042092,
"grad_norm": 4.564054038887482,
"learning_rate": 1.2428373708752298e-06,
"loss": 0.1623,
"step": 1061
},
{
"epoch": 0.2416382252559727,
"grad_norm": 2.913968751293169,
"learning_rate": 1.2428238760392862e-06,
"loss": 0.2404,
"step": 1062
},
{
"epoch": 0.24186575654152445,
"grad_norm": 2.375864551832549,
"learning_rate": 1.2428103685761543e-06,
"loss": 0.1551,
"step": 1063
},
{
"epoch": 0.24209328782707623,
"grad_norm": 2.773326434228427,
"learning_rate": 1.2427968484861097e-06,
"loss": 0.1129,
"step": 1064
},
{
"epoch": 0.24232081911262798,
"grad_norm": 3.440322207371564,
"learning_rate": 1.2427833157694292e-06,
"loss": 0.2312,
"step": 1065
},
{
"epoch": 0.24254835039817976,
"grad_norm": 2.09362609958651,
"learning_rate": 1.2427697704263892e-06,
"loss": 0.1047,
"step": 1066
},
{
"epoch": 0.24277588168373151,
"grad_norm": 2.0696892695320432,
"learning_rate": 1.2427562124572663e-06,
"loss": 0.1156,
"step": 1067
},
{
"epoch": 0.24300341296928327,
"grad_norm": 1.923568801452821,
"learning_rate": 1.2427426418623377e-06,
"loss": 0.1609,
"step": 1068
},
{
"epoch": 0.24323094425483505,
"grad_norm": 1.5158781630471698,
"learning_rate": 1.242729058641881e-06,
"loss": 0.094,
"step": 1069
},
{
"epoch": 0.2434584755403868,
"grad_norm": 2.2258107327352037,
"learning_rate": 1.2427154627961737e-06,
"loss": 0.2017,
"step": 1070
},
{
"epoch": 0.24368600682593858,
"grad_norm": 2.3481688305100645,
"learning_rate": 1.2427018543254935e-06,
"loss": 0.1535,
"step": 1071
},
{
"epoch": 0.24391353811149033,
"grad_norm": 2.148375299510445,
"learning_rate": 1.2426882332301187e-06,
"loss": 0.1812,
"step": 1072
},
{
"epoch": 0.24414106939704208,
"grad_norm": 1.6816805152718777,
"learning_rate": 1.2426745995103277e-06,
"loss": 0.1341,
"step": 1073
},
{
"epoch": 0.24436860068259386,
"grad_norm": 2.651811251817173,
"learning_rate": 1.242660953166399e-06,
"loss": 0.1318,
"step": 1074
},
{
"epoch": 0.2445961319681456,
"grad_norm": 2.473544844662378,
"learning_rate": 1.2426472941986117e-06,
"loss": 0.1972,
"step": 1075
},
{
"epoch": 0.2448236632536974,
"grad_norm": 1.3274925024741444,
"learning_rate": 1.2426336226072449e-06,
"loss": 0.1497,
"step": 1076
},
{
"epoch": 0.24505119453924915,
"grad_norm": 2.1014804926130277,
"learning_rate": 1.242619938392578e-06,
"loss": 0.1186,
"step": 1077
},
{
"epoch": 0.2452787258248009,
"grad_norm": 3.0260303106049973,
"learning_rate": 1.2426062415548907e-06,
"loss": 0.2506,
"step": 1078
},
{
"epoch": 0.24550625711035268,
"grad_norm": 1.2327761741993546,
"learning_rate": 1.2425925320944628e-06,
"loss": 0.117,
"step": 1079
},
{
"epoch": 0.24573378839590443,
"grad_norm": 3.2155457599215036,
"learning_rate": 1.2425788100115747e-06,
"loss": 0.1412,
"step": 1080
},
{
"epoch": 0.2459613196814562,
"grad_norm": 1.6672046307721682,
"learning_rate": 1.2425650753065065e-06,
"loss": 0.148,
"step": 1081
},
{
"epoch": 0.24618885096700796,
"grad_norm": 4.323033908726176,
"learning_rate": 1.2425513279795395e-06,
"loss": 0.1685,
"step": 1082
},
{
"epoch": 0.24641638225255974,
"grad_norm": 2.4128743686143146,
"learning_rate": 1.2425375680309543e-06,
"loss": 0.0992,
"step": 1083
},
{
"epoch": 0.2466439135381115,
"grad_norm": 2.0582783253443497,
"learning_rate": 1.2425237954610322e-06,
"loss": 0.1263,
"step": 1084
},
{
"epoch": 0.24687144482366324,
"grad_norm": 2.5810033905990637,
"learning_rate": 1.2425100102700547e-06,
"loss": 0.2102,
"step": 1085
},
{
"epoch": 0.24709897610921502,
"grad_norm": 2.269665820869707,
"learning_rate": 1.2424962124583033e-06,
"loss": 0.105,
"step": 1086
},
{
"epoch": 0.24732650739476678,
"grad_norm": 2.706182109515585,
"learning_rate": 1.2424824020260603e-06,
"loss": 0.1596,
"step": 1087
},
{
"epoch": 0.24755403868031856,
"grad_norm": 3.0056026517839016,
"learning_rate": 1.2424685789736077e-06,
"loss": 0.1809,
"step": 1088
},
{
"epoch": 0.2477815699658703,
"grad_norm": 2.2230272708907513,
"learning_rate": 1.2424547433012284e-06,
"loss": 0.1187,
"step": 1089
},
{
"epoch": 0.24800910125142206,
"grad_norm": 2.271631978747539,
"learning_rate": 1.2424408950092049e-06,
"loss": 0.1478,
"step": 1090
},
{
"epoch": 0.24823663253697384,
"grad_norm": 2.485671272218175,
"learning_rate": 1.2424270340978204e-06,
"loss": 0.1595,
"step": 1091
},
{
"epoch": 0.2484641638225256,
"grad_norm": 2.5242524420773087,
"learning_rate": 1.2424131605673582e-06,
"loss": 0.2519,
"step": 1092
},
{
"epoch": 0.24869169510807737,
"grad_norm": 2.6439941529662025,
"learning_rate": 1.2423992744181015e-06,
"loss": 0.1389,
"step": 1093
},
{
"epoch": 0.24891922639362912,
"grad_norm": 2.1610086973465417,
"learning_rate": 1.2423853756503343e-06,
"loss": 0.1017,
"step": 1094
},
{
"epoch": 0.24914675767918087,
"grad_norm": 1.8954846688503157,
"learning_rate": 1.2423714642643408e-06,
"loss": 0.2796,
"step": 1095
},
{
"epoch": 0.24937428896473265,
"grad_norm": 1.3124277359799683,
"learning_rate": 1.2423575402604051e-06,
"loss": 0.12,
"step": 1096
},
{
"epoch": 0.2496018202502844,
"grad_norm": 2.5234695537617444,
"learning_rate": 1.2423436036388122e-06,
"loss": 0.1242,
"step": 1097
},
{
"epoch": 0.24982935153583619,
"grad_norm": 2.044792039361886,
"learning_rate": 1.2423296543998465e-06,
"loss": 0.1743,
"step": 1098
},
{
"epoch": 0.25005688282138794,
"grad_norm": 3.6767614291561492,
"learning_rate": 1.2423156925437932e-06,
"loss": 0.2584,
"step": 1099
},
{
"epoch": 0.2502844141069397,
"grad_norm": 2.1397151355216506,
"learning_rate": 1.2423017180709376e-06,
"loss": 0.1586,
"step": 1100
},
{
"epoch": 0.25051194539249144,
"grad_norm": 1.670738860931536,
"learning_rate": 1.2422877309815656e-06,
"loss": 0.0821,
"step": 1101
},
{
"epoch": 0.25073947667804325,
"grad_norm": 2.3733300367714185,
"learning_rate": 1.242273731275963e-06,
"loss": 0.1335,
"step": 1102
},
{
"epoch": 0.250967007963595,
"grad_norm": 2.6954093027320534,
"learning_rate": 1.2422597189544155e-06,
"loss": 0.1244,
"step": 1103
},
{
"epoch": 0.25119453924914675,
"grad_norm": 2.17330712431736,
"learning_rate": 1.2422456940172101e-06,
"loss": 0.1799,
"step": 1104
},
{
"epoch": 0.2514220705346985,
"grad_norm": 2.4883101223722397,
"learning_rate": 1.2422316564646331e-06,
"loss": 0.0881,
"step": 1105
},
{
"epoch": 0.25164960182025026,
"grad_norm": 2.4975644528149528,
"learning_rate": 1.2422176062969713e-06,
"loss": 0.2376,
"step": 1106
},
{
"epoch": 0.25187713310580206,
"grad_norm": 2.242874102497345,
"learning_rate": 1.2422035435145121e-06,
"loss": 0.1117,
"step": 1107
},
{
"epoch": 0.2521046643913538,
"grad_norm": 2.1430334401000994,
"learning_rate": 1.2421894681175428e-06,
"loss": 0.1937,
"step": 1108
},
{
"epoch": 0.25233219567690557,
"grad_norm": 2.8329522904929796,
"learning_rate": 1.2421753801063511e-06,
"loss": 0.2192,
"step": 1109
},
{
"epoch": 0.2525597269624573,
"grad_norm": 2.7185072984242016,
"learning_rate": 1.2421612794812248e-06,
"loss": 0.1612,
"step": 1110
}
],
"logging_steps": 1,
"max_steps": 21975,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 1110,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4896118628352.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}