27b-8-lora / trainer_state.json
furproxy's picture
Upload folder using huggingface_hub
b8cc33b verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 1638,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.003663003663003663,
"grad_norm": 2.9691946506500244,
"learning_rate": 6.000000000000001e-07,
"loss": 2.6877074241638184,
"step": 2
},
{
"epoch": 0.007326007326007326,
"grad_norm": 0.5672376751899719,
"learning_rate": 1.8e-06,
"loss": 1.666015625,
"step": 4
},
{
"epoch": 0.01098901098901099,
"grad_norm": 0.35880523920059204,
"learning_rate": 3e-06,
"loss": 1.877748727798462,
"step": 6
},
{
"epoch": 0.014652014652014652,
"grad_norm": 0.16696049273014069,
"learning_rate": 4.2000000000000004e-06,
"loss": 2.041140079498291,
"step": 8
},
{
"epoch": 0.018315018315018316,
"grad_norm": 0.21169808506965637,
"learning_rate": 5.4e-06,
"loss": 2.1642842292785645,
"step": 10
},
{
"epoch": 0.02197802197802198,
"grad_norm": 0.4977658689022064,
"learning_rate": 6.6e-06,
"loss": 1.9282701015472412,
"step": 12
},
{
"epoch": 0.02564102564102564,
"grad_norm": 0.18059898912906647,
"learning_rate": 7.8e-06,
"loss": 1.7203080654144287,
"step": 14
},
{
"epoch": 0.029304029304029304,
"grad_norm": 0.1946066915988922,
"learning_rate": 9e-06,
"loss": 1.6946827173233032,
"step": 16
},
{
"epoch": 0.03296703296703297,
"grad_norm": 0.45476168394088745,
"learning_rate": 1.02e-05,
"loss": 1.622821569442749,
"step": 18
},
{
"epoch": 0.03663003663003663,
"grad_norm": 0.10335284471511841,
"learning_rate": 1.1400000000000001e-05,
"loss": 1.6928024291992188,
"step": 20
},
{
"epoch": 0.040293040293040296,
"grad_norm": 0.4260774254798889,
"learning_rate": 1.26e-05,
"loss": 0.9692617654800415,
"step": 22
},
{
"epoch": 0.04395604395604396,
"grad_norm": 0.2110120803117752,
"learning_rate": 1.3800000000000002e-05,
"loss": 1.121842384338379,
"step": 24
},
{
"epoch": 0.047619047619047616,
"grad_norm": 0.13500770926475525,
"learning_rate": 1.5e-05,
"loss": 1.365687608718872,
"step": 26
},
{
"epoch": 0.05128205128205128,
"grad_norm": 0.31692269444465637,
"learning_rate": 1.62e-05,
"loss": 1.269879698753357,
"step": 28
},
{
"epoch": 0.054945054945054944,
"grad_norm": 0.2881113886833191,
"learning_rate": 1.74e-05,
"loss": 1.1352685689926147,
"step": 30
},
{
"epoch": 0.05860805860805861,
"grad_norm": 0.19420871138572693,
"learning_rate": 1.86e-05,
"loss": 1.066841959953308,
"step": 32
},
{
"epoch": 0.06227106227106227,
"grad_norm": 0.04560531675815582,
"learning_rate": 1.98e-05,
"loss": 1.214577555656433,
"step": 34
},
{
"epoch": 0.06593406593406594,
"grad_norm": 0.16944342851638794,
"learning_rate": 2.1e-05,
"loss": 1.5755853652954102,
"step": 36
},
{
"epoch": 0.0695970695970696,
"grad_norm": 0.21081246435642242,
"learning_rate": 2.22e-05,
"loss": 1.3488558530807495,
"step": 38
},
{
"epoch": 0.07326007326007326,
"grad_norm": 4.470643043518066,
"learning_rate": 2.3400000000000003e-05,
"loss": 1.384747862815857,
"step": 40
},
{
"epoch": 0.07692307692307693,
"grad_norm": 0.31521132588386536,
"learning_rate": 2.4599999999999998e-05,
"loss": 1.0727258920669556,
"step": 42
},
{
"epoch": 0.08058608058608059,
"grad_norm": 0.13335339725017548,
"learning_rate": 2.58e-05,
"loss": 1.4839975833892822,
"step": 44
},
{
"epoch": 0.08424908424908426,
"grad_norm": 0.2228817194700241,
"learning_rate": 2.7000000000000002e-05,
"loss": 1.5779204368591309,
"step": 46
},
{
"epoch": 0.08791208791208792,
"grad_norm": 1.1686429977416992,
"learning_rate": 2.8199999999999998e-05,
"loss": 0.9971361756324768,
"step": 48
},
{
"epoch": 0.09157509157509157,
"grad_norm": 0.11315350979566574,
"learning_rate": 2.94e-05,
"loss": 1.462868332862854,
"step": 50
},
{
"epoch": 0.09523809523809523,
"grad_norm": 0.3818332552909851,
"learning_rate": 2.99999735818513e-05,
"loss": 0.726470410823822,
"step": 52
},
{
"epoch": 0.0989010989010989,
"grad_norm": 0.2479761391878128,
"learning_rate": 2.9999762237282056e-05,
"loss": 0.9266619086265564,
"step": 54
},
{
"epoch": 0.10256410256410256,
"grad_norm": 0.184525266289711,
"learning_rate": 2.9999339551452214e-05,
"loss": 1.3103946447372437,
"step": 56
},
{
"epoch": 0.10622710622710622,
"grad_norm": 0.1514936238527298,
"learning_rate": 2.9998705530978993e-05,
"loss": 1.1557871103286743,
"step": 58
},
{
"epoch": 0.10989010989010989,
"grad_norm": 0.13591140508651733,
"learning_rate": 2.99978601857881e-05,
"loss": 1.3586221933364868,
"step": 60
},
{
"epoch": 0.11355311355311355,
"grad_norm": 0.3053394854068756,
"learning_rate": 2.999680352911357e-05,
"loss": 1.377090334892273,
"step": 62
},
{
"epoch": 0.11721611721611722,
"grad_norm": 0.1625453382730484,
"learning_rate": 2.9995535577497556e-05,
"loss": 1.3425214290618896,
"step": 64
},
{
"epoch": 0.12087912087912088,
"grad_norm": 0.11238668859004974,
"learning_rate": 2.999405635079008e-05,
"loss": 1.3106110095977783,
"step": 66
},
{
"epoch": 0.12454212454212454,
"grad_norm": 0.4167092740535736,
"learning_rate": 2.99923658721487e-05,
"loss": 1.2862937450408936,
"step": 68
},
{
"epoch": 0.1282051282051282,
"grad_norm": 0.8267874121665955,
"learning_rate": 2.9990464168038176e-05,
"loss": 1.134496808052063,
"step": 70
},
{
"epoch": 0.13186813186813187,
"grad_norm": 0.23497220873832703,
"learning_rate": 2.998835126823003e-05,
"loss": 1.3963985443115234,
"step": 72
},
{
"epoch": 0.13553113553113552,
"grad_norm": 0.33364740014076233,
"learning_rate": 2.99860272058021e-05,
"loss": 1.5833244323730469,
"step": 74
},
{
"epoch": 0.1391941391941392,
"grad_norm": 0.06165367364883423,
"learning_rate": 2.998349201713801e-05,
"loss": 1.062476396560669,
"step": 76
},
{
"epoch": 0.14285714285714285,
"grad_norm": 0.13317528367042542,
"learning_rate": 2.998074574192661e-05,
"loss": 1.4017423391342163,
"step": 78
},
{
"epoch": 0.14652014652014653,
"grad_norm": 0.20784278213977814,
"learning_rate": 2.9977788423161336e-05,
"loss": 1.5657904148101807,
"step": 80
},
{
"epoch": 0.15018315018315018,
"grad_norm": 0.3089655041694641,
"learning_rate": 2.997462010713957e-05,
"loss": 1.1773194074630737,
"step": 82
},
{
"epoch": 0.15384615384615385,
"grad_norm": 0.11661751568317413,
"learning_rate": 2.997124084346186e-05,
"loss": 1.3365869522094727,
"step": 84
},
{
"epoch": 0.1575091575091575,
"grad_norm": 0.2651246190071106,
"learning_rate": 2.9967650685031216e-05,
"loss": 1.4748975038528442,
"step": 86
},
{
"epoch": 0.16117216117216118,
"grad_norm": 0.28366532921791077,
"learning_rate": 2.9963849688052232e-05,
"loss": 0.7039165496826172,
"step": 88
},
{
"epoch": 0.16483516483516483,
"grad_norm": 0.15931299328804016,
"learning_rate": 2.9959837912030202e-05,
"loss": 0.8500699996948242,
"step": 90
},
{
"epoch": 0.1684981684981685,
"grad_norm": 0.18209676444530487,
"learning_rate": 2.9955615419770222e-05,
"loss": 1.2923749685287476,
"step": 92
},
{
"epoch": 0.17216117216117216,
"grad_norm": 0.1933967024087906,
"learning_rate": 2.9951182277376195e-05,
"loss": 1.3166249990463257,
"step": 94
},
{
"epoch": 0.17582417582417584,
"grad_norm": 0.33180925250053406,
"learning_rate": 2.9946538554249767e-05,
"loss": 1.2045100927352905,
"step": 96
},
{
"epoch": 0.1794871794871795,
"grad_norm": 0.1849534809589386,
"learning_rate": 2.994168432308928e-05,
"loss": 1.3024216890335083,
"step": 98
},
{
"epoch": 0.18315018315018314,
"grad_norm": 0.19000062346458435,
"learning_rate": 2.9936619659888623e-05,
"loss": 1.160252571105957,
"step": 100
},
{
"epoch": 0.18681318681318682,
"grad_norm": 1.3152861595153809,
"learning_rate": 2.993134464393602e-05,
"loss": 1.2116779088974,
"step": 102
},
{
"epoch": 0.19047619047619047,
"grad_norm": 0.6932211518287659,
"learning_rate": 2.9925859357812825e-05,
"loss": 0.8742986917495728,
"step": 104
},
{
"epoch": 0.19413919413919414,
"grad_norm": 0.3814776539802551,
"learning_rate": 2.9920163887392198e-05,
"loss": 1.071030616760254,
"step": 106
},
{
"epoch": 0.1978021978021978,
"grad_norm": 0.5205255746841431,
"learning_rate": 2.9914258321837772e-05,
"loss": 1.295392632484436,
"step": 108
},
{
"epoch": 0.20146520146520147,
"grad_norm": 0.23201794922351837,
"learning_rate": 2.9908142753602263e-05,
"loss": 1.1402907371520996,
"step": 110
},
{
"epoch": 0.20512820512820512,
"grad_norm": 0.17367003858089447,
"learning_rate": 2.990181727842602e-05,
"loss": 1.2087258100509644,
"step": 112
},
{
"epoch": 0.2087912087912088,
"grad_norm": 0.1665368527173996,
"learning_rate": 2.9895281995335517e-05,
"loss": 0.6189576983451843,
"step": 114
},
{
"epoch": 0.21245421245421245,
"grad_norm": 0.224325492978096,
"learning_rate": 2.9888537006641817e-05,
"loss": 1.282774806022644,
"step": 116
},
{
"epoch": 0.21611721611721613,
"grad_norm": 0.528431236743927,
"learning_rate": 2.9881582417938958e-05,
"loss": 1.2732535600662231,
"step": 118
},
{
"epoch": 0.21978021978021978,
"grad_norm": 0.2655428647994995,
"learning_rate": 2.9874418338102297e-05,
"loss": 1.3447692394256592,
"step": 120
},
{
"epoch": 0.22344322344322345,
"grad_norm": 0.3630436658859253,
"learning_rate": 2.9867044879286828e-05,
"loss": 1.2736270427703857,
"step": 122
},
{
"epoch": 0.2271062271062271,
"grad_norm": 0.8113774657249451,
"learning_rate": 2.985946215692541e-05,
"loss": 1.0067188739776611,
"step": 124
},
{
"epoch": 0.23076923076923078,
"grad_norm": 0.366671621799469,
"learning_rate": 2.9851670289726944e-05,
"loss": 0.8855844736099243,
"step": 126
},
{
"epoch": 0.23443223443223443,
"grad_norm": 0.2815873324871063,
"learning_rate": 2.9843669399674548e-05,
"loss": 1.1754907369613647,
"step": 128
},
{
"epoch": 0.23809523809523808,
"grad_norm": 0.25937798619270325,
"learning_rate": 2.9835459612023636e-05,
"loss": 1.2796354293823242,
"step": 130
},
{
"epoch": 0.24175824175824176,
"grad_norm": 0.26560601592063904,
"learning_rate": 2.9827041055299935e-05,
"loss": 1.3694589138031006,
"step": 132
},
{
"epoch": 0.2454212454212454,
"grad_norm": 0.3753569722175598,
"learning_rate": 2.98184138612975e-05,
"loss": 1.270322561264038,
"step": 134
},
{
"epoch": 0.2490842490842491,
"grad_norm": 0.20199541747570038,
"learning_rate": 2.9809578165076638e-05,
"loss": 1.2530642747879028,
"step": 136
},
{
"epoch": 0.25274725274725274,
"grad_norm": 0.4547716975212097,
"learning_rate": 2.9800534104961805e-05,
"loss": 1.0693120956420898,
"step": 138
},
{
"epoch": 0.2564102564102564,
"grad_norm": 0.3281041383743286,
"learning_rate": 2.979128182253942e-05,
"loss": 1.2550175189971924,
"step": 140
},
{
"epoch": 0.2600732600732601,
"grad_norm": 0.3992747366428375,
"learning_rate": 2.9781821462655665e-05,
"loss": 1.2704068422317505,
"step": 142
},
{
"epoch": 0.26373626373626374,
"grad_norm": 0.2010176181793213,
"learning_rate": 2.977215317341422e-05,
"loss": 1.2348976135253906,
"step": 144
},
{
"epoch": 0.2673992673992674,
"grad_norm": 0.54754638671875,
"learning_rate": 2.9762277106173925e-05,
"loss": 1.3938997983932495,
"step": 146
},
{
"epoch": 0.27106227106227104,
"grad_norm": 0.2334858775138855,
"learning_rate": 2.975219341554643e-05,
"loss": 1.29594087600708,
"step": 148
},
{
"epoch": 0.27472527472527475,
"grad_norm": 0.05523291975259781,
"learning_rate": 2.9741902259393773e-05,
"loss": 0.9128252863883972,
"step": 150
},
{
"epoch": 0.2783882783882784,
"grad_norm": 0.06265544146299362,
"learning_rate": 2.9731403798825883e-05,
"loss": 0.6248279809951782,
"step": 152
},
{
"epoch": 0.28205128205128205,
"grad_norm": 3.8201651573181152,
"learning_rate": 2.9720698198198106e-05,
"loss": 1.0864554643630981,
"step": 154
},
{
"epoch": 0.2857142857142857,
"grad_norm": 0.5186815857887268,
"learning_rate": 2.9709785625108577e-05,
"loss": 1.323602318763733,
"step": 156
},
{
"epoch": 0.2893772893772894,
"grad_norm": 0.14659367501735687,
"learning_rate": 2.969866625039564e-05,
"loss": 1.2595347166061401,
"step": 158
},
{
"epoch": 0.29304029304029305,
"grad_norm": 0.22754965722560883,
"learning_rate": 2.968734024813515e-05,
"loss": 0.9229455590248108,
"step": 160
},
{
"epoch": 0.2967032967032967,
"grad_norm": 0.30502358078956604,
"learning_rate": 2.9675807795637753e-05,
"loss": 1.5106159448623657,
"step": 162
},
{
"epoch": 0.30036630036630035,
"grad_norm": 0.16795893013477325,
"learning_rate": 2.9664069073446123e-05,
"loss": 1.0200681686401367,
"step": 164
},
{
"epoch": 0.304029304029304,
"grad_norm": 0.5340052247047424,
"learning_rate": 2.9652124265332104e-05,
"loss": 1.3727664947509766,
"step": 166
},
{
"epoch": 0.3076923076923077,
"grad_norm": 0.3535378575325012,
"learning_rate": 2.9639973558293873e-05,
"loss": 1.1149368286132812,
"step": 168
},
{
"epoch": 0.31135531135531136,
"grad_norm": 0.5865007638931274,
"learning_rate": 2.9627617142552972e-05,
"loss": 1.2415387630462646,
"step": 170
},
{
"epoch": 0.315018315018315,
"grad_norm": 0.3413506746292114,
"learning_rate": 2.9615055211551372e-05,
"loss": 0.9970681667327881,
"step": 172
},
{
"epoch": 0.31868131868131866,
"grad_norm": 0.6598376631736755,
"learning_rate": 2.9602287961948407e-05,
"loss": 0.8639789819717407,
"step": 174
},
{
"epoch": 0.32234432234432236,
"grad_norm": 0.21667271852493286,
"learning_rate": 2.958931559361772e-05,
"loss": 1.379815697669983,
"step": 176
},
{
"epoch": 0.326007326007326,
"grad_norm": 0.19014626741409302,
"learning_rate": 2.9576138309644126e-05,
"loss": 0.9096412658691406,
"step": 178
},
{
"epoch": 0.32967032967032966,
"grad_norm": 0.18454764783382416,
"learning_rate": 2.9562756316320423e-05,
"loss": 1.4692316055297852,
"step": 180
},
{
"epoch": 0.3333333333333333,
"grad_norm": 0.17303146421909332,
"learning_rate": 2.9549169823144186e-05,
"loss": 0.962703287601471,
"step": 182
},
{
"epoch": 0.336996336996337,
"grad_norm": 0.29088443517684937,
"learning_rate": 2.9535379042814454e-05,
"loss": 1.22477388381958,
"step": 184
},
{
"epoch": 0.34065934065934067,
"grad_norm": 0.5098505020141602,
"learning_rate": 2.9521384191228436e-05,
"loss": 1.271912932395935,
"step": 186
},
{
"epoch": 0.3443223443223443,
"grad_norm": 0.06021943688392639,
"learning_rate": 2.950718548747811e-05,
"loss": 1.1643396615982056,
"step": 188
},
{
"epoch": 0.34798534798534797,
"grad_norm": 0.20197078585624695,
"learning_rate": 2.9492783153846787e-05,
"loss": 1.25394868850708,
"step": 190
},
{
"epoch": 0.3516483516483517,
"grad_norm": 0.5978615880012512,
"learning_rate": 2.9478177415805647e-05,
"loss": 1.0921686887741089,
"step": 192
},
{
"epoch": 0.3553113553113553,
"grad_norm": 0.15152917802333832,
"learning_rate": 2.946336850201022e-05,
"loss": 1.2309471368789673,
"step": 194
},
{
"epoch": 0.358974358974359,
"grad_norm": 0.1544279307126999,
"learning_rate": 2.9448356644296764e-05,
"loss": 1.2391042709350586,
"step": 196
},
{
"epoch": 0.3626373626373626,
"grad_norm": 0.23729011416435242,
"learning_rate": 2.943314207767867e-05,
"loss": 0.8557896018028259,
"step": 198
},
{
"epoch": 0.3663003663003663,
"grad_norm": 0.18273133039474487,
"learning_rate": 2.9417725040342783e-05,
"loss": 1.0039644241333008,
"step": 200
},
{
"epoch": 0.36996336996337,
"grad_norm": 0.5063828825950623,
"learning_rate": 2.9402105773645648e-05,
"loss": 1.418663740158081,
"step": 202
},
{
"epoch": 0.37362637362637363,
"grad_norm": 0.6436253786087036,
"learning_rate": 2.9386284522109774e-05,
"loss": 1.2050065994262695,
"step": 204
},
{
"epoch": 0.3772893772893773,
"grad_norm": 0.30868470668792725,
"learning_rate": 2.937026153341975e-05,
"loss": 0.7637919187545776,
"step": 206
},
{
"epoch": 0.38095238095238093,
"grad_norm": 0.25211474299430847,
"learning_rate": 2.9354037058418424e-05,
"loss": 1.1266578435897827,
"step": 208
},
{
"epoch": 0.38461538461538464,
"grad_norm": 0.1760515570640564,
"learning_rate": 2.9337611351102914e-05,
"loss": 1.39286208152771,
"step": 210
},
{
"epoch": 0.3882783882783883,
"grad_norm": 0.26995450258255005,
"learning_rate": 2.932098466862071e-05,
"loss": 1.0554150342941284,
"step": 212
},
{
"epoch": 0.39194139194139194,
"grad_norm": 0.1435517966747284,
"learning_rate": 2.9304157271265576e-05,
"loss": 1.1168829202651978,
"step": 214
},
{
"epoch": 0.3956043956043956,
"grad_norm": 0.21202005445957184,
"learning_rate": 2.9287129422473514e-05,
"loss": 1.2109602689743042,
"step": 216
},
{
"epoch": 0.3992673992673993,
"grad_norm": 0.556124210357666,
"learning_rate": 2.9269901388818625e-05,
"loss": 1.2425227165222168,
"step": 218
},
{
"epoch": 0.40293040293040294,
"grad_norm": 0.08504249900579453,
"learning_rate": 2.9252473440008948e-05,
"loss": 0.610443651676178,
"step": 220
},
{
"epoch": 0.4065934065934066,
"grad_norm": 0.2557111084461212,
"learning_rate": 2.923484584888222e-05,
"loss": 1.3219108581542969,
"step": 222
},
{
"epoch": 0.41025641025641024,
"grad_norm": 0.1643073856830597,
"learning_rate": 2.9217018891401635e-05,
"loss": 1.0641902685165405,
"step": 224
},
{
"epoch": 0.4139194139194139,
"grad_norm": 0.34884706139564514,
"learning_rate": 2.9198992846651482e-05,
"loss": 1.2210471630096436,
"step": 226
},
{
"epoch": 0.4175824175824176,
"grad_norm": 0.14093200862407684,
"learning_rate": 2.9180767996832804e-05,
"loss": 1.289689302444458,
"step": 228
},
{
"epoch": 0.42124542124542125,
"grad_norm": 0.4363960921764374,
"learning_rate": 2.9162344627258984e-05,
"loss": 1.206679344177246,
"step": 230
},
{
"epoch": 0.4249084249084249,
"grad_norm": 0.3168153762817383,
"learning_rate": 2.9143723026351256e-05,
"loss": 0.9820088744163513,
"step": 232
},
{
"epoch": 0.42857142857142855,
"grad_norm": 0.5774416923522949,
"learning_rate": 2.9124903485634212e-05,
"loss": 1.4730240106582642,
"step": 234
},
{
"epoch": 0.43223443223443225,
"grad_norm": 0.3200729191303253,
"learning_rate": 2.9105886299731215e-05,
"loss": 1.0066744089126587,
"step": 236
},
{
"epoch": 0.4358974358974359,
"grad_norm": 0.3356908857822418,
"learning_rate": 2.9086671766359816e-05,
"loss": 1.0569260120391846,
"step": 238
},
{
"epoch": 0.43956043956043955,
"grad_norm": 0.37385445833206177,
"learning_rate": 2.9067260186327068e-05,
"loss": 0.8989033699035645,
"step": 240
},
{
"epoch": 0.4432234432234432,
"grad_norm": 0.1826363503932953,
"learning_rate": 2.904765186352482e-05,
"loss": 0.8609814047813416,
"step": 242
},
{
"epoch": 0.4468864468864469,
"grad_norm": 0.16987594962120056,
"learning_rate": 2.902784710492498e-05,
"loss": 0.5631662011146545,
"step": 244
},
{
"epoch": 0.45054945054945056,
"grad_norm": 0.10533586889505386,
"learning_rate": 2.9007846220574677e-05,
"loss": 1.2396587133407593,
"step": 246
},
{
"epoch": 0.4542124542124542,
"grad_norm": 0.25048568844795227,
"learning_rate": 2.8987649523591442e-05,
"loss": 0.8750399351119995,
"step": 248
},
{
"epoch": 0.45787545787545786,
"grad_norm": 0.6428999900817871,
"learning_rate": 2.8967257330158273e-05,
"loss": 1.2499359846115112,
"step": 250
},
{
"epoch": 0.46153846153846156,
"grad_norm": 0.21030965447425842,
"learning_rate": 2.8946669959518716e-05,
"loss": 1.1594537496566772,
"step": 252
},
{
"epoch": 0.4652014652014652,
"grad_norm": 0.21097975969314575,
"learning_rate": 2.892588773397184e-05,
"loss": 0.9518598914146423,
"step": 254
},
{
"epoch": 0.46886446886446886,
"grad_norm": 0.22967208921909332,
"learning_rate": 2.8904910978867214e-05,
"loss": 1.243324875831604,
"step": 256
},
{
"epoch": 0.4725274725274725,
"grad_norm": 0.28955715894699097,
"learning_rate": 2.888374002259979e-05,
"loss": 1.2640973329544067,
"step": 258
},
{
"epoch": 0.47619047619047616,
"grad_norm": 0.10788365453481674,
"learning_rate": 2.8862375196604782e-05,
"loss": 0.7154593467712402,
"step": 260
},
{
"epoch": 0.47985347985347987,
"grad_norm": 0.26385995745658875,
"learning_rate": 2.8840816835352475e-05,
"loss": 1.1346553564071655,
"step": 262
},
{
"epoch": 0.4835164835164835,
"grad_norm": 0.23100446164608002,
"learning_rate": 2.881906527634298e-05,
"loss": 1.213168740272522,
"step": 264
},
{
"epoch": 0.48717948717948717,
"grad_norm": 0.2399711012840271,
"learning_rate": 2.8797120860100952e-05,
"loss": 1.48377525806427,
"step": 266
},
{
"epoch": 0.4908424908424908,
"grad_norm": 0.2898120582103729,
"learning_rate": 2.8774983930170256e-05,
"loss": 0.79274582862854,
"step": 268
},
{
"epoch": 0.4945054945054945,
"grad_norm": 0.11057275533676147,
"learning_rate": 2.875265483310861e-05,
"loss": 1.18440580368042,
"step": 270
},
{
"epoch": 0.4981684981684982,
"grad_norm": 0.34004244208335876,
"learning_rate": 2.873013391848213e-05,
"loss": 0.8764394521713257,
"step": 272
},
{
"epoch": 0.5018315018315018,
"grad_norm": 1.7269130945205688,
"learning_rate": 2.8707421538859884e-05,
"loss": 1.2305411100387573,
"step": 274
},
{
"epoch": 0.5054945054945055,
"grad_norm": 0.4209914803504944,
"learning_rate": 2.8684518049808345e-05,
"loss": 0.7977259159088135,
"step": 276
},
{
"epoch": 0.5091575091575091,
"grad_norm": 0.2639271020889282,
"learning_rate": 2.8661423809885846e-05,
"loss": 0.8770049810409546,
"step": 278
},
{
"epoch": 0.5128205128205128,
"grad_norm": 0.2806454002857208,
"learning_rate": 2.8638139180636962e-05,
"loss": 1.2383899688720703,
"step": 280
},
{
"epoch": 0.5164835164835165,
"grad_norm": 0.19708214700222015,
"learning_rate": 2.861466452658685e-05,
"loss": 1.2301888465881348,
"step": 282
},
{
"epoch": 0.5201465201465202,
"grad_norm": 0.18919607996940613,
"learning_rate": 2.8591000215235535e-05,
"loss": 1.2272742986679077,
"step": 284
},
{
"epoch": 0.5238095238095238,
"grad_norm": 0.6468890309333801,
"learning_rate": 2.8567146617052157e-05,
"loss": 0.978451669216156,
"step": 286
},
{
"epoch": 0.5274725274725275,
"grad_norm": 0.12678763270378113,
"learning_rate": 2.854310410546919e-05,
"loss": 1.2404258251190186,
"step": 288
},
{
"epoch": 0.5311355311355311,
"grad_norm": 0.15650980174541473,
"learning_rate": 2.851887305687657e-05,
"loss": 1.204418420791626,
"step": 290
},
{
"epoch": 0.5347985347985348,
"grad_norm": 0.09721121937036514,
"learning_rate": 2.8494453850615823e-05,
"loss": 0.7288497090339661,
"step": 292
},
{
"epoch": 0.5384615384615384,
"grad_norm": 0.08145172148942947,
"learning_rate": 2.846984686897411e-05,
"loss": 0.9385504722595215,
"step": 294
},
{
"epoch": 0.5421245421245421,
"grad_norm": 0.3740465044975281,
"learning_rate": 2.8445052497178255e-05,
"loss": 1.0164296627044678,
"step": 296
},
{
"epoch": 0.5457875457875457,
"grad_norm": 0.6161367893218994,
"learning_rate": 2.8420071123388712e-05,
"loss": 1.286184549331665,
"step": 298
},
{
"epoch": 0.5494505494505495,
"grad_norm": 0.15520739555358887,
"learning_rate": 2.839490313869348e-05,
"loss": 1.189969778060913,
"step": 300
},
{
"epoch": 0.5531135531135531,
"grad_norm": 0.3342311382293701,
"learning_rate": 2.8369548937101984e-05,
"loss": 1.176458477973938,
"step": 302
},
{
"epoch": 0.5567765567765568,
"grad_norm": 0.1474112868309021,
"learning_rate": 2.8344008915538916e-05,
"loss": 0.9629110097885132,
"step": 304
},
{
"epoch": 0.5604395604395604,
"grad_norm": 0.26941052079200745,
"learning_rate": 2.831828347383802e-05,
"loss": 0.9698507189750671,
"step": 306
},
{
"epoch": 0.5641025641025641,
"grad_norm": 0.33016347885131836,
"learning_rate": 2.82923730147358e-05,
"loss": 1.382779836654663,
"step": 308
},
{
"epoch": 0.5677655677655677,
"grad_norm": 0.21179628372192383,
"learning_rate": 2.826627794386527e-05,
"loss": 0.9771309494972229,
"step": 310
},
{
"epoch": 0.5714285714285714,
"grad_norm": 0.19774630665779114,
"learning_rate": 2.823999866974956e-05,
"loss": 1.2072325944900513,
"step": 312
},
{
"epoch": 0.575091575091575,
"grad_norm": 0.21240785717964172,
"learning_rate": 2.821353560379554e-05,
"loss": 1.1255697011947632,
"step": 314
},
{
"epoch": 0.5787545787545788,
"grad_norm": 0.33391815423965454,
"learning_rate": 2.8186889160287368e-05,
"loss": 1.2749519348144531,
"step": 316
},
{
"epoch": 0.5824175824175825,
"grad_norm": 0.18730609118938446,
"learning_rate": 2.816005975638003e-05,
"loss": 1.1112217903137207,
"step": 318
},
{
"epoch": 0.5860805860805861,
"grad_norm": 0.12002283334732056,
"learning_rate": 2.8133047812092776e-05,
"loss": 1.3227157592773438,
"step": 320
},
{
"epoch": 0.5897435897435898,
"grad_norm": 0.2299957275390625,
"learning_rate": 2.810585375030255e-05,
"loss": 1.512547254562378,
"step": 322
},
{
"epoch": 0.5934065934065934,
"grad_norm": 0.14320939779281616,
"learning_rate": 2.8078477996737404e-05,
"loss": 1.2510945796966553,
"step": 324
},
{
"epoch": 0.5970695970695971,
"grad_norm": 0.15396593511104584,
"learning_rate": 2.805092097996979e-05,
"loss": 1.3484824895858765,
"step": 326
},
{
"epoch": 0.6007326007326007,
"grad_norm": 0.18156281113624573,
"learning_rate": 2.8023183131409867e-05,
"loss": 1.213308572769165,
"step": 328
},
{
"epoch": 0.6043956043956044,
"grad_norm": 0.23053434491157532,
"learning_rate": 2.799526488529877e-05,
"loss": 1.1017242670059204,
"step": 330
},
{
"epoch": 0.608058608058608,
"grad_norm": 0.17770767211914062,
"learning_rate": 2.7967166678701764e-05,
"loss": 1.169398546218872,
"step": 332
},
{
"epoch": 0.6117216117216118,
"grad_norm": 0.49059954285621643,
"learning_rate": 2.7938888951501446e-05,
"loss": 1.2648427486419678,
"step": 334
},
{
"epoch": 0.6153846153846154,
"grad_norm": 0.32419341802597046,
"learning_rate": 2.7910432146390835e-05,
"loss": 0.8742145895957947,
"step": 336
},
{
"epoch": 0.6190476190476191,
"grad_norm": 0.18223723769187927,
"learning_rate": 2.7881796708866444e-05,
"loss": 1.2242615222930908,
"step": 338
},
{
"epoch": 0.6227106227106227,
"grad_norm": 0.12653693556785583,
"learning_rate": 2.7852983087221323e-05,
"loss": 1.2155145406723022,
"step": 340
},
{
"epoch": 0.6263736263736264,
"grad_norm": 0.31820210814476013,
"learning_rate": 2.782399173253801e-05,
"loss": 0.5730471611022949,
"step": 342
},
{
"epoch": 0.63003663003663,
"grad_norm": 0.21934422850608826,
"learning_rate": 2.7794823098681503e-05,
"loss": 1.4268808364868164,
"step": 344
},
{
"epoch": 0.6336996336996337,
"grad_norm": 0.21997937560081482,
"learning_rate": 2.7765477642292122e-05,
"loss": 0.6927456855773926,
"step": 346
},
{
"epoch": 0.6373626373626373,
"grad_norm": 0.1465822160243988,
"learning_rate": 2.7735955822778383e-05,
"loss": 1.1500985622406006,
"step": 348
},
{
"epoch": 0.6410256410256411,
"grad_norm": 0.9232410192489624,
"learning_rate": 2.7706258102309807e-05,
"loss": 0.8731042742729187,
"step": 350
},
{
"epoch": 0.6446886446886447,
"grad_norm": 0.4088384807109833,
"learning_rate": 2.7676384945809665e-05,
"loss": 0.9138453602790833,
"step": 352
},
{
"epoch": 0.6483516483516484,
"grad_norm": 0.5485342144966125,
"learning_rate": 2.7646336820947716e-05,
"loss": 0.9523658156394958,
"step": 354
},
{
"epoch": 0.652014652014652,
"grad_norm": 0.24116860330104828,
"learning_rate": 2.7616114198132885e-05,
"loss": 1.1540048122406006,
"step": 356
},
{
"epoch": 0.6556776556776557,
"grad_norm": 0.32777583599090576,
"learning_rate": 2.7585717550505885e-05,
"loss": 1.1353778839111328,
"step": 358
},
{
"epoch": 0.6593406593406593,
"grad_norm": 0.4388955533504486,
"learning_rate": 2.7555147353931828e-05,
"loss": 0.6781994104385376,
"step": 360
},
{
"epoch": 0.663003663003663,
"grad_norm": 0.2568267285823822,
"learning_rate": 2.752440408699276e-05,
"loss": 0.9329240918159485,
"step": 362
},
{
"epoch": 0.6666666666666666,
"grad_norm": 0.3400379419326782,
"learning_rate": 2.7493488230980183e-05,
"loss": 0.9248878359794617,
"step": 364
},
{
"epoch": 0.6703296703296703,
"grad_norm": 0.21287308633327484,
"learning_rate": 2.746240026988751e-05,
"loss": 1.1757709980010986,
"step": 366
},
{
"epoch": 0.673992673992674,
"grad_norm": 0.17205439507961273,
"learning_rate": 2.7431140690402486e-05,
"loss": 1.0503566265106201,
"step": 368
},
{
"epoch": 0.6776556776556777,
"grad_norm": 0.2852046489715576,
"learning_rate": 2.7399709981899575e-05,
"loss": 1.0815939903259277,
"step": 370
},
{
"epoch": 0.6813186813186813,
"grad_norm": 0.3913589119911194,
"learning_rate": 2.7368108636432305e-05,
"loss": 1.1820636987686157,
"step": 372
},
{
"epoch": 0.684981684981685,
"grad_norm": 0.19117851555347443,
"learning_rate": 2.7336337148725544e-05,
"loss": 1.1824742555618286,
"step": 374
},
{
"epoch": 0.6886446886446886,
"grad_norm": 0.215864896774292,
"learning_rate": 2.7304396016167787e-05,
"loss": 0.8475154042243958,
"step": 376
},
{
"epoch": 0.6923076923076923,
"grad_norm": 0.13860984146595,
"learning_rate": 2.7272285738803325e-05,
"loss": 0.8204724192619324,
"step": 378
},
{
"epoch": 0.6959706959706959,
"grad_norm": 0.75872403383255,
"learning_rate": 2.7240006819324463e-05,
"loss": 1.033507227897644,
"step": 380
},
{
"epoch": 0.6996336996336996,
"grad_norm": 0.1523711383342743,
"learning_rate": 2.7207559763063615e-05,
"loss": 0.9690554738044739,
"step": 382
},
{
"epoch": 0.7032967032967034,
"grad_norm": 0.19733549654483795,
"learning_rate": 2.7174945077985425e-05,
"loss": 1.1682504415512085,
"step": 384
},
{
"epoch": 0.706959706959707,
"grad_norm": 0.28466567397117615,
"learning_rate": 2.7142163274678783e-05,
"loss": 1.0818616151809692,
"step": 386
},
{
"epoch": 0.7106227106227107,
"grad_norm": 0.35399043560028076,
"learning_rate": 2.7109214866348845e-05,
"loss": 0.8753510117530823,
"step": 388
},
{
"epoch": 0.7142857142857143,
"grad_norm": 0.17836952209472656,
"learning_rate": 2.7076100368809007e-05,
"loss": 1.0765563249588013,
"step": 390
},
{
"epoch": 0.717948717948718,
"grad_norm": 0.12893974781036377,
"learning_rate": 2.704282030047281e-05,
"loss": 1.163602352142334,
"step": 392
},
{
"epoch": 0.7216117216117216,
"grad_norm": 0.9552748203277588,
"learning_rate": 2.7009375182345852e-05,
"loss": 1.1078529357910156,
"step": 394
},
{
"epoch": 0.7252747252747253,
"grad_norm": 0.15003487467765808,
"learning_rate": 2.697576553801761e-05,
"loss": 1.2296016216278076,
"step": 396
},
{
"epoch": 0.7289377289377289,
"grad_norm": 0.2294345647096634,
"learning_rate": 2.6941991893653237e-05,
"loss": 0.9108463525772095,
"step": 398
},
{
"epoch": 0.7326007326007326,
"grad_norm": 0.1495809704065323,
"learning_rate": 2.6908054777985364e-05,
"loss": 0.8304123282432556,
"step": 400
},
{
"epoch": 0.7362637362637363,
"grad_norm": 0.1536511480808258,
"learning_rate": 2.6873954722305758e-05,
"loss": 1.23292076587677,
"step": 402
},
{
"epoch": 0.73992673992674,
"grad_norm": 0.2795121967792511,
"learning_rate": 2.6839692260457073e-05,
"loss": 0.8330604434013367,
"step": 404
},
{
"epoch": 0.7435897435897436,
"grad_norm": 0.3814455568790436,
"learning_rate": 2.6805267928824453e-05,
"loss": 1.0098503828048706,
"step": 406
},
{
"epoch": 0.7472527472527473,
"grad_norm": 0.17802539467811584,
"learning_rate": 2.6770682266327137e-05,
"loss": 1.0066304206848145,
"step": 408
},
{
"epoch": 0.7509157509157509,
"grad_norm": 0.3598598837852478,
"learning_rate": 2.6735935814410034e-05,
"loss": 0.8806478381156921,
"step": 410
},
{
"epoch": 0.7545787545787546,
"grad_norm": 0.27274230122566223,
"learning_rate": 2.6701029117035233e-05,
"loss": 1.163108229637146,
"step": 412
},
{
"epoch": 0.7582417582417582,
"grad_norm": 0.27305206656455994,
"learning_rate": 2.666596272067351e-05,
"loss": 1.0062353610992432,
"step": 414
},
{
"epoch": 0.7619047619047619,
"grad_norm": 0.219185933470726,
"learning_rate": 2.663073717429574e-05,
"loss": 1.2644356489181519,
"step": 416
},
{
"epoch": 0.7655677655677655,
"grad_norm": 0.11417064070701599,
"learning_rate": 2.6595353029364336e-05,
"loss": 0.6706827282905579,
"step": 418
},
{
"epoch": 0.7692307692307693,
"grad_norm": 0.1437530517578125,
"learning_rate": 2.6559810839824595e-05,
"loss": 1.2274192571640015,
"step": 420
},
{
"epoch": 0.7728937728937729,
"grad_norm": 0.18546123802661896,
"learning_rate": 2.6524111162096034e-05,
"loss": 0.9849696159362793,
"step": 422
},
{
"epoch": 0.7765567765567766,
"grad_norm": 0.5309081673622131,
"learning_rate": 2.648825455506366e-05,
"loss": 0.4850473403930664,
"step": 424
},
{
"epoch": 0.7802197802197802,
"grad_norm": 0.21896012127399445,
"learning_rate": 2.6452241580069266e-05,
"loss": 0.7771881818771362,
"step": 426
},
{
"epoch": 0.7838827838827839,
"grad_norm": 0.23113255202770233,
"learning_rate": 2.6416072800902587e-05,
"loss": 1.0487337112426758,
"step": 428
},
{
"epoch": 0.7875457875457875,
"grad_norm": 0.31200718879699707,
"learning_rate": 2.6379748783792524e-05,
"loss": 1.198923945426941,
"step": 430
},
{
"epoch": 0.7912087912087912,
"grad_norm": 0.4733012914657593,
"learning_rate": 2.6343270097398235e-05,
"loss": 1.2832245826721191,
"step": 432
},
{
"epoch": 0.7948717948717948,
"grad_norm": 0.19738741219043732,
"learning_rate": 2.630663731280027e-05,
"loss": 0.8760618567466736,
"step": 434
},
{
"epoch": 0.7985347985347986,
"grad_norm": 0.2763383984565735,
"learning_rate": 2.626985100349161e-05,
"loss": 0.9577842354774475,
"step": 436
},
{
"epoch": 0.8021978021978022,
"grad_norm": 0.2796023488044739,
"learning_rate": 2.6232911745368683e-05,
"loss": 0.8739151954650879,
"step": 438
},
{
"epoch": 0.8058608058608059,
"grad_norm": 0.1453395038843155,
"learning_rate": 2.619582011672238e-05,
"loss": 1.2987086772918701,
"step": 440
},
{
"epoch": 0.8095238095238095,
"grad_norm": 0.16335386037826538,
"learning_rate": 2.6158576698228962e-05,
"loss": 1.1930700540542603,
"step": 442
},
{
"epoch": 0.8131868131868132,
"grad_norm": 0.233575239777565,
"learning_rate": 2.6121182072941003e-05,
"loss": 1.189906120300293,
"step": 444
},
{
"epoch": 0.8168498168498168,
"grad_norm": 0.17749448120594025,
"learning_rate": 2.6083636826278228e-05,
"loss": 1.2090119123458862,
"step": 446
},
{
"epoch": 0.8205128205128205,
"grad_norm": 0.21508704125881195,
"learning_rate": 2.6045941546018393e-05,
"loss": 1.214590072631836,
"step": 448
},
{
"epoch": 0.8241758241758241,
"grad_norm": 0.2976554334163666,
"learning_rate": 2.600809682228803e-05,
"loss": 1.300894856452942,
"step": 450
},
{
"epoch": 0.8278388278388278,
"grad_norm": 0.242268368601799,
"learning_rate": 2.5970103247553255e-05,
"loss": 0.9925535321235657,
"step": 452
},
{
"epoch": 0.8315018315018315,
"grad_norm": 0.11002147197723389,
"learning_rate": 2.5931961416610467e-05,
"loss": 0.4743513762950897,
"step": 454
},
{
"epoch": 0.8351648351648352,
"grad_norm": 0.06710305064916611,
"learning_rate": 2.5893671926577045e-05,
"loss": 0.8895782232284546,
"step": 456
},
{
"epoch": 0.8388278388278388,
"grad_norm": 0.3933258354663849,
"learning_rate": 2.5855235376881992e-05,
"loss": 0.8245795369148254,
"step": 458
},
{
"epoch": 0.8424908424908425,
"grad_norm": 0.9087498188018799,
"learning_rate": 2.5816652369256575e-05,
"loss": 1.017600178718567,
"step": 460
},
{
"epoch": 0.8461538461538461,
"grad_norm": 0.31329649686813354,
"learning_rate": 2.5777923507724863e-05,
"loss": 1.2184432744979858,
"step": 462
},
{
"epoch": 0.8498168498168498,
"grad_norm": 0.29965758323669434,
"learning_rate": 2.5739049398594304e-05,
"loss": 1.1723891496658325,
"step": 464
},
{
"epoch": 0.8534798534798534,
"grad_norm": 0.12564675509929657,
"learning_rate": 2.5700030650446236e-05,
"loss": 1.1921778917312622,
"step": 466
},
{
"epoch": 0.8571428571428571,
"grad_norm": 0.1690017133951187,
"learning_rate": 2.5660867874126333e-05,
"loss": 0.8882995843887329,
"step": 468
},
{
"epoch": 0.8608058608058609,
"grad_norm": 0.3731120526790619,
"learning_rate": 2.562156168273506e-05,
"loss": 0.9048332571983337,
"step": 470
},
{
"epoch": 0.8644688644688645,
"grad_norm": 0.2866782248020172,
"learning_rate": 2.558211269161807e-05,
"loss": 1.1616424322128296,
"step": 472
},
{
"epoch": 0.8681318681318682,
"grad_norm": 0.1418561190366745,
"learning_rate": 2.554252151835658e-05,
"loss": 0.8175588250160217,
"step": 474
},
{
"epoch": 0.8717948717948718,
"grad_norm": 0.15560688078403473,
"learning_rate": 2.550278878275768e-05,
"loss": 1.0879602432250977,
"step": 476
},
{
"epoch": 0.8754578754578755,
"grad_norm": 0.3156808614730835,
"learning_rate": 2.5462915106844662e-05,
"loss": 1.0410772562026978,
"step": 478
},
{
"epoch": 0.8791208791208791,
"grad_norm": 2.6696999073028564,
"learning_rate": 2.5422901114847252e-05,
"loss": 1.067376971244812,
"step": 480
},
{
"epoch": 0.8827838827838828,
"grad_norm": 0.20099873840808868,
"learning_rate": 2.5382747433191855e-05,
"loss": 1.181754469871521,
"step": 482
},
{
"epoch": 0.8864468864468864,
"grad_norm": 0.22863909602165222,
"learning_rate": 2.5342454690491742e-05,
"loss": 1.2510448694229126,
"step": 484
},
{
"epoch": 0.8901098901098901,
"grad_norm": 0.18631558120250702,
"learning_rate": 2.5302023517537208e-05,
"loss": 0.8979169726371765,
"step": 486
},
{
"epoch": 0.8937728937728938,
"grad_norm": 0.32262253761291504,
"learning_rate": 2.52614545472857e-05,
"loss": 1.1463004350662231,
"step": 488
},
{
"epoch": 0.8974358974358975,
"grad_norm": 0.20295943319797516,
"learning_rate": 2.522074841485191e-05,
"loss": 1.1008638143539429,
"step": 490
},
{
"epoch": 0.9010989010989011,
"grad_norm": 0.18802852928638458,
"learning_rate": 2.517990575749784e-05,
"loss": 1.038072109222412,
"step": 492
},
{
"epoch": 0.9047619047619048,
"grad_norm": 0.2713935077190399,
"learning_rate": 2.513892721462278e-05,
"loss": 0.594063937664032,
"step": 494
},
{
"epoch": 0.9084249084249084,
"grad_norm": 0.4037923812866211,
"learning_rate": 2.5097813427753367e-05,
"loss": 1.2677081823349,
"step": 496
},
{
"epoch": 0.9120879120879121,
"grad_norm": 0.43999311327934265,
"learning_rate": 2.5056565040533502e-05,
"loss": 1.1207178831100464,
"step": 498
},
{
"epoch": 0.9157509157509157,
"grad_norm": 0.5690798163414001,
"learning_rate": 2.5015182698714257e-05,
"loss": 0.8040251731872559,
"step": 500
},
{
"epoch": 0.9194139194139194,
"grad_norm": 0.11312337219715118,
"learning_rate": 2.4973667050143826e-05,
"loss": 0.8545711636543274,
"step": 502
},
{
"epoch": 0.9230769230769231,
"grad_norm": 2.5619421005249023,
"learning_rate": 2.4932018744757304e-05,
"loss": 0.9387710094451904,
"step": 504
},
{
"epoch": 0.9267399267399268,
"grad_norm": 0.14426486194133759,
"learning_rate": 2.4890238434566572e-05,
"loss": 0.8429293036460876,
"step": 506
},
{
"epoch": 0.9304029304029304,
"grad_norm": 0.22939638793468475,
"learning_rate": 2.4848326773650073e-05,
"loss": 1.1522276401519775,
"step": 508
},
{
"epoch": 0.9340659340659341,
"grad_norm": 0.1552036851644516,
"learning_rate": 2.4806284418142578e-05,
"loss": 1.2124234437942505,
"step": 510
},
{
"epoch": 0.9377289377289377,
"grad_norm": 0.8236564993858337,
"learning_rate": 2.4764112026224884e-05,
"loss": 1.1633071899414062,
"step": 512
},
{
"epoch": 0.9413919413919414,
"grad_norm": 0.21365776658058167,
"learning_rate": 2.472181025811354e-05,
"loss": 0.939444363117218,
"step": 514
},
{
"epoch": 0.945054945054945,
"grad_norm": 0.1673533022403717,
"learning_rate": 2.467937977605051e-05,
"loss": 1.1668999195098877,
"step": 516
},
{
"epoch": 0.9487179487179487,
"grad_norm": 0.09891626238822937,
"learning_rate": 2.4636821244292798e-05,
"loss": 1.2052339315414429,
"step": 518
},
{
"epoch": 0.9523809523809523,
"grad_norm": 0.2742900252342224,
"learning_rate": 2.4594135329102042e-05,
"loss": 1.2038750648498535,
"step": 520
},
{
"epoch": 0.9560439560439561,
"grad_norm": 0.4109446406364441,
"learning_rate": 2.4551322698734087e-05,
"loss": 0.8789442181587219,
"step": 522
},
{
"epoch": 0.9597069597069597,
"grad_norm": 0.5696598291397095,
"learning_rate": 2.4508384023428545e-05,
"loss": 1.0179319381713867,
"step": 524
},
{
"epoch": 0.9633699633699634,
"grad_norm": 0.351968914270401,
"learning_rate": 2.446531997539828e-05,
"loss": 0.9564056992530823,
"step": 526
},
{
"epoch": 0.967032967032967,
"grad_norm": 0.9730896949768066,
"learning_rate": 2.4422131228818865e-05,
"loss": 0.6624069809913635,
"step": 528
},
{
"epoch": 0.9706959706959707,
"grad_norm": 0.09024892747402191,
"learning_rate": 2.437881845981809e-05,
"loss": 0.8948387503623962,
"step": 530
},
{
"epoch": 0.9743589743589743,
"grad_norm": 0.36814096570014954,
"learning_rate": 2.433538234646531e-05,
"loss": 1.3387359380722046,
"step": 532
},
{
"epoch": 0.978021978021978,
"grad_norm": 0.19982792437076569,
"learning_rate": 2.4291823568760872e-05,
"loss": 0.5992705225944519,
"step": 534
},
{
"epoch": 0.9816849816849816,
"grad_norm": 0.36355239152908325,
"learning_rate": 2.4248142808625442e-05,
"loss": 0.859024167060852,
"step": 536
},
{
"epoch": 0.9853479853479854,
"grad_norm": 0.2174113541841507,
"learning_rate": 2.420434074988937e-05,
"loss": 0.7908284664154053,
"step": 538
},
{
"epoch": 0.989010989010989,
"grad_norm": 0.44196808338165283,
"learning_rate": 2.4160418078281927e-05,
"loss": 1.1095813512802124,
"step": 540
},
{
"epoch": 0.9926739926739927,
"grad_norm": 0.44959282875061035,
"learning_rate": 2.411637548142062e-05,
"loss": 1.1603001356124878,
"step": 542
},
{
"epoch": 0.9963369963369964,
"grad_norm": 0.6111741065979004,
"learning_rate": 2.4072213648800402e-05,
"loss": 1.1754573583602905,
"step": 544
},
{
"epoch": 1.0,
"grad_norm": 0.16650645434856415,
"learning_rate": 2.4027933271782885e-05,
"loss": 1.3477964401245117,
"step": 546
},
{
"epoch": 1.0036630036630036,
"grad_norm": 0.3535718023777008,
"learning_rate": 2.398353504358552e-05,
"loss": 0.9175674319267273,
"step": 548
},
{
"epoch": 1.0073260073260073,
"grad_norm": 0.16583868861198425,
"learning_rate": 2.3939019659270728e-05,
"loss": 1.1587849855422974,
"step": 550
},
{
"epoch": 1.010989010989011,
"grad_norm": 0.12593019008636475,
"learning_rate": 2.389438781573504e-05,
"loss": 0.6544692516326904,
"step": 552
},
{
"epoch": 1.0146520146520146,
"grad_norm": 0.4962306320667267,
"learning_rate": 2.3849640211698174e-05,
"loss": 1.1096186637878418,
"step": 554
},
{
"epoch": 1.0183150183150182,
"grad_norm": 0.21355493366718292,
"learning_rate": 2.3804777547692103e-05,
"loss": 1.0150353908538818,
"step": 556
},
{
"epoch": 1.021978021978022,
"grad_norm": 0.13404490053653717,
"learning_rate": 2.3759800526050082e-05,
"loss": 1.1983932256698608,
"step": 558
},
{
"epoch": 1.0256410256410255,
"grad_norm": 0.3030662536621094,
"learning_rate": 2.371470985089565e-05,
"loss": 0.9012506008148193,
"step": 560
},
{
"epoch": 1.0293040293040292,
"grad_norm": 0.21635055541992188,
"learning_rate": 2.366950622813163e-05,
"loss": 1.145828366279602,
"step": 562
},
{
"epoch": 1.032967032967033,
"grad_norm": 0.21448099613189697,
"learning_rate": 2.362419036542904e-05,
"loss": 1.128821611404419,
"step": 564
},
{
"epoch": 1.0366300366300367,
"grad_norm": 0.22941601276397705,
"learning_rate": 2.357876297221606e-05,
"loss": 1.001961350440979,
"step": 566
},
{
"epoch": 1.0402930402930404,
"grad_norm": 0.34975817799568176,
"learning_rate": 2.3533224759666865e-05,
"loss": 1.019287347793579,
"step": 568
},
{
"epoch": 1.043956043956044,
"grad_norm": 0.17393207550048828,
"learning_rate": 2.348757644069056e-05,
"loss": 1.0827431678771973,
"step": 570
},
{
"epoch": 1.0476190476190477,
"grad_norm": 0.17333652079105377,
"learning_rate": 2.3441818729919975e-05,
"loss": 1.2207838296890259,
"step": 572
},
{
"epoch": 1.0512820512820513,
"grad_norm": 2.0395469665527344,
"learning_rate": 2.3395952343700484e-05,
"loss": 1.0374027490615845,
"step": 574
},
{
"epoch": 1.054945054945055,
"grad_norm": 0.22025133669376373,
"learning_rate": 2.33499780000788e-05,
"loss": 1.219722032546997,
"step": 576
},
{
"epoch": 1.0586080586080586,
"grad_norm": 0.15319080650806427,
"learning_rate": 2.3303896418791725e-05,
"loss": 1.2034533023834229,
"step": 578
},
{
"epoch": 1.0622710622710623,
"grad_norm": 0.18114957213401794,
"learning_rate": 2.3257708321254892e-05,
"loss": 1.2026004791259766,
"step": 580
},
{
"epoch": 1.065934065934066,
"grad_norm": 0.3112766444683075,
"learning_rate": 2.321141443055146e-05,
"loss": 0.9691221117973328,
"step": 582
},
{
"epoch": 1.0695970695970696,
"grad_norm": 0.21051643788814545,
"learning_rate": 2.3165015471420802e-05,
"loss": 1.1308506727218628,
"step": 584
},
{
"epoch": 1.0732600732600732,
"grad_norm": 0.1423751562833786,
"learning_rate": 2.3118512170247156e-05,
"loss": 1.0359041690826416,
"step": 586
},
{
"epoch": 1.0769230769230769,
"grad_norm": 0.9423340559005737,
"learning_rate": 2.3071905255048257e-05,
"loss": 0.7976118922233582,
"step": 588
},
{
"epoch": 1.0805860805860805,
"grad_norm": 0.1473110169172287,
"learning_rate": 2.3025195455463938e-05,
"loss": 1.181793212890625,
"step": 590
},
{
"epoch": 1.0842490842490842,
"grad_norm": 0.04453103244304657,
"learning_rate": 2.2978383502744693e-05,
"loss": 0.993567168712616,
"step": 592
},
{
"epoch": 1.0879120879120878,
"grad_norm": 0.5919273495674133,
"learning_rate": 2.2931470129740257e-05,
"loss": 0.9564979076385498,
"step": 594
},
{
"epoch": 1.0915750915750915,
"grad_norm": 0.2739202380180359,
"learning_rate": 2.2884456070888107e-05,
"loss": 0.9901763200759888,
"step": 596
},
{
"epoch": 1.0952380952380953,
"grad_norm": 0.28832197189331055,
"learning_rate": 2.2837342062201987e-05,
"loss": 1.0656261444091797,
"step": 598
},
{
"epoch": 1.098901098901099,
"grad_norm": 0.4255838990211487,
"learning_rate": 2.279012884126037e-05,
"loss": 1.1217824220657349,
"step": 600
},
{
"epoch": 1.1025641025641026,
"grad_norm": 0.2491135150194168,
"learning_rate": 2.2742817147194904e-05,
"loss": 0.8021670579910278,
"step": 602
},
{
"epoch": 1.1062271062271063,
"grad_norm": 0.29399213194847107,
"learning_rate": 2.2695407720678863e-05,
"loss": 0.9206523299217224,
"step": 604
},
{
"epoch": 1.10989010989011,
"grad_norm": 0.204596146941185,
"learning_rate": 2.2647901303915543e-05,
"loss": 0.8671106696128845,
"step": 606
},
{
"epoch": 1.1135531135531136,
"grad_norm": 10.455309867858887,
"learning_rate": 2.2600298640626633e-05,
"loss": 1.2365387678146362,
"step": 608
},
{
"epoch": 1.1172161172161172,
"grad_norm": 0.3023589253425598,
"learning_rate": 2.2552600476040578e-05,
"loss": 0.6999451518058777,
"step": 610
},
{
"epoch": 1.120879120879121,
"grad_norm": 0.47288236021995544,
"learning_rate": 2.2504807556880924e-05,
"loss": 1.1393994092941284,
"step": 612
},
{
"epoch": 1.1245421245421245,
"grad_norm": 0.3826245963573456,
"learning_rate": 2.2456920631354604e-05,
"loss": 0.9749789237976074,
"step": 614
},
{
"epoch": 1.1282051282051282,
"grad_norm": 0.20710138976573944,
"learning_rate": 2.2408940449140255e-05,
"loss": 1.1383905410766602,
"step": 616
},
{
"epoch": 1.1318681318681318,
"grad_norm": 0.4310348629951477,
"learning_rate": 2.2360867761376433e-05,
"loss": 0.3758849799633026,
"step": 618
},
{
"epoch": 1.1355311355311355,
"grad_norm": 0.4155263304710388,
"learning_rate": 2.231270332064993e-05,
"loss": 1.0655720233917236,
"step": 620
},
{
"epoch": 1.1391941391941391,
"grad_norm": 0.33659833669662476,
"learning_rate": 2.2264447880983903e-05,
"loss": 0.7230702042579651,
"step": 622
},
{
"epoch": 1.1428571428571428,
"grad_norm": 0.42456403374671936,
"learning_rate": 2.2216102197826152e-05,
"loss": 1.0588797330856323,
"step": 624
},
{
"epoch": 1.1465201465201464,
"grad_norm": 0.19433031976222992,
"learning_rate": 2.216766702803722e-05,
"loss": 1.3376067876815796,
"step": 626
},
{
"epoch": 1.15018315018315,
"grad_norm": 0.5079641342163086,
"learning_rate": 2.2119143129878612e-05,
"loss": 0.4252643287181854,
"step": 628
},
{
"epoch": 1.1538461538461537,
"grad_norm": 0.039314355701208115,
"learning_rate": 2.2070531263000877e-05,
"loss": 1.0956590175628662,
"step": 630
},
{
"epoch": 1.1575091575091574,
"grad_norm": 0.27123090624809265,
"learning_rate": 2.2021832188431726e-05,
"loss": 1.0597316026687622,
"step": 632
},
{
"epoch": 1.1611721611721613,
"grad_norm": 0.4172046184539795,
"learning_rate": 2.197304666856413e-05,
"loss": 0.8367774486541748,
"step": 634
},
{
"epoch": 1.164835164835165,
"grad_norm": 0.3932022154331207,
"learning_rate": 2.1924175467144374e-05,
"loss": 0.9543017745018005,
"step": 636
},
{
"epoch": 1.1684981684981686,
"grad_norm": 0.1553221344947815,
"learning_rate": 2.1875219349260103e-05,
"loss": 1.1636459827423096,
"step": 638
},
{
"epoch": 1.1721611721611722,
"grad_norm": 0.09826485067605972,
"learning_rate": 2.182617908132835e-05,
"loss": 0.7304913997650146,
"step": 640
},
{
"epoch": 1.1758241758241759,
"grad_norm": 0.2174447923898697,
"learning_rate": 2.1777055431083526e-05,
"loss": 1.1003189086914062,
"step": 642
},
{
"epoch": 1.1794871794871795,
"grad_norm": 0.272905170917511,
"learning_rate": 2.1727849167565417e-05,
"loss": 1.0145132541656494,
"step": 644
},
{
"epoch": 1.1831501831501832,
"grad_norm": 0.1712464690208435,
"learning_rate": 2.1678561061107114e-05,
"loss": 0.7309105396270752,
"step": 646
},
{
"epoch": 1.1868131868131868,
"grad_norm": 0.2222210019826889,
"learning_rate": 2.1629191883322998e-05,
"loss": 0.8648232221603394,
"step": 648
},
{
"epoch": 1.1904761904761905,
"grad_norm": 0.35955536365509033,
"learning_rate": 2.1579742407096626e-05,
"loss": 1.1244066953659058,
"step": 650
},
{
"epoch": 1.1941391941391941,
"grad_norm": 0.2085191309452057,
"learning_rate": 2.1530213406568637e-05,
"loss": 0.6541961431503296,
"step": 652
},
{
"epoch": 1.1978021978021978,
"grad_norm": 1.7351287603378296,
"learning_rate": 2.1480605657124656e-05,
"loss": 1.2610344886779785,
"step": 654
},
{
"epoch": 1.2014652014652014,
"grad_norm": 0.46581918001174927,
"learning_rate": 2.1430919935383112e-05,
"loss": 0.8106693029403687,
"step": 656
},
{
"epoch": 1.205128205128205,
"grad_norm": 0.7874848246574402,
"learning_rate": 2.138115701918312e-05,
"loss": 0.7381888628005981,
"step": 658
},
{
"epoch": 1.2087912087912087,
"grad_norm": 0.13786958158016205,
"learning_rate": 2.1331317687572286e-05,
"loss": 1.1397631168365479,
"step": 660
},
{
"epoch": 1.2124542124542124,
"grad_norm": 0.4511331021785736,
"learning_rate": 2.1281402720794512e-05,
"loss": 0.9875015616416931,
"step": 662
},
{
"epoch": 1.2161172161172162,
"grad_norm": 0.27370062470436096,
"learning_rate": 2.123141290027778e-05,
"loss": 0.8640010356903076,
"step": 664
},
{
"epoch": 1.2197802197802199,
"grad_norm": 0.3684799373149872,
"learning_rate": 2.1181349008621935e-05,
"loss": 0.7830217480659485,
"step": 666
},
{
"epoch": 1.2234432234432235,
"grad_norm": 0.2720448672771454,
"learning_rate": 2.1131211829586398e-05,
"loss": 0.8655436635017395,
"step": 668
},
{
"epoch": 1.2271062271062272,
"grad_norm": 0.11868753284215927,
"learning_rate": 2.1081002148077926e-05,
"loss": 0.6462550163269043,
"step": 670
},
{
"epoch": 1.2307692307692308,
"grad_norm": 0.2972869575023651,
"learning_rate": 2.1030720750138324e-05,
"loss": 1.1838464736938477,
"step": 672
},
{
"epoch": 1.2344322344322345,
"grad_norm": 0.37673911452293396,
"learning_rate": 2.0980368422932118e-05,
"loss": 1.1313834190368652,
"step": 674
},
{
"epoch": 1.2380952380952381,
"grad_norm": 0.18950484693050385,
"learning_rate": 2.092994595473426e-05,
"loss": 1.1562325954437256,
"step": 676
},
{
"epoch": 1.2417582417582418,
"grad_norm": 0.17918187379837036,
"learning_rate": 2.0879454134917752e-05,
"loss": 0.7566133141517639,
"step": 678
},
{
"epoch": 1.2454212454212454,
"grad_norm": 0.1301691085100174,
"learning_rate": 2.0828893753941327e-05,
"loss": 0.8113003373146057,
"step": 680
},
{
"epoch": 1.249084249084249,
"grad_norm": 0.3466956913471222,
"learning_rate": 2.0778265603337043e-05,
"loss": 1.162855625152588,
"step": 682
},
{
"epoch": 1.2527472527472527,
"grad_norm": 0.21489053964614868,
"learning_rate": 2.0727570475697917e-05,
"loss": 1.1384897232055664,
"step": 684
},
{
"epoch": 1.2564102564102564,
"grad_norm": 0.11646708101034164,
"learning_rate": 2.0676809164665485e-05,
"loss": 0.7990652322769165,
"step": 686
},
{
"epoch": 1.26007326007326,
"grad_norm": 0.18446595966815948,
"learning_rate": 2.0625982464917414e-05,
"loss": 1.0909119844436646,
"step": 688
},
{
"epoch": 1.2637362637362637,
"grad_norm": 0.3680472671985626,
"learning_rate": 2.0575091172155033e-05,
"loss": 1.1042051315307617,
"step": 690
},
{
"epoch": 1.2673992673992673,
"grad_norm": 0.13724733889102936,
"learning_rate": 2.052413608309089e-05,
"loss": 1.1563645601272583,
"step": 692
},
{
"epoch": 1.271062271062271,
"grad_norm": 0.33673760294914246,
"learning_rate": 2.0473117995436273e-05,
"loss": 0.9639385938644409,
"step": 694
},
{
"epoch": 1.2747252747252746,
"grad_norm": 0.31943702697753906,
"learning_rate": 2.0422037707888737e-05,
"loss": 0.9691139459609985,
"step": 696
},
{
"epoch": 1.2783882783882783,
"grad_norm": 0.20235387980937958,
"learning_rate": 2.0370896020119568e-05,
"loss": 0.8855386972427368,
"step": 698
},
{
"epoch": 1.282051282051282,
"grad_norm": 0.06310480833053589,
"learning_rate": 2.0319693732761296e-05,
"loss": 0.6270412802696228,
"step": 700
},
{
"epoch": 1.2857142857142856,
"grad_norm": 0.25082582235336304,
"learning_rate": 2.026843164739515e-05,
"loss": 0.5018911957740784,
"step": 702
},
{
"epoch": 1.2893772893772895,
"grad_norm": 0.39865395426750183,
"learning_rate": 2.0217110566538502e-05,
"loss": 1.236328363418579,
"step": 704
},
{
"epoch": 1.293040293040293,
"grad_norm": 0.40237605571746826,
"learning_rate": 2.016573129363231e-05,
"loss": 0.9728720784187317,
"step": 706
},
{
"epoch": 1.2967032967032968,
"grad_norm": 0.10428406298160553,
"learning_rate": 2.011429463302854e-05,
"loss": 0.9300292134284973,
"step": 708
},
{
"epoch": 1.3003663003663004,
"grad_norm": 0.3154103755950928,
"learning_rate": 2.0062801389977577e-05,
"loss": 0.5415329933166504,
"step": 710
},
{
"epoch": 1.304029304029304,
"grad_norm": 0.4799436032772064,
"learning_rate": 2.001125237061561e-05,
"loss": 0.8621584177017212,
"step": 712
},
{
"epoch": 1.3076923076923077,
"grad_norm": 0.21443766355514526,
"learning_rate": 1.9959648381952014e-05,
"loss": 1.1734957695007324,
"step": 714
},
{
"epoch": 1.3113553113553114,
"grad_norm": 0.5315412282943726,
"learning_rate": 1.9907990231856725e-05,
"loss": 1.1061367988586426,
"step": 716
},
{
"epoch": 1.315018315018315,
"grad_norm": 0.2280256748199463,
"learning_rate": 1.9856278729047588e-05,
"loss": 0.8948004245758057,
"step": 718
},
{
"epoch": 1.3186813186813187,
"grad_norm": 0.23351888358592987,
"learning_rate": 1.980451468307768e-05,
"loss": 0.9001222252845764,
"step": 720
},
{
"epoch": 1.3223443223443223,
"grad_norm": 0.26878610253334045,
"learning_rate": 1.975269890432267e-05,
"loss": 0.8802570104598999,
"step": 722
},
{
"epoch": 1.326007326007326,
"grad_norm": 0.709324300289154,
"learning_rate": 1.9700832203968095e-05,
"loss": 1.1894018650054932,
"step": 724
},
{
"epoch": 1.3296703296703296,
"grad_norm": 0.11914347857236862,
"learning_rate": 1.96489153939967e-05,
"loss": 1.0380804538726807,
"step": 726
},
{
"epoch": 1.3333333333333333,
"grad_norm": 0.2910784184932709,
"learning_rate": 1.9596949287175685e-05,
"loss": 1.1207884550094604,
"step": 728
},
{
"epoch": 1.3369963369963371,
"grad_norm": 0.20850665867328644,
"learning_rate": 1.9544934697044008e-05,
"loss": 0.71140056848526,
"step": 730
},
{
"epoch": 1.3406593406593408,
"grad_norm": 0.2311200648546219,
"learning_rate": 1.9492872437899646e-05,
"loss": 0.8012031316757202,
"step": 732
},
{
"epoch": 1.3443223443223444,
"grad_norm": 0.04407264664769173,
"learning_rate": 1.9440763324786843e-05,
"loss": 0.5958037972450256,
"step": 734
},
{
"epoch": 1.347985347985348,
"grad_norm": 0.22078517079353333,
"learning_rate": 1.9388608173483347e-05,
"loss": 1.0953325033187866,
"step": 736
},
{
"epoch": 1.3516483516483517,
"grad_norm": 0.1599600911140442,
"learning_rate": 1.9336407800487642e-05,
"loss": 0.737002432346344,
"step": 738
},
{
"epoch": 1.3553113553113554,
"grad_norm": 0.25335875153541565,
"learning_rate": 1.9284163023006173e-05,
"loss": 0.9944988489151001,
"step": 740
},
{
"epoch": 1.358974358974359,
"grad_norm": 0.2588794529438019,
"learning_rate": 1.923187465894053e-05,
"loss": 0.5732303261756897,
"step": 742
},
{
"epoch": 1.3626373626373627,
"grad_norm": 0.3721718490123749,
"learning_rate": 1.917954352687468e-05,
"loss": 1.2119386196136475,
"step": 744
},
{
"epoch": 1.3663003663003663,
"grad_norm": 0.11127312481403351,
"learning_rate": 1.9127170446062105e-05,
"loss": 1.0228148698806763,
"step": 746
},
{
"epoch": 1.36996336996337,
"grad_norm": 0.2088261842727661,
"learning_rate": 1.907475623641304e-05,
"loss": 1.0425444841384888,
"step": 748
},
{
"epoch": 1.3736263736263736,
"grad_norm": 0.35739022493362427,
"learning_rate": 1.9022301718481554e-05,
"loss": 0.7612188458442688,
"step": 750
},
{
"epoch": 1.3772893772893773,
"grad_norm": 0.24094922840595245,
"learning_rate": 1.8969807713452784e-05,
"loss": 1.0989696979522705,
"step": 752
},
{
"epoch": 1.380952380952381,
"grad_norm": 0.07760689407587051,
"learning_rate": 1.8917275043130034e-05,
"loss": 0.9655136466026306,
"step": 754
},
{
"epoch": 1.3846153846153846,
"grad_norm": 0.17032425105571747,
"learning_rate": 1.886470452992191e-05,
"loss": 1.2732794284820557,
"step": 756
},
{
"epoch": 1.3882783882783882,
"grad_norm": 0.1317293494939804,
"learning_rate": 1.8812096996829475e-05,
"loss": 1.1814535856246948,
"step": 758
},
{
"epoch": 1.3919413919413919,
"grad_norm": 0.13031212985515594,
"learning_rate": 1.875945326743333e-05,
"loss": 1.1256139278411865,
"step": 760
},
{
"epoch": 1.3956043956043955,
"grad_norm": 0.12500829994678497,
"learning_rate": 1.8706774165880748e-05,
"loss": 1.1223680973052979,
"step": 762
},
{
"epoch": 1.3992673992673992,
"grad_norm": 0.2847461700439453,
"learning_rate": 1.8654060516872734e-05,
"loss": 0.6682040691375732,
"step": 764
},
{
"epoch": 1.4029304029304028,
"grad_norm": 0.2852585017681122,
"learning_rate": 1.8601313145651178e-05,
"loss": 0.8641396164894104,
"step": 766
},
{
"epoch": 1.4065934065934065,
"grad_norm": 0.23734663426876068,
"learning_rate": 1.8548532877985863e-05,
"loss": 0.8661168813705444,
"step": 768
},
{
"epoch": 1.4102564102564101,
"grad_norm": 0.2618182599544525,
"learning_rate": 1.8495720540161592e-05,
"loss": 1.15311861038208,
"step": 770
},
{
"epoch": 1.4139194139194138,
"grad_norm": 0.16087937355041504,
"learning_rate": 1.8442876958965228e-05,
"loss": 1.1941028833389282,
"step": 772
},
{
"epoch": 1.4175824175824177,
"grad_norm": 0.3550221920013428,
"learning_rate": 1.8390002961672755e-05,
"loss": 0.9932708144187927,
"step": 774
},
{
"epoch": 1.4212454212454213,
"grad_norm": 0.22254636883735657,
"learning_rate": 1.8337099376036308e-05,
"loss": 1.1301343441009521,
"step": 776
},
{
"epoch": 1.424908424908425,
"grad_norm": 0.20522965490818024,
"learning_rate": 1.828416703027128e-05,
"loss": 0.8269311785697937,
"step": 778
},
{
"epoch": 1.4285714285714286,
"grad_norm": 0.15380977094173431,
"learning_rate": 1.8231206753043253e-05,
"loss": 1.3385837078094482,
"step": 780
},
{
"epoch": 1.4322344322344323,
"grad_norm": 0.4117550849914551,
"learning_rate": 1.8178219373455116e-05,
"loss": 1.2008609771728516,
"step": 782
},
{
"epoch": 1.435897435897436,
"grad_norm": 0.10867168009281158,
"learning_rate": 1.8125205721034043e-05,
"loss": 1.137458086013794,
"step": 784
},
{
"epoch": 1.4395604395604396,
"grad_norm": 0.6372032761573792,
"learning_rate": 1.8072166625718512e-05,
"loss": 0.9664891958236694,
"step": 786
},
{
"epoch": 1.4432234432234432,
"grad_norm": 0.3592172861099243,
"learning_rate": 1.8019102917845315e-05,
"loss": 1.104205846786499,
"step": 788
},
{
"epoch": 1.4468864468864469,
"grad_norm": 0.04871686175465584,
"learning_rate": 1.7966015428136552e-05,
"loss": 0.69446861743927,
"step": 790
},
{
"epoch": 1.4505494505494505,
"grad_norm": 0.4692245125770569,
"learning_rate": 1.791290498768665e-05,
"loss": 0.8986777663230896,
"step": 792
},
{
"epoch": 1.4542124542124542,
"grad_norm": 0.1752997487783432,
"learning_rate": 1.785977242794931e-05,
"loss": 0.784938633441925,
"step": 794
},
{
"epoch": 1.4578754578754578,
"grad_norm": 0.3675242066383362,
"learning_rate": 1.7806618580724534e-05,
"loss": 1.1240798234939575,
"step": 796
},
{
"epoch": 1.4615384615384617,
"grad_norm": 0.22384218871593475,
"learning_rate": 1.775344427814557e-05,
"loss": 0.7758211493492126,
"step": 798
},
{
"epoch": 1.4652014652014653,
"grad_norm": 0.15117856860160828,
"learning_rate": 1.770025035266591e-05,
"loss": 0.9979571104049683,
"step": 800
},
{
"epoch": 1.468864468864469,
"grad_norm": 0.39761456847190857,
"learning_rate": 1.7647037637046236e-05,
"loss": 1.5280622243881226,
"step": 802
},
{
"epoch": 1.4725274725274726,
"grad_norm": 1.469293236732483,
"learning_rate": 1.7593806964341397e-05,
"loss": 0.99940025806427,
"step": 804
},
{
"epoch": 1.4761904761904763,
"grad_norm": 0.1963527500629425,
"learning_rate": 1.7540559167887365e-05,
"loss": 1.171212077140808,
"step": 806
},
{
"epoch": 1.47985347985348,
"grad_norm": 0.8027493953704834,
"learning_rate": 1.748729508128819e-05,
"loss": 1.1501351594924927,
"step": 808
},
{
"epoch": 1.4835164835164836,
"grad_norm": 0.6224597692489624,
"learning_rate": 1.7434015538402948e-05,
"loss": 0.9734046459197998,
"step": 810
},
{
"epoch": 1.4871794871794872,
"grad_norm": 0.16662199795246124,
"learning_rate": 1.7380721373332664e-05,
"loss": 0.8137657642364502,
"step": 812
},
{
"epoch": 1.4908424908424909,
"grad_norm": 0.17651745676994324,
"learning_rate": 1.7327413420407312e-05,
"loss": 1.1850553750991821,
"step": 814
},
{
"epoch": 1.4945054945054945,
"grad_norm": 0.23540274798870087,
"learning_rate": 1.7274092514172685e-05,
"loss": 1.1188106536865234,
"step": 816
},
{
"epoch": 1.4981684981684982,
"grad_norm": 0.18694835901260376,
"learning_rate": 1.7220759489377392e-05,
"loss": 0.3806593418121338,
"step": 818
},
{
"epoch": 1.5018315018315018,
"grad_norm": 0.20619821548461914,
"learning_rate": 1.716741518095973e-05,
"loss": 1.1294645071029663,
"step": 820
},
{
"epoch": 1.5054945054945055,
"grad_norm": 0.17695501446723938,
"learning_rate": 1.7114060424034668e-05,
"loss": 0.8774275183677673,
"step": 822
},
{
"epoch": 1.5091575091575091,
"grad_norm": 0.24438972771167755,
"learning_rate": 1.7060696053880728e-05,
"loss": 0.8076912760734558,
"step": 824
},
{
"epoch": 1.5128205128205128,
"grad_norm": 0.2400708794593811,
"learning_rate": 1.700732290592695e-05,
"loss": 1.0248783826828003,
"step": 826
},
{
"epoch": 1.5164835164835164,
"grad_norm": 3.1497390270233154,
"learning_rate": 1.6953941815739775e-05,
"loss": 0.9810393452644348,
"step": 828
},
{
"epoch": 1.52014652014652,
"grad_norm": 0.2937082052230835,
"learning_rate": 1.6900553619009987e-05,
"loss": 0.978177547454834,
"step": 830
},
{
"epoch": 1.5238095238095237,
"grad_norm": 0.1443583220243454,
"learning_rate": 1.684715915153963e-05,
"loss": 1.2256354093551636,
"step": 832
},
{
"epoch": 1.5274725274725274,
"grad_norm": 0.16623452305793762,
"learning_rate": 1.6793759249228907e-05,
"loss": 0.796883225440979,
"step": 834
},
{
"epoch": 1.531135531135531,
"grad_norm": 0.19156572222709656,
"learning_rate": 1.6740354748063115e-05,
"loss": 1.1286262273788452,
"step": 836
},
{
"epoch": 1.5347985347985347,
"grad_norm": 1.8285589218139648,
"learning_rate": 1.6686946484099533e-05,
"loss": 1.1747335195541382,
"step": 838
},
{
"epoch": 1.5384615384615383,
"grad_norm": 0.38057994842529297,
"learning_rate": 1.6633535293454363e-05,
"loss": 1.0158257484436035,
"step": 840
},
{
"epoch": 1.542124542124542,
"grad_norm": 0.16089002788066864,
"learning_rate": 1.6580122012289612e-05,
"loss": 0.8429889678955078,
"step": 842
},
{
"epoch": 1.5457875457875456,
"grad_norm": 0.2632414400577545,
"learning_rate": 1.6526707476800024e-05,
"loss": 1.0157314538955688,
"step": 844
},
{
"epoch": 1.5494505494505495,
"grad_norm": 0.2916921079158783,
"learning_rate": 1.6473292523199978e-05,
"loss": 1.165480136871338,
"step": 846
},
{
"epoch": 1.5531135531135531,
"grad_norm": 0.1992214024066925,
"learning_rate": 1.6419877987710394e-05,
"loss": 0.5141555070877075,
"step": 848
},
{
"epoch": 1.5567765567765568,
"grad_norm": 0.30533066391944885,
"learning_rate": 1.636646470654564e-05,
"loss": 1.016823172569275,
"step": 850
},
{
"epoch": 1.5604395604395604,
"grad_norm": 0.2514702081680298,
"learning_rate": 1.6313053515900473e-05,
"loss": 1.1563243865966797,
"step": 852
},
{
"epoch": 1.564102564102564,
"grad_norm": 0.3340625464916229,
"learning_rate": 1.625964525193689e-05,
"loss": 0.8487980961799622,
"step": 854
},
{
"epoch": 1.5677655677655677,
"grad_norm": 1.5476115942001343,
"learning_rate": 1.6206240750771092e-05,
"loss": 0.787565290927887,
"step": 856
},
{
"epoch": 1.5714285714285714,
"grad_norm": 0.7004534006118774,
"learning_rate": 1.6152840848460376e-05,
"loss": 0.9129424095153809,
"step": 858
},
{
"epoch": 1.575091575091575,
"grad_norm": 0.289341539144516,
"learning_rate": 1.6099446380990015e-05,
"loss": 0.7192896604537964,
"step": 860
},
{
"epoch": 1.578754578754579,
"grad_norm": 0.18380023539066315,
"learning_rate": 1.604605818426023e-05,
"loss": 1.139159917831421,
"step": 862
},
{
"epoch": 1.5824175824175826,
"grad_norm": 0.30538052320480347,
"learning_rate": 1.5992677094073055e-05,
"loss": 0.8859811425209045,
"step": 864
},
{
"epoch": 1.5860805860805862,
"grad_norm": 0.19615601003170013,
"learning_rate": 1.5939303946119275e-05,
"loss": 1.1617034673690796,
"step": 866
},
{
"epoch": 1.5897435897435899,
"grad_norm": 0.18944290280342102,
"learning_rate": 1.588593957596534e-05,
"loss": 1.1625550985336304,
"step": 868
},
{
"epoch": 1.5934065934065935,
"grad_norm": 0.4998733401298523,
"learning_rate": 1.5832584819040275e-05,
"loss": 0.5713272094726562,
"step": 870
},
{
"epoch": 1.5970695970695972,
"grad_norm": 0.17538763582706451,
"learning_rate": 1.577924051062261e-05,
"loss": 1.1596871614456177,
"step": 872
},
{
"epoch": 1.6007326007326008,
"grad_norm": 0.3706125020980835,
"learning_rate": 1.5725907485827318e-05,
"loss": 1.043558120727539,
"step": 874
},
{
"epoch": 1.6043956043956045,
"grad_norm": 1.1928222179412842,
"learning_rate": 1.567258657959269e-05,
"loss": 0.7278342843055725,
"step": 876
},
{
"epoch": 1.6080586080586081,
"grad_norm": 0.3776850402355194,
"learning_rate": 1.5619278626667336e-05,
"loss": 0.9686797261238098,
"step": 878
},
{
"epoch": 1.6117216117216118,
"grad_norm": 0.2775964140892029,
"learning_rate": 1.556598446159706e-05,
"loss": 0.8903915882110596,
"step": 880
},
{
"epoch": 1.6153846153846154,
"grad_norm": 0.1804128885269165,
"learning_rate": 1.5512704918711812e-05,
"loss": 1.1147016286849976,
"step": 882
},
{
"epoch": 1.619047619047619,
"grad_norm": 0.5397176146507263,
"learning_rate": 1.5459440832112634e-05,
"loss": 0.9109297394752502,
"step": 884
},
{
"epoch": 1.6227106227106227,
"grad_norm": 0.16095860302448273,
"learning_rate": 1.5406193035658606e-05,
"loss": 1.1250665187835693,
"step": 886
},
{
"epoch": 1.6263736263736264,
"grad_norm": 0.10226105153560638,
"learning_rate": 1.535296236295377e-05,
"loss": 0.828086256980896,
"step": 888
},
{
"epoch": 1.63003663003663,
"grad_norm": 0.16585305333137512,
"learning_rate": 1.5299749647334097e-05,
"loss": 1.188000202178955,
"step": 890
},
{
"epoch": 1.6336996336996337,
"grad_norm": 0.2893577516078949,
"learning_rate": 1.5246555721854436e-05,
"loss": 0.8904086947441101,
"step": 892
},
{
"epoch": 1.6373626373626373,
"grad_norm": 0.23474594950675964,
"learning_rate": 1.519338141927547e-05,
"loss": 1.1712456941604614,
"step": 894
},
{
"epoch": 1.641025641025641,
"grad_norm": 0.24309809505939484,
"learning_rate": 1.5140227572050696e-05,
"loss": 0.728343665599823,
"step": 896
},
{
"epoch": 1.6446886446886446,
"grad_norm": 0.21496303379535675,
"learning_rate": 1.5087095012313355e-05,
"loss": 1.1218795776367188,
"step": 898
},
{
"epoch": 1.6483516483516483,
"grad_norm": 0.07185659557580948,
"learning_rate": 1.5033984571863445e-05,
"loss": 0.8113787174224854,
"step": 900
},
{
"epoch": 1.652014652014652,
"grad_norm": 0.3587903678417206,
"learning_rate": 1.498089708215469e-05,
"loss": 0.8660993576049805,
"step": 902
},
{
"epoch": 1.6556776556776556,
"grad_norm": 4.315833568572998,
"learning_rate": 1.4927833374281493e-05,
"loss": 1.0015366077423096,
"step": 904
},
{
"epoch": 1.6593406593406592,
"grad_norm": 0.2749607563018799,
"learning_rate": 1.4874794278965956e-05,
"loss": 1.1231218576431274,
"step": 906
},
{
"epoch": 1.6630036630036629,
"grad_norm": 0.1386031061410904,
"learning_rate": 1.4821780626544885e-05,
"loss": 0.7412285208702087,
"step": 908
},
{
"epoch": 1.6666666666666665,
"grad_norm": 0.7245529294013977,
"learning_rate": 1.476879324695675e-05,
"loss": 1.188240647315979,
"step": 910
},
{
"epoch": 1.6703296703296702,
"grad_norm": 0.14181075990200043,
"learning_rate": 1.4715832969728727e-05,
"loss": 1.2031238079071045,
"step": 912
},
{
"epoch": 1.673992673992674,
"grad_norm": 0.18646664917469025,
"learning_rate": 1.4662900623963691e-05,
"loss": 1.082230567932129,
"step": 914
},
{
"epoch": 1.6776556776556777,
"grad_norm": 0.22001439332962036,
"learning_rate": 1.4609997038327249e-05,
"loss": 1.1923408508300781,
"step": 916
},
{
"epoch": 1.6813186813186813,
"grad_norm": 0.3903493583202362,
"learning_rate": 1.4557123041034773e-05,
"loss": 0.8200342059135437,
"step": 918
},
{
"epoch": 1.684981684981685,
"grad_norm": 0.25271129608154297,
"learning_rate": 1.4504279459838412e-05,
"loss": 1.199188470840454,
"step": 920
},
{
"epoch": 1.6886446886446886,
"grad_norm": 0.24899667501449585,
"learning_rate": 1.4451467122014144e-05,
"loss": 0.9969640374183655,
"step": 922
},
{
"epoch": 1.6923076923076923,
"grad_norm": 0.13163816928863525,
"learning_rate": 1.439868685434883e-05,
"loss": 0.8267517685890198,
"step": 924
},
{
"epoch": 1.695970695970696,
"grad_norm": 0.7692777514457703,
"learning_rate": 1.4345939483127269e-05,
"loss": 0.9600090980529785,
"step": 926
},
{
"epoch": 1.6996336996336996,
"grad_norm": 0.3304704427719116,
"learning_rate": 1.4293225834119256e-05,
"loss": 0.7892555594444275,
"step": 928
},
{
"epoch": 1.7032967032967035,
"grad_norm": 0.03577682748436928,
"learning_rate": 1.4240546732566674e-05,
"loss": 0.8601844310760498,
"step": 930
},
{
"epoch": 1.7069597069597071,
"grad_norm": 1.6921963691711426,
"learning_rate": 1.4187903003170524e-05,
"loss": 0.9109703302383423,
"step": 932
},
{
"epoch": 1.7106227106227108,
"grad_norm": 0.24740178883075714,
"learning_rate": 1.413529547007809e-05,
"loss": 1.3766350746154785,
"step": 934
},
{
"epoch": 1.7142857142857144,
"grad_norm": 0.20183052122592926,
"learning_rate": 1.4082724956869973e-05,
"loss": 1.1772444248199463,
"step": 936
},
{
"epoch": 1.717948717948718,
"grad_norm": 0.31784018874168396,
"learning_rate": 1.4030192286547219e-05,
"loss": 0.4614800810813904,
"step": 938
},
{
"epoch": 1.7216117216117217,
"grad_norm": 0.10893711447715759,
"learning_rate": 1.3977698281518447e-05,
"loss": 0.49052393436431885,
"step": 940
},
{
"epoch": 1.7252747252747254,
"grad_norm": 0.27021560072898865,
"learning_rate": 1.3925243763586967e-05,
"loss": 1.111413598060608,
"step": 942
},
{
"epoch": 1.728937728937729,
"grad_norm": 0.21173328161239624,
"learning_rate": 1.3872829553937894e-05,
"loss": 0.9133450388908386,
"step": 944
},
{
"epoch": 1.7326007326007327,
"grad_norm": 0.24963396787643433,
"learning_rate": 1.3820456473125325e-05,
"loss": 1.1244279146194458,
"step": 946
},
{
"epoch": 1.7362637362637363,
"grad_norm": 0.1540175825357437,
"learning_rate": 1.3768125341059474e-05,
"loss": 1.0782681703567505,
"step": 948
},
{
"epoch": 1.73992673992674,
"grad_norm": 0.21711324155330658,
"learning_rate": 1.3715836976993831e-05,
"loss": 0.978225827217102,
"step": 950
},
{
"epoch": 1.7435897435897436,
"grad_norm": 0.21359801292419434,
"learning_rate": 1.3663592199512362e-05,
"loss": 0.9147098660469055,
"step": 952
},
{
"epoch": 1.7472527472527473,
"grad_norm": 0.22067268192768097,
"learning_rate": 1.3611391826516656e-05,
"loss": 1.1857064962387085,
"step": 954
},
{
"epoch": 1.750915750915751,
"grad_norm": 0.16340802609920502,
"learning_rate": 1.355923667521316e-05,
"loss": 0.7591355443000793,
"step": 956
},
{
"epoch": 1.7545787545787546,
"grad_norm": 0.2300925850868225,
"learning_rate": 1.3507127562100358e-05,
"loss": 1.1689962148666382,
"step": 958
},
{
"epoch": 1.7582417582417582,
"grad_norm": 0.12035728991031647,
"learning_rate": 1.3455065302955996e-05,
"loss": 0.5161279439926147,
"step": 960
},
{
"epoch": 1.7619047619047619,
"grad_norm": 0.2525497078895569,
"learning_rate": 1.340305071282432e-05,
"loss": 0.962215781211853,
"step": 962
},
{
"epoch": 1.7655677655677655,
"grad_norm": 0.23998844623565674,
"learning_rate": 1.3351084606003303e-05,
"loss": 0.9956289529800415,
"step": 964
},
{
"epoch": 1.7692307692307692,
"grad_norm": 0.23394513130187988,
"learning_rate": 1.3299167796031904e-05,
"loss": 1.1649928092956543,
"step": 966
},
{
"epoch": 1.7728937728937728,
"grad_norm": 0.18529178202152252,
"learning_rate": 1.3247301095677334e-05,
"loss": 0.5570883750915527,
"step": 968
},
{
"epoch": 1.7765567765567765,
"grad_norm": 0.44766953587532043,
"learning_rate": 1.3195485316922322e-05,
"loss": 0.796292781829834,
"step": 970
},
{
"epoch": 1.7802197802197801,
"grad_norm": 0.5177598595619202,
"learning_rate": 1.3143721270952416e-05,
"loss": 0.9089959859848022,
"step": 972
},
{
"epoch": 1.7838827838827838,
"grad_norm": 0.5847911238670349,
"learning_rate": 1.3092009768143276e-05,
"loss": 1.1963040828704834,
"step": 974
},
{
"epoch": 1.7875457875457874,
"grad_norm": 0.17191220819950104,
"learning_rate": 1.3040351618047987e-05,
"loss": 1.1532613039016724,
"step": 976
},
{
"epoch": 1.791208791208791,
"grad_norm": 0.1479983627796173,
"learning_rate": 1.2988747629384393e-05,
"loss": 0.8899999856948853,
"step": 978
},
{
"epoch": 1.7948717948717947,
"grad_norm": 0.1665453463792801,
"learning_rate": 1.2937198610022422e-05,
"loss": 0.9276444315910339,
"step": 980
},
{
"epoch": 1.7985347985347986,
"grad_norm": 0.2839411199092865,
"learning_rate": 1.2885705366971466e-05,
"loss": 1.1222878694534302,
"step": 982
},
{
"epoch": 1.8021978021978022,
"grad_norm": 0.29029586911201477,
"learning_rate": 1.2834268706367693e-05,
"loss": 1.0647618770599365,
"step": 984
},
{
"epoch": 1.8058608058608059,
"grad_norm": 0.14540861546993256,
"learning_rate": 1.2782889433461504e-05,
"loss": 0.6247038841247559,
"step": 986
},
{
"epoch": 1.8095238095238095,
"grad_norm": 0.27945929765701294,
"learning_rate": 1.273156835260485e-05,
"loss": 0.7977758646011353,
"step": 988
},
{
"epoch": 1.8131868131868132,
"grad_norm": 0.6269707679748535,
"learning_rate": 1.2680306267238703e-05,
"loss": 0.7228586077690125,
"step": 990
},
{
"epoch": 1.8168498168498168,
"grad_norm": 1.3407803773880005,
"learning_rate": 1.2629103979880435e-05,
"loss": 0.7684195041656494,
"step": 992
},
{
"epoch": 1.8205128205128205,
"grad_norm": 0.1937461942434311,
"learning_rate": 1.2577962292111268e-05,
"loss": 0.8265116214752197,
"step": 994
},
{
"epoch": 1.8241758241758241,
"grad_norm": 0.3656844198703766,
"learning_rate": 1.2526882004563725e-05,
"loss": 1.1013299226760864,
"step": 996
},
{
"epoch": 1.8278388278388278,
"grad_norm": 0.18260568380355835,
"learning_rate": 1.2475863916909116e-05,
"loss": 0.8281925320625305,
"step": 998
},
{
"epoch": 1.8315018315018317,
"grad_norm": 0.32141250371932983,
"learning_rate": 1.2424908827844971e-05,
"loss": 1.1040654182434082,
"step": 1000
},
{
"epoch": 1.8351648351648353,
"grad_norm": 0.22911974787712097,
"learning_rate": 1.2374017535082588e-05,
"loss": 1.1246166229248047,
"step": 1002
},
{
"epoch": 1.838827838827839,
"grad_norm": 0.29980891942977905,
"learning_rate": 1.232319083533452e-05,
"loss": 1.1219016313552856,
"step": 1004
},
{
"epoch": 1.8424908424908426,
"grad_norm": 0.22413678467273712,
"learning_rate": 1.2272429524302087e-05,
"loss": 1.1236064434051514,
"step": 1006
},
{
"epoch": 1.8461538461538463,
"grad_norm": 0.5109539031982422,
"learning_rate": 1.2221734396662956e-05,
"loss": 0.7749718427658081,
"step": 1008
},
{
"epoch": 1.84981684981685,
"grad_norm": 0.20865501463413239,
"learning_rate": 1.2171106246058676e-05,
"loss": 1.0838996171951294,
"step": 1010
},
{
"epoch": 1.8534798534798536,
"grad_norm": 0.19586823880672455,
"learning_rate": 1.212054586508225e-05,
"loss": 0.750687837600708,
"step": 1012
},
{
"epoch": 1.8571428571428572,
"grad_norm": 0.15939399600028992,
"learning_rate": 1.2070054045265746e-05,
"loss": 1.1420842409133911,
"step": 1014
},
{
"epoch": 1.8608058608058609,
"grad_norm": 0.12456727027893066,
"learning_rate": 1.2019631577067883e-05,
"loss": 1.129049301147461,
"step": 1016
},
{
"epoch": 1.8644688644688645,
"grad_norm": 0.17567472159862518,
"learning_rate": 1.1969279249861678e-05,
"loss": 1.206289529800415,
"step": 1018
},
{
"epoch": 1.8681318681318682,
"grad_norm": 0.03003503940999508,
"learning_rate": 1.1918997851922078e-05,
"loss": 0.9472045302391052,
"step": 1020
},
{
"epoch": 1.8717948717948718,
"grad_norm": 0.18334512412548065,
"learning_rate": 1.1868788170413608e-05,
"loss": 1.0975109338760376,
"step": 1022
},
{
"epoch": 1.8754578754578755,
"grad_norm": 0.4730507731437683,
"learning_rate": 1.1818650991378069e-05,
"loss": 0.527807354927063,
"step": 1024
},
{
"epoch": 1.879120879120879,
"grad_norm": 0.156352698802948,
"learning_rate": 1.1768587099722221e-05,
"loss": 1.138107180595398,
"step": 1026
},
{
"epoch": 1.8827838827838828,
"grad_norm": 0.16626542806625366,
"learning_rate": 1.171859727920549e-05,
"loss": 0.9024947881698608,
"step": 1028
},
{
"epoch": 1.8864468864468864,
"grad_norm": 0.3189626634120941,
"learning_rate": 1.1668682312427716e-05,
"loss": 1.142695426940918,
"step": 1030
},
{
"epoch": 1.89010989010989,
"grad_norm": 0.16862735152244568,
"learning_rate": 1.1618842980816885e-05,
"loss": 0.9241726398468018,
"step": 1032
},
{
"epoch": 1.8937728937728937,
"grad_norm": 0.15986524522304535,
"learning_rate": 1.1569080064616892e-05,
"loss": 0.7822670936584473,
"step": 1034
},
{
"epoch": 1.8974358974358974,
"grad_norm": 0.4596797823905945,
"learning_rate": 1.1519394342875344e-05,
"loss": 1.1735057830810547,
"step": 1036
},
{
"epoch": 1.901098901098901,
"grad_norm": 0.15587423741817474,
"learning_rate": 1.1469786593431362e-05,
"loss": 1.1288270950317383,
"step": 1038
},
{
"epoch": 1.9047619047619047,
"grad_norm": 0.2706407904624939,
"learning_rate": 1.1420257592903375e-05,
"loss": 1.1971887350082397,
"step": 1040
},
{
"epoch": 1.9084249084249083,
"grad_norm": 0.3452918231487274,
"learning_rate": 1.1370808116677003e-05,
"loss": 0.5932814478874207,
"step": 1042
},
{
"epoch": 1.912087912087912,
"grad_norm": 0.1644463688135147,
"learning_rate": 1.1321438938892891e-05,
"loss": 0.8462899923324585,
"step": 1044
},
{
"epoch": 1.9157509157509156,
"grad_norm": 0.2494816929101944,
"learning_rate": 1.127215083243459e-05,
"loss": 1.0951734781265259,
"step": 1046
},
{
"epoch": 1.9194139194139193,
"grad_norm": 0.2647112309932709,
"learning_rate": 1.1222944568916477e-05,
"loss": 0.42638513445854187,
"step": 1048
},
{
"epoch": 1.9230769230769231,
"grad_norm": 0.36372193694114685,
"learning_rate": 1.1173820918671653e-05,
"loss": 0.8431220054626465,
"step": 1050
},
{
"epoch": 1.9267399267399268,
"grad_norm": 0.16645146906375885,
"learning_rate": 1.1124780650739898e-05,
"loss": 0.8001301288604736,
"step": 1052
},
{
"epoch": 1.9304029304029304,
"grad_norm": 0.17511388659477234,
"learning_rate": 1.1075824532855632e-05,
"loss": 1.1067595481872559,
"step": 1054
},
{
"epoch": 1.934065934065934,
"grad_norm": 0.2150619924068451,
"learning_rate": 1.1026953331435875e-05,
"loss": 0.7283728122711182,
"step": 1056
},
{
"epoch": 1.9377289377289377,
"grad_norm": 0.2049059122800827,
"learning_rate": 1.0978167811568275e-05,
"loss": 0.9654535055160522,
"step": 1058
},
{
"epoch": 1.9413919413919414,
"grad_norm": 0.22650328278541565,
"learning_rate": 1.092946873699913e-05,
"loss": 1.053628921508789,
"step": 1060
},
{
"epoch": 1.945054945054945,
"grad_norm": 0.23162858188152313,
"learning_rate": 1.0880856870121389e-05,
"loss": 0.8154641389846802,
"step": 1062
},
{
"epoch": 1.9487179487179487,
"grad_norm": 0.12185206264257431,
"learning_rate": 1.0832332971962779e-05,
"loss": 0.7915042638778687,
"step": 1064
},
{
"epoch": 1.9523809523809523,
"grad_norm": 0.2992037236690521,
"learning_rate": 1.0783897802173859e-05,
"loss": 1.1435668468475342,
"step": 1066
},
{
"epoch": 1.9560439560439562,
"grad_norm": 0.18293872475624084,
"learning_rate": 1.07355521190161e-05,
"loss": 1.165887713432312,
"step": 1068
},
{
"epoch": 1.9597069597069599,
"grad_norm": 0.0767071396112442,
"learning_rate": 1.0687296679350072e-05,
"loss": 0.6596081852912903,
"step": 1070
},
{
"epoch": 1.9633699633699635,
"grad_norm": 0.1839928925037384,
"learning_rate": 1.063913223862357e-05,
"loss": 0.8264914155006409,
"step": 1072
},
{
"epoch": 1.9670329670329672,
"grad_norm": 0.1517079919576645,
"learning_rate": 1.0591059550859753e-05,
"loss": 1.1326576471328735,
"step": 1074
},
{
"epoch": 1.9706959706959708,
"grad_norm": 0.18605555593967438,
"learning_rate": 1.0543079368645398e-05,
"loss": 1.1013039350509644,
"step": 1076
},
{
"epoch": 1.9743589743589745,
"grad_norm": 0.2635408043861389,
"learning_rate": 1.0495192443119076e-05,
"loss": 0.9822874069213867,
"step": 1078
},
{
"epoch": 1.978021978021978,
"grad_norm": 0.29666173458099365,
"learning_rate": 1.044739952395942e-05,
"loss": 0.9371342658996582,
"step": 1080
},
{
"epoch": 1.9816849816849818,
"grad_norm": 0.20671826601028442,
"learning_rate": 1.039970135937337e-05,
"loss": 1.0989388227462769,
"step": 1082
},
{
"epoch": 1.9853479853479854,
"grad_norm": 0.08099620789289474,
"learning_rate": 1.0352098696084461e-05,
"loss": 0.6956806778907776,
"step": 1084
},
{
"epoch": 1.989010989010989,
"grad_norm": 0.5828799605369568,
"learning_rate": 1.0304592279321138e-05,
"loss": 0.9571045637130737,
"step": 1086
},
{
"epoch": 1.9926739926739927,
"grad_norm": 0.4506250321865082,
"learning_rate": 1.02571828528051e-05,
"loss": 0.9482959508895874,
"step": 1088
},
{
"epoch": 1.9963369963369964,
"grad_norm": 0.08697620779275894,
"learning_rate": 1.0209871158739632e-05,
"loss": 0.9112911820411682,
"step": 1090
},
{
"epoch": 2.0,
"grad_norm": 0.1944212168455124,
"learning_rate": 1.0162657937798014e-05,
"loss": 1.0127665996551514,
"step": 1092
},
{
"epoch": 2.0036630036630036,
"grad_norm": 0.16653534770011902,
"learning_rate": 1.0115543929111896e-05,
"loss": 1.0326980352401733,
"step": 1094
},
{
"epoch": 2.0073260073260073,
"grad_norm": 0.2532365024089813,
"learning_rate": 1.0068529870259744e-05,
"loss": 1.1123592853546143,
"step": 1096
},
{
"epoch": 2.010989010989011,
"grad_norm": 0.44356510043144226,
"learning_rate": 1.0021616497255306e-05,
"loss": 1.0411099195480347,
"step": 1098
},
{
"epoch": 2.0146520146520146,
"grad_norm": 0.2455623596906662,
"learning_rate": 9.97480454453607e-06,
"loss": 1.1233282089233398,
"step": 1100
},
{
"epoch": 2.0183150183150182,
"grad_norm": 0.41676464676856995,
"learning_rate": 9.928094744951743e-06,
"loss": 0.8063968420028687,
"step": 1102
},
{
"epoch": 2.021978021978022,
"grad_norm": 0.17188072204589844,
"learning_rate": 9.881487829752845e-06,
"loss": 1.1151639223098755,
"step": 1104
},
{
"epoch": 2.0256410256410255,
"grad_norm": 0.12332502007484436,
"learning_rate": 9.834984528579202e-06,
"loss": 0.7464842796325684,
"step": 1106
},
{
"epoch": 2.029304029304029,
"grad_norm": 0.23213370144367218,
"learning_rate": 9.788585569448547e-06,
"loss": 1.16049325466156,
"step": 1108
},
{
"epoch": 2.032967032967033,
"grad_norm": 0.5734947919845581,
"learning_rate": 9.742291678745116e-06,
"loss": 0.8476456999778748,
"step": 1110
},
{
"epoch": 2.0366300366300365,
"grad_norm": 0.06252683699131012,
"learning_rate": 9.696103581208279e-06,
"loss": 0.5513279438018799,
"step": 1112
},
{
"epoch": 2.04029304029304,
"grad_norm": 0.2728578448295593,
"learning_rate": 9.650021999921201e-06,
"loss": 0.81049644947052,
"step": 1114
},
{
"epoch": 2.043956043956044,
"grad_norm": 0.2099541276693344,
"learning_rate": 9.604047656299518e-06,
"loss": 1.1215167045593262,
"step": 1116
},
{
"epoch": 2.0476190476190474,
"grad_norm": 0.4395456910133362,
"learning_rate": 9.558181270080027e-06,
"loss": 1.0823380947113037,
"step": 1118
},
{
"epoch": 2.051282051282051,
"grad_norm": 0.4403935968875885,
"learning_rate": 9.512423559309438e-06,
"loss": 0.782497227191925,
"step": 1120
},
{
"epoch": 2.0549450549450547,
"grad_norm": 0.3430410921573639,
"learning_rate": 9.46677524033314e-06,
"loss": 1.246758222579956,
"step": 1122
},
{
"epoch": 2.0586080586080584,
"grad_norm": 0.14065080881118774,
"learning_rate": 9.421237027783945e-06,
"loss": 1.1071714162826538,
"step": 1124
},
{
"epoch": 2.062271062271062,
"grad_norm": 0.17606811225414276,
"learning_rate": 9.37580963457096e-06,
"loss": 0.722714364528656,
"step": 1126
},
{
"epoch": 2.065934065934066,
"grad_norm": 0.49320313334465027,
"learning_rate": 9.330493771868376e-06,
"loss": 1.0717109441757202,
"step": 1128
},
{
"epoch": 2.06959706959707,
"grad_norm": 0.401737242937088,
"learning_rate": 9.285290149104353e-06,
"loss": 1.072496771812439,
"step": 1130
},
{
"epoch": 2.0732600732600734,
"grad_norm": 0.2318621277809143,
"learning_rate": 9.240199473949919e-06,
"loss": 1.1079678535461426,
"step": 1132
},
{
"epoch": 2.076923076923077,
"grad_norm": 0.16058529913425446,
"learning_rate": 9.195222452307901e-06,
"loss": 1.0885059833526611,
"step": 1134
},
{
"epoch": 2.0805860805860807,
"grad_norm": 0.24526721239089966,
"learning_rate": 9.15035978830183e-06,
"loss": 0.6723949313163757,
"step": 1136
},
{
"epoch": 2.0842490842490844,
"grad_norm": 0.10136513411998749,
"learning_rate": 9.105612184264966e-06,
"loss": 1.0849757194519043,
"step": 1138
},
{
"epoch": 2.087912087912088,
"grad_norm": 0.14613744616508484,
"learning_rate": 9.060980340729273e-06,
"loss": 0.9755294919013977,
"step": 1140
},
{
"epoch": 2.0915750915750917,
"grad_norm": 0.5334711074829102,
"learning_rate": 9.01646495641448e-06,
"loss": 1.081385850906372,
"step": 1142
},
{
"epoch": 2.0952380952380953,
"grad_norm": 0.3792720437049866,
"learning_rate": 8.972066728217119e-06,
"loss": 0.3753919303417206,
"step": 1144
},
{
"epoch": 2.098901098901099,
"grad_norm": 0.4398306608200073,
"learning_rate": 8.927786351199602e-06,
"loss": 0.8418686389923096,
"step": 1146
},
{
"epoch": 2.1025641025641026,
"grad_norm": 0.5362254977226257,
"learning_rate": 8.883624518579383e-06,
"loss": 0.7773471474647522,
"step": 1148
},
{
"epoch": 2.1062271062271063,
"grad_norm": 0.22035492956638336,
"learning_rate": 8.839581921718077e-06,
"loss": 0.7661272287368774,
"step": 1150
},
{
"epoch": 2.10989010989011,
"grad_norm": 0.21903280913829803,
"learning_rate": 8.795659250110636e-06,
"loss": 0.9389075040817261,
"step": 1152
},
{
"epoch": 2.1135531135531136,
"grad_norm": 0.2613048255443573,
"learning_rate": 8.751857191374557e-06,
"loss": 0.7085542678833008,
"step": 1154
},
{
"epoch": 2.1172161172161172,
"grad_norm": 0.21798495948314667,
"learning_rate": 8.708176431239132e-06,
"loss": 1.0334385633468628,
"step": 1156
},
{
"epoch": 2.120879120879121,
"grad_norm": 0.277040958404541,
"learning_rate": 8.664617653534689e-06,
"loss": 0.9751821160316467,
"step": 1158
},
{
"epoch": 2.1245421245421245,
"grad_norm": 0.4517989754676819,
"learning_rate": 8.62118154018191e-06,
"loss": 0.8295323848724365,
"step": 1160
},
{
"epoch": 2.128205128205128,
"grad_norm": 0.7343789935112,
"learning_rate": 8.577868771181137e-06,
"loss": 0.7427678108215332,
"step": 1162
},
{
"epoch": 2.131868131868132,
"grad_norm": 1.01250159740448,
"learning_rate": 8.534680024601725e-06,
"loss": 0.9223976731300354,
"step": 1164
},
{
"epoch": 2.1355311355311355,
"grad_norm": 0.30640891194343567,
"learning_rate": 8.491615976571454e-06,
"loss": 0.3898874521255493,
"step": 1166
},
{
"epoch": 2.139194139194139,
"grad_norm": 0.23792658746242523,
"learning_rate": 8.448677301265912e-06,
"loss": 0.3980587124824524,
"step": 1168
},
{
"epoch": 2.142857142857143,
"grad_norm": 0.19917914271354675,
"learning_rate": 8.405864670897965e-06,
"loss": 0.8230171203613281,
"step": 1170
},
{
"epoch": 2.1465201465201464,
"grad_norm": 1.2255111932754517,
"learning_rate": 8.363178755707208e-06,
"loss": 1.0885212421417236,
"step": 1172
},
{
"epoch": 2.15018315018315,
"grad_norm": 0.20657393336296082,
"learning_rate": 8.32062022394949e-06,
"loss": 1.2467355728149414,
"step": 1174
},
{
"epoch": 2.1538461538461537,
"grad_norm": 0.27998223900794983,
"learning_rate": 8.278189741886461e-06,
"loss": 0.7243333458900452,
"step": 1176
},
{
"epoch": 2.1575091575091574,
"grad_norm": 0.1207960993051529,
"learning_rate": 8.235887973775122e-06,
"loss": 1.1129575967788696,
"step": 1178
},
{
"epoch": 2.161172161172161,
"grad_norm": 0.44535985589027405,
"learning_rate": 8.193715581857427e-06,
"loss": 1.1027677059173584,
"step": 1180
},
{
"epoch": 2.1648351648351647,
"grad_norm": 0.19374164938926697,
"learning_rate": 8.151673226349922e-06,
"loss": 1.0765420198440552,
"step": 1182
},
{
"epoch": 2.1684981684981683,
"grad_norm": 0.16121609508991241,
"learning_rate": 8.109761565433432e-06,
"loss": 1.1161972284317017,
"step": 1184
},
{
"epoch": 2.172161172161172,
"grad_norm": 0.08308101445436478,
"learning_rate": 8.067981255242707e-06,
"loss": 0.25859737396240234,
"step": 1186
},
{
"epoch": 2.1758241758241756,
"grad_norm": 0.8552573919296265,
"learning_rate": 8.02633294985618e-06,
"loss": 1.162278175354004,
"step": 1188
},
{
"epoch": 2.1794871794871793,
"grad_norm": 4.6851019859313965,
"learning_rate": 7.984817301285743e-06,
"loss": 0.7232871055603027,
"step": 1190
},
{
"epoch": 2.183150183150183,
"grad_norm": 0.40211737155914307,
"learning_rate": 7.943434959466499e-06,
"loss": 0.9548918008804321,
"step": 1192
},
{
"epoch": 2.186813186813187,
"grad_norm": 0.16626113653182983,
"learning_rate": 7.902186572246633e-06,
"loss": 0.7093480825424194,
"step": 1194
},
{
"epoch": 2.1904761904761907,
"grad_norm": 0.3302949368953705,
"learning_rate": 7.861072785377226e-06,
"loss": 0.86142897605896,
"step": 1196
},
{
"epoch": 2.1941391941391943,
"grad_norm": 0.14928938448429108,
"learning_rate": 7.820094242502165e-06,
"loss": 1.089579701423645,
"step": 1198
},
{
"epoch": 2.197802197802198,
"grad_norm": 0.6205920577049255,
"learning_rate": 7.779251585148091e-06,
"loss": 0.6903566718101501,
"step": 1200
},
{
"epoch": 2.2014652014652016,
"grad_norm": 0.15354351699352264,
"learning_rate": 7.7385454527143e-06,
"loss": 1.1525441408157349,
"step": 1202
},
{
"epoch": 2.2051282051282053,
"grad_norm": 0.36918511986732483,
"learning_rate": 7.697976482462797e-06,
"loss": 1.1042122840881348,
"step": 1204
},
{
"epoch": 2.208791208791209,
"grad_norm": 0.25888246297836304,
"learning_rate": 7.657545309508264e-06,
"loss": 0.42669007182121277,
"step": 1206
},
{
"epoch": 2.2124542124542126,
"grad_norm": 0.18095026910305023,
"learning_rate": 7.617252566808145e-06,
"loss": 0.6994042992591858,
"step": 1208
},
{
"epoch": 2.2161172161172162,
"grad_norm": 0.08532464504241943,
"learning_rate": 7.577098885152746e-06,
"loss": 0.543409526348114,
"step": 1210
},
{
"epoch": 2.21978021978022,
"grad_norm": 0.5108111500740051,
"learning_rate": 7.537084893155339e-06,
"loss": 1.0356272459030151,
"step": 1212
},
{
"epoch": 2.2234432234432235,
"grad_norm": 0.4792783260345459,
"learning_rate": 7.497211217242321e-06,
"loss": 0.7714564204216003,
"step": 1214
},
{
"epoch": 2.227106227106227,
"grad_norm": 0.557415246963501,
"learning_rate": 7.457478481643422e-06,
"loss": 0.8565025925636292,
"step": 1216
},
{
"epoch": 2.230769230769231,
"grad_norm": 0.18768003582954407,
"learning_rate": 7.417887308381932e-06,
"loss": 1.0898362398147583,
"step": 1218
},
{
"epoch": 2.2344322344322345,
"grad_norm": 0.7332565188407898,
"learning_rate": 7.378438317264942e-06,
"loss": 1.0876530408859253,
"step": 1220
},
{
"epoch": 2.238095238095238,
"grad_norm": 0.23596039414405823,
"learning_rate": 7.339132125873669e-06,
"loss": 1.1004698276519775,
"step": 1222
},
{
"epoch": 2.241758241758242,
"grad_norm": 0.3819977045059204,
"learning_rate": 7.299969349553767e-06,
"loss": 0.43464016914367676,
"step": 1224
},
{
"epoch": 2.2454212454212454,
"grad_norm": 0.1879609376192093,
"learning_rate": 7.260950601405695e-06,
"loss": 1.056800365447998,
"step": 1226
},
{
"epoch": 2.249084249084249,
"grad_norm": 1.195755124092102,
"learning_rate": 7.222076492275143e-06,
"loss": 0.5487478971481323,
"step": 1228
},
{
"epoch": 2.2527472527472527,
"grad_norm": 0.08555728197097778,
"learning_rate": 7.183347630743432e-06,
"loss": 0.5845374464988708,
"step": 1230
},
{
"epoch": 2.2564102564102564,
"grad_norm": 0.22689585387706757,
"learning_rate": 7.1447646231180085e-06,
"loss": 0.5656768083572388,
"step": 1232
},
{
"epoch": 2.26007326007326,
"grad_norm": 0.3646693229675293,
"learning_rate": 7.10632807342296e-06,
"loss": 1.0442264080047607,
"step": 1234
},
{
"epoch": 2.2637362637362637,
"grad_norm": 16.00477409362793,
"learning_rate": 7.068038583389534e-06,
"loss": 0.7446240782737732,
"step": 1236
},
{
"epoch": 2.2673992673992673,
"grad_norm": 0.27392327785491943,
"learning_rate": 7.029896752446748e-06,
"loss": 0.8997309803962708,
"step": 1238
},
{
"epoch": 2.271062271062271,
"grad_norm": 0.1259632706642151,
"learning_rate": 6.991903177711974e-06,
"loss": 0.8252923488616943,
"step": 1240
},
{
"epoch": 2.2747252747252746,
"grad_norm": 0.3390623927116394,
"learning_rate": 6.9540584539816095e-06,
"loss": 0.8650439381599426,
"step": 1242
},
{
"epoch": 2.2783882783882783,
"grad_norm": 0.2814621329307556,
"learning_rate": 6.916363173721768e-06,
"loss": 1.0692728757858276,
"step": 1244
},
{
"epoch": 2.282051282051282,
"grad_norm": 0.11820454150438309,
"learning_rate": 6.878817927058999e-06,
"loss": 0.7066527009010315,
"step": 1246
},
{
"epoch": 2.2857142857142856,
"grad_norm": 0.4047481119632721,
"learning_rate": 6.841423301771039e-06,
"loss": 0.6795726418495178,
"step": 1248
},
{
"epoch": 2.2893772893772892,
"grad_norm": 0.12312712520360947,
"learning_rate": 6.804179883277623e-06,
"loss": 1.0733113288879395,
"step": 1250
},
{
"epoch": 2.293040293040293,
"grad_norm": 0.2027161866426468,
"learning_rate": 6.76708825463132e-06,
"loss": 0.7829720377922058,
"step": 1252
},
{
"epoch": 2.2967032967032965,
"grad_norm": 0.14922744035720825,
"learning_rate": 6.730148996508395e-06,
"loss": 1.1252154111862183,
"step": 1254
},
{
"epoch": 2.3003663003663,
"grad_norm": 0.3244604170322418,
"learning_rate": 6.693362687199734e-06,
"loss": 1.1468591690063477,
"step": 1256
},
{
"epoch": 2.304029304029304,
"grad_norm": 0.9295156598091125,
"learning_rate": 6.656729902601769e-06,
"loss": 0.7650408148765564,
"step": 1258
},
{
"epoch": 2.3076923076923075,
"grad_norm": 0.3396666646003723,
"learning_rate": 6.620251216207478e-06,
"loss": 1.1212382316589355,
"step": 1260
},
{
"epoch": 2.311355311355311,
"grad_norm": 2.412235975265503,
"learning_rate": 6.583927199097413e-06,
"loss": 0.9821506142616272,
"step": 1262
},
{
"epoch": 2.315018315018315,
"grad_norm": 0.1932942420244217,
"learning_rate": 6.547758419930738e-06,
"loss": 0.8289713859558105,
"step": 1264
},
{
"epoch": 2.3186813186813184,
"grad_norm": 0.17503376305103302,
"learning_rate": 6.51174544493634e-06,
"loss": 0.7343323230743408,
"step": 1266
},
{
"epoch": 2.3223443223443225,
"grad_norm": 0.7151310443878174,
"learning_rate": 6.47588883790397e-06,
"loss": 0.7910966873168945,
"step": 1268
},
{
"epoch": 2.326007326007326,
"grad_norm": 0.5414786338806152,
"learning_rate": 6.440189160175403e-06,
"loss": 1.1558384895324707,
"step": 1270
},
{
"epoch": 2.32967032967033,
"grad_norm": 0.17176415026187897,
"learning_rate": 6.404646970635663e-06,
"loss": 0.973163366317749,
"step": 1272
},
{
"epoch": 2.3333333333333335,
"grad_norm": 0.3297277092933655,
"learning_rate": 6.369262825704263e-06,
"loss": 0.5530449748039246,
"step": 1274
},
{
"epoch": 2.336996336996337,
"grad_norm": 0.5279161334037781,
"learning_rate": 6.334037279326493e-06,
"loss": 0.9808717370033264,
"step": 1276
},
{
"epoch": 2.340659340659341,
"grad_norm": 1.7091296911239624,
"learning_rate": 6.2989708829647665e-06,
"loss": 0.8029320240020752,
"step": 1278
},
{
"epoch": 2.3443223443223444,
"grad_norm": 0.31587427854537964,
"learning_rate": 6.264064185589969e-06,
"loss": 0.7187018394470215,
"step": 1280
},
{
"epoch": 2.347985347985348,
"grad_norm": 0.18362294137477875,
"learning_rate": 6.229317733672865e-06,
"loss": 0.600703239440918,
"step": 1282
},
{
"epoch": 2.3516483516483517,
"grad_norm": 0.388109028339386,
"learning_rate": 6.194732071175547e-06,
"loss": 1.0635820627212524,
"step": 1284
},
{
"epoch": 2.3553113553113554,
"grad_norm": 5.055185794830322,
"learning_rate": 6.160307739542927e-06,
"loss": 0.5505824685096741,
"step": 1286
},
{
"epoch": 2.358974358974359,
"grad_norm": 0.289461612701416,
"learning_rate": 6.126045277694242e-06,
"loss": 0.894909679889679,
"step": 1288
},
{
"epoch": 2.3626373626373627,
"grad_norm": 0.21914826333522797,
"learning_rate": 6.091945222014643e-06,
"loss": 0.8601652383804321,
"step": 1290
},
{
"epoch": 2.3663003663003663,
"grad_norm": 0.2595743238925934,
"learning_rate": 6.058008106346765e-06,
"loss": 0.7463572025299072,
"step": 1292
},
{
"epoch": 2.36996336996337,
"grad_norm": 0.7285544276237488,
"learning_rate": 6.0242344619823924e-06,
"loss": 0.8526186943054199,
"step": 1294
},
{
"epoch": 2.3736263736263736,
"grad_norm": 0.17746083438396454,
"learning_rate": 5.99062481765415e-06,
"loss": 1.172499656677246,
"step": 1296
},
{
"epoch": 2.3772893772893773,
"grad_norm": 0.48706692457199097,
"learning_rate": 5.95717969952719e-06,
"loss": 1.0589731931686401,
"step": 1298
},
{
"epoch": 2.380952380952381,
"grad_norm": 0.3078210651874542,
"learning_rate": 5.9238996311909985e-06,
"loss": 1.1234736442565918,
"step": 1300
},
{
"epoch": 2.3846153846153846,
"grad_norm": 0.25923097133636475,
"learning_rate": 5.890785133651159e-06,
"loss": 0.8126481771469116,
"step": 1302
},
{
"epoch": 2.3882783882783882,
"grad_norm": 1.3072257041931152,
"learning_rate": 5.857836725321219e-06,
"loss": 0.4338674545288086,
"step": 1304
},
{
"epoch": 2.391941391941392,
"grad_norm": 0.14950256049633026,
"learning_rate": 5.825054922014571e-06,
"loss": 0.9943127036094666,
"step": 1306
},
{
"epoch": 2.3956043956043955,
"grad_norm": 0.19597715139389038,
"learning_rate": 5.792440236936386e-06,
"loss": 1.0862926244735718,
"step": 1308
},
{
"epoch": 2.399267399267399,
"grad_norm": 0.4389561116695404,
"learning_rate": 5.759993180675542e-06,
"loss": 0.4683815836906433,
"step": 1310
},
{
"epoch": 2.402930402930403,
"grad_norm": 0.25034990906715393,
"learning_rate": 5.727714261196677e-06,
"loss": 1.0892280340194702,
"step": 1312
},
{
"epoch": 2.4065934065934065,
"grad_norm": 0.1509070247411728,
"learning_rate": 5.695603983832217e-06,
"loss": 0.7876355648040771,
"step": 1314
},
{
"epoch": 2.41025641025641,
"grad_norm": 0.8369637131690979,
"learning_rate": 5.663662851274458e-06,
"loss": 1.0897516012191772,
"step": 1316
},
{
"epoch": 2.413919413919414,
"grad_norm": 0.6931006908416748,
"learning_rate": 5.631891363567699e-06,
"loss": 0.7256535291671753,
"step": 1318
},
{
"epoch": 2.4175824175824174,
"grad_norm": 0.12493342906236649,
"learning_rate": 5.600290018100429e-06,
"loss": 0.7264870405197144,
"step": 1320
},
{
"epoch": 2.421245421245421,
"grad_norm": 0.9422951340675354,
"learning_rate": 5.568859309597517e-06,
"loss": 0.5611618161201477,
"step": 1322
},
{
"epoch": 2.4249084249084247,
"grad_norm": 0.4177769124507904,
"learning_rate": 5.537599730112495e-06,
"loss": 0.7243083119392395,
"step": 1324
},
{
"epoch": 2.4285714285714284,
"grad_norm": 0.17681288719177246,
"learning_rate": 5.50651176901982e-06,
"loss": 0.7493731379508972,
"step": 1326
},
{
"epoch": 2.4322344322344325,
"grad_norm": 0.08245257288217545,
"learning_rate": 5.475595913007242e-06,
"loss": 0.6541057229042053,
"step": 1328
},
{
"epoch": 2.435897435897436,
"grad_norm": 0.2710842490196228,
"learning_rate": 5.4448526460681765e-06,
"loss": 0.7777208089828491,
"step": 1330
},
{
"epoch": 2.4395604395604398,
"grad_norm": 0.1899799406528473,
"learning_rate": 5.414282449494118e-06,
"loss": 1.0001888275146484,
"step": 1332
},
{
"epoch": 2.4432234432234434,
"grad_norm": 0.36911335587501526,
"learning_rate": 5.3838858018671185e-06,
"loss": 0.6608698964118958,
"step": 1334
},
{
"epoch": 2.446886446886447,
"grad_norm": 0.29634609818458557,
"learning_rate": 5.353663179052286e-06,
"loss": 0.611993134021759,
"step": 1336
},
{
"epoch": 2.4505494505494507,
"grad_norm": 1.4256441593170166,
"learning_rate": 5.323615054190335e-06,
"loss": 1.0301096439361572,
"step": 1338
},
{
"epoch": 2.4542124542124544,
"grad_norm": 0.24053452908992767,
"learning_rate": 5.293741897690192e-06,
"loss": 0.8246780633926392,
"step": 1340
},
{
"epoch": 2.457875457875458,
"grad_norm": 0.1095680221915245,
"learning_rate": 5.264044177221619e-06,
"loss": 0.5975589156150818,
"step": 1342
},
{
"epoch": 2.4615384615384617,
"grad_norm": 0.18885232508182526,
"learning_rate": 5.23452235770788e-06,
"loss": 0.7892658710479736,
"step": 1344
},
{
"epoch": 2.4652014652014653,
"grad_norm": 0.11767473816871643,
"learning_rate": 5.205176901318497e-06,
"loss": 1.0192914009094238,
"step": 1346
},
{
"epoch": 2.468864468864469,
"grad_norm": 0.22971481084823608,
"learning_rate": 5.176008267461988e-06,
"loss": 1.0761326551437378,
"step": 1348
},
{
"epoch": 2.4725274725274726,
"grad_norm": 0.20064282417297363,
"learning_rate": 5.14701691277868e-06,
"loss": 0.6804295778274536,
"step": 1350
},
{
"epoch": 2.4761904761904763,
"grad_norm": 0.19957488775253296,
"learning_rate": 5.118203291133559e-06,
"loss": 0.7721022367477417,
"step": 1352
},
{
"epoch": 2.47985347985348,
"grad_norm": 0.346648633480072,
"learning_rate": 5.0895678536091705e-06,
"loss": 0.9085915088653564,
"step": 1354
},
{
"epoch": 2.4835164835164836,
"grad_norm": 0.037051133811473846,
"learning_rate": 5.061111048498556e-06,
"loss": 0.8866533637046814,
"step": 1356
},
{
"epoch": 2.4871794871794872,
"grad_norm": 0.20938566327095032,
"learning_rate": 5.032833321298238e-06,
"loss": 1.0554593801498413,
"step": 1358
},
{
"epoch": 2.490842490842491,
"grad_norm": 0.5141227841377258,
"learning_rate": 5.004735114701233e-06,
"loss": 0.8432056307792664,
"step": 1360
},
{
"epoch": 2.4945054945054945,
"grad_norm": 0.12697762250900269,
"learning_rate": 4.97681686859013e-06,
"loss": 0.7530601620674133,
"step": 1362
},
{
"epoch": 2.498168498168498,
"grad_norm": 0.5744228959083557,
"learning_rate": 4.949079020030214e-06,
"loss": 0.7272409200668335,
"step": 1364
},
{
"epoch": 2.501831501831502,
"grad_norm": 0.12479441612958908,
"learning_rate": 4.921522003262595e-06,
"loss": 1.0741475820541382,
"step": 1366
},
{
"epoch": 2.5054945054945055,
"grad_norm": 0.19363726675510406,
"learning_rate": 4.89414624969745e-06,
"loss": 0.7075502276420593,
"step": 1368
},
{
"epoch": 2.509157509157509,
"grad_norm": 0.45009559392929077,
"learning_rate": 4.8669521879072295e-06,
"loss": 0.8860321640968323,
"step": 1370
},
{
"epoch": 2.5128205128205128,
"grad_norm": 0.09184969216585159,
"learning_rate": 4.839940243619968e-06,
"loss": 0.6010065078735352,
"step": 1372
},
{
"epoch": 2.5164835164835164,
"grad_norm": 0.19973506033420563,
"learning_rate": 4.813110839712629e-06,
"loss": 0.7367382049560547,
"step": 1374
},
{
"epoch": 2.52014652014652,
"grad_norm": 0.5963112711906433,
"learning_rate": 4.786464396204463e-06,
"loss": 0.9273372888565063,
"step": 1376
},
{
"epoch": 2.5238095238095237,
"grad_norm": 0.23852768540382385,
"learning_rate": 4.760001330250443e-06,
"loss": 0.742865800857544,
"step": 1378
},
{
"epoch": 2.5274725274725274,
"grad_norm": 0.1433335691690445,
"learning_rate": 4.733722056134734e-06,
"loss": 0.8435138463973999,
"step": 1380
},
{
"epoch": 2.531135531135531,
"grad_norm": 0.10045904666185379,
"learning_rate": 4.707626985264201e-06,
"loss": 0.8152870535850525,
"step": 1382
},
{
"epoch": 2.5347985347985347,
"grad_norm": 0.13199284672737122,
"learning_rate": 4.681716526161982e-06,
"loss": 0.7646356821060181,
"step": 1384
},
{
"epoch": 2.5384615384615383,
"grad_norm": 0.20522791147232056,
"learning_rate": 4.655991084461084e-06,
"loss": 1.0694559812545776,
"step": 1386
},
{
"epoch": 2.542124542124542,
"grad_norm": 0.46581119298934937,
"learning_rate": 4.630451062898016e-06,
"loss": 0.729383111000061,
"step": 1388
},
{
"epoch": 2.5457875457875456,
"grad_norm": 0.3454929292201996,
"learning_rate": 4.6050968613065214e-06,
"loss": 0.7659768462181091,
"step": 1390
},
{
"epoch": 2.5494505494505493,
"grad_norm": 0.671925961971283,
"learning_rate": 4.579928876611288e-06,
"loss": 0.9364892840385437,
"step": 1392
},
{
"epoch": 2.553113553113553,
"grad_norm": 0.1817079782485962,
"learning_rate": 4.554947502821745e-06,
"loss": 0.6875939965248108,
"step": 1394
},
{
"epoch": 2.5567765567765566,
"grad_norm": 0.16234847903251648,
"learning_rate": 4.53015313102589e-06,
"loss": 0.30868446826934814,
"step": 1396
},
{
"epoch": 2.5604395604395602,
"grad_norm": 0.18673402070999146,
"learning_rate": 4.505546149384179e-06,
"loss": 1.0998352766036987,
"step": 1398
},
{
"epoch": 2.564102564102564,
"grad_norm": 0.1808062493801117,
"learning_rate": 4.481126943123428e-06,
"loss": 1.0791916847229004,
"step": 1400
},
{
"epoch": 2.5677655677655675,
"grad_norm": 0.285334974527359,
"learning_rate": 4.45689589453081e-06,
"loss": 0.938387393951416,
"step": 1402
},
{
"epoch": 2.571428571428571,
"grad_norm": 0.1669720709323883,
"learning_rate": 4.432853382947845e-06,
"loss": 1.074457049369812,
"step": 1404
},
{
"epoch": 2.575091575091575,
"grad_norm": 0.7900213599205017,
"learning_rate": 4.408999784764466e-06,
"loss": 0.44015753269195557,
"step": 1406
},
{
"epoch": 2.578754578754579,
"grad_norm": 0.21599474549293518,
"learning_rate": 4.3853354734131475e-06,
"loss": 1.097135066986084,
"step": 1408
},
{
"epoch": 2.5824175824175826,
"grad_norm": 0.18292640149593353,
"learning_rate": 4.361860819363036e-06,
"loss": 0.830746054649353,
"step": 1410
},
{
"epoch": 2.586080586080586,
"grad_norm": 0.6029004454612732,
"learning_rate": 4.338576190114154e-06,
"loss": 0.7889416813850403,
"step": 1412
},
{
"epoch": 2.58974358974359,
"grad_norm": 0.13510458171367645,
"learning_rate": 4.315481950191659e-06,
"loss": 0.9027295708656311,
"step": 1414
},
{
"epoch": 2.5934065934065935,
"grad_norm": 0.21142619848251343,
"learning_rate": 4.292578461140117e-06,
"loss": 1.0660418272018433,
"step": 1416
},
{
"epoch": 2.597069597069597,
"grad_norm": 0.9611092805862427,
"learning_rate": 4.269866081517867e-06,
"loss": 0.7184525728225708,
"step": 1418
},
{
"epoch": 2.600732600732601,
"grad_norm": 0.15182118117809296,
"learning_rate": 4.2473451668913935e-06,
"loss": 1.0582796335220337,
"step": 1420
},
{
"epoch": 2.6043956043956045,
"grad_norm": 0.3284706771373749,
"learning_rate": 4.225016069829747e-06,
"loss": 0.9986996650695801,
"step": 1422
},
{
"epoch": 2.608058608058608,
"grad_norm": 0.24331869184970856,
"learning_rate": 4.2028791398990525e-06,
"loss": 1.1596800088882446,
"step": 1424
},
{
"epoch": 2.6117216117216118,
"grad_norm": 0.31541645526885986,
"learning_rate": 4.180934723657021e-06,
"loss": 1.1926846504211426,
"step": 1426
},
{
"epoch": 2.6153846153846154,
"grad_norm": 0.3905728757381439,
"learning_rate": 4.159183164647525e-06,
"loss": 1.107182502746582,
"step": 1428
},
{
"epoch": 2.619047619047619,
"grad_norm": 0.14054612815380096,
"learning_rate": 4.137624803395217e-06,
"loss": 1.050660490989685,
"step": 1430
},
{
"epoch": 2.6227106227106227,
"grad_norm": 0.30420032143592834,
"learning_rate": 4.116259977400214e-06,
"loss": 0.8558696508407593,
"step": 1432
},
{
"epoch": 2.6263736263736264,
"grad_norm": 1.371067762374878,
"learning_rate": 4.0950890211327875e-06,
"loss": 0.901361882686615,
"step": 1434
},
{
"epoch": 2.63003663003663,
"grad_norm": 0.4628206789493561,
"learning_rate": 4.0741122660281595e-06,
"loss": 1.2507123947143555,
"step": 1436
},
{
"epoch": 2.6336996336996337,
"grad_norm": 0.5903933644294739,
"learning_rate": 4.053330040481287e-06,
"loss": 1.1097562313079834,
"step": 1438
},
{
"epoch": 2.6373626373626373,
"grad_norm": 0.29786449670791626,
"learning_rate": 4.032742669841728e-06,
"loss": 1.066929578781128,
"step": 1440
},
{
"epoch": 2.641025641025641,
"grad_norm": 0.13905611634254456,
"learning_rate": 4.012350476408563e-06,
"loss": 1.0995196104049683,
"step": 1442
},
{
"epoch": 2.6446886446886446,
"grad_norm": 0.21123115718364716,
"learning_rate": 3.992153779425325e-06,
"loss": 1.0221703052520752,
"step": 1444
},
{
"epoch": 2.6483516483516483,
"grad_norm": 0.27779990434646606,
"learning_rate": 3.972152895075025e-06,
"loss": 0.7702869772911072,
"step": 1446
},
{
"epoch": 2.652014652014652,
"grad_norm": 0.20709839463233948,
"learning_rate": 3.952348136475182e-06,
"loss": 0.777704656124115,
"step": 1448
},
{
"epoch": 2.6556776556776556,
"grad_norm": 0.14510610699653625,
"learning_rate": 3.932739813672935e-06,
"loss": 1.060199499130249,
"step": 1450
},
{
"epoch": 2.659340659340659,
"grad_norm": 0.10560119152069092,
"learning_rate": 3.913328233640182e-06,
"loss": 0.6270155906677246,
"step": 1452
},
{
"epoch": 2.663003663003663,
"grad_norm": 0.3765946924686432,
"learning_rate": 3.894113700268784e-06,
"loss": 0.6499032378196716,
"step": 1454
},
{
"epoch": 2.6666666666666665,
"grad_norm": 0.1713167279958725,
"learning_rate": 3.8750965143657906e-06,
"loss": 1.038994550704956,
"step": 1456
},
{
"epoch": 2.67032967032967,
"grad_norm": 0.22225315868854523,
"learning_rate": 3.8562769736487434e-06,
"loss": 0.7112734317779541,
"step": 1458
},
{
"epoch": 2.6739926739926743,
"grad_norm": 0.06286896765232086,
"learning_rate": 3.8376553727410175e-06,
"loss": 0.7135747075080872,
"step": 1460
},
{
"epoch": 2.677655677655678,
"grad_norm": 0.15267214179039001,
"learning_rate": 3.819232003167198e-06,
"loss": 0.7469848990440369,
"step": 1462
},
{
"epoch": 2.6813186813186816,
"grad_norm": 0.24047072231769562,
"learning_rate": 3.801007153348521e-06,
"loss": 1.0165785551071167,
"step": 1464
},
{
"epoch": 2.684981684981685,
"grad_norm": 0.2921578288078308,
"learning_rate": 3.7829811085983675e-06,
"loss": 0.8360713720321655,
"step": 1466
},
{
"epoch": 2.688644688644689,
"grad_norm": 0.13940708339214325,
"learning_rate": 3.765154151117778e-06,
"loss": 0.9943856596946716,
"step": 1468
},
{
"epoch": 2.6923076923076925,
"grad_norm": 0.4869571030139923,
"learning_rate": 3.747526559991056e-06,
"loss": 0.5810178518295288,
"step": 1470
},
{
"epoch": 2.695970695970696,
"grad_norm": 0.43914780020713806,
"learning_rate": 3.7300986111813788e-06,
"loss": 0.7607510685920715,
"step": 1472
},
{
"epoch": 2.6996336996337,
"grad_norm": 0.21713170409202576,
"learning_rate": 3.7128705775264885e-06,
"loss": 0.391747385263443,
"step": 1474
},
{
"epoch": 2.7032967032967035,
"grad_norm": 0.19103080034255981,
"learning_rate": 3.695842728734425e-06,
"loss": 1.1086090803146362,
"step": 1476
},
{
"epoch": 2.706959706959707,
"grad_norm": 0.28505468368530273,
"learning_rate": 3.6790153313792904e-06,
"loss": 0.6411003470420837,
"step": 1478
},
{
"epoch": 2.7106227106227108,
"grad_norm": 0.17257079482078552,
"learning_rate": 3.662388648897086e-06,
"loss": 1.060044765472412,
"step": 1480
},
{
"epoch": 2.7142857142857144,
"grad_norm": 0.2528294622898102,
"learning_rate": 3.6459629415815826e-06,
"loss": 1.06841242313385,
"step": 1482
},
{
"epoch": 2.717948717948718,
"grad_norm": 0.8728216886520386,
"learning_rate": 3.629738466580249e-06,
"loss": 0.9711808562278748,
"step": 1484
},
{
"epoch": 2.7216117216117217,
"grad_norm": 0.2920563817024231,
"learning_rate": 3.6137154778902252e-06,
"loss": 1.1136637926101685,
"step": 1486
},
{
"epoch": 2.7252747252747254,
"grad_norm": 0.2023547887802124,
"learning_rate": 3.5978942263543494e-06,
"loss": 1.108361840248108,
"step": 1488
},
{
"epoch": 2.728937728937729,
"grad_norm": 0.2274034470319748,
"learning_rate": 3.5822749596572212e-06,
"loss": 0.48676446080207825,
"step": 1490
},
{
"epoch": 2.7326007326007327,
"grad_norm": 0.29057979583740234,
"learning_rate": 3.5668579223213327e-06,
"loss": 1.0988428592681885,
"step": 1492
},
{
"epoch": 2.7362637362637363,
"grad_norm": 0.16937057673931122,
"learning_rate": 3.5516433557032396e-06,
"loss": 0.5996679663658142,
"step": 1494
},
{
"epoch": 2.73992673992674,
"grad_norm": 0.23391544818878174,
"learning_rate": 3.5366314979897804e-06,
"loss": 1.1664600372314453,
"step": 1496
},
{
"epoch": 2.7435897435897436,
"grad_norm": 0.1585794985294342,
"learning_rate": 3.5218225841943505e-06,
"loss": 1.05189049243927,
"step": 1498
},
{
"epoch": 2.7472527472527473,
"grad_norm": 0.26321935653686523,
"learning_rate": 3.5072168461532164e-06,
"loss": 0.7114003300666809,
"step": 1500
},
{
"epoch": 2.750915750915751,
"grad_norm": 0.13159599900245667,
"learning_rate": 3.492814512521892e-06,
"loss": 0.9970650672912598,
"step": 1502
},
{
"epoch": 2.7545787545787546,
"grad_norm": 0.13932837545871735,
"learning_rate": 3.4786158087715646e-06,
"loss": 1.0803966522216797,
"step": 1504
},
{
"epoch": 2.758241758241758,
"grad_norm": 0.24444995820522308,
"learning_rate": 3.4646209571855467e-06,
"loss": 1.0812441110610962,
"step": 1506
},
{
"epoch": 2.761904761904762,
"grad_norm": 0.12447657436132431,
"learning_rate": 3.450830176855816e-06,
"loss": 0.883111298084259,
"step": 1508
},
{
"epoch": 2.7655677655677655,
"grad_norm": 0.32130947709083557,
"learning_rate": 3.437243683679577e-06,
"loss": 0.9158514738082886,
"step": 1510
},
{
"epoch": 2.769230769230769,
"grad_norm": 0.3241344094276428,
"learning_rate": 3.4238616903558755e-06,
"loss": 0.8885561227798462,
"step": 1512
},
{
"epoch": 2.772893772893773,
"grad_norm": 0.21939632296562195,
"learning_rate": 3.4106844063822806e-06,
"loss": 1.0644609928131104,
"step": 1514
},
{
"epoch": 2.7765567765567765,
"grad_norm": 0.22788673639297485,
"learning_rate": 3.397712038051595e-06,
"loss": 0.9741593599319458,
"step": 1516
},
{
"epoch": 2.78021978021978,
"grad_norm": 0.1162387877702713,
"learning_rate": 3.3849447884486317e-06,
"loss": 0.9548935890197754,
"step": 1518
},
{
"epoch": 2.7838827838827838,
"grad_norm": 0.6515100002288818,
"learning_rate": 3.372382857447029e-06,
"loss": 0.4730675220489502,
"step": 1520
},
{
"epoch": 2.7875457875457874,
"grad_norm": 0.5070962905883789,
"learning_rate": 3.360026441706132e-06,
"loss": 0.734772264957428,
"step": 1522
},
{
"epoch": 2.791208791208791,
"grad_norm": 0.15056446194648743,
"learning_rate": 3.3478757346678978e-06,
"loss": 0.7798343896865845,
"step": 1524
},
{
"epoch": 2.7948717948717947,
"grad_norm": 0.13427495956420898,
"learning_rate": 3.335930926553878e-06,
"loss": 1.1091750860214233,
"step": 1526
},
{
"epoch": 2.7985347985347984,
"grad_norm": 0.14417144656181335,
"learning_rate": 3.324192204362245e-06,
"loss": 1.0608025789260864,
"step": 1528
},
{
"epoch": 2.802197802197802,
"grad_norm": 0.2243596315383911,
"learning_rate": 3.3126597518648514e-06,
"loss": 1.071797251701355,
"step": 1530
},
{
"epoch": 2.8058608058608057,
"grad_norm": 0.3019508719444275,
"learning_rate": 3.301333749604362e-06,
"loss": 0.9574898481369019,
"step": 1532
},
{
"epoch": 2.8095238095238093,
"grad_norm": 0.14282682538032532,
"learning_rate": 3.2902143748914256e-06,
"loss": 0.726614773273468,
"step": 1534
},
{
"epoch": 2.813186813186813,
"grad_norm": 0.18306607007980347,
"learning_rate": 3.279301801801897e-06,
"loss": 1.2403117418289185,
"step": 1536
},
{
"epoch": 2.8168498168498166,
"grad_norm": 0.17832379043102264,
"learning_rate": 3.2685962011741165e-06,
"loss": 0.8608546853065491,
"step": 1538
},
{
"epoch": 2.8205128205128203,
"grad_norm": 0.335725873708725,
"learning_rate": 3.2580977406062313e-06,
"loss": 0.7375699281692505,
"step": 1540
},
{
"epoch": 2.824175824175824,
"grad_norm": 0.1197366863489151,
"learning_rate": 3.24780658445357e-06,
"loss": 0.8306369781494141,
"step": 1542
},
{
"epoch": 2.8278388278388276,
"grad_norm": 0.12221439182758331,
"learning_rate": 3.237722893826076e-06,
"loss": 1.0438703298568726,
"step": 1544
},
{
"epoch": 2.8315018315018317,
"grad_norm": 0.21702435612678528,
"learning_rate": 3.2278468265857805e-06,
"loss": 1.0939574241638184,
"step": 1546
},
{
"epoch": 2.8351648351648353,
"grad_norm": 0.2695011496543884,
"learning_rate": 3.218178537344335e-06,
"loss": 1.1599433422088623,
"step": 1548
},
{
"epoch": 2.838827838827839,
"grad_norm": 0.3290030360221863,
"learning_rate": 3.208718177460581e-06,
"loss": 0.7901730537414551,
"step": 1550
},
{
"epoch": 2.8424908424908426,
"grad_norm": 0.05684468150138855,
"learning_rate": 3.199465895038196e-06,
"loss": 0.5308884978294373,
"step": 1552
},
{
"epoch": 2.8461538461538463,
"grad_norm": 0.12094342708587646,
"learning_rate": 3.19042183492336e-06,
"loss": 1.0966477394104004,
"step": 1554
},
{
"epoch": 2.84981684981685,
"grad_norm": 0.13761815428733826,
"learning_rate": 3.1815861387025012e-06,
"loss": 0.8628841638565063,
"step": 1556
},
{
"epoch": 2.8534798534798536,
"grad_norm": 0.3820330798625946,
"learning_rate": 3.1729589447000673e-06,
"loss": 1.0031236410140991,
"step": 1558
},
{
"epoch": 2.857142857142857,
"grad_norm": 0.27110400795936584,
"learning_rate": 3.164540387976365e-06,
"loss": 0.533704400062561,
"step": 1560
},
{
"epoch": 2.860805860805861,
"grad_norm": 0.3188073933124542,
"learning_rate": 3.1563306003254506e-06,
"loss": 0.6449660658836365,
"step": 1562
},
{
"epoch": 2.8644688644688645,
"grad_norm": 0.18272897601127625,
"learning_rate": 3.1483297102730584e-06,
"loss": 0.8202900886535645,
"step": 1564
},
{
"epoch": 2.868131868131868,
"grad_norm": 0.8145133256912231,
"learning_rate": 3.1405378430745944e-06,
"loss": 0.616677463054657,
"step": 1566
},
{
"epoch": 2.871794871794872,
"grad_norm": 0.4853191077709198,
"learning_rate": 3.1329551207131714e-06,
"loss": 0.6839433908462524,
"step": 1568
},
{
"epoch": 2.8754578754578755,
"grad_norm": 0.11846169084310532,
"learning_rate": 3.1255816618977038e-06,
"loss": 0.6521806716918945,
"step": 1570
},
{
"epoch": 2.879120879120879,
"grad_norm": 0.12504829466342926,
"learning_rate": 3.1184175820610454e-06,
"loss": 0.6307588815689087,
"step": 1572
},
{
"epoch": 2.8827838827838828,
"grad_norm": 0.21441541612148285,
"learning_rate": 3.111462993358183e-06,
"loss": 0.9419472813606262,
"step": 1574
},
{
"epoch": 2.8864468864468864,
"grad_norm": 0.4661043882369995,
"learning_rate": 3.104718004664481e-06,
"loss": 0.8766722083091736,
"step": 1576
},
{
"epoch": 2.89010989010989,
"grad_norm": 0.7980870604515076,
"learning_rate": 3.09818272157398e-06,
"loss": 0.8545612692832947,
"step": 1578
},
{
"epoch": 2.8937728937728937,
"grad_norm": 0.26410335302352905,
"learning_rate": 3.0918572463977376e-06,
"loss": 1.1469751596450806,
"step": 1580
},
{
"epoch": 2.8974358974358974,
"grad_norm": 0.2552175521850586,
"learning_rate": 3.085741678162231e-06,
"loss": 0.7883166670799255,
"step": 1582
},
{
"epoch": 2.901098901098901,
"grad_norm": 0.0656161829829216,
"learning_rate": 3.079836112607805e-06,
"loss": 0.7394980788230896,
"step": 1584
},
{
"epoch": 2.9047619047619047,
"grad_norm": 0.14533191919326782,
"learning_rate": 3.074140642187176e-06,
"loss": 0.7442795038223267,
"step": 1586
},
{
"epoch": 2.9084249084249083,
"grad_norm": 0.1676967591047287,
"learning_rate": 3.068655356063979e-06,
"loss": 1.1076900959014893,
"step": 1588
},
{
"epoch": 2.912087912087912,
"grad_norm": 0.5147824883460999,
"learning_rate": 3.063380340111379e-06,
"loss": 1.143357753753662,
"step": 1590
},
{
"epoch": 2.9157509157509156,
"grad_norm": 0.15945342183113098,
"learning_rate": 3.0583156769107198e-06,
"loss": 0.42322206497192383,
"step": 1592
},
{
"epoch": 2.9194139194139193,
"grad_norm": 0.2826630771160126,
"learning_rate": 3.0534614457502347e-06,
"loss": 1.0997695922851562,
"step": 1594
},
{
"epoch": 2.9230769230769234,
"grad_norm": 0.22525803744792938,
"learning_rate": 3.0488177226238068e-06,
"loss": 0.7123748064041138,
"step": 1596
},
{
"epoch": 2.926739926739927,
"grad_norm": 0.23277594149112701,
"learning_rate": 3.0443845802297755e-06,
"loss": 1.1194868087768555,
"step": 1598
},
{
"epoch": 2.9304029304029307,
"grad_norm": 0.08772231638431549,
"learning_rate": 3.0401620879697976e-06,
"loss": 0.7338302731513977,
"step": 1600
},
{
"epoch": 2.9340659340659343,
"grad_norm": 0.232249915599823,
"learning_rate": 3.0361503119477703e-06,
"loss": 1.0972161293029785,
"step": 1602
},
{
"epoch": 2.937728937728938,
"grad_norm": 0.0685197114944458,
"learning_rate": 3.032349314968781e-06,
"loss": 0.7930707335472107,
"step": 1604
},
{
"epoch": 2.9413919413919416,
"grad_norm": 0.1667313277721405,
"learning_rate": 3.028759156538139e-06,
"loss": 1.0795692205429077,
"step": 1606
},
{
"epoch": 2.9450549450549453,
"grad_norm": 0.45531848073005676,
"learning_rate": 3.025379892860435e-06,
"loss": 0.6840215921401978,
"step": 1608
},
{
"epoch": 2.948717948717949,
"grad_norm": 0.3655454218387604,
"learning_rate": 3.022211576838662e-06,
"loss": 0.9497122168540955,
"step": 1610
},
{
"epoch": 2.9523809523809526,
"grad_norm": 0.4418063163757324,
"learning_rate": 3.0192542580733894e-06,
"loss": 0.808830738067627,
"step": 1612
},
{
"epoch": 2.956043956043956,
"grad_norm": 0.1944647580385208,
"learning_rate": 3.016507982861989e-06,
"loss": 0.6359755992889404,
"step": 1614
},
{
"epoch": 2.95970695970696,
"grad_norm": 0.4210836887359619,
"learning_rate": 3.013972794197901e-06,
"loss": 0.7775853276252747,
"step": 1616
},
{
"epoch": 2.9633699633699635,
"grad_norm": 0.34786224365234375,
"learning_rate": 3.0116487317699732e-06,
"loss": 1.322286605834961,
"step": 1618
},
{
"epoch": 2.967032967032967,
"grad_norm": 0.17325156927108765,
"learning_rate": 3.009535831961828e-06,
"loss": 1.19371497631073,
"step": 1620
},
{
"epoch": 2.970695970695971,
"grad_norm": 0.5465530753135681,
"learning_rate": 3.007634127851303e-06,
"loss": 0.8626972436904907,
"step": 1622
},
{
"epoch": 2.9743589743589745,
"grad_norm": 0.7124922871589661,
"learning_rate": 3.005943649209923e-06,
"loss": 0.7288764715194702,
"step": 1624
},
{
"epoch": 2.978021978021978,
"grad_norm": 0.1308281123638153,
"learning_rate": 3.0044644225024444e-06,
"loss": 0.8229957222938538,
"step": 1626
},
{
"epoch": 2.9816849816849818,
"grad_norm": 0.20323815941810608,
"learning_rate": 3.003196470886432e-06,
"loss": 0.9102584719657898,
"step": 1628
},
{
"epoch": 2.9853479853479854,
"grad_norm": 0.14625518023967743,
"learning_rate": 3.002139814211902e-06,
"loss": 1.3533666133880615,
"step": 1630
},
{
"epoch": 2.989010989010989,
"grad_norm": 0.22311708331108093,
"learning_rate": 3.0012944690210082e-06,
"loss": 0.9485982060432434,
"step": 1632
},
{
"epoch": 2.9926739926739927,
"grad_norm": 0.23025082051753998,
"learning_rate": 3.000660448547786e-06,
"loss": 0.72516268491745,
"step": 1634
},
{
"epoch": 2.9963369963369964,
"grad_norm": 0.1964196413755417,
"learning_rate": 3.0002377627179435e-06,
"loss": 1.2260894775390625,
"step": 1636
},
{
"epoch": 3.0,
"grad_norm": 0.15897659957408905,
"learning_rate": 3.0000264181487013e-06,
"loss": 1.0367738008499146,
"step": 1638
},
{
"epoch": 3.0,
"step": 1638,
"total_flos": 8.4482141520606e+18,
"train_loss": 0.9952802232333592,
"train_runtime": 79492.9069,
"train_samples_per_second": 0.495,
"train_steps_per_second": 0.021
}
],
"logging_steps": 2,
"max_steps": 1638,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 99999,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 8.4482141520606e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}