27b-3-lora / trainer_state.json
furproxy's picture
Upload folder using huggingface_hub
86eb0a2 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 1638,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.003663003663003663,
"grad_norm": 2.7634639739990234,
"learning_rate": 6.000000000000001e-07,
"loss": 2.6851325035095215,
"step": 2
},
{
"epoch": 0.007326007326007326,
"grad_norm": 0.8769217133522034,
"learning_rate": 1.8e-06,
"loss": 1.6665289402008057,
"step": 4
},
{
"epoch": 0.01098901098901099,
"grad_norm": 0.7409242987632751,
"learning_rate": 3e-06,
"loss": 1.8780521154403687,
"step": 6
},
{
"epoch": 0.014652014652014652,
"grad_norm": 0.1563258320093155,
"learning_rate": 4.2000000000000004e-06,
"loss": 2.041105270385742,
"step": 8
},
{
"epoch": 0.018315018315018316,
"grad_norm": 0.15041226148605347,
"learning_rate": 5.4e-06,
"loss": 2.166102409362793,
"step": 10
},
{
"epoch": 0.02197802197802198,
"grad_norm": 0.7837436199188232,
"learning_rate": 6.6e-06,
"loss": 1.9360963106155396,
"step": 12
},
{
"epoch": 0.02564102564102564,
"grad_norm": 0.15504689514636993,
"learning_rate": 7.8e-06,
"loss": 1.7194116115570068,
"step": 14
},
{
"epoch": 0.029304029304029304,
"grad_norm": 0.1344185769557953,
"learning_rate": 9e-06,
"loss": 1.7012724876403809,
"step": 16
},
{
"epoch": 0.03296703296703297,
"grad_norm": 0.864067792892456,
"learning_rate": 1.02e-05,
"loss": 1.628023386001587,
"step": 18
},
{
"epoch": 0.03663003663003663,
"grad_norm": 0.12599794566631317,
"learning_rate": 1.1400000000000001e-05,
"loss": 1.6917732954025269,
"step": 20
},
{
"epoch": 0.040293040293040296,
"grad_norm": 0.38553279638290405,
"learning_rate": 1.26e-05,
"loss": 0.955093264579773,
"step": 22
},
{
"epoch": 0.04395604395604396,
"grad_norm": 0.17990106344223022,
"learning_rate": 1.3800000000000002e-05,
"loss": 1.1128543615341187,
"step": 24
},
{
"epoch": 0.047619047619047616,
"grad_norm": 0.19061581790447235,
"learning_rate": 1.5e-05,
"loss": 1.3577781915664673,
"step": 26
},
{
"epoch": 0.05128205128205128,
"grad_norm": 0.41450390219688416,
"learning_rate": 1.62e-05,
"loss": 1.2393029928207397,
"step": 28
},
{
"epoch": 0.054945054945054944,
"grad_norm": 0.2504269778728485,
"learning_rate": 1.74e-05,
"loss": 1.1353970766067505,
"step": 30
},
{
"epoch": 0.05860805860805861,
"grad_norm": 0.32704558968544006,
"learning_rate": 1.86e-05,
"loss": 1.0676343441009521,
"step": 32
},
{
"epoch": 0.06227106227106227,
"grad_norm": 0.04873079061508179,
"learning_rate": 1.98e-05,
"loss": 1.2141464948654175,
"step": 34
},
{
"epoch": 0.06593406593406594,
"grad_norm": 0.29445287585258484,
"learning_rate": 2.1e-05,
"loss": 1.5761451721191406,
"step": 36
},
{
"epoch": 0.0695970695970696,
"grad_norm": 3.7144603729248047,
"learning_rate": 2.22e-05,
"loss": 1.3491644859313965,
"step": 38
},
{
"epoch": 0.07326007326007326,
"grad_norm": 0.42794638872146606,
"learning_rate": 2.3400000000000003e-05,
"loss": 1.385630488395691,
"step": 40
},
{
"epoch": 0.07692307692307693,
"grad_norm": 0.11913251876831055,
"learning_rate": 2.4599999999999998e-05,
"loss": 1.0656204223632812,
"step": 42
},
{
"epoch": 0.08058608058608059,
"grad_norm": 0.098934605717659,
"learning_rate": 2.58e-05,
"loss": 1.4789392948150635,
"step": 44
},
{
"epoch": 0.08424908424908426,
"grad_norm": 0.14308126270771027,
"learning_rate": 2.7000000000000002e-05,
"loss": 1.5735292434692383,
"step": 46
},
{
"epoch": 0.08791208791208792,
"grad_norm": 0.1506340503692627,
"learning_rate": 2.8199999999999998e-05,
"loss": 1.0008467435836792,
"step": 48
},
{
"epoch": 0.09157509157509157,
"grad_norm": 0.22060516476631165,
"learning_rate": 2.94e-05,
"loss": 1.460166335105896,
"step": 50
},
{
"epoch": 0.09523809523809523,
"grad_norm": 0.09151721745729446,
"learning_rate": 2.99999735818513e-05,
"loss": 0.7308154702186584,
"step": 52
},
{
"epoch": 0.0989010989010989,
"grad_norm": 0.3875337839126587,
"learning_rate": 2.9999762237282056e-05,
"loss": 0.9233083724975586,
"step": 54
},
{
"epoch": 0.10256410256410256,
"grad_norm": 0.33670932054519653,
"learning_rate": 2.9999339551452214e-05,
"loss": 1.3086968660354614,
"step": 56
},
{
"epoch": 0.10622710622710622,
"grad_norm": 0.12714841961860657,
"learning_rate": 2.9998705530978993e-05,
"loss": 1.1492022275924683,
"step": 58
},
{
"epoch": 0.10989010989010989,
"grad_norm": 0.1399059295654297,
"learning_rate": 2.99978601857881e-05,
"loss": 1.354356050491333,
"step": 60
},
{
"epoch": 0.11355311355311355,
"grad_norm": 0.4168715178966522,
"learning_rate": 2.999680352911357e-05,
"loss": 1.3717955350875854,
"step": 62
},
{
"epoch": 0.11721611721611722,
"grad_norm": 0.3270918130874634,
"learning_rate": 2.9995535577497556e-05,
"loss": 1.3428699970245361,
"step": 64
},
{
"epoch": 0.12087912087912088,
"grad_norm": 0.23755627870559692,
"learning_rate": 2.999405635079008e-05,
"loss": 1.3064519166946411,
"step": 66
},
{
"epoch": 0.12454212454212454,
"grad_norm": 0.16909252107143402,
"learning_rate": 2.99923658721487e-05,
"loss": 1.2845274209976196,
"step": 68
},
{
"epoch": 0.1282051282051282,
"grad_norm": 0.6442620754241943,
"learning_rate": 2.9990464168038176e-05,
"loss": 1.2543851137161255,
"step": 70
},
{
"epoch": 0.13186813186813187,
"grad_norm": 0.1498740315437317,
"learning_rate": 2.998835126823003e-05,
"loss": 1.3962249755859375,
"step": 72
},
{
"epoch": 0.13553113553113552,
"grad_norm": 0.22128598392009735,
"learning_rate": 2.99860272058021e-05,
"loss": 1.5755624771118164,
"step": 74
},
{
"epoch": 0.1391941391941392,
"grad_norm": 0.06653512269258499,
"learning_rate": 2.998349201713801e-05,
"loss": 1.061017394065857,
"step": 76
},
{
"epoch": 0.14285714285714285,
"grad_norm": 0.10738393664360046,
"learning_rate": 2.998074574192661e-05,
"loss": 1.3995990753173828,
"step": 78
},
{
"epoch": 0.14652014652014653,
"grad_norm": 0.10514689981937408,
"learning_rate": 2.9977788423161336e-05,
"loss": 1.5613818168640137,
"step": 80
},
{
"epoch": 0.15018315018315018,
"grad_norm": 0.16226087510585785,
"learning_rate": 2.997462010713957e-05,
"loss": 1.17906653881073,
"step": 82
},
{
"epoch": 0.15384615384615385,
"grad_norm": 0.1813281923532486,
"learning_rate": 2.997124084346186e-05,
"loss": 1.3376390933990479,
"step": 84
},
{
"epoch": 0.1575091575091575,
"grad_norm": 0.1863105148077011,
"learning_rate": 2.9967650685031216e-05,
"loss": 1.4737788438796997,
"step": 86
},
{
"epoch": 0.16117216117216118,
"grad_norm": 0.11065241694450378,
"learning_rate": 2.9963849688052232e-05,
"loss": 0.7325466275215149,
"step": 88
},
{
"epoch": 0.16483516483516483,
"grad_norm": 0.19816920161247253,
"learning_rate": 2.9959837912030202e-05,
"loss": 0.8696061968803406,
"step": 90
},
{
"epoch": 0.1684981684981685,
"grad_norm": 0.16392071545124054,
"learning_rate": 2.9955615419770222e-05,
"loss": 1.2930067777633667,
"step": 92
},
{
"epoch": 0.17216117216117216,
"grad_norm": 0.15244193375110626,
"learning_rate": 2.9951182277376195e-05,
"loss": 1.319109320640564,
"step": 94
},
{
"epoch": 0.17582417582417584,
"grad_norm": 0.216253861784935,
"learning_rate": 2.9946538554249767e-05,
"loss": 1.212167739868164,
"step": 96
},
{
"epoch": 0.1794871794871795,
"grad_norm": 0.17411836981773376,
"learning_rate": 2.994168432308928e-05,
"loss": 1.3011664152145386,
"step": 98
},
{
"epoch": 0.18315018315018314,
"grad_norm": 0.18492431938648224,
"learning_rate": 2.9936619659888623e-05,
"loss": 1.1626869440078735,
"step": 100
},
{
"epoch": 0.18681318681318682,
"grad_norm": 0.17933642864227295,
"learning_rate": 2.993134464393602e-05,
"loss": 1.2483956813812256,
"step": 102
},
{
"epoch": 0.19047619047619047,
"grad_norm": 0.1060023307800293,
"learning_rate": 2.9925859357812825e-05,
"loss": 0.9081482887268066,
"step": 104
},
{
"epoch": 0.19413919413919414,
"grad_norm": 2.96535325050354,
"learning_rate": 2.9920163887392198e-05,
"loss": 1.0657414197921753,
"step": 106
},
{
"epoch": 0.1978021978021978,
"grad_norm": 0.259998619556427,
"learning_rate": 2.9914258321837772e-05,
"loss": 1.2962814569473267,
"step": 108
},
{
"epoch": 0.20146520146520147,
"grad_norm": 0.1942208707332611,
"learning_rate": 2.9908142753602263e-05,
"loss": 1.1466888189315796,
"step": 110
},
{
"epoch": 0.20512820512820512,
"grad_norm": 0.0988549217581749,
"learning_rate": 2.990181727842602e-05,
"loss": 1.20805823802948,
"step": 112
},
{
"epoch": 0.2087912087912088,
"grad_norm": 0.7758049964904785,
"learning_rate": 2.9895281995335517e-05,
"loss": 0.6120161414146423,
"step": 114
},
{
"epoch": 0.21245421245421245,
"grad_norm": 0.5128256678581238,
"learning_rate": 2.9888537006641817e-05,
"loss": 1.2869303226470947,
"step": 116
},
{
"epoch": 0.21611721611721613,
"grad_norm": 0.1632162183523178,
"learning_rate": 2.9881582417938958e-05,
"loss": 1.2748751640319824,
"step": 118
},
{
"epoch": 0.21978021978021978,
"grad_norm": 0.1547815352678299,
"learning_rate": 2.9874418338102297e-05,
"loss": 1.3451178073883057,
"step": 120
},
{
"epoch": 0.22344322344322345,
"grad_norm": 0.1399235874414444,
"learning_rate": 2.9867044879286828e-05,
"loss": 1.2734277248382568,
"step": 122
},
{
"epoch": 0.2271062271062271,
"grad_norm": 0.23412162065505981,
"learning_rate": 2.985946215692541e-05,
"loss": 1.008587121963501,
"step": 124
},
{
"epoch": 0.23076923076923078,
"grad_norm": 0.21800032258033752,
"learning_rate": 2.9851670289726944e-05,
"loss": 0.8805273771286011,
"step": 126
},
{
"epoch": 0.23443223443223443,
"grad_norm": 0.32528096437454224,
"learning_rate": 2.9843669399674548e-05,
"loss": 1.1765263080596924,
"step": 128
},
{
"epoch": 0.23809523809523808,
"grad_norm": 0.8285342454910278,
"learning_rate": 2.9835459612023636e-05,
"loss": 1.2803601026535034,
"step": 130
},
{
"epoch": 0.24175824175824176,
"grad_norm": 0.11129158735275269,
"learning_rate": 2.9827041055299935e-05,
"loss": 1.3686916828155518,
"step": 132
},
{
"epoch": 0.2454212454212454,
"grad_norm": 0.4619719684123993,
"learning_rate": 2.98184138612975e-05,
"loss": 1.2748310565948486,
"step": 134
},
{
"epoch": 0.2490842490842491,
"grad_norm": 0.1334097981452942,
"learning_rate": 2.9809578165076638e-05,
"loss": 1.2551707029342651,
"step": 136
},
{
"epoch": 0.25274725274725274,
"grad_norm": 0.4292519986629486,
"learning_rate": 2.9800534104961805e-05,
"loss": 1.0610690116882324,
"step": 138
},
{
"epoch": 0.2564102564102564,
"grad_norm": 0.1360931247472763,
"learning_rate": 2.979128182253942e-05,
"loss": 1.2578339576721191,
"step": 140
},
{
"epoch": 0.2600732600732601,
"grad_norm": 0.47023099660873413,
"learning_rate": 2.9781821462655665e-05,
"loss": 1.270743489265442,
"step": 142
},
{
"epoch": 0.26373626373626374,
"grad_norm": 0.16765442490577698,
"learning_rate": 2.977215317341422e-05,
"loss": 1.2367427349090576,
"step": 144
},
{
"epoch": 0.2673992673992674,
"grad_norm": 0.25935599207878113,
"learning_rate": 2.9762277106173925e-05,
"loss": 1.389586329460144,
"step": 146
},
{
"epoch": 0.27106227106227104,
"grad_norm": 0.15559542179107666,
"learning_rate": 2.975219341554643e-05,
"loss": 1.2958955764770508,
"step": 148
},
{
"epoch": 0.27472527472527475,
"grad_norm": 0.053712569177150726,
"learning_rate": 2.9741902259393773e-05,
"loss": 0.9116654396057129,
"step": 150
},
{
"epoch": 0.2783882783882784,
"grad_norm": 0.06762389093637466,
"learning_rate": 2.9731403798825883e-05,
"loss": 0.6223784685134888,
"step": 152
},
{
"epoch": 0.28205128205128205,
"grad_norm": 1.4543309211730957,
"learning_rate": 2.9720698198198106e-05,
"loss": 1.0832018852233887,
"step": 154
},
{
"epoch": 0.2857142857142857,
"grad_norm": 0.13763271272182465,
"learning_rate": 2.9709785625108577e-05,
"loss": 1.3260327577590942,
"step": 156
},
{
"epoch": 0.2893772893772894,
"grad_norm": 0.17763610184192657,
"learning_rate": 2.969866625039564e-05,
"loss": 1.2564287185668945,
"step": 158
},
{
"epoch": 0.29304029304029305,
"grad_norm": 0.24049849808216095,
"learning_rate": 2.968734024813515e-05,
"loss": 0.9224046468734741,
"step": 160
},
{
"epoch": 0.2967032967032967,
"grad_norm": 0.1957104206085205,
"learning_rate": 2.9675807795637753e-05,
"loss": 1.5114474296569824,
"step": 162
},
{
"epoch": 0.30036630036630035,
"grad_norm": 0.11220777034759521,
"learning_rate": 2.9664069073446123e-05,
"loss": 1.0336811542510986,
"step": 164
},
{
"epoch": 0.304029304029304,
"grad_norm": 0.24891024827957153,
"learning_rate": 2.9652124265332104e-05,
"loss": 1.3741110563278198,
"step": 166
},
{
"epoch": 0.3076923076923077,
"grad_norm": 0.16911421716213226,
"learning_rate": 2.9639973558293873e-05,
"loss": 1.1178027391433716,
"step": 168
},
{
"epoch": 0.31135531135531136,
"grad_norm": 0.14972515404224396,
"learning_rate": 2.9627617142552972e-05,
"loss": 1.240762710571289,
"step": 170
},
{
"epoch": 0.315018315018315,
"grad_norm": 0.2638348340988159,
"learning_rate": 2.9615055211551372e-05,
"loss": 1.0101988315582275,
"step": 172
},
{
"epoch": 0.31868131868131866,
"grad_norm": 0.2641901969909668,
"learning_rate": 2.9602287961948407e-05,
"loss": 0.8690211772918701,
"step": 174
},
{
"epoch": 0.32234432234432236,
"grad_norm": 0.1691320687532425,
"learning_rate": 2.958931559361772e-05,
"loss": 1.3790042400360107,
"step": 176
},
{
"epoch": 0.326007326007326,
"grad_norm": 0.16526873409748077,
"learning_rate": 2.9576138309644126e-05,
"loss": 0.9012804627418518,
"step": 178
},
{
"epoch": 0.32967032967032966,
"grad_norm": 0.25823915004730225,
"learning_rate": 2.9562756316320423e-05,
"loss": 1.4668437242507935,
"step": 180
},
{
"epoch": 0.3333333333333333,
"grad_norm": 0.18741410970687866,
"learning_rate": 2.9549169823144186e-05,
"loss": 0.9619400501251221,
"step": 182
},
{
"epoch": 0.336996336996337,
"grad_norm": 1.2425379753112793,
"learning_rate": 2.9535379042814454e-05,
"loss": 1.2266501188278198,
"step": 184
},
{
"epoch": 0.34065934065934067,
"grad_norm": 1.2727388143539429,
"learning_rate": 2.9521384191228436e-05,
"loss": 1.2628209590911865,
"step": 186
},
{
"epoch": 0.3443223443223443,
"grad_norm": 0.12109709531068802,
"learning_rate": 2.950718548747811e-05,
"loss": 1.163791537284851,
"step": 188
},
{
"epoch": 0.34798534798534797,
"grad_norm": 0.1681678593158722,
"learning_rate": 2.9492783153846787e-05,
"loss": 1.2521121501922607,
"step": 190
},
{
"epoch": 0.3516483516483517,
"grad_norm": 0.39801836013793945,
"learning_rate": 2.9478177415805647e-05,
"loss": 1.0975821018218994,
"step": 192
},
{
"epoch": 0.3553113553113553,
"grad_norm": 0.11527588963508606,
"learning_rate": 2.946336850201022e-05,
"loss": 1.2337632179260254,
"step": 194
},
{
"epoch": 0.358974358974359,
"grad_norm": 0.23134586215019226,
"learning_rate": 2.9448356644296764e-05,
"loss": 1.2389250993728638,
"step": 196
},
{
"epoch": 0.3626373626373626,
"grad_norm": 0.21775896847248077,
"learning_rate": 2.943314207767867e-05,
"loss": 0.8557232022285461,
"step": 198
},
{
"epoch": 0.3663003663003663,
"grad_norm": 0.26850470900535583,
"learning_rate": 2.9417725040342783e-05,
"loss": 0.9985939264297485,
"step": 200
},
{
"epoch": 0.36996336996337,
"grad_norm": 1.0465861558914185,
"learning_rate": 2.9402105773645648e-05,
"loss": 1.421555757522583,
"step": 202
},
{
"epoch": 0.37362637362637363,
"grad_norm": 0.33346226811408997,
"learning_rate": 2.9386284522109774e-05,
"loss": 1.2066435813903809,
"step": 204
},
{
"epoch": 0.3772893772893773,
"grad_norm": 0.3973228335380554,
"learning_rate": 2.937026153341975e-05,
"loss": 0.7680864334106445,
"step": 206
},
{
"epoch": 0.38095238095238093,
"grad_norm": 0.3039422929286957,
"learning_rate": 2.9354037058418424e-05,
"loss": 1.1236940622329712,
"step": 208
},
{
"epoch": 0.38461538461538464,
"grad_norm": 0.3086465895175934,
"learning_rate": 2.9337611351102914e-05,
"loss": 1.392964243888855,
"step": 210
},
{
"epoch": 0.3882783882783883,
"grad_norm": 0.1851595640182495,
"learning_rate": 2.932098466862071e-05,
"loss": 1.0556249618530273,
"step": 212
},
{
"epoch": 0.39194139194139194,
"grad_norm": 0.15683996677398682,
"learning_rate": 2.9304157271265576e-05,
"loss": 1.1175445318222046,
"step": 214
},
{
"epoch": 0.3956043956043956,
"grad_norm": 0.14839743077754974,
"learning_rate": 2.9287129422473514e-05,
"loss": 1.2113745212554932,
"step": 216
},
{
"epoch": 0.3992673992673993,
"grad_norm": 0.11618765443563461,
"learning_rate": 2.9269901388818625e-05,
"loss": 1.2436152696609497,
"step": 218
},
{
"epoch": 0.40293040293040294,
"grad_norm": 0.12922729551792145,
"learning_rate": 2.9252473440008948e-05,
"loss": 0.619842529296875,
"step": 220
},
{
"epoch": 0.4065934065934066,
"grad_norm": 0.16302533447742462,
"learning_rate": 2.923484584888222e-05,
"loss": 1.325548529624939,
"step": 222
},
{
"epoch": 0.41025641025641024,
"grad_norm": 0.146114781498909,
"learning_rate": 2.9217018891401635e-05,
"loss": 1.0726938247680664,
"step": 224
},
{
"epoch": 0.4139194139194139,
"grad_norm": 0.16791270673274994,
"learning_rate": 2.9198992846651482e-05,
"loss": 1.2183935642242432,
"step": 226
},
{
"epoch": 0.4175824175824176,
"grad_norm": 0.15313786268234253,
"learning_rate": 2.9180767996832804e-05,
"loss": 1.2867389917373657,
"step": 228
},
{
"epoch": 0.42124542124542125,
"grad_norm": 0.24053709208965302,
"learning_rate": 2.9162344627258984e-05,
"loss": 1.213542103767395,
"step": 230
},
{
"epoch": 0.4249084249084249,
"grad_norm": 0.17704996466636658,
"learning_rate": 2.9143723026351256e-05,
"loss": 0.977487325668335,
"step": 232
},
{
"epoch": 0.42857142857142855,
"grad_norm": 0.15275506675243378,
"learning_rate": 2.9124903485634212e-05,
"loss": 1.472753882408142,
"step": 234
},
{
"epoch": 0.43223443223443225,
"grad_norm": 0.31269755959510803,
"learning_rate": 2.9105886299731215e-05,
"loss": 1.0034068822860718,
"step": 236
},
{
"epoch": 0.4358974358974359,
"grad_norm": 1.7898775339126587,
"learning_rate": 2.9086671766359816e-05,
"loss": 1.0583497285842896,
"step": 238
},
{
"epoch": 0.43956043956043955,
"grad_norm": 0.27997422218322754,
"learning_rate": 2.9067260186327068e-05,
"loss": 0.8900536894798279,
"step": 240
},
{
"epoch": 0.4432234432234432,
"grad_norm": 0.23087511956691742,
"learning_rate": 2.904765186352482e-05,
"loss": 0.8671596646308899,
"step": 242
},
{
"epoch": 0.4468864468864469,
"grad_norm": 0.07247351109981537,
"learning_rate": 2.902784710492498e-05,
"loss": 0.5696659684181213,
"step": 244
},
{
"epoch": 0.45054945054945056,
"grad_norm": 0.2190362513065338,
"learning_rate": 2.9007846220574677e-05,
"loss": 1.2421321868896484,
"step": 246
},
{
"epoch": 0.4542124542124542,
"grad_norm": 0.5854616165161133,
"learning_rate": 2.8987649523591442e-05,
"loss": 0.8691569566726685,
"step": 248
},
{
"epoch": 0.45787545787545786,
"grad_norm": 0.5738214254379272,
"learning_rate": 2.8967257330158273e-05,
"loss": 1.2534294128417969,
"step": 250
},
{
"epoch": 0.46153846153846156,
"grad_norm": 3.5424513816833496,
"learning_rate": 2.8946669959518716e-05,
"loss": 1.1625200510025024,
"step": 252
},
{
"epoch": 0.4652014652014652,
"grad_norm": 0.591675341129303,
"learning_rate": 2.892588773397184e-05,
"loss": 0.9581058025360107,
"step": 254
},
{
"epoch": 0.46886446886446886,
"grad_norm": 0.19084429740905762,
"learning_rate": 2.8904910978867214e-05,
"loss": 1.242812156677246,
"step": 256
},
{
"epoch": 0.4725274725274725,
"grad_norm": 0.2806980311870575,
"learning_rate": 2.888374002259979e-05,
"loss": 1.2669005393981934,
"step": 258
},
{
"epoch": 0.47619047619047616,
"grad_norm": 0.09780281782150269,
"learning_rate": 2.8862375196604782e-05,
"loss": 0.725159227848053,
"step": 260
},
{
"epoch": 0.47985347985347987,
"grad_norm": 0.3521279990673065,
"learning_rate": 2.8840816835352475e-05,
"loss": 1.1425093412399292,
"step": 262
},
{
"epoch": 0.4835164835164835,
"grad_norm": 0.11237731575965881,
"learning_rate": 2.881906527634298e-05,
"loss": 1.216615080833435,
"step": 264
},
{
"epoch": 0.48717948717948717,
"grad_norm": 0.1556321531534195,
"learning_rate": 2.8797120860100952e-05,
"loss": 1.4829010963439941,
"step": 266
},
{
"epoch": 0.4908424908424908,
"grad_norm": 0.6579442024230957,
"learning_rate": 2.8774983930170256e-05,
"loss": 0.78812175989151,
"step": 268
},
{
"epoch": 0.4945054945054945,
"grad_norm": 0.2598232328891754,
"learning_rate": 2.875265483310861e-05,
"loss": 1.1891193389892578,
"step": 270
},
{
"epoch": 0.4981684981684982,
"grad_norm": 0.171274334192276,
"learning_rate": 2.873013391848213e-05,
"loss": 0.8797785043716431,
"step": 272
},
{
"epoch": 0.5018315018315018,
"grad_norm": 0.21141739189624786,
"learning_rate": 2.8707421538859884e-05,
"loss": 1.236473798751831,
"step": 274
},
{
"epoch": 0.5054945054945055,
"grad_norm": 0.3102591633796692,
"learning_rate": 2.8684518049808345e-05,
"loss": 0.8383572101593018,
"step": 276
},
{
"epoch": 0.5091575091575091,
"grad_norm": 0.11805420368909836,
"learning_rate": 2.8661423809885846e-05,
"loss": 0.8777554035186768,
"step": 278
},
{
"epoch": 0.5128205128205128,
"grad_norm": 0.2608731687068939,
"learning_rate": 2.8638139180636962e-05,
"loss": 1.2403507232666016,
"step": 280
},
{
"epoch": 0.5164835164835165,
"grad_norm": 0.19656170904636383,
"learning_rate": 2.861466452658685e-05,
"loss": 1.2285083532333374,
"step": 282
},
{
"epoch": 0.5201465201465202,
"grad_norm": 0.3849106729030609,
"learning_rate": 2.8591000215235535e-05,
"loss": 1.2334085702896118,
"step": 284
},
{
"epoch": 0.5238095238095238,
"grad_norm": 0.25484830141067505,
"learning_rate": 2.8567146617052157e-05,
"loss": 0.9802875518798828,
"step": 286
},
{
"epoch": 0.5274725274725275,
"grad_norm": 0.15390388667583466,
"learning_rate": 2.854310410546919e-05,
"loss": 1.2414391040802002,
"step": 288
},
{
"epoch": 0.5311355311355311,
"grad_norm": 0.1489962786436081,
"learning_rate": 2.851887305687657e-05,
"loss": 1.2051469087600708,
"step": 290
},
{
"epoch": 0.5347985347985348,
"grad_norm": 0.11044786125421524,
"learning_rate": 2.8494453850615823e-05,
"loss": 0.7286781668663025,
"step": 292
},
{
"epoch": 0.5384615384615384,
"grad_norm": 0.03477945178747177,
"learning_rate": 2.846984686897411e-05,
"loss": 0.939666748046875,
"step": 294
},
{
"epoch": 0.5421245421245421,
"grad_norm": 0.3079014718532562,
"learning_rate": 2.8445052497178255e-05,
"loss": 1.0221385955810547,
"step": 296
},
{
"epoch": 0.5457875457875457,
"grad_norm": 0.4201778173446655,
"learning_rate": 2.8420071123388712e-05,
"loss": 1.2870213985443115,
"step": 298
},
{
"epoch": 0.5494505494505495,
"grad_norm": 0.19912484288215637,
"learning_rate": 2.839490313869348e-05,
"loss": 1.206202507019043,
"step": 300
},
{
"epoch": 0.5531135531135531,
"grad_norm": 0.11873411387205124,
"learning_rate": 2.8369548937101984e-05,
"loss": 1.1803181171417236,
"step": 302
},
{
"epoch": 0.5567765567765568,
"grad_norm": 0.29429757595062256,
"learning_rate": 2.8344008915538916e-05,
"loss": 0.9696465730667114,
"step": 304
},
{
"epoch": 0.5604395604395604,
"grad_norm": 0.27017688751220703,
"learning_rate": 2.831828347383802e-05,
"loss": 0.9468304514884949,
"step": 306
},
{
"epoch": 0.5641025641025641,
"grad_norm": 0.2277199774980545,
"learning_rate": 2.82923730147358e-05,
"loss": 1.385387897491455,
"step": 308
},
{
"epoch": 0.5677655677655677,
"grad_norm": 0.16605223715305328,
"learning_rate": 2.826627794386527e-05,
"loss": 0.9745985865592957,
"step": 310
},
{
"epoch": 0.5714285714285714,
"grad_norm": 0.18600815534591675,
"learning_rate": 2.823999866974956e-05,
"loss": 1.205910563468933,
"step": 312
},
{
"epoch": 0.575091575091575,
"grad_norm": 0.14909575879573822,
"learning_rate": 2.821353560379554e-05,
"loss": 1.1275948286056519,
"step": 314
},
{
"epoch": 0.5787545787545788,
"grad_norm": 0.3817387819290161,
"learning_rate": 2.8186889160287368e-05,
"loss": 1.2773256301879883,
"step": 316
},
{
"epoch": 0.5824175824175825,
"grad_norm": 0.15909817814826965,
"learning_rate": 2.816005975638003e-05,
"loss": 1.119057297706604,
"step": 318
},
{
"epoch": 0.5860805860805861,
"grad_norm": 0.13221731781959534,
"learning_rate": 2.8133047812092776e-05,
"loss": 1.3285653591156006,
"step": 320
},
{
"epoch": 0.5897435897435898,
"grad_norm": 0.29347363114356995,
"learning_rate": 2.810585375030255e-05,
"loss": 1.51324462890625,
"step": 322
},
{
"epoch": 0.5934065934065934,
"grad_norm": 0.23748619854450226,
"learning_rate": 2.8078477996737404e-05,
"loss": 1.2570521831512451,
"step": 324
},
{
"epoch": 0.5970695970695971,
"grad_norm": 0.2422262281179428,
"learning_rate": 2.805092097996979e-05,
"loss": 1.3527086973190308,
"step": 326
},
{
"epoch": 0.6007326007326007,
"grad_norm": 0.16281095147132874,
"learning_rate": 2.8023183131409867e-05,
"loss": 1.2173492908477783,
"step": 328
},
{
"epoch": 0.6043956043956044,
"grad_norm": 0.23670464754104614,
"learning_rate": 2.799526488529877e-05,
"loss": 1.1010452508926392,
"step": 330
},
{
"epoch": 0.608058608058608,
"grad_norm": 0.30718305706977844,
"learning_rate": 2.7967166678701764e-05,
"loss": 1.174407958984375,
"step": 332
},
{
"epoch": 0.6117216117216118,
"grad_norm": 0.17562542855739594,
"learning_rate": 2.7938888951501446e-05,
"loss": 1.2717376947402954,
"step": 334
},
{
"epoch": 0.6153846153846154,
"grad_norm": 0.4294782280921936,
"learning_rate": 2.7910432146390835e-05,
"loss": 0.877622127532959,
"step": 336
},
{
"epoch": 0.6190476190476191,
"grad_norm": 0.2313699722290039,
"learning_rate": 2.7881796708866444e-05,
"loss": 1.2277888059616089,
"step": 338
},
{
"epoch": 0.6227106227106227,
"grad_norm": 0.19100335240364075,
"learning_rate": 2.7852983087221323e-05,
"loss": 1.2185378074645996,
"step": 340
},
{
"epoch": 0.6263736263736264,
"grad_norm": 0.4008959233760834,
"learning_rate": 2.782399173253801e-05,
"loss": 0.5671009421348572,
"step": 342
},
{
"epoch": 0.63003663003663,
"grad_norm": 0.122787244617939,
"learning_rate": 2.7794823098681503e-05,
"loss": 1.4235719442367554,
"step": 344
},
{
"epoch": 0.6336996336996337,
"grad_norm": 0.23276489973068237,
"learning_rate": 2.7765477642292122e-05,
"loss": 0.6784141659736633,
"step": 346
},
{
"epoch": 0.6373626373626373,
"grad_norm": 0.181949183344841,
"learning_rate": 2.7735955822778383e-05,
"loss": 1.1455185413360596,
"step": 348
},
{
"epoch": 0.6410256410256411,
"grad_norm": 0.23124440014362335,
"learning_rate": 2.7706258102309807e-05,
"loss": 0.8757343888282776,
"step": 350
},
{
"epoch": 0.6446886446886447,
"grad_norm": 0.3276009261608124,
"learning_rate": 2.7676384945809665e-05,
"loss": 0.9093174934387207,
"step": 352
},
{
"epoch": 0.6483516483516484,
"grad_norm": 0.2787076234817505,
"learning_rate": 2.7646336820947716e-05,
"loss": 0.931143045425415,
"step": 354
},
{
"epoch": 0.652014652014652,
"grad_norm": 0.3154600262641907,
"learning_rate": 2.7616114198132885e-05,
"loss": 1.1572201251983643,
"step": 356
},
{
"epoch": 0.6556776556776557,
"grad_norm": 0.461033433675766,
"learning_rate": 2.7585717550505885e-05,
"loss": 1.1429153680801392,
"step": 358
},
{
"epoch": 0.6593406593406593,
"grad_norm": 0.5056472420692444,
"learning_rate": 2.7555147353931828e-05,
"loss": 0.6815345883369446,
"step": 360
},
{
"epoch": 0.663003663003663,
"grad_norm": 2.4244563579559326,
"learning_rate": 2.752440408699276e-05,
"loss": 0.9283912181854248,
"step": 362
},
{
"epoch": 0.6666666666666666,
"grad_norm": 0.16936089098453522,
"learning_rate": 2.7493488230980183e-05,
"loss": 0.9282920360565186,
"step": 364
},
{
"epoch": 0.6703296703296703,
"grad_norm": 0.3102758228778839,
"learning_rate": 2.746240026988751e-05,
"loss": 1.1775506734848022,
"step": 366
},
{
"epoch": 0.673992673992674,
"grad_norm": 0.2653065323829651,
"learning_rate": 2.7431140690402486e-05,
"loss": 1.0435811281204224,
"step": 368
},
{
"epoch": 0.6776556776556777,
"grad_norm": 0.08228558301925659,
"learning_rate": 2.7399709981899575e-05,
"loss": 1.0839931964874268,
"step": 370
},
{
"epoch": 0.6813186813186813,
"grad_norm": 0.38366320729255676,
"learning_rate": 2.7368108636432305e-05,
"loss": 1.1829266548156738,
"step": 372
},
{
"epoch": 0.684981684981685,
"grad_norm": 0.28726011514663696,
"learning_rate": 2.7336337148725544e-05,
"loss": 1.1892226934432983,
"step": 374
},
{
"epoch": 0.6886446886446886,
"grad_norm": 0.12154829502105713,
"learning_rate": 2.7304396016167787e-05,
"loss": 0.8759651780128479,
"step": 376
},
{
"epoch": 0.6923076923076923,
"grad_norm": 0.10848227143287659,
"learning_rate": 2.7272285738803325e-05,
"loss": 0.8214247822761536,
"step": 378
},
{
"epoch": 0.6959706959706959,
"grad_norm": 0.6671015620231628,
"learning_rate": 2.7240006819324463e-05,
"loss": 1.035841941833496,
"step": 380
},
{
"epoch": 0.6996336996336996,
"grad_norm": 0.14265285432338715,
"learning_rate": 2.7207559763063615e-05,
"loss": 0.9698776602745056,
"step": 382
},
{
"epoch": 0.7032967032967034,
"grad_norm": 0.13904374837875366,
"learning_rate": 2.7174945077985425e-05,
"loss": 1.1749252080917358,
"step": 384
},
{
"epoch": 0.706959706959707,
"grad_norm": 0.2913973927497864,
"learning_rate": 2.7142163274678783e-05,
"loss": 1.0791226625442505,
"step": 386
},
{
"epoch": 0.7106227106227107,
"grad_norm": 0.10218534618616104,
"learning_rate": 2.7109214866348845e-05,
"loss": 0.8769177794456482,
"step": 388
},
{
"epoch": 0.7142857142857143,
"grad_norm": 0.23247778415679932,
"learning_rate": 2.7076100368809007e-05,
"loss": 1.0835739374160767,
"step": 390
},
{
"epoch": 0.717948717948718,
"grad_norm": 0.2376396507024765,
"learning_rate": 2.704282030047281e-05,
"loss": 1.1691678762435913,
"step": 392
},
{
"epoch": 0.7216117216117216,
"grad_norm": 0.22467094659805298,
"learning_rate": 2.7009375182345852e-05,
"loss": 1.1094324588775635,
"step": 394
},
{
"epoch": 0.7252747252747253,
"grad_norm": 0.13416820764541626,
"learning_rate": 2.697576553801761e-05,
"loss": 1.23366117477417,
"step": 396
},
{
"epoch": 0.7289377289377289,
"grad_norm": 0.2065097987651825,
"learning_rate": 2.6941991893653237e-05,
"loss": 0.914151132106781,
"step": 398
},
{
"epoch": 0.7326007326007326,
"grad_norm": 0.17413359880447388,
"learning_rate": 2.6908054777985364e-05,
"loss": 0.8300017714500427,
"step": 400
},
{
"epoch": 0.7362637362637363,
"grad_norm": 0.1717977672815323,
"learning_rate": 2.6873954722305758e-05,
"loss": 1.2391371726989746,
"step": 402
},
{
"epoch": 0.73992673992674,
"grad_norm": 0.16516517102718353,
"learning_rate": 2.6839692260457073e-05,
"loss": 0.8433033227920532,
"step": 404
},
{
"epoch": 0.7435897435897436,
"grad_norm": 0.24240587651729584,
"learning_rate": 2.6805267928824453e-05,
"loss": 1.0130888223648071,
"step": 406
},
{
"epoch": 0.7472527472527473,
"grad_norm": 0.31289470195770264,
"learning_rate": 2.6770682266327137e-05,
"loss": 1.0070133209228516,
"step": 408
},
{
"epoch": 0.7509157509157509,
"grad_norm": 0.23881283402442932,
"learning_rate": 2.6735935814410034e-05,
"loss": 0.8644686341285706,
"step": 410
},
{
"epoch": 0.7545787545787546,
"grad_norm": 0.16381560266017914,
"learning_rate": 2.6701029117035233e-05,
"loss": 1.1643633842468262,
"step": 412
},
{
"epoch": 0.7582417582417582,
"grad_norm": 0.7487297654151917,
"learning_rate": 2.666596272067351e-05,
"loss": 0.9935792684555054,
"step": 414
},
{
"epoch": 0.7619047619047619,
"grad_norm": 0.21959738433361053,
"learning_rate": 2.663073717429574e-05,
"loss": 1.2659343481063843,
"step": 416
},
{
"epoch": 0.7655677655677655,
"grad_norm": 0.1119983121752739,
"learning_rate": 2.6595353029364336e-05,
"loss": 0.6618472337722778,
"step": 418
},
{
"epoch": 0.7692307692307693,
"grad_norm": 0.14166665077209473,
"learning_rate": 2.6559810839824595e-05,
"loss": 1.228902816772461,
"step": 420
},
{
"epoch": 0.7728937728937729,
"grad_norm": 0.30395272374153137,
"learning_rate": 2.6524111162096034e-05,
"loss": 0.9871556758880615,
"step": 422
},
{
"epoch": 0.7765567765567766,
"grad_norm": 0.14193427562713623,
"learning_rate": 2.648825455506366e-05,
"loss": 0.48868873715400696,
"step": 424
},
{
"epoch": 0.7802197802197802,
"grad_norm": 0.23696619272232056,
"learning_rate": 2.6452241580069266e-05,
"loss": 0.7792347073554993,
"step": 426
},
{
"epoch": 0.7838827838827839,
"grad_norm": 0.25328806042671204,
"learning_rate": 2.6416072800902587e-05,
"loss": 1.059155821800232,
"step": 428
},
{
"epoch": 0.7875457875457875,
"grad_norm": 0.1666765809059143,
"learning_rate": 2.6379748783792524e-05,
"loss": 1.2015224695205688,
"step": 430
},
{
"epoch": 0.7912087912087912,
"grad_norm": 0.16949041187763214,
"learning_rate": 2.6343270097398235e-05,
"loss": 1.2865936756134033,
"step": 432
},
{
"epoch": 0.7948717948717948,
"grad_norm": 0.28642794489860535,
"learning_rate": 2.630663731280027e-05,
"loss": 0.8753965497016907,
"step": 434
},
{
"epoch": 0.7985347985347986,
"grad_norm": 0.27329057455062866,
"learning_rate": 2.626985100349161e-05,
"loss": 0.9554466009140015,
"step": 436
},
{
"epoch": 0.8021978021978022,
"grad_norm": 0.2615062892436981,
"learning_rate": 2.6232911745368683e-05,
"loss": 0.8810899257659912,
"step": 438
},
{
"epoch": 0.8058608058608059,
"grad_norm": 0.7468202114105225,
"learning_rate": 2.619582011672238e-05,
"loss": 1.293893814086914,
"step": 440
},
{
"epoch": 0.8095238095238095,
"grad_norm": 0.12691368162631989,
"learning_rate": 2.6158576698228962e-05,
"loss": 1.1932464838027954,
"step": 442
},
{
"epoch": 0.8131868131868132,
"grad_norm": 1.2029927968978882,
"learning_rate": 2.6121182072941003e-05,
"loss": 1.1963627338409424,
"step": 444
},
{
"epoch": 0.8168498168498168,
"grad_norm": 0.2814699113368988,
"learning_rate": 2.6083636826278228e-05,
"loss": 1.2165777683258057,
"step": 446
},
{
"epoch": 0.8205128205128205,
"grad_norm": 0.1440906971693039,
"learning_rate": 2.6045941546018393e-05,
"loss": 1.2216907739639282,
"step": 448
},
{
"epoch": 0.8241758241758241,
"grad_norm": 0.20276811718940735,
"learning_rate": 2.600809682228803e-05,
"loss": 1.296385645866394,
"step": 450
},
{
"epoch": 0.8278388278388278,
"grad_norm": 0.2349991798400879,
"learning_rate": 2.5970103247553255e-05,
"loss": 0.9932718873023987,
"step": 452
},
{
"epoch": 0.8315018315018315,
"grad_norm": 0.08576500415802002,
"learning_rate": 2.5931961416610467e-05,
"loss": 0.49712786078453064,
"step": 454
},
{
"epoch": 0.8351648351648352,
"grad_norm": 0.06757020205259323,
"learning_rate": 2.5893671926577045e-05,
"loss": 0.8930724263191223,
"step": 456
},
{
"epoch": 0.8388278388278388,
"grad_norm": 0.26352459192276,
"learning_rate": 2.5855235376881992e-05,
"loss": 0.8232430815696716,
"step": 458
},
{
"epoch": 0.8424908424908425,
"grad_norm": 0.3167935013771057,
"learning_rate": 2.5816652369256575e-05,
"loss": 1.0176485776901245,
"step": 460
},
{
"epoch": 0.8461538461538461,
"grad_norm": 0.917674720287323,
"learning_rate": 2.5777923507724863e-05,
"loss": 1.2205572128295898,
"step": 462
},
{
"epoch": 0.8498168498168498,
"grad_norm": 0.14571218192577362,
"learning_rate": 2.5739049398594304e-05,
"loss": 1.1725519895553589,
"step": 464
},
{
"epoch": 0.8534798534798534,
"grad_norm": 0.06749556213617325,
"learning_rate": 2.5700030650446236e-05,
"loss": 1.1946470737457275,
"step": 466
},
{
"epoch": 0.8571428571428571,
"grad_norm": 0.18941232562065125,
"learning_rate": 2.5660867874126333e-05,
"loss": 0.8800565004348755,
"step": 468
},
{
"epoch": 0.8608058608058609,
"grad_norm": 0.1686919927597046,
"learning_rate": 2.562156168273506e-05,
"loss": 0.9081763029098511,
"step": 470
},
{
"epoch": 0.8644688644688645,
"grad_norm": 0.21368175745010376,
"learning_rate": 2.558211269161807e-05,
"loss": 1.1638240814208984,
"step": 472
},
{
"epoch": 0.8681318681318682,
"grad_norm": 0.5323616862297058,
"learning_rate": 2.554252151835658e-05,
"loss": 0.8203825950622559,
"step": 474
},
{
"epoch": 0.8717948717948718,
"grad_norm": 0.1532222181558609,
"learning_rate": 2.550278878275768e-05,
"loss": 1.0964173078536987,
"step": 476
},
{
"epoch": 0.8754578754578755,
"grad_norm": 0.2797001302242279,
"learning_rate": 2.5462915106844662e-05,
"loss": 1.04283607006073,
"step": 478
},
{
"epoch": 0.8791208791208791,
"grad_norm": 0.374732106924057,
"learning_rate": 2.5422901114847252e-05,
"loss": 1.072584867477417,
"step": 480
},
{
"epoch": 0.8827838827838828,
"grad_norm": 0.34028613567352295,
"learning_rate": 2.5382747433191855e-05,
"loss": 1.1796945333480835,
"step": 482
},
{
"epoch": 0.8864468864468864,
"grad_norm": 0.2851976454257965,
"learning_rate": 2.5342454690491742e-05,
"loss": 1.2570006847381592,
"step": 484
},
{
"epoch": 0.8901098901098901,
"grad_norm": 0.40268465876579285,
"learning_rate": 2.5302023517537208e-05,
"loss": 0.8995251655578613,
"step": 486
},
{
"epoch": 0.8937728937728938,
"grad_norm": 0.3454226553440094,
"learning_rate": 2.52614545472857e-05,
"loss": 1.1451166868209839,
"step": 488
},
{
"epoch": 0.8974358974358975,
"grad_norm": 0.14757300913333893,
"learning_rate": 2.522074841485191e-05,
"loss": 1.1034044027328491,
"step": 490
},
{
"epoch": 0.9010989010989011,
"grad_norm": 0.21547852456569672,
"learning_rate": 2.517990575749784e-05,
"loss": 1.0412471294403076,
"step": 492
},
{
"epoch": 0.9047619047619048,
"grad_norm": 0.29664546251296997,
"learning_rate": 2.513892721462278e-05,
"loss": 0.5865399837493896,
"step": 494
},
{
"epoch": 0.9084249084249084,
"grad_norm": 0.2264026254415512,
"learning_rate": 2.5097813427753367e-05,
"loss": 1.2764793634414673,
"step": 496
},
{
"epoch": 0.9120879120879121,
"grad_norm": 0.2795766294002533,
"learning_rate": 2.5056565040533502e-05,
"loss": 1.1217232942581177,
"step": 498
},
{
"epoch": 0.9157509157509157,
"grad_norm": 0.1990990787744522,
"learning_rate": 2.5015182698714257e-05,
"loss": 0.8060373663902283,
"step": 500
},
{
"epoch": 0.9194139194139194,
"grad_norm": 0.1130012720823288,
"learning_rate": 2.4973667050143826e-05,
"loss": 0.8575466275215149,
"step": 502
},
{
"epoch": 0.9230769230769231,
"grad_norm": 0.37339073419570923,
"learning_rate": 2.4932018744757304e-05,
"loss": 0.9292844533920288,
"step": 504
},
{
"epoch": 0.9267399267399268,
"grad_norm": 0.1154537945985794,
"learning_rate": 2.4890238434566572e-05,
"loss": 0.8504922389984131,
"step": 506
},
{
"epoch": 0.9304029304029304,
"grad_norm": 0.19666744768619537,
"learning_rate": 2.4848326773650073e-05,
"loss": 1.1545928716659546,
"step": 508
},
{
"epoch": 0.9340659340659341,
"grad_norm": 0.46453702449798584,
"learning_rate": 2.4806284418142578e-05,
"loss": 1.2140713930130005,
"step": 510
},
{
"epoch": 0.9377289377289377,
"grad_norm": 0.15014062821865082,
"learning_rate": 2.4764112026224884e-05,
"loss": 1.165743112564087,
"step": 512
},
{
"epoch": 0.9413919413919414,
"grad_norm": 0.1959581971168518,
"learning_rate": 2.472181025811354e-05,
"loss": 0.9361699819564819,
"step": 514
},
{
"epoch": 0.945054945054945,
"grad_norm": 0.2921157777309418,
"learning_rate": 2.467937977605051e-05,
"loss": 1.1688098907470703,
"step": 516
},
{
"epoch": 0.9487179487179487,
"grad_norm": 0.28574448823928833,
"learning_rate": 2.4636821244292798e-05,
"loss": 1.204991340637207,
"step": 518
},
{
"epoch": 0.9523809523809523,
"grad_norm": 0.3698614239692688,
"learning_rate": 2.4594135329102042e-05,
"loss": 1.2093229293823242,
"step": 520
},
{
"epoch": 0.9560439560439561,
"grad_norm": 0.24117770791053772,
"learning_rate": 2.4551322698734087e-05,
"loss": 0.8721481561660767,
"step": 522
},
{
"epoch": 0.9597069597069597,
"grad_norm": 0.3299312889575958,
"learning_rate": 2.4508384023428545e-05,
"loss": 1.020495891571045,
"step": 524
},
{
"epoch": 0.9633699633699634,
"grad_norm": 0.2226363718509674,
"learning_rate": 2.446531997539828e-05,
"loss": 0.9500446915626526,
"step": 526
},
{
"epoch": 0.967032967032967,
"grad_norm": 0.19496284425258636,
"learning_rate": 2.4422131228818865e-05,
"loss": 0.6567864418029785,
"step": 528
},
{
"epoch": 0.9706959706959707,
"grad_norm": 0.15906959772109985,
"learning_rate": 2.437881845981809e-05,
"loss": 0.8984352946281433,
"step": 530
},
{
"epoch": 0.9743589743589743,
"grad_norm": 0.22973327338695526,
"learning_rate": 2.433538234646531e-05,
"loss": 1.345873236656189,
"step": 532
},
{
"epoch": 0.978021978021978,
"grad_norm": 6.082988739013672,
"learning_rate": 2.4291823568760872e-05,
"loss": 0.6008288860321045,
"step": 534
},
{
"epoch": 0.9816849816849816,
"grad_norm": 0.40974557399749756,
"learning_rate": 2.4248142808625442e-05,
"loss": 0.861249566078186,
"step": 536
},
{
"epoch": 0.9853479853479854,
"grad_norm": 0.16949814558029175,
"learning_rate": 2.420434074988937e-05,
"loss": 0.7901135087013245,
"step": 538
},
{
"epoch": 0.989010989010989,
"grad_norm": 0.35582488775253296,
"learning_rate": 2.4160418078281927e-05,
"loss": 1.119564414024353,
"step": 540
},
{
"epoch": 0.9926739926739927,
"grad_norm": 0.2957996726036072,
"learning_rate": 2.411637548142062e-05,
"loss": 1.1645922660827637,
"step": 542
},
{
"epoch": 0.9963369963369964,
"grad_norm": 0.38206928968429565,
"learning_rate": 2.4072213648800402e-05,
"loss": 1.1729217767715454,
"step": 544
},
{
"epoch": 1.0,
"grad_norm": 0.19045640528202057,
"learning_rate": 2.4027933271782885e-05,
"loss": 1.3550233840942383,
"step": 546
},
{
"epoch": 1.0036630036630036,
"grad_norm": 0.6254698038101196,
"learning_rate": 2.398353504358552e-05,
"loss": 0.9200565814971924,
"step": 548
},
{
"epoch": 1.0073260073260073,
"grad_norm": 0.17445723712444305,
"learning_rate": 2.3939019659270728e-05,
"loss": 1.1623973846435547,
"step": 550
},
{
"epoch": 1.010989010989011,
"grad_norm": 0.050915662199258804,
"learning_rate": 2.389438781573504e-05,
"loss": 0.652208149433136,
"step": 552
},
{
"epoch": 1.0146520146520146,
"grad_norm": 0.6708527207374573,
"learning_rate": 2.3849640211698174e-05,
"loss": 1.117956519126892,
"step": 554
},
{
"epoch": 1.0183150183150182,
"grad_norm": 0.44070523977279663,
"learning_rate": 2.3804777547692103e-05,
"loss": 1.011860966682434,
"step": 556
},
{
"epoch": 1.021978021978022,
"grad_norm": 0.25871241092681885,
"learning_rate": 2.3759800526050082e-05,
"loss": 1.2005692720413208,
"step": 558
},
{
"epoch": 1.0256410256410255,
"grad_norm": 0.2895589768886566,
"learning_rate": 2.371470985089565e-05,
"loss": 0.9117672443389893,
"step": 560
},
{
"epoch": 1.0293040293040292,
"grad_norm": 0.29746928811073303,
"learning_rate": 2.366950622813163e-05,
"loss": 1.151308536529541,
"step": 562
},
{
"epoch": 1.032967032967033,
"grad_norm": 0.24571825563907623,
"learning_rate": 2.362419036542904e-05,
"loss": 1.1341103315353394,
"step": 564
},
{
"epoch": 1.0366300366300367,
"grad_norm": 0.5598662495613098,
"learning_rate": 2.357876297221606e-05,
"loss": 1.0440534353256226,
"step": 566
},
{
"epoch": 1.0402930402930404,
"grad_norm": 0.20514336228370667,
"learning_rate": 2.3533224759666865e-05,
"loss": 1.0374512672424316,
"step": 568
},
{
"epoch": 1.043956043956044,
"grad_norm": 0.24357455968856812,
"learning_rate": 2.348757644069056e-05,
"loss": 1.0872145891189575,
"step": 570
},
{
"epoch": 1.0476190476190477,
"grad_norm": 0.25669795274734497,
"learning_rate": 2.3441818729919975e-05,
"loss": 1.2239400148391724,
"step": 572
},
{
"epoch": 1.0512820512820513,
"grad_norm": 0.40305405855178833,
"learning_rate": 2.3395952343700484e-05,
"loss": 1.0533016920089722,
"step": 574
},
{
"epoch": 1.054945054945055,
"grad_norm": 0.2591313421726227,
"learning_rate": 2.33499780000788e-05,
"loss": 1.2207998037338257,
"step": 576
},
{
"epoch": 1.0586080586080586,
"grad_norm": 0.174869567155838,
"learning_rate": 2.3303896418791725e-05,
"loss": 1.2067805528640747,
"step": 578
},
{
"epoch": 1.0622710622710623,
"grad_norm": 0.22043009102344513,
"learning_rate": 2.3257708321254892e-05,
"loss": 1.205613613128662,
"step": 580
},
{
"epoch": 1.065934065934066,
"grad_norm": 0.28748825192451477,
"learning_rate": 2.321141443055146e-05,
"loss": 0.9749211072921753,
"step": 582
},
{
"epoch": 1.0695970695970696,
"grad_norm": 0.5063738226890564,
"learning_rate": 2.3165015471420802e-05,
"loss": 1.1344975233078003,
"step": 584
},
{
"epoch": 1.0732600732600732,
"grad_norm": 0.12719090282917023,
"learning_rate": 2.3118512170247156e-05,
"loss": 1.0455325841903687,
"step": 586
},
{
"epoch": 1.0769230769230769,
"grad_norm": 0.2692602872848511,
"learning_rate": 2.3071905255048257e-05,
"loss": 0.8101100325584412,
"step": 588
},
{
"epoch": 1.0805860805860805,
"grad_norm": 0.2367333620786667,
"learning_rate": 2.3025195455463938e-05,
"loss": 1.189924716949463,
"step": 590
},
{
"epoch": 1.0842490842490842,
"grad_norm": 0.10912540555000305,
"learning_rate": 2.2978383502744693e-05,
"loss": 0.9998126029968262,
"step": 592
},
{
"epoch": 1.0879120879120878,
"grad_norm": 0.3905029594898224,
"learning_rate": 2.2931470129740257e-05,
"loss": 0.973837673664093,
"step": 594
},
{
"epoch": 1.0915750915750915,
"grad_norm": 0.14884768426418304,
"learning_rate": 2.2884456070888107e-05,
"loss": 1.0034931898117065,
"step": 596
},
{
"epoch": 1.0952380952380953,
"grad_norm": 0.3063752353191376,
"learning_rate": 2.2837342062201987e-05,
"loss": 1.072454571723938,
"step": 598
},
{
"epoch": 1.098901098901099,
"grad_norm": 0.20692849159240723,
"learning_rate": 2.279012884126037e-05,
"loss": 1.1523897647857666,
"step": 600
},
{
"epoch": 1.1025641025641026,
"grad_norm": 0.20062100887298584,
"learning_rate": 2.2742817147194904e-05,
"loss": 0.8056322336196899,
"step": 602
},
{
"epoch": 1.1062271062271063,
"grad_norm": 1.2330729961395264,
"learning_rate": 2.2695407720678863e-05,
"loss": 0.934237003326416,
"step": 604
},
{
"epoch": 1.10989010989011,
"grad_norm": 0.29200276732444763,
"learning_rate": 2.2647901303915543e-05,
"loss": 0.8741939663887024,
"step": 606
},
{
"epoch": 1.1135531135531136,
"grad_norm": 0.3068870007991791,
"learning_rate": 2.2600298640626633e-05,
"loss": 1.244341492652893,
"step": 608
},
{
"epoch": 1.1172161172161172,
"grad_norm": 0.1670544445514679,
"learning_rate": 2.2552600476040578e-05,
"loss": 0.7092036604881287,
"step": 610
},
{
"epoch": 1.120879120879121,
"grad_norm": 0.39533790946006775,
"learning_rate": 2.2504807556880924e-05,
"loss": 1.1511996984481812,
"step": 612
},
{
"epoch": 1.1245421245421245,
"grad_norm": 0.23137971758842468,
"learning_rate": 2.2456920631354604e-05,
"loss": 0.97856205701828,
"step": 614
},
{
"epoch": 1.1282051282051282,
"grad_norm": 0.9034252166748047,
"learning_rate": 2.2408940449140255e-05,
"loss": 1.1431798934936523,
"step": 616
},
{
"epoch": 1.1318681318681318,
"grad_norm": 0.2496388852596283,
"learning_rate": 2.2360867761376433e-05,
"loss": 0.3764049708843231,
"step": 618
},
{
"epoch": 1.1355311355311355,
"grad_norm": 0.3029516637325287,
"learning_rate": 2.231270332064993e-05,
"loss": 1.0727355480194092,
"step": 620
},
{
"epoch": 1.1391941391941391,
"grad_norm": 0.6300837397575378,
"learning_rate": 2.2264447880983903e-05,
"loss": 0.7348633408546448,
"step": 622
},
{
"epoch": 1.1428571428571428,
"grad_norm": 0.39987483620643616,
"learning_rate": 2.2216102197826152e-05,
"loss": 1.064742922782898,
"step": 624
},
{
"epoch": 1.1465201465201464,
"grad_norm": 0.12290269881486893,
"learning_rate": 2.216766702803722e-05,
"loss": 1.3470101356506348,
"step": 626
},
{
"epoch": 1.15018315018315,
"grad_norm": 1.7875564098358154,
"learning_rate": 2.2119143129878612e-05,
"loss": 0.43953192234039307,
"step": 628
},
{
"epoch": 1.1538461538461537,
"grad_norm": 0.067696712911129,
"learning_rate": 2.2070531263000877e-05,
"loss": 1.1008399724960327,
"step": 630
},
{
"epoch": 1.1575091575091574,
"grad_norm": 0.2383015751838684,
"learning_rate": 2.2021832188431726e-05,
"loss": 1.0739026069641113,
"step": 632
},
{
"epoch": 1.1611721611721613,
"grad_norm": 0.2584260404109955,
"learning_rate": 2.197304666856413e-05,
"loss": 0.8374975919723511,
"step": 634
},
{
"epoch": 1.164835164835165,
"grad_norm": 1.6519781351089478,
"learning_rate": 2.1924175467144374e-05,
"loss": 0.9540712833404541,
"step": 636
},
{
"epoch": 1.1684981684981686,
"grad_norm": 0.29954585433006287,
"learning_rate": 2.1875219349260103e-05,
"loss": 1.1742781400680542,
"step": 638
},
{
"epoch": 1.1721611721611722,
"grad_norm": 0.04083525016903877,
"learning_rate": 2.182617908132835e-05,
"loss": 0.7330670952796936,
"step": 640
},
{
"epoch": 1.1758241758241759,
"grad_norm": 0.2031724601984024,
"learning_rate": 2.1777055431083526e-05,
"loss": 1.1114752292633057,
"step": 642
},
{
"epoch": 1.1794871794871795,
"grad_norm": 0.24981261789798737,
"learning_rate": 2.1727849167565417e-05,
"loss": 1.0450292825698853,
"step": 644
},
{
"epoch": 1.1831501831501832,
"grad_norm": 0.18336619436740875,
"learning_rate": 2.1678561061107114e-05,
"loss": 0.7295739650726318,
"step": 646
},
{
"epoch": 1.1868131868131868,
"grad_norm": 0.2836684286594391,
"learning_rate": 2.1629191883322998e-05,
"loss": 0.8890778422355652,
"step": 648
},
{
"epoch": 1.1904761904761905,
"grad_norm": 0.18022595345973969,
"learning_rate": 2.1579742407096626e-05,
"loss": 1.12935209274292,
"step": 650
},
{
"epoch": 1.1941391941391941,
"grad_norm": 0.23749671876430511,
"learning_rate": 2.1530213406568637e-05,
"loss": 0.6536941528320312,
"step": 652
},
{
"epoch": 1.1978021978021978,
"grad_norm": 0.4507904350757599,
"learning_rate": 2.1480605657124656e-05,
"loss": 1.275600552558899,
"step": 654
},
{
"epoch": 1.2014652014652014,
"grad_norm": 0.12597453594207764,
"learning_rate": 2.1430919935383112e-05,
"loss": 0.8148038983345032,
"step": 656
},
{
"epoch": 1.205128205128205,
"grad_norm": 2.4110560417175293,
"learning_rate": 2.138115701918312e-05,
"loss": 0.7001177668571472,
"step": 658
},
{
"epoch": 1.2087912087912087,
"grad_norm": 0.2808850109577179,
"learning_rate": 2.1331317687572286e-05,
"loss": 1.1457589864730835,
"step": 660
},
{
"epoch": 1.2124542124542124,
"grad_norm": 0.31538572907447815,
"learning_rate": 2.1281402720794512e-05,
"loss": 1.0017123222351074,
"step": 662
},
{
"epoch": 1.2161172161172162,
"grad_norm": 0.894040048122406,
"learning_rate": 2.123141290027778e-05,
"loss": 0.8663807511329651,
"step": 664
},
{
"epoch": 1.2197802197802199,
"grad_norm": 0.1825011521577835,
"learning_rate": 2.1181349008621935e-05,
"loss": 0.8002346754074097,
"step": 666
},
{
"epoch": 1.2234432234432235,
"grad_norm": 5.175918102264404,
"learning_rate": 2.1131211829586398e-05,
"loss": 0.8699268698692322,
"step": 668
},
{
"epoch": 1.2271062271062272,
"grad_norm": 0.17078006267547607,
"learning_rate": 2.1081002148077926e-05,
"loss": 0.6468662619590759,
"step": 670
},
{
"epoch": 1.2307692307692308,
"grad_norm": 0.15255847573280334,
"learning_rate": 2.1030720750138324e-05,
"loss": 1.1679998636245728,
"step": 672
},
{
"epoch": 1.2344322344322345,
"grad_norm": 0.2974357306957245,
"learning_rate": 2.0980368422932118e-05,
"loss": 1.1423126459121704,
"step": 674
},
{
"epoch": 1.2380952380952381,
"grad_norm": 0.18821929395198822,
"learning_rate": 2.092994595473426e-05,
"loss": 1.1631795167922974,
"step": 676
},
{
"epoch": 1.2417582417582418,
"grad_norm": 0.1881231665611267,
"learning_rate": 2.0879454134917752e-05,
"loss": 0.7617767453193665,
"step": 678
},
{
"epoch": 1.2454212454212454,
"grad_norm": 0.31422385573387146,
"learning_rate": 2.0828893753941327e-05,
"loss": 0.8161511421203613,
"step": 680
},
{
"epoch": 1.249084249084249,
"grad_norm": 0.6012793183326721,
"learning_rate": 2.0778265603337043e-05,
"loss": 1.1746625900268555,
"step": 682
},
{
"epoch": 1.2527472527472527,
"grad_norm": 0.1525307446718216,
"learning_rate": 2.0727570475697917e-05,
"loss": 1.148007869720459,
"step": 684
},
{
"epoch": 1.2564102564102564,
"grad_norm": 0.15192103385925293,
"learning_rate": 2.0676809164665485e-05,
"loss": 0.8160861730575562,
"step": 686
},
{
"epoch": 1.26007326007326,
"grad_norm": 0.24711591005325317,
"learning_rate": 2.0625982464917414e-05,
"loss": 1.0958616733551025,
"step": 688
},
{
"epoch": 1.2637362637362637,
"grad_norm": 0.8192933201789856,
"learning_rate": 2.0575091172155033e-05,
"loss": 1.1153453588485718,
"step": 690
},
{
"epoch": 1.2673992673992673,
"grad_norm": 0.38153979182243347,
"learning_rate": 2.052413608309089e-05,
"loss": 1.1683244705200195,
"step": 692
},
{
"epoch": 1.271062271062271,
"grad_norm": 0.5269439816474915,
"learning_rate": 2.0473117995436273e-05,
"loss": 0.9698490500450134,
"step": 694
},
{
"epoch": 1.2747252747252746,
"grad_norm": 0.1796112358570099,
"learning_rate": 2.0422037707888737e-05,
"loss": 0.9768142700195312,
"step": 696
},
{
"epoch": 1.2783882783882783,
"grad_norm": 0.30962157249450684,
"learning_rate": 2.0370896020119568e-05,
"loss": 0.8801419734954834,
"step": 698
},
{
"epoch": 1.282051282051282,
"grad_norm": 0.17478680610656738,
"learning_rate": 2.0319693732761296e-05,
"loss": 0.6408591866493225,
"step": 700
},
{
"epoch": 1.2857142857142856,
"grad_norm": 0.30901262164115906,
"learning_rate": 2.026843164739515e-05,
"loss": 0.5093188881874084,
"step": 702
},
{
"epoch": 1.2893772893772895,
"grad_norm": 0.34778061509132385,
"learning_rate": 2.0217110566538502e-05,
"loss": 1.2507480382919312,
"step": 704
},
{
"epoch": 1.293040293040293,
"grad_norm": 0.10851099342107773,
"learning_rate": 2.016573129363231e-05,
"loss": 1.0021785497665405,
"step": 706
},
{
"epoch": 1.2967032967032968,
"grad_norm": 0.0709109827876091,
"learning_rate": 2.011429463302854e-05,
"loss": 0.9341714382171631,
"step": 708
},
{
"epoch": 1.3003663003663004,
"grad_norm": 0.3086980879306793,
"learning_rate": 2.0062801389977577e-05,
"loss": 0.5391806364059448,
"step": 710
},
{
"epoch": 1.304029304029304,
"grad_norm": 0.9953245520591736,
"learning_rate": 2.001125237061561e-05,
"loss": 0.8783678412437439,
"step": 712
},
{
"epoch": 1.3076923076923077,
"grad_norm": 0.22242064774036407,
"learning_rate": 1.9959648381952014e-05,
"loss": 1.1794803142547607,
"step": 714
},
{
"epoch": 1.3113553113553114,
"grad_norm": 0.28814682364463806,
"learning_rate": 1.9907990231856725e-05,
"loss": 1.1167832612991333,
"step": 716
},
{
"epoch": 1.315018315018315,
"grad_norm": 0.15364542603492737,
"learning_rate": 1.9856278729047588e-05,
"loss": 0.908664882183075,
"step": 718
},
{
"epoch": 1.3186813186813187,
"grad_norm": 0.21915094554424286,
"learning_rate": 1.980451468307768e-05,
"loss": 0.8994994163513184,
"step": 720
},
{
"epoch": 1.3223443223443223,
"grad_norm": 0.14681462943553925,
"learning_rate": 1.975269890432267e-05,
"loss": 0.8842648863792419,
"step": 722
},
{
"epoch": 1.326007326007326,
"grad_norm": 0.4719051420688629,
"learning_rate": 1.9700832203968095e-05,
"loss": 1.1994279623031616,
"step": 724
},
{
"epoch": 1.3296703296703296,
"grad_norm": 0.1713068187236786,
"learning_rate": 1.96489153939967e-05,
"loss": 1.0513155460357666,
"step": 726
},
{
"epoch": 1.3333333333333333,
"grad_norm": 0.16900034248828888,
"learning_rate": 1.9596949287175685e-05,
"loss": 1.1289994716644287,
"step": 728
},
{
"epoch": 1.3369963369963371,
"grad_norm": 0.11318284273147583,
"learning_rate": 1.9544934697044008e-05,
"loss": 0.7207897305488586,
"step": 730
},
{
"epoch": 1.3406593406593408,
"grad_norm": 1.0781842470169067,
"learning_rate": 1.9492872437899646e-05,
"loss": 0.8054978251457214,
"step": 732
},
{
"epoch": 1.3443223443223444,
"grad_norm": 0.09919891506433487,
"learning_rate": 1.9440763324786843e-05,
"loss": 0.5997318625450134,
"step": 734
},
{
"epoch": 1.347985347985348,
"grad_norm": 0.16401244699954987,
"learning_rate": 1.9388608173483347e-05,
"loss": 1.1036202907562256,
"step": 736
},
{
"epoch": 1.3516483516483517,
"grad_norm": 0.17233139276504517,
"learning_rate": 1.9336407800487642e-05,
"loss": 0.7368432879447937,
"step": 738
},
{
"epoch": 1.3553113553113554,
"grad_norm": 0.11431029438972473,
"learning_rate": 1.9284163023006173e-05,
"loss": 0.9985055923461914,
"step": 740
},
{
"epoch": 1.358974358974359,
"grad_norm": 0.21836988627910614,
"learning_rate": 1.923187465894053e-05,
"loss": 0.5708799362182617,
"step": 742
},
{
"epoch": 1.3626373626373627,
"grad_norm": 0.17751345038414001,
"learning_rate": 1.917954352687468e-05,
"loss": 1.2120763063430786,
"step": 744
},
{
"epoch": 1.3663003663003663,
"grad_norm": 0.1561298668384552,
"learning_rate": 1.9127170446062105e-05,
"loss": 1.0274112224578857,
"step": 746
},
{
"epoch": 1.36996336996337,
"grad_norm": 0.1297256052494049,
"learning_rate": 1.907475623641304e-05,
"loss": 1.051086664199829,
"step": 748
},
{
"epoch": 1.3736263736263736,
"grad_norm": 0.303082138299942,
"learning_rate": 1.9022301718481554e-05,
"loss": 0.7688701152801514,
"step": 750
},
{
"epoch": 1.3772893772893773,
"grad_norm": 0.09415057301521301,
"learning_rate": 1.8969807713452784e-05,
"loss": 1.0989794731140137,
"step": 752
},
{
"epoch": 1.380952380952381,
"grad_norm": 0.2263706773519516,
"learning_rate": 1.8917275043130034e-05,
"loss": 0.9529420137405396,
"step": 754
},
{
"epoch": 1.3846153846153846,
"grad_norm": 0.23786725103855133,
"learning_rate": 1.886470452992191e-05,
"loss": 1.2820546627044678,
"step": 756
},
{
"epoch": 1.3882783882783882,
"grad_norm": 0.1806584894657135,
"learning_rate": 1.8812096996829475e-05,
"loss": 1.1889806985855103,
"step": 758
},
{
"epoch": 1.3919413919413919,
"grad_norm": 0.1611943542957306,
"learning_rate": 1.875945326743333e-05,
"loss": 1.1311790943145752,
"step": 760
},
{
"epoch": 1.3956043956043955,
"grad_norm": 0.13743668794631958,
"learning_rate": 1.8706774165880748e-05,
"loss": 1.1275527477264404,
"step": 762
},
{
"epoch": 1.3992673992673992,
"grad_norm": 0.5155539512634277,
"learning_rate": 1.8654060516872734e-05,
"loss": 0.6817983984947205,
"step": 764
},
{
"epoch": 1.4029304029304028,
"grad_norm": 0.12801358103752136,
"learning_rate": 1.8601313145651178e-05,
"loss": 0.8715826869010925,
"step": 766
},
{
"epoch": 1.4065934065934065,
"grad_norm": 0.611792802810669,
"learning_rate": 1.8548532877985863e-05,
"loss": 0.8770813941955566,
"step": 768
},
{
"epoch": 1.4102564102564101,
"grad_norm": 0.47473448514938354,
"learning_rate": 1.8495720540161592e-05,
"loss": 1.1611006259918213,
"step": 770
},
{
"epoch": 1.4139194139194138,
"grad_norm": 0.13255630433559418,
"learning_rate": 1.8442876958965228e-05,
"loss": 1.1962770223617554,
"step": 772
},
{
"epoch": 1.4175824175824177,
"grad_norm": 0.27610790729522705,
"learning_rate": 1.8390002961672755e-05,
"loss": 1.0068659782409668,
"step": 774
},
{
"epoch": 1.4212454212454213,
"grad_norm": 0.1513095498085022,
"learning_rate": 1.8337099376036308e-05,
"loss": 1.1399455070495605,
"step": 776
},
{
"epoch": 1.424908424908425,
"grad_norm": 0.48067036271095276,
"learning_rate": 1.828416703027128e-05,
"loss": 0.8147709965705872,
"step": 778
},
{
"epoch": 1.4285714285714286,
"grad_norm": 0.10299618542194366,
"learning_rate": 1.8231206753043253e-05,
"loss": 1.3462679386138916,
"step": 780
},
{
"epoch": 1.4322344322344323,
"grad_norm": 0.3704274594783783,
"learning_rate": 1.8178219373455116e-05,
"loss": 1.2018518447875977,
"step": 782
},
{
"epoch": 1.435897435897436,
"grad_norm": 0.16568604111671448,
"learning_rate": 1.8125205721034043e-05,
"loss": 1.1414146423339844,
"step": 784
},
{
"epoch": 1.4395604395604396,
"grad_norm": 0.29782232642173767,
"learning_rate": 1.8072166625718512e-05,
"loss": 0.9365593791007996,
"step": 786
},
{
"epoch": 1.4432234432234432,
"grad_norm": 0.5219512581825256,
"learning_rate": 1.8019102917845315e-05,
"loss": 1.1156021356582642,
"step": 788
},
{
"epoch": 1.4468864468864469,
"grad_norm": 0.05873679369688034,
"learning_rate": 1.7966015428136552e-05,
"loss": 0.7134132385253906,
"step": 790
},
{
"epoch": 1.4505494505494505,
"grad_norm": 0.11460971087217331,
"learning_rate": 1.791290498768665e-05,
"loss": 0.9004511833190918,
"step": 792
},
{
"epoch": 1.4542124542124542,
"grad_norm": 0.1498289257287979,
"learning_rate": 1.785977242794931e-05,
"loss": 0.787198543548584,
"step": 794
},
{
"epoch": 1.4578754578754578,
"grad_norm": 0.15771807730197906,
"learning_rate": 1.7806618580724534e-05,
"loss": 1.132145881652832,
"step": 796
},
{
"epoch": 1.4615384615384617,
"grad_norm": 0.10715603083372116,
"learning_rate": 1.775344427814557e-05,
"loss": 0.7792448401451111,
"step": 798
},
{
"epoch": 1.4652014652014653,
"grad_norm": 0.10087893158197403,
"learning_rate": 1.770025035266591e-05,
"loss": 1.006882667541504,
"step": 800
},
{
"epoch": 1.468864468864469,
"grad_norm": 0.19758541882038116,
"learning_rate": 1.7647037637046236e-05,
"loss": 1.535607933998108,
"step": 802
},
{
"epoch": 1.4725274725274726,
"grad_norm": 0.15355999767780304,
"learning_rate": 1.7593806964341397e-05,
"loss": 1.0087249279022217,
"step": 804
},
{
"epoch": 1.4761904761904763,
"grad_norm": 0.47955119609832764,
"learning_rate": 1.7540559167887365e-05,
"loss": 1.1739630699157715,
"step": 806
},
{
"epoch": 1.47985347985348,
"grad_norm": 0.21914640069007874,
"learning_rate": 1.748729508128819e-05,
"loss": 1.1568723917007446,
"step": 808
},
{
"epoch": 1.4835164835164836,
"grad_norm": 0.2966259717941284,
"learning_rate": 1.7434015538402948e-05,
"loss": 0.9803312420845032,
"step": 810
},
{
"epoch": 1.4871794871794872,
"grad_norm": 0.19581644237041473,
"learning_rate": 1.7380721373332664e-05,
"loss": 0.8270664811134338,
"step": 812
},
{
"epoch": 1.4908424908424909,
"grad_norm": 0.14163576066493988,
"learning_rate": 1.7327413420407312e-05,
"loss": 1.1908292770385742,
"step": 814
},
{
"epoch": 1.4945054945054945,
"grad_norm": 0.3342287540435791,
"learning_rate": 1.7274092514172685e-05,
"loss": 1.1254945993423462,
"step": 816
},
{
"epoch": 1.4981684981684982,
"grad_norm": 0.05009487271308899,
"learning_rate": 1.7220759489377392e-05,
"loss": 0.38835394382476807,
"step": 818
},
{
"epoch": 1.5018315018315018,
"grad_norm": 0.20819129049777985,
"learning_rate": 1.716741518095973e-05,
"loss": 1.1369304656982422,
"step": 820
},
{
"epoch": 1.5054945054945055,
"grad_norm": 0.25079068541526794,
"learning_rate": 1.7114060424034668e-05,
"loss": 0.8918940424919128,
"step": 822
},
{
"epoch": 1.5091575091575091,
"grad_norm": 0.7835426926612854,
"learning_rate": 1.7060696053880728e-05,
"loss": 0.8167926073074341,
"step": 824
},
{
"epoch": 1.5128205128205128,
"grad_norm": 0.18384632468223572,
"learning_rate": 1.700732290592695e-05,
"loss": 1.0300673246383667,
"step": 826
},
{
"epoch": 1.5164835164835164,
"grad_norm": 1.3027671575546265,
"learning_rate": 1.6953941815739775e-05,
"loss": 0.9904571771621704,
"step": 828
},
{
"epoch": 1.52014652014652,
"grad_norm": 0.2394954115152359,
"learning_rate": 1.6900553619009987e-05,
"loss": 0.9918930530548096,
"step": 830
},
{
"epoch": 1.5238095238095237,
"grad_norm": 0.17653672397136688,
"learning_rate": 1.684715915153963e-05,
"loss": 1.2324399948120117,
"step": 832
},
{
"epoch": 1.5274725274725274,
"grad_norm": 0.17192932963371277,
"learning_rate": 1.6793759249228907e-05,
"loss": 0.7932698726654053,
"step": 834
},
{
"epoch": 1.531135531135531,
"grad_norm": 0.13914711773395538,
"learning_rate": 1.6740354748063115e-05,
"loss": 1.1407725811004639,
"step": 836
},
{
"epoch": 1.5347985347985347,
"grad_norm": 0.1912311464548111,
"learning_rate": 1.6686946484099533e-05,
"loss": 1.1839306354522705,
"step": 838
},
{
"epoch": 1.5384615384615383,
"grad_norm": 0.3413101136684418,
"learning_rate": 1.6633535293454363e-05,
"loss": 1.0283156633377075,
"step": 840
},
{
"epoch": 1.542124542124542,
"grad_norm": 0.15573465824127197,
"learning_rate": 1.6580122012289612e-05,
"loss": 0.8538442850112915,
"step": 842
},
{
"epoch": 1.5457875457875456,
"grad_norm": 0.1616344302892685,
"learning_rate": 1.6526707476800024e-05,
"loss": 1.0215482711791992,
"step": 844
},
{
"epoch": 1.5494505494505495,
"grad_norm": 0.11193003505468369,
"learning_rate": 1.6473292523199978e-05,
"loss": 1.1683791875839233,
"step": 846
},
{
"epoch": 1.5531135531135531,
"grad_norm": 0.15385620296001434,
"learning_rate": 1.6419877987710394e-05,
"loss": 0.5395596623420715,
"step": 848
},
{
"epoch": 1.5567765567765568,
"grad_norm": 0.2141391485929489,
"learning_rate": 1.636646470654564e-05,
"loss": 1.0174407958984375,
"step": 850
},
{
"epoch": 1.5604395604395604,
"grad_norm": 0.18008647859096527,
"learning_rate": 1.6313053515900473e-05,
"loss": 1.1618224382400513,
"step": 852
},
{
"epoch": 1.564102564102564,
"grad_norm": 0.39221835136413574,
"learning_rate": 1.625964525193689e-05,
"loss": 0.839201807975769,
"step": 854
},
{
"epoch": 1.5677655677655677,
"grad_norm": 0.21565252542495728,
"learning_rate": 1.6206240750771092e-05,
"loss": 0.7961281538009644,
"step": 856
},
{
"epoch": 1.5714285714285714,
"grad_norm": 0.23852740228176117,
"learning_rate": 1.6152840848460376e-05,
"loss": 0.92723548412323,
"step": 858
},
{
"epoch": 1.575091575091575,
"grad_norm": 0.5424776077270508,
"learning_rate": 1.6099446380990015e-05,
"loss": 0.7285541892051697,
"step": 860
},
{
"epoch": 1.578754578754579,
"grad_norm": 0.3289344012737274,
"learning_rate": 1.604605818426023e-05,
"loss": 1.1397308111190796,
"step": 862
},
{
"epoch": 1.5824175824175826,
"grad_norm": 0.21419380605220795,
"learning_rate": 1.5992677094073055e-05,
"loss": 0.8967806696891785,
"step": 864
},
{
"epoch": 1.5860805860805862,
"grad_norm": 0.27611565589904785,
"learning_rate": 1.5939303946119275e-05,
"loss": 1.1727690696716309,
"step": 866
},
{
"epoch": 1.5897435897435899,
"grad_norm": 0.18314819037914276,
"learning_rate": 1.588593957596534e-05,
"loss": 1.1750589609146118,
"step": 868
},
{
"epoch": 1.5934065934065935,
"grad_norm": 0.13304927945137024,
"learning_rate": 1.5832584819040275e-05,
"loss": 0.5855087041854858,
"step": 870
},
{
"epoch": 1.5970695970695972,
"grad_norm": 0.18358372151851654,
"learning_rate": 1.577924051062261e-05,
"loss": 1.1658121347427368,
"step": 872
},
{
"epoch": 1.6007326007326008,
"grad_norm": 0.09625950455665588,
"learning_rate": 1.5725907485827318e-05,
"loss": 1.0468436479568481,
"step": 874
},
{
"epoch": 1.6043956043956045,
"grad_norm": 0.265048086643219,
"learning_rate": 1.567258657959269e-05,
"loss": 0.7421779036521912,
"step": 876
},
{
"epoch": 1.6080586080586081,
"grad_norm": 0.34532174468040466,
"learning_rate": 1.5619278626667336e-05,
"loss": 0.9705301523208618,
"step": 878
},
{
"epoch": 1.6117216117216118,
"grad_norm": 0.0698627457022667,
"learning_rate": 1.556598446159706e-05,
"loss": 0.8958454728126526,
"step": 880
},
{
"epoch": 1.6153846153846154,
"grad_norm": 0.17785139381885529,
"learning_rate": 1.5512704918711812e-05,
"loss": 1.1171237230300903,
"step": 882
},
{
"epoch": 1.619047619047619,
"grad_norm": 0.8073061108589172,
"learning_rate": 1.5459440832112634e-05,
"loss": 0.9405111074447632,
"step": 884
},
{
"epoch": 1.6227106227106227,
"grad_norm": 0.20005500316619873,
"learning_rate": 1.5406193035658606e-05,
"loss": 1.1305389404296875,
"step": 886
},
{
"epoch": 1.6263736263736264,
"grad_norm": 0.12977421283721924,
"learning_rate": 1.535296236295377e-05,
"loss": 0.8348196148872375,
"step": 888
},
{
"epoch": 1.63003663003663,
"grad_norm": 0.10178139060735703,
"learning_rate": 1.5299749647334097e-05,
"loss": 1.1974626779556274,
"step": 890
},
{
"epoch": 1.6336996336996337,
"grad_norm": 0.4449411928653717,
"learning_rate": 1.5246555721854436e-05,
"loss": 0.9033501148223877,
"step": 892
},
{
"epoch": 1.6373626373626373,
"grad_norm": 0.19725538790225983,
"learning_rate": 1.519338141927547e-05,
"loss": 1.1756013631820679,
"step": 894
},
{
"epoch": 1.641025641025641,
"grad_norm": 0.19788263738155365,
"learning_rate": 1.5140227572050696e-05,
"loss": 0.7380312085151672,
"step": 896
},
{
"epoch": 1.6446886446886446,
"grad_norm": 0.17406921088695526,
"learning_rate": 1.5087095012313355e-05,
"loss": 1.1261850595474243,
"step": 898
},
{
"epoch": 1.6483516483516483,
"grad_norm": 0.061784036457538605,
"learning_rate": 1.5033984571863445e-05,
"loss": 0.8112171292304993,
"step": 900
},
{
"epoch": 1.652014652014652,
"grad_norm": 0.13861116766929626,
"learning_rate": 1.498089708215469e-05,
"loss": 0.862524151802063,
"step": 902
},
{
"epoch": 1.6556776556776556,
"grad_norm": 0.1592007875442505,
"learning_rate": 1.4927833374281493e-05,
"loss": 1.0081220865249634,
"step": 904
},
{
"epoch": 1.6593406593406592,
"grad_norm": 1.8398786783218384,
"learning_rate": 1.4874794278965956e-05,
"loss": 1.1238994598388672,
"step": 906
},
{
"epoch": 1.6630036630036629,
"grad_norm": 0.19751088321208954,
"learning_rate": 1.4821780626544885e-05,
"loss": 0.7444801926612854,
"step": 908
},
{
"epoch": 1.6666666666666665,
"grad_norm": 0.28103649616241455,
"learning_rate": 1.476879324695675e-05,
"loss": 1.1913678646087646,
"step": 910
},
{
"epoch": 1.6703296703296702,
"grad_norm": 0.09520045667886734,
"learning_rate": 1.4715832969728727e-05,
"loss": 1.207666277885437,
"step": 912
},
{
"epoch": 1.673992673992674,
"grad_norm": 0.12307148426771164,
"learning_rate": 1.4662900623963691e-05,
"loss": 1.0871981382369995,
"step": 914
},
{
"epoch": 1.6776556776556777,
"grad_norm": 0.21801486611366272,
"learning_rate": 1.4609997038327249e-05,
"loss": 1.1932222843170166,
"step": 916
},
{
"epoch": 1.6813186813186813,
"grad_norm": 0.23331131041049957,
"learning_rate": 1.4557123041034773e-05,
"loss": 0.8278278112411499,
"step": 918
},
{
"epoch": 1.684981684981685,
"grad_norm": 0.13510961830615997,
"learning_rate": 1.4504279459838412e-05,
"loss": 1.2047544717788696,
"step": 920
},
{
"epoch": 1.6886446886446886,
"grad_norm": 0.16401754319667816,
"learning_rate": 1.4451467122014144e-05,
"loss": 1.0300697088241577,
"step": 922
},
{
"epoch": 1.6923076923076923,
"grad_norm": 0.1318156123161316,
"learning_rate": 1.439868685434883e-05,
"loss": 0.8341712355613708,
"step": 924
},
{
"epoch": 1.695970695970696,
"grad_norm": 0.3042833209037781,
"learning_rate": 1.4345939483127269e-05,
"loss": 0.9632514715194702,
"step": 926
},
{
"epoch": 1.6996336996336996,
"grad_norm": 0.29487597942352295,
"learning_rate": 1.4293225834119256e-05,
"loss": 0.8002101182937622,
"step": 928
},
{
"epoch": 1.7032967032967035,
"grad_norm": 0.07691221684217453,
"learning_rate": 1.4240546732566674e-05,
"loss": 0.8642545938491821,
"step": 930
},
{
"epoch": 1.7069597069597071,
"grad_norm": 0.33097583055496216,
"learning_rate": 1.4187903003170524e-05,
"loss": 0.9122767448425293,
"step": 932
},
{
"epoch": 1.7106227106227108,
"grad_norm": 0.1507510542869568,
"learning_rate": 1.413529547007809e-05,
"loss": 1.3842699527740479,
"step": 934
},
{
"epoch": 1.7142857142857144,
"grad_norm": 0.3127226233482361,
"learning_rate": 1.4082724956869973e-05,
"loss": 1.179833173751831,
"step": 936
},
{
"epoch": 1.717948717948718,
"grad_norm": 0.3052959740161896,
"learning_rate": 1.4030192286547219e-05,
"loss": 0.4733130931854248,
"step": 938
},
{
"epoch": 1.7216117216117217,
"grad_norm": 0.35021454095840454,
"learning_rate": 1.3977698281518447e-05,
"loss": 0.5180521607398987,
"step": 940
},
{
"epoch": 1.7252747252747254,
"grad_norm": 0.22165612876415253,
"learning_rate": 1.3925243763586967e-05,
"loss": 1.1145201921463013,
"step": 942
},
{
"epoch": 1.728937728937729,
"grad_norm": 0.16052335500717163,
"learning_rate": 1.3872829553937894e-05,
"loss": 0.9176861643791199,
"step": 944
},
{
"epoch": 1.7326007326007327,
"grad_norm": 0.1676378697156906,
"learning_rate": 1.3820456473125325e-05,
"loss": 1.1314038038253784,
"step": 946
},
{
"epoch": 1.7362637362637363,
"grad_norm": 0.23916418850421906,
"learning_rate": 1.3768125341059474e-05,
"loss": 1.0898257493972778,
"step": 948
},
{
"epoch": 1.73992673992674,
"grad_norm": 0.2966461181640625,
"learning_rate": 1.3715836976993831e-05,
"loss": 0.9806386828422546,
"step": 950
},
{
"epoch": 1.7435897435897436,
"grad_norm": 0.3395932614803314,
"learning_rate": 1.3663592199512362e-05,
"loss": 0.9286150932312012,
"step": 952
},
{
"epoch": 1.7472527472527473,
"grad_norm": 0.1921040564775467,
"learning_rate": 1.3611391826516656e-05,
"loss": 1.1887242794036865,
"step": 954
},
{
"epoch": 1.750915750915751,
"grad_norm": 0.19361916184425354,
"learning_rate": 1.355923667521316e-05,
"loss": 0.7717471718788147,
"step": 956
},
{
"epoch": 1.7545787545787546,
"grad_norm": 0.12737134099006653,
"learning_rate": 1.3507127562100358e-05,
"loss": 1.1729557514190674,
"step": 958
},
{
"epoch": 1.7582417582417582,
"grad_norm": 0.1720629334449768,
"learning_rate": 1.3455065302955996e-05,
"loss": 0.5185899138450623,
"step": 960
},
{
"epoch": 1.7619047619047619,
"grad_norm": 0.13436830043792725,
"learning_rate": 1.340305071282432e-05,
"loss": 0.965956449508667,
"step": 962
},
{
"epoch": 1.7655677655677655,
"grad_norm": 0.14258961379528046,
"learning_rate": 1.3351084606003303e-05,
"loss": 1.0140048265457153,
"step": 964
},
{
"epoch": 1.7692307692307692,
"grad_norm": 0.6037375926971436,
"learning_rate": 1.3299167796031904e-05,
"loss": 1.1673392057418823,
"step": 966
},
{
"epoch": 1.7728937728937728,
"grad_norm": 0.33095860481262207,
"learning_rate": 1.3247301095677334e-05,
"loss": 0.5561227798461914,
"step": 968
},
{
"epoch": 1.7765567765567765,
"grad_norm": 0.6743564605712891,
"learning_rate": 1.3195485316922322e-05,
"loss": 0.8274823427200317,
"step": 970
},
{
"epoch": 1.7802197802197801,
"grad_norm": 0.30674779415130615,
"learning_rate": 1.3143721270952416e-05,
"loss": 0.8968408703804016,
"step": 972
},
{
"epoch": 1.7838827838827838,
"grad_norm": 0.6774747967720032,
"learning_rate": 1.3092009768143276e-05,
"loss": 1.1993706226348877,
"step": 974
},
{
"epoch": 1.7875457875457874,
"grad_norm": 0.5064061284065247,
"learning_rate": 1.3040351618047987e-05,
"loss": 1.1533069610595703,
"step": 976
},
{
"epoch": 1.791208791208791,
"grad_norm": 0.10748698562383652,
"learning_rate": 1.2988747629384393e-05,
"loss": 0.9061447978019714,
"step": 978
},
{
"epoch": 1.7948717948717947,
"grad_norm": 0.2948685884475708,
"learning_rate": 1.2937198610022422e-05,
"loss": 0.9336826801300049,
"step": 980
},
{
"epoch": 1.7985347985347986,
"grad_norm": 0.12122903764247894,
"learning_rate": 1.2885705366971466e-05,
"loss": 1.1304030418395996,
"step": 982
},
{
"epoch": 1.8021978021978022,
"grad_norm": 0.2671152949333191,
"learning_rate": 1.2834268706367693e-05,
"loss": 1.0765833854675293,
"step": 984
},
{
"epoch": 1.8058608058608059,
"grad_norm": 0.08703822642564774,
"learning_rate": 1.2782889433461504e-05,
"loss": 0.6333800554275513,
"step": 986
},
{
"epoch": 1.8095238095238095,
"grad_norm": 0.4972066283226013,
"learning_rate": 1.273156835260485e-05,
"loss": 0.8049939870834351,
"step": 988
},
{
"epoch": 1.8131868131868132,
"grad_norm": 0.2837335169315338,
"learning_rate": 1.2680306267238703e-05,
"loss": 0.7262604832649231,
"step": 990
},
{
"epoch": 1.8168498168498168,
"grad_norm": 0.20847444236278534,
"learning_rate": 1.2629103979880435e-05,
"loss": 0.7781581878662109,
"step": 992
},
{
"epoch": 1.8205128205128205,
"grad_norm": 0.10468706488609314,
"learning_rate": 1.2577962292111268e-05,
"loss": 0.8340839147567749,
"step": 994
},
{
"epoch": 1.8241758241758241,
"grad_norm": 0.7519865036010742,
"learning_rate": 1.2526882004563725e-05,
"loss": 1.1118800640106201,
"step": 996
},
{
"epoch": 1.8278388278388278,
"grad_norm": 0.10123409330844879,
"learning_rate": 1.2475863916909116e-05,
"loss": 0.839496374130249,
"step": 998
},
{
"epoch": 1.8315018315018317,
"grad_norm": 0.1531890332698822,
"learning_rate": 1.2424908827844971e-05,
"loss": 1.1070917844772339,
"step": 1000
},
{
"epoch": 1.8351648351648353,
"grad_norm": 0.17556923627853394,
"learning_rate": 1.2374017535082588e-05,
"loss": 1.1273016929626465,
"step": 1002
},
{
"epoch": 1.838827838827839,
"grad_norm": 0.15883979201316833,
"learning_rate": 1.232319083533452e-05,
"loss": 1.1315432786941528,
"step": 1004
},
{
"epoch": 1.8424908424908426,
"grad_norm": 0.12646114826202393,
"learning_rate": 1.2272429524302087e-05,
"loss": 1.1264725923538208,
"step": 1006
},
{
"epoch": 1.8461538461538463,
"grad_norm": 0.8804011940956116,
"learning_rate": 1.2221734396662956e-05,
"loss": 0.7873448133468628,
"step": 1008
},
{
"epoch": 1.84981684981685,
"grad_norm": 0.12892308831214905,
"learning_rate": 1.2171106246058676e-05,
"loss": 1.0917152166366577,
"step": 1010
},
{
"epoch": 1.8534798534798536,
"grad_norm": 0.1620880663394928,
"learning_rate": 1.212054586508225e-05,
"loss": 0.7507990598678589,
"step": 1012
},
{
"epoch": 1.8571428571428572,
"grad_norm": 0.117234967648983,
"learning_rate": 1.2070054045265746e-05,
"loss": 1.1457178592681885,
"step": 1014
},
{
"epoch": 1.8608058608058609,
"grad_norm": 0.20892217755317688,
"learning_rate": 1.2019631577067883e-05,
"loss": 1.133773922920227,
"step": 1016
},
{
"epoch": 1.8644688644688645,
"grad_norm": 0.21547968685626984,
"learning_rate": 1.1969279249861678e-05,
"loss": 1.2079287767410278,
"step": 1018
},
{
"epoch": 1.8681318681318682,
"grad_norm": 0.022327091544866562,
"learning_rate": 1.1918997851922078e-05,
"loss": 0.9504647254943848,
"step": 1020
},
{
"epoch": 1.8717948717948718,
"grad_norm": 0.3572510778903961,
"learning_rate": 1.1868788170413608e-05,
"loss": 1.1040507555007935,
"step": 1022
},
{
"epoch": 1.8754578754578755,
"grad_norm": 0.2154110074043274,
"learning_rate": 1.1818650991378069e-05,
"loss": 0.5356627702713013,
"step": 1024
},
{
"epoch": 1.879120879120879,
"grad_norm": 0.19389349222183228,
"learning_rate": 1.1768587099722221e-05,
"loss": 1.1394623517990112,
"step": 1026
},
{
"epoch": 1.8827838827838828,
"grad_norm": 0.19930551946163177,
"learning_rate": 1.171859727920549e-05,
"loss": 0.9046638011932373,
"step": 1028
},
{
"epoch": 1.8864468864468864,
"grad_norm": 0.13454696536064148,
"learning_rate": 1.1668682312427716e-05,
"loss": 1.1357349157333374,
"step": 1030
},
{
"epoch": 1.89010989010989,
"grad_norm": 0.13437552750110626,
"learning_rate": 1.1618842980816885e-05,
"loss": 0.9377343058586121,
"step": 1032
},
{
"epoch": 1.8937728937728937,
"grad_norm": 0.5006322860717773,
"learning_rate": 1.1569080064616892e-05,
"loss": 0.7831814289093018,
"step": 1034
},
{
"epoch": 1.8974358974358974,
"grad_norm": 0.2925697863101959,
"learning_rate": 1.1519394342875344e-05,
"loss": 1.1792892217636108,
"step": 1036
},
{
"epoch": 1.901098901098901,
"grad_norm": 0.21419307589530945,
"learning_rate": 1.1469786593431362e-05,
"loss": 1.1360909938812256,
"step": 1038
},
{
"epoch": 1.9047619047619047,
"grad_norm": 0.36561742424964905,
"learning_rate": 1.1420257592903375e-05,
"loss": 1.204813838005066,
"step": 1040
},
{
"epoch": 1.9084249084249083,
"grad_norm": 0.2760768532752991,
"learning_rate": 1.1370808116677003e-05,
"loss": 0.6075218319892883,
"step": 1042
},
{
"epoch": 1.912087912087912,
"grad_norm": 0.1529398262500763,
"learning_rate": 1.1321438938892891e-05,
"loss": 0.859750509262085,
"step": 1044
},
{
"epoch": 1.9157509157509156,
"grad_norm": 0.15058556199073792,
"learning_rate": 1.127215083243459e-05,
"loss": 1.101906418800354,
"step": 1046
},
{
"epoch": 1.9194139194139193,
"grad_norm": 0.19583989679813385,
"learning_rate": 1.1222944568916477e-05,
"loss": 0.4320715367794037,
"step": 1048
},
{
"epoch": 1.9230769230769231,
"grad_norm": 0.39312562346458435,
"learning_rate": 1.1173820918671653e-05,
"loss": 0.8532530069351196,
"step": 1050
},
{
"epoch": 1.9267399267399268,
"grad_norm": 0.3669014573097229,
"learning_rate": 1.1124780650739898e-05,
"loss": 0.7985857725143433,
"step": 1052
},
{
"epoch": 1.9304029304029304,
"grad_norm": 0.12371397763490677,
"learning_rate": 1.1075824532855632e-05,
"loss": 1.1105282306671143,
"step": 1054
},
{
"epoch": 1.934065934065934,
"grad_norm": 0.13472405076026917,
"learning_rate": 1.1026953331435875e-05,
"loss": 0.7349132895469666,
"step": 1056
},
{
"epoch": 1.9377289377289377,
"grad_norm": 0.6902068257331848,
"learning_rate": 1.0978167811568275e-05,
"loss": 0.970415472984314,
"step": 1058
},
{
"epoch": 1.9413919413919414,
"grad_norm": 0.2783970236778259,
"learning_rate": 1.092946873699913e-05,
"loss": 1.0616910457611084,
"step": 1060
},
{
"epoch": 1.945054945054945,
"grad_norm": 0.1329077184200287,
"learning_rate": 1.0880856870121389e-05,
"loss": 0.8149954080581665,
"step": 1062
},
{
"epoch": 1.9487179487179487,
"grad_norm": 0.3218192756175995,
"learning_rate": 1.0832332971962779e-05,
"loss": 0.7964991331100464,
"step": 1064
},
{
"epoch": 1.9523809523809523,
"grad_norm": 0.24318785965442657,
"learning_rate": 1.0783897802173859e-05,
"loss": 1.1447077989578247,
"step": 1066
},
{
"epoch": 1.9560439560439562,
"grad_norm": 0.46663448214530945,
"learning_rate": 1.07355521190161e-05,
"loss": 1.1718124151229858,
"step": 1068
},
{
"epoch": 1.9597069597069599,
"grad_norm": 0.08453179895877838,
"learning_rate": 1.0687296679350072e-05,
"loss": 0.6696170568466187,
"step": 1070
},
{
"epoch": 1.9633699633699635,
"grad_norm": 0.2535535991191864,
"learning_rate": 1.063913223862357e-05,
"loss": 0.827909529209137,
"step": 1072
},
{
"epoch": 1.9670329670329672,
"grad_norm": 0.2511337995529175,
"learning_rate": 1.0591059550859753e-05,
"loss": 1.1328091621398926,
"step": 1074
},
{
"epoch": 1.9706959706959708,
"grad_norm": 0.129400372505188,
"learning_rate": 1.0543079368645398e-05,
"loss": 1.1073366403579712,
"step": 1076
},
{
"epoch": 1.9743589743589745,
"grad_norm": 0.17139089107513428,
"learning_rate": 1.0495192443119076e-05,
"loss": 0.9826921820640564,
"step": 1078
},
{
"epoch": 1.978021978021978,
"grad_norm": 0.2520369589328766,
"learning_rate": 1.044739952395942e-05,
"loss": 1.006474256515503,
"step": 1080
},
{
"epoch": 1.9816849816849818,
"grad_norm": 0.13477469980716705,
"learning_rate": 1.039970135937337e-05,
"loss": 1.0919735431671143,
"step": 1082
},
{
"epoch": 1.9853479853479854,
"grad_norm": 0.06585554778575897,
"learning_rate": 1.0352098696084461e-05,
"loss": 0.7031868696212769,
"step": 1084
},
{
"epoch": 1.989010989010989,
"grad_norm": 0.34441766142845154,
"learning_rate": 1.0304592279321138e-05,
"loss": 0.9618685245513916,
"step": 1086
},
{
"epoch": 1.9926739926739927,
"grad_norm": 0.20067350566387177,
"learning_rate": 1.02571828528051e-05,
"loss": 0.958778440952301,
"step": 1088
},
{
"epoch": 1.9963369963369964,
"grad_norm": 0.07336684316396713,
"learning_rate": 1.0209871158739632e-05,
"loss": 0.9063729047775269,
"step": 1090
},
{
"epoch": 2.0,
"grad_norm": 0.12516078352928162,
"learning_rate": 1.0162657937798014e-05,
"loss": 1.0292725563049316,
"step": 1092
},
{
"epoch": 2.0036630036630036,
"grad_norm": 0.1312766820192337,
"learning_rate": 1.0115543929111896e-05,
"loss": 1.0530153512954712,
"step": 1094
},
{
"epoch": 2.0073260073260073,
"grad_norm": 0.17860770225524902,
"learning_rate": 1.0068529870259744e-05,
"loss": 1.1205320358276367,
"step": 1096
},
{
"epoch": 2.010989010989011,
"grad_norm": 0.11073648929595947,
"learning_rate": 1.0021616497255306e-05,
"loss": 1.0491172075271606,
"step": 1098
},
{
"epoch": 2.0146520146520146,
"grad_norm": 0.2365168035030365,
"learning_rate": 9.97480454453607e-06,
"loss": 1.135168194770813,
"step": 1100
},
{
"epoch": 2.0183150183150182,
"grad_norm": 0.16269534826278687,
"learning_rate": 9.928094744951743e-06,
"loss": 0.818796694278717,
"step": 1102
},
{
"epoch": 2.021978021978022,
"grad_norm": 0.16431863605976105,
"learning_rate": 9.881487829752845e-06,
"loss": 1.1236073970794678,
"step": 1104
},
{
"epoch": 2.0256410256410255,
"grad_norm": 0.2139783650636673,
"learning_rate": 9.834984528579202e-06,
"loss": 0.7558184266090393,
"step": 1106
},
{
"epoch": 2.029304029304029,
"grad_norm": 0.1066666916012764,
"learning_rate": 9.788585569448547e-06,
"loss": 1.1649444103240967,
"step": 1108
},
{
"epoch": 2.032967032967033,
"grad_norm": 0.2774350941181183,
"learning_rate": 9.742291678745116e-06,
"loss": 0.878158450126648,
"step": 1110
},
{
"epoch": 2.0366300366300365,
"grad_norm": 0.0811762735247612,
"learning_rate": 9.696103581208279e-06,
"loss": 0.5665119886398315,
"step": 1112
},
{
"epoch": 2.04029304029304,
"grad_norm": 0.3537541627883911,
"learning_rate": 9.650021999921201e-06,
"loss": 0.801210880279541,
"step": 1114
},
{
"epoch": 2.043956043956044,
"grad_norm": 0.7711409330368042,
"learning_rate": 9.604047656299518e-06,
"loss": 1.1278789043426514,
"step": 1116
},
{
"epoch": 2.0476190476190474,
"grad_norm": 0.17425225675106049,
"learning_rate": 9.558181270080027e-06,
"loss": 1.094169020652771,
"step": 1118
},
{
"epoch": 2.051282051282051,
"grad_norm": 0.28304409980773926,
"learning_rate": 9.512423559309438e-06,
"loss": 0.7918586730957031,
"step": 1120
},
{
"epoch": 2.0549450549450547,
"grad_norm": 0.49804916977882385,
"learning_rate": 9.46677524033314e-06,
"loss": 1.2529525756835938,
"step": 1122
},
{
"epoch": 2.0586080586080584,
"grad_norm": 0.237227663397789,
"learning_rate": 9.421237027783945e-06,
"loss": 1.119982361793518,
"step": 1124
},
{
"epoch": 2.062271062271062,
"grad_norm": 0.2809504568576813,
"learning_rate": 9.37580963457096e-06,
"loss": 0.7264096736907959,
"step": 1126
},
{
"epoch": 2.065934065934066,
"grad_norm": 0.13018397986888885,
"learning_rate": 9.330493771868376e-06,
"loss": 1.0815666913986206,
"step": 1128
},
{
"epoch": 2.06959706959707,
"grad_norm": 0.12473749369382858,
"learning_rate": 9.285290149104353e-06,
"loss": 1.0647907257080078,
"step": 1130
},
{
"epoch": 2.0732600732600734,
"grad_norm": 0.19751816987991333,
"learning_rate": 9.240199473949919e-06,
"loss": 1.107387661933899,
"step": 1132
},
{
"epoch": 2.076923076923077,
"grad_norm": 0.19950120151042938,
"learning_rate": 9.195222452307901e-06,
"loss": 1.0926631689071655,
"step": 1134
},
{
"epoch": 2.0805860805860807,
"grad_norm": 0.3071011006832123,
"learning_rate": 9.15035978830183e-06,
"loss": 0.7186596989631653,
"step": 1136
},
{
"epoch": 2.0842490842490844,
"grad_norm": 0.13422009348869324,
"learning_rate": 9.105612184264966e-06,
"loss": 1.0940665006637573,
"step": 1138
},
{
"epoch": 2.087912087912088,
"grad_norm": 0.303777813911438,
"learning_rate": 9.060980340729273e-06,
"loss": 0.9830268621444702,
"step": 1140
},
{
"epoch": 2.0915750915750917,
"grad_norm": 0.3303181231021881,
"learning_rate": 9.01646495641448e-06,
"loss": 1.0870164632797241,
"step": 1142
},
{
"epoch": 2.0952380952380953,
"grad_norm": 0.0919371172785759,
"learning_rate": 8.972066728217119e-06,
"loss": 0.35283589363098145,
"step": 1144
},
{
"epoch": 2.098901098901099,
"grad_norm": 0.6702982187271118,
"learning_rate": 8.927786351199602e-06,
"loss": 0.8584544658660889,
"step": 1146
},
{
"epoch": 2.1025641025641026,
"grad_norm": 0.4644732177257538,
"learning_rate": 8.883624518579383e-06,
"loss": 0.786492109298706,
"step": 1148
},
{
"epoch": 2.1062271062271063,
"grad_norm": 0.15775777399539948,
"learning_rate": 8.839581921718077e-06,
"loss": 0.7861689925193787,
"step": 1150
},
{
"epoch": 2.10989010989011,
"grad_norm": 0.213873028755188,
"learning_rate": 8.795659250110636e-06,
"loss": 0.9232085943222046,
"step": 1152
},
{
"epoch": 2.1135531135531136,
"grad_norm": 0.371718168258667,
"learning_rate": 8.751857191374557e-06,
"loss": 0.730678141117096,
"step": 1154
},
{
"epoch": 2.1172161172161172,
"grad_norm": 0.18902969360351562,
"learning_rate": 8.708176431239132e-06,
"loss": 1.0358822345733643,
"step": 1156
},
{
"epoch": 2.120879120879121,
"grad_norm": 0.25546038150787354,
"learning_rate": 8.664617653534689e-06,
"loss": 1.00810706615448,
"step": 1158
},
{
"epoch": 2.1245421245421245,
"grad_norm": 0.9564505815505981,
"learning_rate": 8.62118154018191e-06,
"loss": 0.8407284617424011,
"step": 1160
},
{
"epoch": 2.128205128205128,
"grad_norm": 0.38118690252304077,
"learning_rate": 8.577868771181137e-06,
"loss": 0.7272080779075623,
"step": 1162
},
{
"epoch": 2.131868131868132,
"grad_norm": 0.3005581796169281,
"learning_rate": 8.534680024601725e-06,
"loss": 0.8898174166679382,
"step": 1164
},
{
"epoch": 2.1355311355311355,
"grad_norm": 0.3586452305316925,
"learning_rate": 8.491615976571454e-06,
"loss": 0.40493980050086975,
"step": 1166
},
{
"epoch": 2.139194139194139,
"grad_norm": 1.5432233810424805,
"learning_rate": 8.448677301265912e-06,
"loss": 0.41544270515441895,
"step": 1168
},
{
"epoch": 2.142857142857143,
"grad_norm": 0.14153523743152618,
"learning_rate": 8.405864670897965e-06,
"loss": 0.8618958592414856,
"step": 1170
},
{
"epoch": 2.1465201465201464,
"grad_norm": 1.5002825260162354,
"learning_rate": 8.363178755707208e-06,
"loss": 1.103924036026001,
"step": 1172
},
{
"epoch": 2.15018315018315,
"grad_norm": 0.6268448829650879,
"learning_rate": 8.32062022394949e-06,
"loss": 1.2559269666671753,
"step": 1174
},
{
"epoch": 2.1538461538461537,
"grad_norm": 0.43047645688056946,
"learning_rate": 8.278189741886461e-06,
"loss": 0.7496550679206848,
"step": 1176
},
{
"epoch": 2.1575091575091574,
"grad_norm": 0.1281701624393463,
"learning_rate": 8.235887973775122e-06,
"loss": 1.1159412860870361,
"step": 1178
},
{
"epoch": 2.161172161172161,
"grad_norm": 0.10378590226173401,
"learning_rate": 8.193715581857427e-06,
"loss": 1.11162269115448,
"step": 1180
},
{
"epoch": 2.1648351648351647,
"grad_norm": 0.3076005280017853,
"learning_rate": 8.151673226349922e-06,
"loss": 1.0918073654174805,
"step": 1182
},
{
"epoch": 2.1684981684981683,
"grad_norm": 0.13103008270263672,
"learning_rate": 8.109761565433432e-06,
"loss": 1.122482180595398,
"step": 1184
},
{
"epoch": 2.172161172161172,
"grad_norm": 0.12446437776088715,
"learning_rate": 8.067981255242707e-06,
"loss": 0.25327301025390625,
"step": 1186
},
{
"epoch": 2.1758241758241756,
"grad_norm": 0.25651493668556213,
"learning_rate": 8.02633294985618e-06,
"loss": 1.1719110012054443,
"step": 1188
},
{
"epoch": 2.1794871794871793,
"grad_norm": 1.0342334508895874,
"learning_rate": 7.984817301285743e-06,
"loss": 0.7195925116539001,
"step": 1190
},
{
"epoch": 2.183150183150183,
"grad_norm": 0.6622636318206787,
"learning_rate": 7.943434959466499e-06,
"loss": 0.9531126618385315,
"step": 1192
},
{
"epoch": 2.186813186813187,
"grad_norm": 0.2052282840013504,
"learning_rate": 7.902186572246633e-06,
"loss": 0.7320451736450195,
"step": 1194
},
{
"epoch": 2.1904761904761907,
"grad_norm": 0.12658308446407318,
"learning_rate": 7.861072785377226e-06,
"loss": 0.8746700882911682,
"step": 1196
},
{
"epoch": 2.1941391941391943,
"grad_norm": 0.15891726315021515,
"learning_rate": 7.820094242502165e-06,
"loss": 1.0944993495941162,
"step": 1198
},
{
"epoch": 2.197802197802198,
"grad_norm": 0.5890740156173706,
"learning_rate": 7.779251585148091e-06,
"loss": 0.6998624801635742,
"step": 1200
},
{
"epoch": 2.2014652014652016,
"grad_norm": 0.304503470659256,
"learning_rate": 7.7385454527143e-06,
"loss": 1.1593294143676758,
"step": 1202
},
{
"epoch": 2.2051282051282053,
"grad_norm": 0.49278563261032104,
"learning_rate": 7.697976482462797e-06,
"loss": 1.1104764938354492,
"step": 1204
},
{
"epoch": 2.208791208791209,
"grad_norm": 0.4754427671432495,
"learning_rate": 7.657545309508264e-06,
"loss": 0.4781201481819153,
"step": 1206
},
{
"epoch": 2.2124542124542126,
"grad_norm": 0.16634488105773926,
"learning_rate": 7.617252566808145e-06,
"loss": 0.718996524810791,
"step": 1208
},
{
"epoch": 2.2161172161172162,
"grad_norm": 0.17117178440093994,
"learning_rate": 7.577098885152746e-06,
"loss": 0.5503133535385132,
"step": 1210
},
{
"epoch": 2.21978021978022,
"grad_norm": 0.30203965306282043,
"learning_rate": 7.537084893155339e-06,
"loss": 1.0482512712478638,
"step": 1212
},
{
"epoch": 2.2234432234432235,
"grad_norm": 0.38951486349105835,
"learning_rate": 7.497211217242321e-06,
"loss": 0.7798130512237549,
"step": 1214
},
{
"epoch": 2.227106227106227,
"grad_norm": 0.24308180809020996,
"learning_rate": 7.457478481643422e-06,
"loss": 0.8609579801559448,
"step": 1216
},
{
"epoch": 2.230769230769231,
"grad_norm": 0.20325809717178345,
"learning_rate": 7.417887308381932e-06,
"loss": 1.0931227207183838,
"step": 1218
},
{
"epoch": 2.2344322344322345,
"grad_norm": 0.18645040690898895,
"learning_rate": 7.378438317264942e-06,
"loss": 1.0955512523651123,
"step": 1220
},
{
"epoch": 2.238095238095238,
"grad_norm": 0.1416483223438263,
"learning_rate": 7.339132125873669e-06,
"loss": 1.1086313724517822,
"step": 1222
},
{
"epoch": 2.241758241758242,
"grad_norm": 0.4882635176181793,
"learning_rate": 7.299969349553767e-06,
"loss": 0.4420263171195984,
"step": 1224
},
{
"epoch": 2.2454212454212454,
"grad_norm": 0.7788896560668945,
"learning_rate": 7.260950601405695e-06,
"loss": 1.061559796333313,
"step": 1226
},
{
"epoch": 2.249084249084249,
"grad_norm": 0.8590144515037537,
"learning_rate": 7.222076492275143e-06,
"loss": 0.5786591172218323,
"step": 1228
},
{
"epoch": 2.2527472527472527,
"grad_norm": 0.11729376763105392,
"learning_rate": 7.183347630743432e-06,
"loss": 0.57936692237854,
"step": 1230
},
{
"epoch": 2.2564102564102564,
"grad_norm": 0.1521664410829544,
"learning_rate": 7.1447646231180085e-06,
"loss": 0.5732910633087158,
"step": 1232
},
{
"epoch": 2.26007326007326,
"grad_norm": 0.6147300601005554,
"learning_rate": 7.10632807342296e-06,
"loss": 1.0902845859527588,
"step": 1234
},
{
"epoch": 2.2637362637362637,
"grad_norm": 0.18098106980323792,
"learning_rate": 7.068038583389534e-06,
"loss": 0.7539463639259338,
"step": 1236
},
{
"epoch": 2.2673992673992673,
"grad_norm": 0.12775729596614838,
"learning_rate": 7.029896752446748e-06,
"loss": 0.9212363362312317,
"step": 1238
},
{
"epoch": 2.271062271062271,
"grad_norm": 0.2789549231529236,
"learning_rate": 6.991903177711974e-06,
"loss": 0.8633685111999512,
"step": 1240
},
{
"epoch": 2.2747252747252746,
"grad_norm": 0.22567078471183777,
"learning_rate": 6.9540584539816095e-06,
"loss": 0.888152539730072,
"step": 1242
},
{
"epoch": 2.2783882783882783,
"grad_norm": 0.2940141558647156,
"learning_rate": 6.916363173721768e-06,
"loss": 1.08126699924469,
"step": 1244
},
{
"epoch": 2.282051282051282,
"grad_norm": 0.046570662409067154,
"learning_rate": 6.878817927058999e-06,
"loss": 0.705131471157074,
"step": 1246
},
{
"epoch": 2.2857142857142856,
"grad_norm": 1.4523853063583374,
"learning_rate": 6.841423301771039e-06,
"loss": 0.6978131532669067,
"step": 1248
},
{
"epoch": 2.2893772893772892,
"grad_norm": 0.2593541741371155,
"learning_rate": 6.804179883277623e-06,
"loss": 1.0748162269592285,
"step": 1250
},
{
"epoch": 2.293040293040293,
"grad_norm": 0.25335627794265747,
"learning_rate": 6.76708825463132e-06,
"loss": 0.8004967570304871,
"step": 1252
},
{
"epoch": 2.2967032967032965,
"grad_norm": 0.23331041634082794,
"learning_rate": 6.730148996508395e-06,
"loss": 1.135155200958252,
"step": 1254
},
{
"epoch": 2.3003663003663,
"grad_norm": 0.38009828329086304,
"learning_rate": 6.693362687199734e-06,
"loss": 1.1570165157318115,
"step": 1256
},
{
"epoch": 2.304029304029304,
"grad_norm": 0.26016008853912354,
"learning_rate": 6.656729902601769e-06,
"loss": 0.7557705640792847,
"step": 1258
},
{
"epoch": 2.3076923076923075,
"grad_norm": 0.32817086577415466,
"learning_rate": 6.620251216207478e-06,
"loss": 1.136408805847168,
"step": 1260
},
{
"epoch": 2.311355311355311,
"grad_norm": 0.23846906423568726,
"learning_rate": 6.583927199097413e-06,
"loss": 0.9904882311820984,
"step": 1262
},
{
"epoch": 2.315018315018315,
"grad_norm": 0.3852023482322693,
"learning_rate": 6.547758419930738e-06,
"loss": 0.842047393321991,
"step": 1264
},
{
"epoch": 2.3186813186813184,
"grad_norm": 0.3430316746234894,
"learning_rate": 6.51174544493634e-06,
"loss": 0.7513152360916138,
"step": 1266
},
{
"epoch": 2.3223443223443225,
"grad_norm": 0.3087650537490845,
"learning_rate": 6.47588883790397e-06,
"loss": 0.7970260977745056,
"step": 1268
},
{
"epoch": 2.326007326007326,
"grad_norm": 0.46505463123321533,
"learning_rate": 6.440189160175403e-06,
"loss": 1.1637327671051025,
"step": 1270
},
{
"epoch": 2.32967032967033,
"grad_norm": 0.23669065535068512,
"learning_rate": 6.404646970635663e-06,
"loss": 0.9795457720756531,
"step": 1272
},
{
"epoch": 2.3333333333333335,
"grad_norm": 0.3105694651603699,
"learning_rate": 6.369262825704263e-06,
"loss": 0.5742363929748535,
"step": 1274
},
{
"epoch": 2.336996336996337,
"grad_norm": 0.2248220443725586,
"learning_rate": 6.334037279326493e-06,
"loss": 0.9909239411354065,
"step": 1276
},
{
"epoch": 2.340659340659341,
"grad_norm": 0.3926773965358734,
"learning_rate": 6.2989708829647665e-06,
"loss": 0.8048092722892761,
"step": 1278
},
{
"epoch": 2.3443223443223444,
"grad_norm": 0.1858094483613968,
"learning_rate": 6.264064185589969e-06,
"loss": 0.7330389022827148,
"step": 1280
},
{
"epoch": 2.347985347985348,
"grad_norm": 0.210875004529953,
"learning_rate": 6.229317733672865e-06,
"loss": 0.5947535037994385,
"step": 1282
},
{
"epoch": 2.3516483516483517,
"grad_norm": 0.12374936044216156,
"learning_rate": 6.194732071175547e-06,
"loss": 1.070632815361023,
"step": 1284
},
{
"epoch": 2.3553113553113554,
"grad_norm": 0.3236323297023773,
"learning_rate": 6.160307739542927e-06,
"loss": 0.5611193180084229,
"step": 1286
},
{
"epoch": 2.358974358974359,
"grad_norm": 0.10615868121385574,
"learning_rate": 6.126045277694242e-06,
"loss": 0.9052793979644775,
"step": 1288
},
{
"epoch": 2.3626373626373627,
"grad_norm": 0.1812293529510498,
"learning_rate": 6.091945222014643e-06,
"loss": 0.8611981272697449,
"step": 1290
},
{
"epoch": 2.3663003663003663,
"grad_norm": 0.14232122898101807,
"learning_rate": 6.058008106346765e-06,
"loss": 0.7440568804740906,
"step": 1292
},
{
"epoch": 2.36996336996337,
"grad_norm": 0.13175328075885773,
"learning_rate": 6.0242344619823924e-06,
"loss": 0.8583801984786987,
"step": 1294
},
{
"epoch": 2.3736263736263736,
"grad_norm": 0.380293071269989,
"learning_rate": 5.99062481765415e-06,
"loss": 1.184190034866333,
"step": 1296
},
{
"epoch": 2.3772893772893773,
"grad_norm": 0.22037489712238312,
"learning_rate": 5.95717969952719e-06,
"loss": 1.06187903881073,
"step": 1298
},
{
"epoch": 2.380952380952381,
"grad_norm": 0.14270919561386108,
"learning_rate": 5.9238996311909985e-06,
"loss": 1.1278164386749268,
"step": 1300
},
{
"epoch": 2.3846153846153846,
"grad_norm": 0.396483451128006,
"learning_rate": 5.890785133651159e-06,
"loss": 0.8158243894577026,
"step": 1302
},
{
"epoch": 2.3882783882783882,
"grad_norm": 0.3289090692996979,
"learning_rate": 5.857836725321219e-06,
"loss": 0.44378575682640076,
"step": 1304
},
{
"epoch": 2.391941391941392,
"grad_norm": 0.18279911577701569,
"learning_rate": 5.825054922014571e-06,
"loss": 0.8863163590431213,
"step": 1306
},
{
"epoch": 2.3956043956043955,
"grad_norm": 0.5970947742462158,
"learning_rate": 5.792440236936386e-06,
"loss": 1.0911579132080078,
"step": 1308
},
{
"epoch": 2.399267399267399,
"grad_norm": 0.45000842213630676,
"learning_rate": 5.759993180675542e-06,
"loss": 0.4878068268299103,
"step": 1310
},
{
"epoch": 2.402930402930403,
"grad_norm": 0.1446293145418167,
"learning_rate": 5.727714261196677e-06,
"loss": 1.095346212387085,
"step": 1312
},
{
"epoch": 2.4065934065934065,
"grad_norm": 0.11170390248298645,
"learning_rate": 5.695603983832217e-06,
"loss": 0.7981175780296326,
"step": 1314
},
{
"epoch": 2.41025641025641,
"grad_norm": 0.1577857881784439,
"learning_rate": 5.663662851274458e-06,
"loss": 1.0971083641052246,
"step": 1316
},
{
"epoch": 2.413919413919414,
"grad_norm": 0.6926708221435547,
"learning_rate": 5.631891363567699e-06,
"loss": 0.7854663729667664,
"step": 1318
},
{
"epoch": 2.4175824175824174,
"grad_norm": 0.503436267375946,
"learning_rate": 5.600290018100429e-06,
"loss": 0.7330096364021301,
"step": 1320
},
{
"epoch": 2.421245421245421,
"grad_norm": 0.20037294924259186,
"learning_rate": 5.568859309597517e-06,
"loss": 0.5682854652404785,
"step": 1322
},
{
"epoch": 2.4249084249084247,
"grad_norm": 0.5486008524894714,
"learning_rate": 5.537599730112495e-06,
"loss": 0.7392922639846802,
"step": 1324
},
{
"epoch": 2.4285714285714284,
"grad_norm": 0.1791073977947235,
"learning_rate": 5.50651176901982e-06,
"loss": 0.7554723024368286,
"step": 1326
},
{
"epoch": 2.4322344322344325,
"grad_norm": 0.06500615179538727,
"learning_rate": 5.475595913007242e-06,
"loss": 0.666039764881134,
"step": 1328
},
{
"epoch": 2.435897435897436,
"grad_norm": 0.40083473920822144,
"learning_rate": 5.4448526460681765e-06,
"loss": 0.7853435277938843,
"step": 1330
},
{
"epoch": 2.4395604395604398,
"grad_norm": 0.3601733446121216,
"learning_rate": 5.414282449494118e-06,
"loss": 1.0072463750839233,
"step": 1332
},
{
"epoch": 2.4432234432234434,
"grad_norm": 0.1372423619031906,
"learning_rate": 5.3838858018671185e-06,
"loss": 0.7029188275337219,
"step": 1334
},
{
"epoch": 2.446886446886447,
"grad_norm": 0.39225441217422485,
"learning_rate": 5.353663179052286e-06,
"loss": 0.6242628693580627,
"step": 1336
},
{
"epoch": 2.4505494505494507,
"grad_norm": 0.1579807996749878,
"learning_rate": 5.323615054190335e-06,
"loss": 0.9927806854248047,
"step": 1338
},
{
"epoch": 2.4542124542124544,
"grad_norm": 0.07751084119081497,
"learning_rate": 5.293741897690192e-06,
"loss": 0.8317915201187134,
"step": 1340
},
{
"epoch": 2.457875457875458,
"grad_norm": 0.07456672191619873,
"learning_rate": 5.264044177221619e-06,
"loss": 0.5970339775085449,
"step": 1342
},
{
"epoch": 2.4615384615384617,
"grad_norm": 0.12574979662895203,
"learning_rate": 5.23452235770788e-06,
"loss": 0.797691285610199,
"step": 1344
},
{
"epoch": 2.4652014652014653,
"grad_norm": 0.13968591392040253,
"learning_rate": 5.205176901318497e-06,
"loss": 1.031499981880188,
"step": 1346
},
{
"epoch": 2.468864468864469,
"grad_norm": 0.21238183975219727,
"learning_rate": 5.176008267461988e-06,
"loss": 1.0819623470306396,
"step": 1348
},
{
"epoch": 2.4725274725274726,
"grad_norm": 0.21279728412628174,
"learning_rate": 5.14701691277868e-06,
"loss": 0.6901581883430481,
"step": 1350
},
{
"epoch": 2.4761904761904763,
"grad_norm": 0.1678483486175537,
"learning_rate": 5.118203291133559e-06,
"loss": 0.7881539463996887,
"step": 1352
},
{
"epoch": 2.47985347985348,
"grad_norm": 0.4039932191371918,
"learning_rate": 5.0895678536091705e-06,
"loss": 0.8885727524757385,
"step": 1354
},
{
"epoch": 2.4835164835164836,
"grad_norm": 0.033704448491334915,
"learning_rate": 5.061111048498556e-06,
"loss": 0.8928987383842468,
"step": 1356
},
{
"epoch": 2.4871794871794872,
"grad_norm": 0.2174617052078247,
"learning_rate": 5.032833321298238e-06,
"loss": 1.059902548789978,
"step": 1358
},
{
"epoch": 2.490842490842491,
"grad_norm": 0.2906103730201721,
"learning_rate": 5.004735114701233e-06,
"loss": 0.8573927283287048,
"step": 1360
},
{
"epoch": 2.4945054945054945,
"grad_norm": 0.2030203640460968,
"learning_rate": 4.97681686859013e-06,
"loss": 0.7605724334716797,
"step": 1362
},
{
"epoch": 2.498168498168498,
"grad_norm": 0.22399546205997467,
"learning_rate": 4.949079020030214e-06,
"loss": 0.7423288226127625,
"step": 1364
},
{
"epoch": 2.501831501831502,
"grad_norm": 0.289281964302063,
"learning_rate": 4.921522003262595e-06,
"loss": 1.0794835090637207,
"step": 1366
},
{
"epoch": 2.5054945054945055,
"grad_norm": 0.3163336515426636,
"learning_rate": 4.89414624969745e-06,
"loss": 0.783159077167511,
"step": 1368
},
{
"epoch": 2.509157509157509,
"grad_norm": 0.3136351406574249,
"learning_rate": 4.8669521879072295e-06,
"loss": 0.8880327939987183,
"step": 1370
},
{
"epoch": 2.5128205128205128,
"grad_norm": 0.09697853773832321,
"learning_rate": 4.839940243619968e-06,
"loss": 0.6036884188652039,
"step": 1372
},
{
"epoch": 2.5164835164835164,
"grad_norm": 0.1854383796453476,
"learning_rate": 4.813110839712629e-06,
"loss": 0.7379795908927917,
"step": 1374
},
{
"epoch": 2.52014652014652,
"grad_norm": 0.2810053527355194,
"learning_rate": 4.786464396204463e-06,
"loss": 0.9333634972572327,
"step": 1376
},
{
"epoch": 2.5238095238095237,
"grad_norm": 0.132881760597229,
"learning_rate": 4.760001330250443e-06,
"loss": 0.7495957612991333,
"step": 1378
},
{
"epoch": 2.5274725274725274,
"grad_norm": 0.13360945880413055,
"learning_rate": 4.733722056134734e-06,
"loss": 0.8512004613876343,
"step": 1380
},
{
"epoch": 2.531135531135531,
"grad_norm": 0.026275115087628365,
"learning_rate": 4.707626985264201e-06,
"loss": 0.8265200853347778,
"step": 1382
},
{
"epoch": 2.5347985347985347,
"grad_norm": 0.12132708728313446,
"learning_rate": 4.681716526161982e-06,
"loss": 0.7633360624313354,
"step": 1384
},
{
"epoch": 2.5384615384615383,
"grad_norm": 0.14558154344558716,
"learning_rate": 4.655991084461084e-06,
"loss": 1.0717121362686157,
"step": 1386
},
{
"epoch": 2.542124542124542,
"grad_norm": 0.23924115300178528,
"learning_rate": 4.630451062898016e-06,
"loss": 0.7378057241439819,
"step": 1388
},
{
"epoch": 2.5457875457875456,
"grad_norm": 0.0945201888680458,
"learning_rate": 4.6050968613065214e-06,
"loss": 0.7798994183540344,
"step": 1390
},
{
"epoch": 2.5494505494505493,
"grad_norm": 0.20364250242710114,
"learning_rate": 4.579928876611288e-06,
"loss": 0.9487650394439697,
"step": 1392
},
{
"epoch": 2.553113553113553,
"grad_norm": 0.5912707448005676,
"learning_rate": 4.554947502821745e-06,
"loss": 0.7024298310279846,
"step": 1394
},
{
"epoch": 2.5567765567765566,
"grad_norm": 0.18214735388755798,
"learning_rate": 4.53015313102589e-06,
"loss": 0.3131049871444702,
"step": 1396
},
{
"epoch": 2.5604395604395602,
"grad_norm": 0.16529445350170135,
"learning_rate": 4.505546149384179e-06,
"loss": 1.0923385620117188,
"step": 1398
},
{
"epoch": 2.564102564102564,
"grad_norm": 0.2890293598175049,
"learning_rate": 4.481126943123428e-06,
"loss": 1.0871299505233765,
"step": 1400
},
{
"epoch": 2.5677655677655675,
"grad_norm": 0.4707176089286804,
"learning_rate": 4.45689589453081e-06,
"loss": 0.83514803647995,
"step": 1402
},
{
"epoch": 2.571428571428571,
"grad_norm": 0.13972851634025574,
"learning_rate": 4.432853382947845e-06,
"loss": 1.0818039178848267,
"step": 1404
},
{
"epoch": 2.575091575091575,
"grad_norm": 0.43603020906448364,
"learning_rate": 4.408999784764466e-06,
"loss": 0.4603523313999176,
"step": 1406
},
{
"epoch": 2.578754578754579,
"grad_norm": 0.12546171247959137,
"learning_rate": 4.3853354734131475e-06,
"loss": 1.1147819757461548,
"step": 1408
},
{
"epoch": 2.5824175824175826,
"grad_norm": 0.16802863776683807,
"learning_rate": 4.361860819363036e-06,
"loss": 0.843324601650238,
"step": 1410
},
{
"epoch": 2.586080586080586,
"grad_norm": 0.2939806282520294,
"learning_rate": 4.338576190114154e-06,
"loss": 0.7986133694648743,
"step": 1412
},
{
"epoch": 2.58974358974359,
"grad_norm": 0.17033688724040985,
"learning_rate": 4.315481950191659e-06,
"loss": 0.9339474439620972,
"step": 1414
},
{
"epoch": 2.5934065934065935,
"grad_norm": 0.19038307666778564,
"learning_rate": 4.292578461140117e-06,
"loss": 1.0690734386444092,
"step": 1416
},
{
"epoch": 2.597069597069597,
"grad_norm": 3.438582181930542,
"learning_rate": 4.269866081517867e-06,
"loss": 0.7258854508399963,
"step": 1418
},
{
"epoch": 2.600732600732601,
"grad_norm": 0.20529590547084808,
"learning_rate": 4.2473451668913935e-06,
"loss": 1.0604960918426514,
"step": 1420
},
{
"epoch": 2.6043956043956045,
"grad_norm": 0.221652090549469,
"learning_rate": 4.225016069829747e-06,
"loss": 1.0046571493148804,
"step": 1422
},
{
"epoch": 2.608058608058608,
"grad_norm": 0.22944243252277374,
"learning_rate": 4.2028791398990525e-06,
"loss": 1.164031982421875,
"step": 1424
},
{
"epoch": 2.6117216117216118,
"grad_norm": 2.034560441970825,
"learning_rate": 4.180934723657021e-06,
"loss": 1.2159157991409302,
"step": 1426
},
{
"epoch": 2.6153846153846154,
"grad_norm": 0.38653919100761414,
"learning_rate": 4.159183164647525e-06,
"loss": 1.113503336906433,
"step": 1428
},
{
"epoch": 2.619047619047619,
"grad_norm": 0.127518430352211,
"learning_rate": 4.137624803395217e-06,
"loss": 1.056712031364441,
"step": 1430
},
{
"epoch": 2.6227106227106227,
"grad_norm": 0.6618165373802185,
"learning_rate": 4.116259977400214e-06,
"loss": 0.8523443937301636,
"step": 1432
},
{
"epoch": 2.6263736263736264,
"grad_norm": 0.751720666885376,
"learning_rate": 4.0950890211327875e-06,
"loss": 0.9039216637611389,
"step": 1434
},
{
"epoch": 2.63003663003663,
"grad_norm": 0.2348269522190094,
"learning_rate": 4.0741122660281595e-06,
"loss": 1.2673101425170898,
"step": 1436
},
{
"epoch": 2.6336996336996337,
"grad_norm": 0.1788758486509323,
"learning_rate": 4.053330040481287e-06,
"loss": 1.1147148609161377,
"step": 1438
},
{
"epoch": 2.6373626373626373,
"grad_norm": 0.1402553915977478,
"learning_rate": 4.032742669841728e-06,
"loss": 1.0825566053390503,
"step": 1440
},
{
"epoch": 2.641025641025641,
"grad_norm": 0.16031986474990845,
"learning_rate": 4.012350476408563e-06,
"loss": 1.0977410078048706,
"step": 1442
},
{
"epoch": 2.6446886446886446,
"grad_norm": 0.25420641899108887,
"learning_rate": 3.992153779425325e-06,
"loss": 1.0275911092758179,
"step": 1444
},
{
"epoch": 2.6483516483516483,
"grad_norm": 0.25857681035995483,
"learning_rate": 3.972152895075025e-06,
"loss": 0.7753064036369324,
"step": 1446
},
{
"epoch": 2.652014652014652,
"grad_norm": 0.17423996329307556,
"learning_rate": 3.952348136475182e-06,
"loss": 0.7941141128540039,
"step": 1448
},
{
"epoch": 2.6556776556776556,
"grad_norm": 0.22696810960769653,
"learning_rate": 3.932739813672935e-06,
"loss": 1.064581036567688,
"step": 1450
},
{
"epoch": 2.659340659340659,
"grad_norm": 0.22865885496139526,
"learning_rate": 3.913328233640182e-06,
"loss": 0.6472091674804688,
"step": 1452
},
{
"epoch": 2.663003663003663,
"grad_norm": 0.44280895590782166,
"learning_rate": 3.894113700268784e-06,
"loss": 0.6564828753471375,
"step": 1454
},
{
"epoch": 2.6666666666666665,
"grad_norm": 0.2046354115009308,
"learning_rate": 3.8750965143657906e-06,
"loss": 1.0501418113708496,
"step": 1456
},
{
"epoch": 2.67032967032967,
"grad_norm": 0.27620330452919006,
"learning_rate": 3.8562769736487434e-06,
"loss": 0.7150424718856812,
"step": 1458
},
{
"epoch": 2.6739926739926743,
"grad_norm": 0.37583354115486145,
"learning_rate": 3.8376553727410175e-06,
"loss": 0.7222166657447815,
"step": 1460
},
{
"epoch": 2.677655677655678,
"grad_norm": 0.1864767223596573,
"learning_rate": 3.819232003167198e-06,
"loss": 0.7472525835037231,
"step": 1462
},
{
"epoch": 2.6813186813186816,
"grad_norm": 0.3188279867172241,
"learning_rate": 3.801007153348521e-06,
"loss": 1.0297720432281494,
"step": 1464
},
{
"epoch": 2.684981684981685,
"grad_norm": 0.2448807954788208,
"learning_rate": 3.7829811085983675e-06,
"loss": 0.8464494943618774,
"step": 1466
},
{
"epoch": 2.688644688644689,
"grad_norm": 0.20494867861270905,
"learning_rate": 3.765154151117778e-06,
"loss": 1.0036866664886475,
"step": 1468
},
{
"epoch": 2.6923076923076925,
"grad_norm": 1.04972505569458,
"learning_rate": 3.747526559991056e-06,
"loss": 0.604132890701294,
"step": 1470
},
{
"epoch": 2.695970695970696,
"grad_norm": 0.4022798538208008,
"learning_rate": 3.7300986111813788e-06,
"loss": 0.755085289478302,
"step": 1472
},
{
"epoch": 2.6996336996337,
"grad_norm": 0.1984054297208786,
"learning_rate": 3.7128705775264885e-06,
"loss": 0.4051523506641388,
"step": 1474
},
{
"epoch": 2.7032967032967035,
"grad_norm": 0.12463561445474625,
"learning_rate": 3.695842728734425e-06,
"loss": 1.1173571348190308,
"step": 1476
},
{
"epoch": 2.706959706959707,
"grad_norm": 1.7665959596633911,
"learning_rate": 3.6790153313792904e-06,
"loss": 0.6412270069122314,
"step": 1478
},
{
"epoch": 2.7106227106227108,
"grad_norm": 0.19511784613132477,
"learning_rate": 3.662388648897086e-06,
"loss": 1.0653748512268066,
"step": 1480
},
{
"epoch": 2.7142857142857144,
"grad_norm": 0.11843400448560715,
"learning_rate": 3.6459629415815826e-06,
"loss": 1.07832670211792,
"step": 1482
},
{
"epoch": 2.717948717948718,
"grad_norm": 0.6528528928756714,
"learning_rate": 3.629738466580249e-06,
"loss": 0.9797776341438293,
"step": 1484
},
{
"epoch": 2.7216117216117217,
"grad_norm": 0.16327300667762756,
"learning_rate": 3.6137154778902252e-06,
"loss": 1.1180468797683716,
"step": 1486
},
{
"epoch": 2.7252747252747254,
"grad_norm": 0.12726306915283203,
"learning_rate": 3.5978942263543494e-06,
"loss": 1.113090991973877,
"step": 1488
},
{
"epoch": 2.728937728937729,
"grad_norm": 0.24696581065654755,
"learning_rate": 3.5822749596572212e-06,
"loss": 0.4961181581020355,
"step": 1490
},
{
"epoch": 2.7326007326007327,
"grad_norm": 0.10352645814418793,
"learning_rate": 3.5668579223213327e-06,
"loss": 1.101576328277588,
"step": 1492
},
{
"epoch": 2.7362637362637363,
"grad_norm": 0.22286243736743927,
"learning_rate": 3.5516433557032396e-06,
"loss": 0.6083530783653259,
"step": 1494
},
{
"epoch": 2.73992673992674,
"grad_norm": 0.2677902281284332,
"learning_rate": 3.5366314979897804e-06,
"loss": 1.1746376752853394,
"step": 1496
},
{
"epoch": 2.7435897435897436,
"grad_norm": 0.2446221262216568,
"learning_rate": 3.5218225841943505e-06,
"loss": 1.0550827980041504,
"step": 1498
},
{
"epoch": 2.7472527472527473,
"grad_norm": 0.266031950712204,
"learning_rate": 3.5072168461532164e-06,
"loss": 0.725365161895752,
"step": 1500
},
{
"epoch": 2.750915750915751,
"grad_norm": 0.12808158993721008,
"learning_rate": 3.492814512521892e-06,
"loss": 1.0045487880706787,
"step": 1502
},
{
"epoch": 2.7545787545787546,
"grad_norm": 0.15381655097007751,
"learning_rate": 3.4786158087715646e-06,
"loss": 1.087293028831482,
"step": 1504
},
{
"epoch": 2.758241758241758,
"grad_norm": 0.22132404148578644,
"learning_rate": 3.4646209571855467e-06,
"loss": 1.090523600578308,
"step": 1506
},
{
"epoch": 2.761904761904762,
"grad_norm": 0.08514848351478577,
"learning_rate": 3.450830176855816e-06,
"loss": 0.8890020847320557,
"step": 1508
},
{
"epoch": 2.7655677655677655,
"grad_norm": 0.18703573942184448,
"learning_rate": 3.437243683679577e-06,
"loss": 0.9329557418823242,
"step": 1510
},
{
"epoch": 2.769230769230769,
"grad_norm": 1.3092690706253052,
"learning_rate": 3.4238616903558755e-06,
"loss": 0.917612612247467,
"step": 1512
},
{
"epoch": 2.772893772893773,
"grad_norm": 0.25868988037109375,
"learning_rate": 3.4106844063822806e-06,
"loss": 1.070085048675537,
"step": 1514
},
{
"epoch": 2.7765567765567765,
"grad_norm": 0.17441320419311523,
"learning_rate": 3.397712038051595e-06,
"loss": 0.9860969185829163,
"step": 1516
},
{
"epoch": 2.78021978021978,
"grad_norm": 0.10650072246789932,
"learning_rate": 3.3849447884486317e-06,
"loss": 0.9501438736915588,
"step": 1518
},
{
"epoch": 2.7838827838827838,
"grad_norm": 0.4618084132671356,
"learning_rate": 3.372382857447029e-06,
"loss": 0.4886384904384613,
"step": 1520
},
{
"epoch": 2.7875457875457874,
"grad_norm": 0.15856708586215973,
"learning_rate": 3.360026441706132e-06,
"loss": 0.729036808013916,
"step": 1522
},
{
"epoch": 2.791208791208791,
"grad_norm": 0.16188757121562958,
"learning_rate": 3.3478757346678978e-06,
"loss": 0.8096993565559387,
"step": 1524
},
{
"epoch": 2.7948717948717947,
"grad_norm": 0.26798292994499207,
"learning_rate": 3.335930926553878e-06,
"loss": 1.1168920993804932,
"step": 1526
},
{
"epoch": 2.7985347985347984,
"grad_norm": 0.1299770325422287,
"learning_rate": 3.324192204362245e-06,
"loss": 1.0694761276245117,
"step": 1528
},
{
"epoch": 2.802197802197802,
"grad_norm": 0.11976618319749832,
"learning_rate": 3.3126597518648514e-06,
"loss": 1.0747312307357788,
"step": 1530
},
{
"epoch": 2.8058608058608057,
"grad_norm": 0.13405738770961761,
"learning_rate": 3.301333749604362e-06,
"loss": 0.9669806957244873,
"step": 1532
},
{
"epoch": 2.8095238095238093,
"grad_norm": 0.12106166779994965,
"learning_rate": 3.2902143748914256e-06,
"loss": 0.7313033938407898,
"step": 1534
},
{
"epoch": 2.813186813186813,
"grad_norm": 0.1960568130016327,
"learning_rate": 3.279301801801897e-06,
"loss": 1.2520811557769775,
"step": 1536
},
{
"epoch": 2.8168498168498166,
"grad_norm": 0.15917320549488068,
"learning_rate": 3.2685962011741165e-06,
"loss": 0.8736512064933777,
"step": 1538
},
{
"epoch": 2.8205128205128203,
"grad_norm": 0.2921707332134247,
"learning_rate": 3.2580977406062313e-06,
"loss": 0.760867714881897,
"step": 1540
},
{
"epoch": 2.824175824175824,
"grad_norm": 0.13915686309337616,
"learning_rate": 3.24780658445357e-06,
"loss": 0.8380671739578247,
"step": 1542
},
{
"epoch": 2.8278388278388276,
"grad_norm": 0.27057400345802307,
"learning_rate": 3.237722893826076e-06,
"loss": 1.0505430698394775,
"step": 1544
},
{
"epoch": 2.8315018315018317,
"grad_norm": 0.3602019250392914,
"learning_rate": 3.2278468265857805e-06,
"loss": 1.0977282524108887,
"step": 1546
},
{
"epoch": 2.8351648351648353,
"grad_norm": 0.1742822378873825,
"learning_rate": 3.218178537344335e-06,
"loss": 1.1638634204864502,
"step": 1548
},
{
"epoch": 2.838827838827839,
"grad_norm": 0.19395624101161957,
"learning_rate": 3.208718177460581e-06,
"loss": 0.7924173474311829,
"step": 1550
},
{
"epoch": 2.8424908424908426,
"grad_norm": 0.05924064666032791,
"learning_rate": 3.199465895038196e-06,
"loss": 0.5408016443252563,
"step": 1552
},
{
"epoch": 2.8461538461538463,
"grad_norm": 0.14001992344856262,
"learning_rate": 3.19042183492336e-06,
"loss": 1.102298378944397,
"step": 1554
},
{
"epoch": 2.84981684981685,
"grad_norm": 0.1830560564994812,
"learning_rate": 3.1815861387025012e-06,
"loss": 0.8673851490020752,
"step": 1556
},
{
"epoch": 2.8534798534798536,
"grad_norm": 0.13735051453113556,
"learning_rate": 3.1729589447000673e-06,
"loss": 1.0182592868804932,
"step": 1558
},
{
"epoch": 2.857142857142857,
"grad_norm": 0.34191715717315674,
"learning_rate": 3.164540387976365e-06,
"loss": 0.5478770732879639,
"step": 1560
},
{
"epoch": 2.860805860805861,
"grad_norm": 1.376367449760437,
"learning_rate": 3.1563306003254506e-06,
"loss": 0.6626429557800293,
"step": 1562
},
{
"epoch": 2.8644688644688645,
"grad_norm": 0.15791043639183044,
"learning_rate": 3.1483297102730584e-06,
"loss": 0.8440994620323181,
"step": 1564
},
{
"epoch": 2.868131868131868,
"grad_norm": 0.8644531965255737,
"learning_rate": 3.1405378430745944e-06,
"loss": 0.6315191388130188,
"step": 1566
},
{
"epoch": 2.871794871794872,
"grad_norm": 0.3515836298465729,
"learning_rate": 3.1329551207131714e-06,
"loss": 0.6892625093460083,
"step": 1568
},
{
"epoch": 2.8754578754578755,
"grad_norm": 0.250385046005249,
"learning_rate": 3.1255816618977038e-06,
"loss": 0.6573564410209656,
"step": 1570
},
{
"epoch": 2.879120879120879,
"grad_norm": 2.9979300498962402,
"learning_rate": 3.1184175820610454e-06,
"loss": 0.6695932745933533,
"step": 1572
},
{
"epoch": 2.8827838827838828,
"grad_norm": 0.16444166004657745,
"learning_rate": 3.111462993358183e-06,
"loss": 0.97144615650177,
"step": 1574
},
{
"epoch": 2.8864468864468864,
"grad_norm": 0.4399212896823883,
"learning_rate": 3.104718004664481e-06,
"loss": 0.8942310214042664,
"step": 1576
},
{
"epoch": 2.89010989010989,
"grad_norm": 0.05127674713730812,
"learning_rate": 3.09818272157398e-06,
"loss": 0.8532702922821045,
"step": 1578
},
{
"epoch": 2.8937728937728937,
"grad_norm": 0.36568161845207214,
"learning_rate": 3.0918572463977376e-06,
"loss": 1.1513917446136475,
"step": 1580
},
{
"epoch": 2.8974358974358974,
"grad_norm": 2.713129997253418,
"learning_rate": 3.085741678162231e-06,
"loss": 0.8030990362167358,
"step": 1582
},
{
"epoch": 2.901098901098901,
"grad_norm": 0.10846489667892456,
"learning_rate": 3.079836112607805e-06,
"loss": 0.7485967874526978,
"step": 1584
},
{
"epoch": 2.9047619047619047,
"grad_norm": 0.1457647830247879,
"learning_rate": 3.074140642187176e-06,
"loss": 0.7479197382926941,
"step": 1586
},
{
"epoch": 2.9084249084249083,
"grad_norm": 0.17844462394714355,
"learning_rate": 3.068655356063979e-06,
"loss": 1.1137986183166504,
"step": 1588
},
{
"epoch": 2.912087912087912,
"grad_norm": 0.15415538847446442,
"learning_rate": 3.063380340111379e-06,
"loss": 1.1451196670532227,
"step": 1590
},
{
"epoch": 2.9157509157509156,
"grad_norm": 0.327592134475708,
"learning_rate": 3.0583156769107198e-06,
"loss": 0.4721798300743103,
"step": 1592
},
{
"epoch": 2.9194139194139193,
"grad_norm": 0.13491347432136536,
"learning_rate": 3.0534614457502347e-06,
"loss": 1.1134108304977417,
"step": 1594
},
{
"epoch": 2.9230769230769234,
"grad_norm": 0.12644971907138824,
"learning_rate": 3.0488177226238068e-06,
"loss": 0.7207664251327515,
"step": 1596
},
{
"epoch": 2.926739926739927,
"grad_norm": 1.161653995513916,
"learning_rate": 3.0443845802297755e-06,
"loss": 1.1275835037231445,
"step": 1598
},
{
"epoch": 2.9304029304029307,
"grad_norm": 0.11171621829271317,
"learning_rate": 3.0401620879697976e-06,
"loss": 0.7411856055259705,
"step": 1600
},
{
"epoch": 2.9340659340659343,
"grad_norm": 0.168545201420784,
"learning_rate": 3.0361503119477703e-06,
"loss": 1.0992079973220825,
"step": 1602
},
{
"epoch": 2.937728937728938,
"grad_norm": 0.07794881612062454,
"learning_rate": 3.032349314968781e-06,
"loss": 0.7998334169387817,
"step": 1604
},
{
"epoch": 2.9413919413919416,
"grad_norm": 0.17410752177238464,
"learning_rate": 3.028759156538139e-06,
"loss": 1.085739016532898,
"step": 1606
},
{
"epoch": 2.9450549450549453,
"grad_norm": 0.3942596912384033,
"learning_rate": 3.025379892860435e-06,
"loss": 0.6764653325080872,
"step": 1608
},
{
"epoch": 2.948717948717949,
"grad_norm": 0.2702687680721283,
"learning_rate": 3.022211576838662e-06,
"loss": 0.9535015821456909,
"step": 1610
},
{
"epoch": 2.9523809523809526,
"grad_norm": 0.38882049918174744,
"learning_rate": 3.0192542580733894e-06,
"loss": 0.8127344846725464,
"step": 1612
},
{
"epoch": 2.956043956043956,
"grad_norm": 0.29593485593795776,
"learning_rate": 3.016507982861989e-06,
"loss": 0.6491580605506897,
"step": 1614
},
{
"epoch": 2.95970695970696,
"grad_norm": 0.11674999445676804,
"learning_rate": 3.013972794197901e-06,
"loss": 0.7913042902946472,
"step": 1616
},
{
"epoch": 2.9633699633699635,
"grad_norm": 0.41557908058166504,
"learning_rate": 3.0116487317699732e-06,
"loss": 1.3324899673461914,
"step": 1618
},
{
"epoch": 2.967032967032967,
"grad_norm": 0.742667019367218,
"learning_rate": 3.009535831961828e-06,
"loss": 1.202343225479126,
"step": 1620
},
{
"epoch": 2.970695970695971,
"grad_norm": 0.27745646238327026,
"learning_rate": 3.007634127851303e-06,
"loss": 0.8820834159851074,
"step": 1622
},
{
"epoch": 2.9743589743589745,
"grad_norm": 0.4623531401157379,
"learning_rate": 3.005943649209923e-06,
"loss": 0.7531993389129639,
"step": 1624
},
{
"epoch": 2.978021978021978,
"grad_norm": 0.20727969706058502,
"learning_rate": 3.0044644225024444e-06,
"loss": 0.8301671147346497,
"step": 1626
},
{
"epoch": 2.9816849816849818,
"grad_norm": 0.3271661698818207,
"learning_rate": 3.003196470886432e-06,
"loss": 0.9212415814399719,
"step": 1628
},
{
"epoch": 2.9853479853479854,
"grad_norm": 0.2580219805240631,
"learning_rate": 3.002139814211902e-06,
"loss": 1.356811761856079,
"step": 1630
},
{
"epoch": 2.989010989010989,
"grad_norm": 0.34232744574546814,
"learning_rate": 3.0012944690210082e-06,
"loss": 0.9668951630592346,
"step": 1632
},
{
"epoch": 2.9926739926739927,
"grad_norm": 0.3988720774650574,
"learning_rate": 3.000660448547786e-06,
"loss": 0.7317683696746826,
"step": 1634
},
{
"epoch": 2.9963369963369964,
"grad_norm": 0.1947978436946869,
"learning_rate": 3.0002377627179435e-06,
"loss": 1.229650616645813,
"step": 1636
},
{
"epoch": 3.0,
"grad_norm": 0.17901748418807983,
"learning_rate": 3.0000264181487013e-06,
"loss": 1.0439913272857666,
"step": 1638
},
{
"epoch": 3.0,
"step": 1638,
"total_flos": 8.4482141520606e+18,
"train_loss": 1.0011659153579064,
"train_runtime": 59591.0314,
"train_samples_per_second": 0.66,
"train_steps_per_second": 0.027
}
],
"logging_steps": 2,
"max_steps": 1638,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 99999,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 8.4482141520606e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}