27b-2-lora / trainer_state.json
furproxy's picture
Upload folder using huggingface_hub
5b97b6d verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 1638,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.003663003663003663,
"grad_norm": 2.7655394077301025,
"learning_rate": 6.000000000000001e-07,
"loss": 2.6851325035095215,
"step": 2
},
{
"epoch": 0.007326007326007326,
"grad_norm": 0.29943645000457764,
"learning_rate": 1.8e-06,
"loss": 1.6655781269073486,
"step": 4
},
{
"epoch": 0.01098901098901099,
"grad_norm": 0.29468199610710144,
"learning_rate": 3e-06,
"loss": 1.8773365020751953,
"step": 6
},
{
"epoch": 0.014652014652014652,
"grad_norm": 0.1775636374950409,
"learning_rate": 4.2000000000000004e-06,
"loss": 2.041961669921875,
"step": 8
},
{
"epoch": 0.018315018315018316,
"grad_norm": 0.2983125150203705,
"learning_rate": 5.4e-06,
"loss": 2.1573827266693115,
"step": 10
},
{
"epoch": 0.02197802197802198,
"grad_norm": 0.854044497013092,
"learning_rate": 6.6e-06,
"loss": 1.9245799779891968,
"step": 12
},
{
"epoch": 0.02564102564102564,
"grad_norm": 0.15724875032901764,
"learning_rate": 7.8e-06,
"loss": 1.7219539880752563,
"step": 14
},
{
"epoch": 0.029304029304029304,
"grad_norm": 0.26879143714904785,
"learning_rate": 9e-06,
"loss": 1.6919527053833008,
"step": 16
},
{
"epoch": 0.03296703296703297,
"grad_norm": 0.4510348439216614,
"learning_rate": 1.02e-05,
"loss": 1.6220406293869019,
"step": 18
},
{
"epoch": 0.03663003663003663,
"grad_norm": 0.13924920558929443,
"learning_rate": 1.1400000000000001e-05,
"loss": 1.6960639953613281,
"step": 20
},
{
"epoch": 0.040293040293040296,
"grad_norm": 0.8865143656730652,
"learning_rate": 1.26e-05,
"loss": 0.9666078090667725,
"step": 22
},
{
"epoch": 0.04395604395604396,
"grad_norm": 0.27947765588760376,
"learning_rate": 1.3800000000000002e-05,
"loss": 1.128947138786316,
"step": 24
},
{
"epoch": 0.047619047619047616,
"grad_norm": 0.18685200810432434,
"learning_rate": 1.5e-05,
"loss": 1.3666882514953613,
"step": 26
},
{
"epoch": 0.05128205128205128,
"grad_norm": 0.47109824419021606,
"learning_rate": 1.62e-05,
"loss": 1.2677953243255615,
"step": 28
},
{
"epoch": 0.054945054945054944,
"grad_norm": 1.4102905988693237,
"learning_rate": 1.74e-05,
"loss": 1.138432264328003,
"step": 30
},
{
"epoch": 0.05860805860805861,
"grad_norm": 0.37619346380233765,
"learning_rate": 1.86e-05,
"loss": 1.0671923160552979,
"step": 32
},
{
"epoch": 0.06227106227106227,
"grad_norm": 0.08159811794757843,
"learning_rate": 1.98e-05,
"loss": 1.2145860195159912,
"step": 34
},
{
"epoch": 0.06593406593406594,
"grad_norm": 0.09897736459970474,
"learning_rate": 2.1e-05,
"loss": 1.579596996307373,
"step": 36
},
{
"epoch": 0.0695970695970696,
"grad_norm": 0.30364152789115906,
"learning_rate": 2.22e-05,
"loss": 1.349471926689148,
"step": 38
},
{
"epoch": 0.07326007326007326,
"grad_norm": 0.16812732815742493,
"learning_rate": 2.3400000000000003e-05,
"loss": 1.3879759311676025,
"step": 40
},
{
"epoch": 0.07692307692307693,
"grad_norm": 0.17663103342056274,
"learning_rate": 2.4599999999999998e-05,
"loss": 1.0692293643951416,
"step": 42
},
{
"epoch": 0.08058608058608059,
"grad_norm": 0.1400979459285736,
"learning_rate": 2.58e-05,
"loss": 1.48546302318573,
"step": 44
},
{
"epoch": 0.08424908424908426,
"grad_norm": 0.1608627736568451,
"learning_rate": 2.7000000000000002e-05,
"loss": 1.577519416809082,
"step": 46
},
{
"epoch": 0.08791208791208792,
"grad_norm": 0.42714622616767883,
"learning_rate": 2.8199999999999998e-05,
"loss": 1.0018680095672607,
"step": 48
},
{
"epoch": 0.09157509157509157,
"grad_norm": 0.1683175265789032,
"learning_rate": 2.94e-05,
"loss": 1.4638025760650635,
"step": 50
},
{
"epoch": 0.09523809523809523,
"grad_norm": 0.09218421578407288,
"learning_rate": 2.99999735818513e-05,
"loss": 0.722154974937439,
"step": 52
},
{
"epoch": 0.0989010989010989,
"grad_norm": 0.2785327732563019,
"learning_rate": 2.9999762237282056e-05,
"loss": 0.9314170479774475,
"step": 54
},
{
"epoch": 0.10256410256410256,
"grad_norm": 0.23271751403808594,
"learning_rate": 2.9999339551452214e-05,
"loss": 1.3097407817840576,
"step": 56
},
{
"epoch": 0.10622710622710622,
"grad_norm": 0.1494479924440384,
"learning_rate": 2.9998705530978993e-05,
"loss": 1.152754783630371,
"step": 58
},
{
"epoch": 0.10989010989010989,
"grad_norm": 0.20334792137145996,
"learning_rate": 2.99978601857881e-05,
"loss": 1.3560007810592651,
"step": 60
},
{
"epoch": 0.11355311355311355,
"grad_norm": 0.3645194470882416,
"learning_rate": 2.999680352911357e-05,
"loss": 1.3752349615097046,
"step": 62
},
{
"epoch": 0.11721611721611722,
"grad_norm": 0.3081910312175751,
"learning_rate": 2.9995535577497556e-05,
"loss": 1.3438981771469116,
"step": 64
},
{
"epoch": 0.12087912087912088,
"grad_norm": 0.13718588650226593,
"learning_rate": 2.999405635079008e-05,
"loss": 1.3078930377960205,
"step": 66
},
{
"epoch": 0.12454212454212454,
"grad_norm": 0.17203469574451447,
"learning_rate": 2.99923658721487e-05,
"loss": 1.2862566709518433,
"step": 68
},
{
"epoch": 0.1282051282051282,
"grad_norm": 0.48204678297042847,
"learning_rate": 2.9990464168038176e-05,
"loss": 1.1795488595962524,
"step": 70
},
{
"epoch": 0.13186813186813187,
"grad_norm": 0.21979455649852753,
"learning_rate": 2.998835126823003e-05,
"loss": 1.395269751548767,
"step": 72
},
{
"epoch": 0.13553113553113552,
"grad_norm": 0.2504670023918152,
"learning_rate": 2.99860272058021e-05,
"loss": 1.584033727645874,
"step": 74
},
{
"epoch": 0.1391941391941392,
"grad_norm": 0.0789148136973381,
"learning_rate": 2.998349201713801e-05,
"loss": 1.0636365413665771,
"step": 76
},
{
"epoch": 0.14285714285714285,
"grad_norm": 0.33568286895751953,
"learning_rate": 2.998074574192661e-05,
"loss": 1.4008864164352417,
"step": 78
},
{
"epoch": 0.14652014652014653,
"grad_norm": 0.23792332410812378,
"learning_rate": 2.9977788423161336e-05,
"loss": 1.56437349319458,
"step": 80
},
{
"epoch": 0.15018315018315018,
"grad_norm": 0.21572615206241608,
"learning_rate": 2.997462010713957e-05,
"loss": 1.1760307550430298,
"step": 82
},
{
"epoch": 0.15384615384615385,
"grad_norm": 0.37639090418815613,
"learning_rate": 2.997124084346186e-05,
"loss": 1.3391790390014648,
"step": 84
},
{
"epoch": 0.1575091575091575,
"grad_norm": 0.39284688234329224,
"learning_rate": 2.9967650685031216e-05,
"loss": 1.472882866859436,
"step": 86
},
{
"epoch": 0.16117216117216118,
"grad_norm": 0.15103961527347565,
"learning_rate": 2.9963849688052232e-05,
"loss": 0.7084518074989319,
"step": 88
},
{
"epoch": 0.16483516483516483,
"grad_norm": 0.08433467894792557,
"learning_rate": 2.9959837912030202e-05,
"loss": 0.848395586013794,
"step": 90
},
{
"epoch": 0.1684981684981685,
"grad_norm": 0.18389679491519928,
"learning_rate": 2.9955615419770222e-05,
"loss": 1.2933943271636963,
"step": 92
},
{
"epoch": 0.17216117216117216,
"grad_norm": 0.4208066761493683,
"learning_rate": 2.9951182277376195e-05,
"loss": 1.3197017908096313,
"step": 94
},
{
"epoch": 0.17582417582417584,
"grad_norm": 0.23863960802555084,
"learning_rate": 2.9946538554249767e-05,
"loss": 1.2148442268371582,
"step": 96
},
{
"epoch": 0.1794871794871795,
"grad_norm": 0.11656401306390762,
"learning_rate": 2.994168432308928e-05,
"loss": 1.3039908409118652,
"step": 98
},
{
"epoch": 0.18315018315018314,
"grad_norm": 0.6390186548233032,
"learning_rate": 2.9936619659888623e-05,
"loss": 1.159237027168274,
"step": 100
},
{
"epoch": 0.18681318681318682,
"grad_norm": 0.18607312440872192,
"learning_rate": 2.993134464393602e-05,
"loss": 1.2290619611740112,
"step": 102
},
{
"epoch": 0.19047619047619047,
"grad_norm": 0.16850972175598145,
"learning_rate": 2.9925859357812825e-05,
"loss": 0.8924808502197266,
"step": 104
},
{
"epoch": 0.19413919413919414,
"grad_norm": 1.5484095811843872,
"learning_rate": 2.9920163887392198e-05,
"loss": 1.0766998529434204,
"step": 106
},
{
"epoch": 0.1978021978021978,
"grad_norm": 0.1801968663930893,
"learning_rate": 2.9914258321837772e-05,
"loss": 1.2965888977050781,
"step": 108
},
{
"epoch": 0.20146520146520147,
"grad_norm": 0.22028259932994843,
"learning_rate": 2.9908142753602263e-05,
"loss": 1.1484687328338623,
"step": 110
},
{
"epoch": 0.20512820512820512,
"grad_norm": 0.15182016789913177,
"learning_rate": 2.990181727842602e-05,
"loss": 1.2071518898010254,
"step": 112
},
{
"epoch": 0.2087912087912088,
"grad_norm": 0.3974195122718811,
"learning_rate": 2.9895281995335517e-05,
"loss": 0.6265676617622375,
"step": 114
},
{
"epoch": 0.21245421245421245,
"grad_norm": 0.15311695635318756,
"learning_rate": 2.9888537006641817e-05,
"loss": 1.2827767133712769,
"step": 116
},
{
"epoch": 0.21611721611721613,
"grad_norm": 0.403340607881546,
"learning_rate": 2.9881582417938958e-05,
"loss": 1.2744747400283813,
"step": 118
},
{
"epoch": 0.21978021978021978,
"grad_norm": 0.47385725378990173,
"learning_rate": 2.9874418338102297e-05,
"loss": 1.3409874439239502,
"step": 120
},
{
"epoch": 0.22344322344322345,
"grad_norm": 0.21080061793327332,
"learning_rate": 2.9867044879286828e-05,
"loss": 1.2713534832000732,
"step": 122
},
{
"epoch": 0.2271062271062271,
"grad_norm": 1.2038962841033936,
"learning_rate": 2.985946215692541e-05,
"loss": 1.0160202980041504,
"step": 124
},
{
"epoch": 0.23076923076923078,
"grad_norm": 0.43111974000930786,
"learning_rate": 2.9851670289726944e-05,
"loss": 0.8933266401290894,
"step": 126
},
{
"epoch": 0.23443223443223443,
"grad_norm": 0.22090385854244232,
"learning_rate": 2.9843669399674548e-05,
"loss": 1.180678129196167,
"step": 128
},
{
"epoch": 0.23809523809523808,
"grad_norm": 0.19944743812084198,
"learning_rate": 2.9835459612023636e-05,
"loss": 1.2787879705429077,
"step": 130
},
{
"epoch": 0.24175824175824176,
"grad_norm": 0.18606817722320557,
"learning_rate": 2.9827041055299935e-05,
"loss": 1.376538872718811,
"step": 132
},
{
"epoch": 0.2454212454212454,
"grad_norm": 0.21287062764167786,
"learning_rate": 2.98184138612975e-05,
"loss": 1.2710717916488647,
"step": 134
},
{
"epoch": 0.2490842490842491,
"grad_norm": 0.20542077720165253,
"learning_rate": 2.9809578165076638e-05,
"loss": 1.2541818618774414,
"step": 136
},
{
"epoch": 0.25274725274725274,
"grad_norm": 0.13655985891819,
"learning_rate": 2.9800534104961805e-05,
"loss": 1.0699478387832642,
"step": 138
},
{
"epoch": 0.2564102564102564,
"grad_norm": 0.13522057235240936,
"learning_rate": 2.979128182253942e-05,
"loss": 1.255359411239624,
"step": 140
},
{
"epoch": 0.2600732600732601,
"grad_norm": 0.21614201366901398,
"learning_rate": 2.9781821462655665e-05,
"loss": 1.2742397785186768,
"step": 142
},
{
"epoch": 0.26373626373626374,
"grad_norm": 0.28022998571395874,
"learning_rate": 2.977215317341422e-05,
"loss": 1.23661208152771,
"step": 144
},
{
"epoch": 0.2673992673992674,
"grad_norm": 0.5679643154144287,
"learning_rate": 2.9762277106173925e-05,
"loss": 1.3913161754608154,
"step": 146
},
{
"epoch": 0.27106227106227104,
"grad_norm": 0.14959579706192017,
"learning_rate": 2.975219341554643e-05,
"loss": 1.2973092794418335,
"step": 148
},
{
"epoch": 0.27472527472527475,
"grad_norm": 0.07510862499475479,
"learning_rate": 2.9741902259393773e-05,
"loss": 0.9124496579170227,
"step": 150
},
{
"epoch": 0.2783882783882784,
"grad_norm": 0.10233580321073532,
"learning_rate": 2.9731403798825883e-05,
"loss": 0.6311591863632202,
"step": 152
},
{
"epoch": 0.28205128205128205,
"grad_norm": 0.16836495697498322,
"learning_rate": 2.9720698198198106e-05,
"loss": 1.091691017150879,
"step": 154
},
{
"epoch": 0.2857142857142857,
"grad_norm": 0.09479176998138428,
"learning_rate": 2.9709785625108577e-05,
"loss": 1.3299628496170044,
"step": 156
},
{
"epoch": 0.2893772893772894,
"grad_norm": 0.1901499629020691,
"learning_rate": 2.969866625039564e-05,
"loss": 1.2565171718597412,
"step": 158
},
{
"epoch": 0.29304029304029305,
"grad_norm": 0.19252723455429077,
"learning_rate": 2.968734024813515e-05,
"loss": 0.9349527359008789,
"step": 160
},
{
"epoch": 0.2967032967032967,
"grad_norm": 0.23732879757881165,
"learning_rate": 2.9675807795637753e-05,
"loss": 1.5130983591079712,
"step": 162
},
{
"epoch": 0.30036630036630035,
"grad_norm": 0.15965285897254944,
"learning_rate": 2.9664069073446123e-05,
"loss": 1.047977328300476,
"step": 164
},
{
"epoch": 0.304029304029304,
"grad_norm": 0.575114905834198,
"learning_rate": 2.9652124265332104e-05,
"loss": 1.3748732805252075,
"step": 166
},
{
"epoch": 0.3076923076923077,
"grad_norm": 0.18872004747390747,
"learning_rate": 2.9639973558293873e-05,
"loss": 1.1198259592056274,
"step": 168
},
{
"epoch": 0.31135531135531136,
"grad_norm": 0.2314036637544632,
"learning_rate": 2.9627617142552972e-05,
"loss": 1.2412378787994385,
"step": 170
},
{
"epoch": 0.315018315018315,
"grad_norm": 0.3105119466781616,
"learning_rate": 2.9615055211551372e-05,
"loss": 0.996675431728363,
"step": 172
},
{
"epoch": 0.31868131868131866,
"grad_norm": 0.3508174419403076,
"learning_rate": 2.9602287961948407e-05,
"loss": 0.8448920249938965,
"step": 174
},
{
"epoch": 0.32234432234432236,
"grad_norm": 0.10528334975242615,
"learning_rate": 2.958931559361772e-05,
"loss": 1.3807752132415771,
"step": 176
},
{
"epoch": 0.326007326007326,
"grad_norm": 0.19879353046417236,
"learning_rate": 2.9576138309644126e-05,
"loss": 0.9085540175437927,
"step": 178
},
{
"epoch": 0.32967032967032966,
"grad_norm": 0.15222589671611786,
"learning_rate": 2.9562756316320423e-05,
"loss": 1.4698972702026367,
"step": 180
},
{
"epoch": 0.3333333333333333,
"grad_norm": 0.24432772397994995,
"learning_rate": 2.9549169823144186e-05,
"loss": 0.9636735916137695,
"step": 182
},
{
"epoch": 0.336996336996337,
"grad_norm": 0.16814936697483063,
"learning_rate": 2.9535379042814454e-05,
"loss": 1.2255438566207886,
"step": 184
},
{
"epoch": 0.34065934065934067,
"grad_norm": 0.1778419017791748,
"learning_rate": 2.9521384191228436e-05,
"loss": 1.2754427194595337,
"step": 186
},
{
"epoch": 0.3443223443223443,
"grad_norm": 0.1005903109908104,
"learning_rate": 2.950718548747811e-05,
"loss": 1.159492015838623,
"step": 188
},
{
"epoch": 0.34798534798534797,
"grad_norm": 0.10859628021717072,
"learning_rate": 2.9492783153846787e-05,
"loss": 1.2530215978622437,
"step": 190
},
{
"epoch": 0.3516483516483517,
"grad_norm": 0.396244078874588,
"learning_rate": 2.9478177415805647e-05,
"loss": 1.0890475511550903,
"step": 192
},
{
"epoch": 0.3553113553113553,
"grad_norm": 0.24021735787391663,
"learning_rate": 2.946336850201022e-05,
"loss": 1.2338961362838745,
"step": 194
},
{
"epoch": 0.358974358974359,
"grad_norm": 0.20047220587730408,
"learning_rate": 2.9448356644296764e-05,
"loss": 1.2417196035385132,
"step": 196
},
{
"epoch": 0.3626373626373626,
"grad_norm": 0.21200719475746155,
"learning_rate": 2.943314207767867e-05,
"loss": 0.8491206169128418,
"step": 198
},
{
"epoch": 0.3663003663003663,
"grad_norm": 0.18689094483852386,
"learning_rate": 2.9417725040342783e-05,
"loss": 0.9986346364021301,
"step": 200
},
{
"epoch": 0.36996336996337,
"grad_norm": 0.27257877588272095,
"learning_rate": 2.9402105773645648e-05,
"loss": 1.4155678749084473,
"step": 202
},
{
"epoch": 0.37362637362637363,
"grad_norm": 0.8289733529090881,
"learning_rate": 2.9386284522109774e-05,
"loss": 1.199107050895691,
"step": 204
},
{
"epoch": 0.3772893772893773,
"grad_norm": 0.7309209108352661,
"learning_rate": 2.937026153341975e-05,
"loss": 0.7536513805389404,
"step": 206
},
{
"epoch": 0.38095238095238093,
"grad_norm": 0.4993458390235901,
"learning_rate": 2.9354037058418424e-05,
"loss": 1.122713565826416,
"step": 208
},
{
"epoch": 0.38461538461538464,
"grad_norm": 0.3532741367816925,
"learning_rate": 2.9337611351102914e-05,
"loss": 1.396581768989563,
"step": 210
},
{
"epoch": 0.3882783882783883,
"grad_norm": 0.17742833495140076,
"learning_rate": 2.932098466862071e-05,
"loss": 1.057890772819519,
"step": 212
},
{
"epoch": 0.39194139194139194,
"grad_norm": 0.2426135092973709,
"learning_rate": 2.9304157271265576e-05,
"loss": 1.121835708618164,
"step": 214
},
{
"epoch": 0.3956043956043956,
"grad_norm": 0.29707637429237366,
"learning_rate": 2.9287129422473514e-05,
"loss": 1.210677981376648,
"step": 216
},
{
"epoch": 0.3992673992673993,
"grad_norm": 0.13365934789180756,
"learning_rate": 2.9269901388818625e-05,
"loss": 1.242161750793457,
"step": 218
},
{
"epoch": 0.40293040293040294,
"grad_norm": 0.08649564534425735,
"learning_rate": 2.9252473440008948e-05,
"loss": 0.6110650897026062,
"step": 220
},
{
"epoch": 0.4065934065934066,
"grad_norm": 0.2575564384460449,
"learning_rate": 2.923484584888222e-05,
"loss": 1.3188272714614868,
"step": 222
},
{
"epoch": 0.41025641025641024,
"grad_norm": 0.25342005491256714,
"learning_rate": 2.9217018891401635e-05,
"loss": 1.067653775215149,
"step": 224
},
{
"epoch": 0.4139194139194139,
"grad_norm": 0.9904859066009521,
"learning_rate": 2.9198992846651482e-05,
"loss": 1.2169532775878906,
"step": 226
},
{
"epoch": 0.4175824175824176,
"grad_norm": 0.1713169813156128,
"learning_rate": 2.9180767996832804e-05,
"loss": 1.2927213907241821,
"step": 228
},
{
"epoch": 0.42124542124542125,
"grad_norm": 0.3995862305164337,
"learning_rate": 2.9162344627258984e-05,
"loss": 1.2316654920578003,
"step": 230
},
{
"epoch": 0.4249084249084249,
"grad_norm": 3.5245652198791504,
"learning_rate": 2.9143723026351256e-05,
"loss": 0.9831936955451965,
"step": 232
},
{
"epoch": 0.42857142857142855,
"grad_norm": 0.29103225469589233,
"learning_rate": 2.9124903485634212e-05,
"loss": 1.475206971168518,
"step": 234
},
{
"epoch": 0.43223443223443225,
"grad_norm": 0.4642655551433563,
"learning_rate": 2.9105886299731215e-05,
"loss": 1.0043811798095703,
"step": 236
},
{
"epoch": 0.4358974358974359,
"grad_norm": 0.1674029529094696,
"learning_rate": 2.9086671766359816e-05,
"loss": 1.0577434301376343,
"step": 238
},
{
"epoch": 0.43956043956043955,
"grad_norm": 0.937583863735199,
"learning_rate": 2.9067260186327068e-05,
"loss": 0.9001191854476929,
"step": 240
},
{
"epoch": 0.4432234432234432,
"grad_norm": 0.2830636203289032,
"learning_rate": 2.904765186352482e-05,
"loss": 0.8691871762275696,
"step": 242
},
{
"epoch": 0.4468864468864469,
"grad_norm": 0.13799089193344116,
"learning_rate": 2.902784710492498e-05,
"loss": 0.5735795497894287,
"step": 244
},
{
"epoch": 0.45054945054945056,
"grad_norm": 0.14723800122737885,
"learning_rate": 2.9007846220574677e-05,
"loss": 1.2381662130355835,
"step": 246
},
{
"epoch": 0.4542124542124542,
"grad_norm": 0.2460140883922577,
"learning_rate": 2.8987649523591442e-05,
"loss": 0.8658979535102844,
"step": 248
},
{
"epoch": 0.45787545787545786,
"grad_norm": 0.4360606372356415,
"learning_rate": 2.8967257330158273e-05,
"loss": 1.2675610780715942,
"step": 250
},
{
"epoch": 0.46153846153846156,
"grad_norm": 0.14133431017398834,
"learning_rate": 2.8946669959518716e-05,
"loss": 1.1595462560653687,
"step": 252
},
{
"epoch": 0.4652014652014652,
"grad_norm": 0.1358579695224762,
"learning_rate": 2.892588773397184e-05,
"loss": 0.9582386016845703,
"step": 254
},
{
"epoch": 0.46886446886446886,
"grad_norm": 0.1341731995344162,
"learning_rate": 2.8904910978867214e-05,
"loss": 1.2427244186401367,
"step": 256
},
{
"epoch": 0.4725274725274725,
"grad_norm": 0.20216862857341766,
"learning_rate": 2.888374002259979e-05,
"loss": 1.2652021646499634,
"step": 258
},
{
"epoch": 0.47619047619047616,
"grad_norm": 0.13634032011032104,
"learning_rate": 2.8862375196604782e-05,
"loss": 0.7191201448440552,
"step": 260
},
{
"epoch": 0.47985347985347987,
"grad_norm": 0.33932217955589294,
"learning_rate": 2.8840816835352475e-05,
"loss": 1.1376659870147705,
"step": 262
},
{
"epoch": 0.4835164835164835,
"grad_norm": 0.14357520639896393,
"learning_rate": 2.881906527634298e-05,
"loss": 1.2125585079193115,
"step": 264
},
{
"epoch": 0.48717948717948717,
"grad_norm": 0.22996890544891357,
"learning_rate": 2.8797120860100952e-05,
"loss": 1.4826048612594604,
"step": 266
},
{
"epoch": 0.4908424908424908,
"grad_norm": 0.3534063994884491,
"learning_rate": 2.8774983930170256e-05,
"loss": 0.7847967147827148,
"step": 268
},
{
"epoch": 0.4945054945054945,
"grad_norm": 0.37955111265182495,
"learning_rate": 2.875265483310861e-05,
"loss": 1.18699312210083,
"step": 270
},
{
"epoch": 0.4981684981684982,
"grad_norm": 0.15797623991966248,
"learning_rate": 2.873013391848213e-05,
"loss": 0.8804799914360046,
"step": 272
},
{
"epoch": 0.5018315018315018,
"grad_norm": 0.3480098843574524,
"learning_rate": 2.8707421538859884e-05,
"loss": 1.2322217226028442,
"step": 274
},
{
"epoch": 0.5054945054945055,
"grad_norm": 0.5580818057060242,
"learning_rate": 2.8684518049808345e-05,
"loss": 0.7790613174438477,
"step": 276
},
{
"epoch": 0.5091575091575091,
"grad_norm": 0.14733240008354187,
"learning_rate": 2.8661423809885846e-05,
"loss": 0.8785544633865356,
"step": 278
},
{
"epoch": 0.5128205128205128,
"grad_norm": 0.22084079682826996,
"learning_rate": 2.8638139180636962e-05,
"loss": 1.236435055732727,
"step": 280
},
{
"epoch": 0.5164835164835165,
"grad_norm": 0.2052122801542282,
"learning_rate": 2.861466452658685e-05,
"loss": 1.2310553789138794,
"step": 282
},
{
"epoch": 0.5201465201465202,
"grad_norm": 0.15730488300323486,
"learning_rate": 2.8591000215235535e-05,
"loss": 1.2288477420806885,
"step": 284
},
{
"epoch": 0.5238095238095238,
"grad_norm": 0.33526456356048584,
"learning_rate": 2.8567146617052157e-05,
"loss": 0.9904751777648926,
"step": 286
},
{
"epoch": 0.5274725274725275,
"grad_norm": 0.3099616467952728,
"learning_rate": 2.854310410546919e-05,
"loss": 1.2387449741363525,
"step": 288
},
{
"epoch": 0.5311355311355311,
"grad_norm": 0.4743753671646118,
"learning_rate": 2.851887305687657e-05,
"loss": 1.2020177841186523,
"step": 290
},
{
"epoch": 0.5347985347985348,
"grad_norm": 0.2047477811574936,
"learning_rate": 2.8494453850615823e-05,
"loss": 0.7334977984428406,
"step": 292
},
{
"epoch": 0.5384615384615384,
"grad_norm": 0.034840475767850876,
"learning_rate": 2.846984686897411e-05,
"loss": 0.9410668611526489,
"step": 294
},
{
"epoch": 0.5421245421245421,
"grad_norm": 0.3429921269416809,
"learning_rate": 2.8445052497178255e-05,
"loss": 1.0193301439285278,
"step": 296
},
{
"epoch": 0.5457875457875457,
"grad_norm": 0.4340989291667938,
"learning_rate": 2.8420071123388712e-05,
"loss": 1.2840567827224731,
"step": 298
},
{
"epoch": 0.5494505494505495,
"grad_norm": 0.22159327566623688,
"learning_rate": 2.839490313869348e-05,
"loss": 1.1924721002578735,
"step": 300
},
{
"epoch": 0.5531135531135531,
"grad_norm": 0.3795735239982605,
"learning_rate": 2.8369548937101984e-05,
"loss": 1.1779296398162842,
"step": 302
},
{
"epoch": 0.5567765567765568,
"grad_norm": 0.1328314244747162,
"learning_rate": 2.8344008915538916e-05,
"loss": 0.9684816002845764,
"step": 304
},
{
"epoch": 0.5604395604395604,
"grad_norm": 0.28786700963974,
"learning_rate": 2.831828347383802e-05,
"loss": 0.9479919075965881,
"step": 306
},
{
"epoch": 0.5641025641025641,
"grad_norm": 0.38591575622558594,
"learning_rate": 2.82923730147358e-05,
"loss": 1.379252552986145,
"step": 308
},
{
"epoch": 0.5677655677655677,
"grad_norm": 0.2665002644062042,
"learning_rate": 2.826627794386527e-05,
"loss": 0.9848244190216064,
"step": 310
},
{
"epoch": 0.5714285714285714,
"grad_norm": 0.16822513937950134,
"learning_rate": 2.823999866974956e-05,
"loss": 1.2088336944580078,
"step": 312
},
{
"epoch": 0.575091575091575,
"grad_norm": 0.14254474639892578,
"learning_rate": 2.821353560379554e-05,
"loss": 1.125260591506958,
"step": 314
},
{
"epoch": 0.5787545787545788,
"grad_norm": 0.1947673112154007,
"learning_rate": 2.8186889160287368e-05,
"loss": 1.2775671482086182,
"step": 316
},
{
"epoch": 0.5824175824175825,
"grad_norm": 0.8883699774742126,
"learning_rate": 2.816005975638003e-05,
"loss": 1.1112195253372192,
"step": 318
},
{
"epoch": 0.5860805860805861,
"grad_norm": 0.1947968453168869,
"learning_rate": 2.8133047812092776e-05,
"loss": 1.3266563415527344,
"step": 320
},
{
"epoch": 0.5897435897435898,
"grad_norm": 0.16167616844177246,
"learning_rate": 2.810585375030255e-05,
"loss": 1.5130597352981567,
"step": 322
},
{
"epoch": 0.5934065934065934,
"grad_norm": 0.32240772247314453,
"learning_rate": 2.8078477996737404e-05,
"loss": 1.25220787525177,
"step": 324
},
{
"epoch": 0.5970695970695971,
"grad_norm": 0.22599875926971436,
"learning_rate": 2.805092097996979e-05,
"loss": 1.349832534790039,
"step": 326
},
{
"epoch": 0.6007326007326007,
"grad_norm": 0.1706995815038681,
"learning_rate": 2.8023183131409867e-05,
"loss": 1.2129117250442505,
"step": 328
},
{
"epoch": 0.6043956043956044,
"grad_norm": 0.4682578146457672,
"learning_rate": 2.799526488529877e-05,
"loss": 1.1015658378601074,
"step": 330
},
{
"epoch": 0.608058608058608,
"grad_norm": 0.1629960834980011,
"learning_rate": 2.7967166678701764e-05,
"loss": 1.1752092838287354,
"step": 332
},
{
"epoch": 0.6117216117216118,
"grad_norm": 0.5704866647720337,
"learning_rate": 2.7938888951501446e-05,
"loss": 1.2739248275756836,
"step": 334
},
{
"epoch": 0.6153846153846154,
"grad_norm": 0.2894584536552429,
"learning_rate": 2.7910432146390835e-05,
"loss": 0.8835808634757996,
"step": 336
},
{
"epoch": 0.6190476190476191,
"grad_norm": 0.15816010534763336,
"learning_rate": 2.7881796708866444e-05,
"loss": 1.2259109020233154,
"step": 338
},
{
"epoch": 0.6227106227106227,
"grad_norm": 0.18366442620754242,
"learning_rate": 2.7852983087221323e-05,
"loss": 1.2217463254928589,
"step": 340
},
{
"epoch": 0.6263736263736264,
"grad_norm": 0.12548674643039703,
"learning_rate": 2.782399173253801e-05,
"loss": 0.5570770502090454,
"step": 342
},
{
"epoch": 0.63003663003663,
"grad_norm": 0.1253451108932495,
"learning_rate": 2.7794823098681503e-05,
"loss": 1.4292207956314087,
"step": 344
},
{
"epoch": 0.6336996336996337,
"grad_norm": 0.2767960727214813,
"learning_rate": 2.7765477642292122e-05,
"loss": 0.6793244481086731,
"step": 346
},
{
"epoch": 0.6373626373626373,
"grad_norm": 0.25969550013542175,
"learning_rate": 2.7735955822778383e-05,
"loss": 1.1459492444992065,
"step": 348
},
{
"epoch": 0.6410256410256411,
"grad_norm": 0.3016161620616913,
"learning_rate": 2.7706258102309807e-05,
"loss": 0.8780456185340881,
"step": 350
},
{
"epoch": 0.6446886446886447,
"grad_norm": 0.1947915107011795,
"learning_rate": 2.7676384945809665e-05,
"loss": 0.9107787013053894,
"step": 352
},
{
"epoch": 0.6483516483516484,
"grad_norm": 0.3486472964286804,
"learning_rate": 2.7646336820947716e-05,
"loss": 0.9405763149261475,
"step": 354
},
{
"epoch": 0.652014652014652,
"grad_norm": 0.2618328332901001,
"learning_rate": 2.7616114198132885e-05,
"loss": 1.154807448387146,
"step": 356
},
{
"epoch": 0.6556776556776557,
"grad_norm": 0.24301418662071228,
"learning_rate": 2.7585717550505885e-05,
"loss": 1.141382098197937,
"step": 358
},
{
"epoch": 0.6593406593406593,
"grad_norm": 0.20328634977340698,
"learning_rate": 2.7555147353931828e-05,
"loss": 0.6797105669975281,
"step": 360
},
{
"epoch": 0.663003663003663,
"grad_norm": 0.1462642401456833,
"learning_rate": 2.752440408699276e-05,
"loss": 0.9351564049720764,
"step": 362
},
{
"epoch": 0.6666666666666666,
"grad_norm": 0.20813682675361633,
"learning_rate": 2.7493488230980183e-05,
"loss": 0.9224430322647095,
"step": 364
},
{
"epoch": 0.6703296703296703,
"grad_norm": 0.9584963917732239,
"learning_rate": 2.746240026988751e-05,
"loss": 1.178276538848877,
"step": 366
},
{
"epoch": 0.673992673992674,
"grad_norm": 0.3994496464729309,
"learning_rate": 2.7431140690402486e-05,
"loss": 1.039903998374939,
"step": 368
},
{
"epoch": 0.6776556776556777,
"grad_norm": 0.14436861872673035,
"learning_rate": 2.7399709981899575e-05,
"loss": 1.0789308547973633,
"step": 370
},
{
"epoch": 0.6813186813186813,
"grad_norm": 0.38669517636299133,
"learning_rate": 2.7368108636432305e-05,
"loss": 1.1867707967758179,
"step": 372
},
{
"epoch": 0.684981684981685,
"grad_norm": 0.23368126153945923,
"learning_rate": 2.7336337148725544e-05,
"loss": 1.1822454929351807,
"step": 374
},
{
"epoch": 0.6886446886446886,
"grad_norm": 0.15929855406284332,
"learning_rate": 2.7304396016167787e-05,
"loss": 0.8562048077583313,
"step": 376
},
{
"epoch": 0.6923076923076923,
"grad_norm": 0.09261632710695267,
"learning_rate": 2.7272285738803325e-05,
"loss": 0.8207629919052124,
"step": 378
},
{
"epoch": 0.6959706959706959,
"grad_norm": 1.0426931381225586,
"learning_rate": 2.7240006819324463e-05,
"loss": 1.0330201387405396,
"step": 380
},
{
"epoch": 0.6996336996336996,
"grad_norm": 0.11508945375680923,
"learning_rate": 2.7207559763063615e-05,
"loss": 0.9683942794799805,
"step": 382
},
{
"epoch": 0.7032967032967034,
"grad_norm": 0.26285308599472046,
"learning_rate": 2.7174945077985425e-05,
"loss": 1.1675138473510742,
"step": 384
},
{
"epoch": 0.706959706959707,
"grad_norm": 0.4783492982387543,
"learning_rate": 2.7142163274678783e-05,
"loss": 1.0812029838562012,
"step": 386
},
{
"epoch": 0.7106227106227107,
"grad_norm": 0.17882274091243744,
"learning_rate": 2.7109214866348845e-05,
"loss": 0.8760687708854675,
"step": 388
},
{
"epoch": 0.7142857142857143,
"grad_norm": 0.13550622761249542,
"learning_rate": 2.7076100368809007e-05,
"loss": 1.0749393701553345,
"step": 390
},
{
"epoch": 0.717948717948718,
"grad_norm": 0.12767675518989563,
"learning_rate": 2.704282030047281e-05,
"loss": 1.1613750457763672,
"step": 392
},
{
"epoch": 0.7216117216117216,
"grad_norm": 0.25305429100990295,
"learning_rate": 2.7009375182345852e-05,
"loss": 1.1066409349441528,
"step": 394
},
{
"epoch": 0.7252747252747253,
"grad_norm": 0.16726504266262054,
"learning_rate": 2.697576553801761e-05,
"loss": 1.2291399240493774,
"step": 396
},
{
"epoch": 0.7289377289377289,
"grad_norm": 0.09550374001264572,
"learning_rate": 2.6941991893653237e-05,
"loss": 0.9111291170120239,
"step": 398
},
{
"epoch": 0.7326007326007326,
"grad_norm": 0.1426897495985031,
"learning_rate": 2.6908054777985364e-05,
"loss": 0.8321019411087036,
"step": 400
},
{
"epoch": 0.7362637362637363,
"grad_norm": 0.7372925877571106,
"learning_rate": 2.6873954722305758e-05,
"loss": 1.2364851236343384,
"step": 402
},
{
"epoch": 0.73992673992674,
"grad_norm": 0.13425855338573456,
"learning_rate": 2.6839692260457073e-05,
"loss": 0.8418540358543396,
"step": 404
},
{
"epoch": 0.7435897435897436,
"grad_norm": 62.03416061401367,
"learning_rate": 2.6805267928824453e-05,
"loss": 1.0157253742218018,
"step": 406
},
{
"epoch": 0.7472527472527473,
"grad_norm": 0.2104729413986206,
"learning_rate": 2.6770682266327137e-05,
"loss": 1.0042197704315186,
"step": 408
},
{
"epoch": 0.7509157509157509,
"grad_norm": 0.32168519496917725,
"learning_rate": 2.6735935814410034e-05,
"loss": 0.8710044622421265,
"step": 410
},
{
"epoch": 0.7545787545787546,
"grad_norm": 0.11317263543605804,
"learning_rate": 2.6701029117035233e-05,
"loss": 1.1607297658920288,
"step": 412
},
{
"epoch": 0.7582417582417582,
"grad_norm": 0.25706130266189575,
"learning_rate": 2.666596272067351e-05,
"loss": 1.0102430582046509,
"step": 414
},
{
"epoch": 0.7619047619047619,
"grad_norm": 0.16405972838401794,
"learning_rate": 2.663073717429574e-05,
"loss": 1.264566421508789,
"step": 416
},
{
"epoch": 0.7655677655677655,
"grad_norm": 0.06632276624441147,
"learning_rate": 2.6595353029364336e-05,
"loss": 0.6621611714363098,
"step": 418
},
{
"epoch": 0.7692307692307693,
"grad_norm": 0.13398383557796478,
"learning_rate": 2.6559810839824595e-05,
"loss": 1.2317739725112915,
"step": 420
},
{
"epoch": 0.7728937728937729,
"grad_norm": 0.13647086918354034,
"learning_rate": 2.6524111162096034e-05,
"loss": 0.9846137166023254,
"step": 422
},
{
"epoch": 0.7765567765567766,
"grad_norm": 0.2981526553630829,
"learning_rate": 2.648825455506366e-05,
"loss": 0.48747333884239197,
"step": 424
},
{
"epoch": 0.7802197802197802,
"grad_norm": 0.12210040539503098,
"learning_rate": 2.6452241580069266e-05,
"loss": 0.7788556218147278,
"step": 426
},
{
"epoch": 0.7838827838827839,
"grad_norm": 0.8211970329284668,
"learning_rate": 2.6416072800902587e-05,
"loss": 1.0571116209030151,
"step": 428
},
{
"epoch": 0.7875457875457875,
"grad_norm": 0.4774661064147949,
"learning_rate": 2.6379748783792524e-05,
"loss": 1.2003669738769531,
"step": 430
},
{
"epoch": 0.7912087912087912,
"grad_norm": 0.2680320739746094,
"learning_rate": 2.6343270097398235e-05,
"loss": 1.2852318286895752,
"step": 432
},
{
"epoch": 0.7948717948717948,
"grad_norm": 0.15633486211299896,
"learning_rate": 2.630663731280027e-05,
"loss": 0.8747589588165283,
"step": 434
},
{
"epoch": 0.7985347985347986,
"grad_norm": 0.20981334149837494,
"learning_rate": 2.626985100349161e-05,
"loss": 0.9550784230232239,
"step": 436
},
{
"epoch": 0.8021978021978022,
"grad_norm": 0.14196103811264038,
"learning_rate": 2.6232911745368683e-05,
"loss": 0.8694673776626587,
"step": 438
},
{
"epoch": 0.8058608058608059,
"grad_norm": 0.7047719955444336,
"learning_rate": 2.619582011672238e-05,
"loss": 1.2975598573684692,
"step": 440
},
{
"epoch": 0.8095238095238095,
"grad_norm": 0.22660371661186218,
"learning_rate": 2.6158576698228962e-05,
"loss": 1.193320393562317,
"step": 442
},
{
"epoch": 0.8131868131868132,
"grad_norm": 0.13018931448459625,
"learning_rate": 2.6121182072941003e-05,
"loss": 1.1928589344024658,
"step": 444
},
{
"epoch": 0.8168498168498168,
"grad_norm": 0.2646295428276062,
"learning_rate": 2.6083636826278228e-05,
"loss": 1.21590256690979,
"step": 446
},
{
"epoch": 0.8205128205128205,
"grad_norm": 0.16783171892166138,
"learning_rate": 2.6045941546018393e-05,
"loss": 1.2151340246200562,
"step": 448
},
{
"epoch": 0.8241758241758241,
"grad_norm": 0.13590548932552338,
"learning_rate": 2.600809682228803e-05,
"loss": 1.3085159063339233,
"step": 450
},
{
"epoch": 0.8278388278388278,
"grad_norm": 0.29630717635154724,
"learning_rate": 2.5970103247553255e-05,
"loss": 0.9952484369277954,
"step": 452
},
{
"epoch": 0.8315018315018315,
"grad_norm": 0.054378245025873184,
"learning_rate": 2.5931961416610467e-05,
"loss": 0.47884345054626465,
"step": 454
},
{
"epoch": 0.8351648351648352,
"grad_norm": 0.0672004446387291,
"learning_rate": 2.5893671926577045e-05,
"loss": 0.8887318968772888,
"step": 456
},
{
"epoch": 0.8388278388278388,
"grad_norm": 0.24996346235275269,
"learning_rate": 2.5855235376881992e-05,
"loss": 0.8204334378242493,
"step": 458
},
{
"epoch": 0.8424908424908425,
"grad_norm": 0.4183832108974457,
"learning_rate": 2.5816652369256575e-05,
"loss": 1.0224486589431763,
"step": 460
},
{
"epoch": 0.8461538461538461,
"grad_norm": 0.1493893414735794,
"learning_rate": 2.5777923507724863e-05,
"loss": 1.218255639076233,
"step": 462
},
{
"epoch": 0.8498168498168498,
"grad_norm": 1.3485445976257324,
"learning_rate": 2.5739049398594304e-05,
"loss": 1.1752053499221802,
"step": 464
},
{
"epoch": 0.8534798534798534,
"grad_norm": 0.04876955226063728,
"learning_rate": 2.5700030650446236e-05,
"loss": 1.1931688785552979,
"step": 466
},
{
"epoch": 0.8571428571428571,
"grad_norm": 0.26011160016059875,
"learning_rate": 2.5660867874126333e-05,
"loss": 0.8781902194023132,
"step": 468
},
{
"epoch": 0.8608058608058609,
"grad_norm": 0.24683745205402374,
"learning_rate": 2.562156168273506e-05,
"loss": 0.9107936024665833,
"step": 470
},
{
"epoch": 0.8644688644688645,
"grad_norm": 0.163967564702034,
"learning_rate": 2.558211269161807e-05,
"loss": 1.161299705505371,
"step": 472
},
{
"epoch": 0.8681318681318682,
"grad_norm": 0.21867094933986664,
"learning_rate": 2.554252151835658e-05,
"loss": 0.8376265168190002,
"step": 474
},
{
"epoch": 0.8717948717948718,
"grad_norm": 0.15649911761283875,
"learning_rate": 2.550278878275768e-05,
"loss": 1.0913820266723633,
"step": 476
},
{
"epoch": 0.8754578754578755,
"grad_norm": 0.1824655383825302,
"learning_rate": 2.5462915106844662e-05,
"loss": 1.0505826473236084,
"step": 478
},
{
"epoch": 0.8791208791208791,
"grad_norm": 0.3090521991252899,
"learning_rate": 2.5422901114847252e-05,
"loss": 1.0692349672317505,
"step": 480
},
{
"epoch": 0.8827838827838828,
"grad_norm": 0.20167720317840576,
"learning_rate": 2.5382747433191855e-05,
"loss": 1.1805630922317505,
"step": 482
},
{
"epoch": 0.8864468864468864,
"grad_norm": 0.1725214272737503,
"learning_rate": 2.5342454690491742e-05,
"loss": 1.2538508176803589,
"step": 484
},
{
"epoch": 0.8901098901098901,
"grad_norm": 0.1624617725610733,
"learning_rate": 2.5302023517537208e-05,
"loss": 0.8995814919471741,
"step": 486
},
{
"epoch": 0.8937728937728938,
"grad_norm": 0.43343842029571533,
"learning_rate": 2.52614545472857e-05,
"loss": 1.1371649503707886,
"step": 488
},
{
"epoch": 0.8974358974358975,
"grad_norm": 0.20284152030944824,
"learning_rate": 2.522074841485191e-05,
"loss": 1.0982645750045776,
"step": 490
},
{
"epoch": 0.9010989010989011,
"grad_norm": 0.09868486225605011,
"learning_rate": 2.517990575749784e-05,
"loss": 1.034292221069336,
"step": 492
},
{
"epoch": 0.9047619047619048,
"grad_norm": 0.3678258955478668,
"learning_rate": 2.513892721462278e-05,
"loss": 0.5749409198760986,
"step": 494
},
{
"epoch": 0.9084249084249084,
"grad_norm": 0.20719310641288757,
"learning_rate": 2.5097813427753367e-05,
"loss": 1.26762056350708,
"step": 496
},
{
"epoch": 0.9120879120879121,
"grad_norm": 0.15135224163532257,
"learning_rate": 2.5056565040533502e-05,
"loss": 1.1197978258132935,
"step": 498
},
{
"epoch": 0.9157509157509157,
"grad_norm": 0.2651313543319702,
"learning_rate": 2.5015182698714257e-05,
"loss": 0.8029969930648804,
"step": 500
},
{
"epoch": 0.9194139194139194,
"grad_norm": 0.04700407758355141,
"learning_rate": 2.4973667050143826e-05,
"loss": 0.8531551957130432,
"step": 502
},
{
"epoch": 0.9230769230769231,
"grad_norm": 0.13656482100486755,
"learning_rate": 2.4932018744757304e-05,
"loss": 0.9319955110549927,
"step": 504
},
{
"epoch": 0.9267399267399268,
"grad_norm": 0.09392324835062027,
"learning_rate": 2.4890238434566572e-05,
"loss": 0.8403662443161011,
"step": 506
},
{
"epoch": 0.9304029304029304,
"grad_norm": 0.15018832683563232,
"learning_rate": 2.4848326773650073e-05,
"loss": 1.1501878499984741,
"step": 508
},
{
"epoch": 0.9340659340659341,
"grad_norm": 0.23537546396255493,
"learning_rate": 2.4806284418142578e-05,
"loss": 1.2112456560134888,
"step": 510
},
{
"epoch": 0.9377289377289377,
"grad_norm": 2.0061421394348145,
"learning_rate": 2.4764112026224884e-05,
"loss": 1.1638637781143188,
"step": 512
},
{
"epoch": 0.9413919413919414,
"grad_norm": 0.26257815957069397,
"learning_rate": 2.472181025811354e-05,
"loss": 0.9370872974395752,
"step": 514
},
{
"epoch": 0.945054945054945,
"grad_norm": 0.14425083994865417,
"learning_rate": 2.467937977605051e-05,
"loss": 1.1661062240600586,
"step": 516
},
{
"epoch": 0.9487179487179487,
"grad_norm": 0.15449078381061554,
"learning_rate": 2.4636821244292798e-05,
"loss": 1.205389380455017,
"step": 518
},
{
"epoch": 0.9523809523809523,
"grad_norm": 0.2787289619445801,
"learning_rate": 2.4594135329102042e-05,
"loss": 1.2056492567062378,
"step": 520
},
{
"epoch": 0.9560439560439561,
"grad_norm": 0.13692162930965424,
"learning_rate": 2.4551322698734087e-05,
"loss": 0.8705477118492126,
"step": 522
},
{
"epoch": 0.9597069597069597,
"grad_norm": 0.2541006803512573,
"learning_rate": 2.4508384023428545e-05,
"loss": 1.018216609954834,
"step": 524
},
{
"epoch": 0.9633699633699634,
"grad_norm": 0.30003371834754944,
"learning_rate": 2.446531997539828e-05,
"loss": 0.9477030634880066,
"step": 526
},
{
"epoch": 0.967032967032967,
"grad_norm": 0.5497117042541504,
"learning_rate": 2.4422131228818865e-05,
"loss": 0.652143120765686,
"step": 528
},
{
"epoch": 0.9706959706959707,
"grad_norm": 0.16208703815937042,
"learning_rate": 2.437881845981809e-05,
"loss": 0.8955711126327515,
"step": 530
},
{
"epoch": 0.9743589743589743,
"grad_norm": 0.33463242650032043,
"learning_rate": 2.433538234646531e-05,
"loss": 1.3425277471542358,
"step": 532
},
{
"epoch": 0.978021978021978,
"grad_norm": 0.16141419112682343,
"learning_rate": 2.4291823568760872e-05,
"loss": 0.6009050011634827,
"step": 534
},
{
"epoch": 0.9816849816849816,
"grad_norm": 0.5491900444030762,
"learning_rate": 2.4248142808625442e-05,
"loss": 0.8425756692886353,
"step": 536
},
{
"epoch": 0.9853479853479854,
"grad_norm": 0.12517108023166656,
"learning_rate": 2.420434074988937e-05,
"loss": 0.7846530079841614,
"step": 538
},
{
"epoch": 0.989010989010989,
"grad_norm": 0.4031621217727661,
"learning_rate": 2.4160418078281927e-05,
"loss": 1.1070702075958252,
"step": 540
},
{
"epoch": 0.9926739926739927,
"grad_norm": 0.12632204592227936,
"learning_rate": 2.411637548142062e-05,
"loss": 1.1606487035751343,
"step": 542
},
{
"epoch": 0.9963369963369964,
"grad_norm": 0.5152090191841125,
"learning_rate": 2.4072213648800402e-05,
"loss": 1.1755306720733643,
"step": 544
},
{
"epoch": 1.0,
"grad_norm": 0.14655056595802307,
"learning_rate": 2.4027933271782885e-05,
"loss": 1.3486008644104004,
"step": 546
},
{
"epoch": 1.0036630036630036,
"grad_norm": 0.2913415729999542,
"learning_rate": 2.398353504358552e-05,
"loss": 0.9161700010299683,
"step": 548
},
{
"epoch": 1.0073260073260073,
"grad_norm": 0.2311517894268036,
"learning_rate": 2.3939019659270728e-05,
"loss": 1.1729316711425781,
"step": 550
},
{
"epoch": 1.010989010989011,
"grad_norm": 0.0538957379758358,
"learning_rate": 2.389438781573504e-05,
"loss": 0.6469966769218445,
"step": 552
},
{
"epoch": 1.0146520146520146,
"grad_norm": 0.3061436116695404,
"learning_rate": 2.3849640211698174e-05,
"loss": 1.1134721040725708,
"step": 554
},
{
"epoch": 1.0183150183150182,
"grad_norm": 0.19010700285434723,
"learning_rate": 2.3804777547692103e-05,
"loss": 1.018505573272705,
"step": 556
},
{
"epoch": 1.021978021978022,
"grad_norm": 0.12313753366470337,
"learning_rate": 2.3759800526050082e-05,
"loss": 1.200607180595398,
"step": 558
},
{
"epoch": 1.0256410256410255,
"grad_norm": 0.18256710469722748,
"learning_rate": 2.371470985089565e-05,
"loss": 0.9006626009941101,
"step": 560
},
{
"epoch": 1.0293040293040292,
"grad_norm": 0.32325589656829834,
"learning_rate": 2.366950622813163e-05,
"loss": 1.1467283964157104,
"step": 562
},
{
"epoch": 1.032967032967033,
"grad_norm": 2.3231663703918457,
"learning_rate": 2.362419036542904e-05,
"loss": 1.1306356191635132,
"step": 564
},
{
"epoch": 1.0366300366300367,
"grad_norm": 0.1579679250717163,
"learning_rate": 2.357876297221606e-05,
"loss": 1.0079030990600586,
"step": 566
},
{
"epoch": 1.0402930402930404,
"grad_norm": 0.2728642225265503,
"learning_rate": 2.3533224759666865e-05,
"loss": 1.0216898918151855,
"step": 568
},
{
"epoch": 1.043956043956044,
"grad_norm": 1.051032543182373,
"learning_rate": 2.348757644069056e-05,
"loss": 1.0827183723449707,
"step": 570
},
{
"epoch": 1.0476190476190477,
"grad_norm": 0.40817469358444214,
"learning_rate": 2.3441818729919975e-05,
"loss": 1.2242071628570557,
"step": 572
},
{
"epoch": 1.0512820512820513,
"grad_norm": 0.709222137928009,
"learning_rate": 2.3395952343700484e-05,
"loss": 1.0530122518539429,
"step": 574
},
{
"epoch": 1.054945054945055,
"grad_norm": 0.2929840087890625,
"learning_rate": 2.33499780000788e-05,
"loss": 1.2369706630706787,
"step": 576
},
{
"epoch": 1.0586080586080586,
"grad_norm": 0.2514389753341675,
"learning_rate": 2.3303896418791725e-05,
"loss": 1.2025117874145508,
"step": 578
},
{
"epoch": 1.0622710622710623,
"grad_norm": 0.25741446018218994,
"learning_rate": 2.3257708321254892e-05,
"loss": 1.2033288478851318,
"step": 580
},
{
"epoch": 1.065934065934066,
"grad_norm": 0.30797845125198364,
"learning_rate": 2.321141443055146e-05,
"loss": 0.9666109085083008,
"step": 582
},
{
"epoch": 1.0695970695970696,
"grad_norm": 0.27686113119125366,
"learning_rate": 2.3165015471420802e-05,
"loss": 1.1291571855545044,
"step": 584
},
{
"epoch": 1.0732600732600732,
"grad_norm": 0.13662219047546387,
"learning_rate": 2.3118512170247156e-05,
"loss": 1.0490210056304932,
"step": 586
},
{
"epoch": 1.0769230769230769,
"grad_norm": 0.24129001796245575,
"learning_rate": 2.3071905255048257e-05,
"loss": 0.7990568280220032,
"step": 588
},
{
"epoch": 1.0805860805860805,
"grad_norm": 0.3677581548690796,
"learning_rate": 2.3025195455463938e-05,
"loss": 1.1841416358947754,
"step": 590
},
{
"epoch": 1.0842490842490842,
"grad_norm": 0.11656766384840012,
"learning_rate": 2.2978383502744693e-05,
"loss": 0.9949502348899841,
"step": 592
},
{
"epoch": 1.0879120879120878,
"grad_norm": 0.29528045654296875,
"learning_rate": 2.2931470129740257e-05,
"loss": 0.955126166343689,
"step": 594
},
{
"epoch": 1.0915750915750915,
"grad_norm": 0.16909858584403992,
"learning_rate": 2.2884456070888107e-05,
"loss": 0.986553966999054,
"step": 596
},
{
"epoch": 1.0952380952380953,
"grad_norm": 0.5373761653900146,
"learning_rate": 2.2837342062201987e-05,
"loss": 1.0663678646087646,
"step": 598
},
{
"epoch": 1.098901098901099,
"grad_norm": 0.2534824013710022,
"learning_rate": 2.279012884126037e-05,
"loss": 1.1465845108032227,
"step": 600
},
{
"epoch": 1.1025641025641026,
"grad_norm": 0.4134259521961212,
"learning_rate": 2.2742817147194904e-05,
"loss": 0.7965458035469055,
"step": 602
},
{
"epoch": 1.1062271062271063,
"grad_norm": 0.5439176559448242,
"learning_rate": 2.2695407720678863e-05,
"loss": 0.9291653633117676,
"step": 604
},
{
"epoch": 1.10989010989011,
"grad_norm": 0.33265918493270874,
"learning_rate": 2.2647901303915543e-05,
"loss": 0.8729199171066284,
"step": 606
},
{
"epoch": 1.1135531135531136,
"grad_norm": 0.26687994599342346,
"learning_rate": 2.2600298640626633e-05,
"loss": 1.2342718839645386,
"step": 608
},
{
"epoch": 1.1172161172161172,
"grad_norm": 0.21249845623970032,
"learning_rate": 2.2552600476040578e-05,
"loss": 0.7072524428367615,
"step": 610
},
{
"epoch": 1.120879120879121,
"grad_norm": 0.42950597405433655,
"learning_rate": 2.2504807556880924e-05,
"loss": 1.1363072395324707,
"step": 612
},
{
"epoch": 1.1245421245421245,
"grad_norm": 0.35692963004112244,
"learning_rate": 2.2456920631354604e-05,
"loss": 0.9730544090270996,
"step": 614
},
{
"epoch": 1.1282051282051282,
"grad_norm": 0.26220595836639404,
"learning_rate": 2.2408940449140255e-05,
"loss": 1.13991117477417,
"step": 616
},
{
"epoch": 1.1318681318681318,
"grad_norm": 0.2685360312461853,
"learning_rate": 2.2360867761376433e-05,
"loss": 0.38837555050849915,
"step": 618
},
{
"epoch": 1.1355311355311355,
"grad_norm": 0.19976449012756348,
"learning_rate": 2.231270332064993e-05,
"loss": 1.0665472745895386,
"step": 620
},
{
"epoch": 1.1391941391941391,
"grad_norm": 0.29795894026756287,
"learning_rate": 2.2264447880983903e-05,
"loss": 0.7275888919830322,
"step": 622
},
{
"epoch": 1.1428571428571428,
"grad_norm": 0.34990039467811584,
"learning_rate": 2.2216102197826152e-05,
"loss": 1.0672345161437988,
"step": 624
},
{
"epoch": 1.1465201465201464,
"grad_norm": 0.18159109354019165,
"learning_rate": 2.216766702803722e-05,
"loss": 1.3399895429611206,
"step": 626
},
{
"epoch": 1.15018315018315,
"grad_norm": 8.862257957458496,
"learning_rate": 2.2119143129878612e-05,
"loss": 0.4365134835243225,
"step": 628
},
{
"epoch": 1.1538461538461537,
"grad_norm": 0.04682470113039017,
"learning_rate": 2.2070531263000877e-05,
"loss": 1.099286437034607,
"step": 630
},
{
"epoch": 1.1575091575091574,
"grad_norm": 0.5580438375473022,
"learning_rate": 2.2021832188431726e-05,
"loss": 1.0629825592041016,
"step": 632
},
{
"epoch": 1.1611721611721613,
"grad_norm": 0.39520084857940674,
"learning_rate": 2.197304666856413e-05,
"loss": 0.841308057308197,
"step": 634
},
{
"epoch": 1.164835164835165,
"grad_norm": 0.37604862451553345,
"learning_rate": 2.1924175467144374e-05,
"loss": 0.9573929309844971,
"step": 636
},
{
"epoch": 1.1684981684981686,
"grad_norm": 0.16199947893619537,
"learning_rate": 2.1875219349260103e-05,
"loss": 1.1685539484024048,
"step": 638
},
{
"epoch": 1.1721611721611722,
"grad_norm": 0.03986337035894394,
"learning_rate": 2.182617908132835e-05,
"loss": 0.7316250801086426,
"step": 640
},
{
"epoch": 1.1758241758241759,
"grad_norm": 0.28604838252067566,
"learning_rate": 2.1777055431083526e-05,
"loss": 1.103910207748413,
"step": 642
},
{
"epoch": 1.1794871794871795,
"grad_norm": 0.33542269468307495,
"learning_rate": 2.1727849167565417e-05,
"loss": 1.0138633251190186,
"step": 644
},
{
"epoch": 1.1831501831501832,
"grad_norm": 0.15346784889698029,
"learning_rate": 2.1678561061107114e-05,
"loss": 0.7353882193565369,
"step": 646
},
{
"epoch": 1.1868131868131868,
"grad_norm": 0.3873114287853241,
"learning_rate": 2.1629191883322998e-05,
"loss": 0.870601236820221,
"step": 648
},
{
"epoch": 1.1904761904761905,
"grad_norm": 0.2770773768424988,
"learning_rate": 2.1579742407096626e-05,
"loss": 1.1267426013946533,
"step": 650
},
{
"epoch": 1.1941391941391941,
"grad_norm": 0.20924782752990723,
"learning_rate": 2.1530213406568637e-05,
"loss": 0.6501792669296265,
"step": 652
},
{
"epoch": 1.1978021978021978,
"grad_norm": 0.2145625650882721,
"learning_rate": 2.1480605657124656e-05,
"loss": 1.2713698148727417,
"step": 654
},
{
"epoch": 1.2014652014652014,
"grad_norm": 0.3093368113040924,
"learning_rate": 2.1430919935383112e-05,
"loss": 0.8059902787208557,
"step": 656
},
{
"epoch": 1.205128205128205,
"grad_norm": 2.7126386165618896,
"learning_rate": 2.138115701918312e-05,
"loss": 0.6809201836585999,
"step": 658
},
{
"epoch": 1.2087912087912087,
"grad_norm": 0.18192383646965027,
"learning_rate": 2.1331317687572286e-05,
"loss": 1.1400713920593262,
"step": 660
},
{
"epoch": 1.2124542124542124,
"grad_norm": 0.5643167495727539,
"learning_rate": 2.1281402720794512e-05,
"loss": 1.0006842613220215,
"step": 662
},
{
"epoch": 1.2161172161172162,
"grad_norm": 0.48151227831840515,
"learning_rate": 2.123141290027778e-05,
"loss": 0.867255687713623,
"step": 664
},
{
"epoch": 1.2197802197802199,
"grad_norm": 0.23649705946445465,
"learning_rate": 2.1181349008621935e-05,
"loss": 0.782567024230957,
"step": 666
},
{
"epoch": 1.2234432234432235,
"grad_norm": 0.3819615840911865,
"learning_rate": 2.1131211829586398e-05,
"loss": 0.8727216720581055,
"step": 668
},
{
"epoch": 1.2271062271062272,
"grad_norm": 0.4201929271221161,
"learning_rate": 2.1081002148077926e-05,
"loss": 0.6579726934432983,
"step": 670
},
{
"epoch": 1.2307692307692308,
"grad_norm": 0.17308755218982697,
"learning_rate": 2.1030720750138324e-05,
"loss": 1.1650503873825073,
"step": 672
},
{
"epoch": 1.2344322344322345,
"grad_norm": 0.3225247263908386,
"learning_rate": 2.0980368422932118e-05,
"loss": 1.137325406074524,
"step": 674
},
{
"epoch": 1.2380952380952381,
"grad_norm": 0.1973855048418045,
"learning_rate": 2.092994595473426e-05,
"loss": 1.1627634763717651,
"step": 676
},
{
"epoch": 1.2417582417582418,
"grad_norm": 0.2576034367084503,
"learning_rate": 2.0879454134917752e-05,
"loss": 0.758223831653595,
"step": 678
},
{
"epoch": 1.2454212454212454,
"grad_norm": 0.1488749384880066,
"learning_rate": 2.0828893753941327e-05,
"loss": 0.8128418326377869,
"step": 680
},
{
"epoch": 1.249084249084249,
"grad_norm": 0.39639613032341003,
"learning_rate": 2.0778265603337043e-05,
"loss": 1.1716361045837402,
"step": 682
},
{
"epoch": 1.2527472527472527,
"grad_norm": 0.25259119272232056,
"learning_rate": 2.0727570475697917e-05,
"loss": 1.1419763565063477,
"step": 684
},
{
"epoch": 1.2564102564102564,
"grad_norm": 0.2757309675216675,
"learning_rate": 2.0676809164665485e-05,
"loss": 0.8066729307174683,
"step": 686
},
{
"epoch": 1.26007326007326,
"grad_norm": 0.7084298133850098,
"learning_rate": 2.0625982464917414e-05,
"loss": 1.0903362035751343,
"step": 688
},
{
"epoch": 1.2637362637362637,
"grad_norm": 2.3106207847595215,
"learning_rate": 2.0575091172155033e-05,
"loss": 1.116640567779541,
"step": 690
},
{
"epoch": 1.2673992673992673,
"grad_norm": 0.34988725185394287,
"learning_rate": 2.052413608309089e-05,
"loss": 1.1625053882598877,
"step": 692
},
{
"epoch": 1.271062271062271,
"grad_norm": 0.6054718494415283,
"learning_rate": 2.0473117995436273e-05,
"loss": 0.9630265831947327,
"step": 694
},
{
"epoch": 1.2747252747252746,
"grad_norm": 0.3232510983943939,
"learning_rate": 2.0422037707888737e-05,
"loss": 0.9731068015098572,
"step": 696
},
{
"epoch": 1.2783882783882783,
"grad_norm": 0.21576230227947235,
"learning_rate": 2.0370896020119568e-05,
"loss": 0.8881978988647461,
"step": 698
},
{
"epoch": 1.282051282051282,
"grad_norm": 0.12663686275482178,
"learning_rate": 2.0319693732761296e-05,
"loss": 0.6315019726753235,
"step": 700
},
{
"epoch": 1.2857142857142856,
"grad_norm": 0.41833654046058655,
"learning_rate": 2.026843164739515e-05,
"loss": 0.5021913051605225,
"step": 702
},
{
"epoch": 1.2893772893772895,
"grad_norm": 0.28392916917800903,
"learning_rate": 2.0217110566538502e-05,
"loss": 1.2423911094665527,
"step": 704
},
{
"epoch": 1.293040293040293,
"grad_norm": 0.44487547874450684,
"learning_rate": 2.016573129363231e-05,
"loss": 0.9610970616340637,
"step": 706
},
{
"epoch": 1.2967032967032968,
"grad_norm": 0.5216399431228638,
"learning_rate": 2.011429463302854e-05,
"loss": 0.9326560497283936,
"step": 708
},
{
"epoch": 1.3003663003663004,
"grad_norm": 0.19748345017433167,
"learning_rate": 2.0062801389977577e-05,
"loss": 0.5408502221107483,
"step": 710
},
{
"epoch": 1.304029304029304,
"grad_norm": 1.4596139192581177,
"learning_rate": 2.001125237061561e-05,
"loss": 0.8761268854141235,
"step": 712
},
{
"epoch": 1.3076923076923077,
"grad_norm": 0.3239227533340454,
"learning_rate": 1.9959648381952014e-05,
"loss": 1.1772205829620361,
"step": 714
},
{
"epoch": 1.3113553113553114,
"grad_norm": 0.3733477294445038,
"learning_rate": 1.9907990231856725e-05,
"loss": 1.1099432706832886,
"step": 716
},
{
"epoch": 1.315018315018315,
"grad_norm": 0.39496785402297974,
"learning_rate": 1.9856278729047588e-05,
"loss": 0.9054940342903137,
"step": 718
},
{
"epoch": 1.3186813186813187,
"grad_norm": 0.31072095036506653,
"learning_rate": 1.980451468307768e-05,
"loss": 0.9181997776031494,
"step": 720
},
{
"epoch": 1.3223443223443223,
"grad_norm": 0.17306113243103027,
"learning_rate": 1.975269890432267e-05,
"loss": 0.8793990015983582,
"step": 722
},
{
"epoch": 1.326007326007326,
"grad_norm": 0.13014285266399384,
"learning_rate": 1.9700832203968095e-05,
"loss": 1.186950922012329,
"step": 724
},
{
"epoch": 1.3296703296703296,
"grad_norm": 0.22023595869541168,
"learning_rate": 1.96489153939967e-05,
"loss": 1.0376808643341064,
"step": 726
},
{
"epoch": 1.3333333333333333,
"grad_norm": 0.28590211272239685,
"learning_rate": 1.9596949287175685e-05,
"loss": 1.1279020309448242,
"step": 728
},
{
"epoch": 1.3369963369963371,
"grad_norm": 0.1974659264087677,
"learning_rate": 1.9544934697044008e-05,
"loss": 0.716360867023468,
"step": 730
},
{
"epoch": 1.3406593406593408,
"grad_norm": 0.222087100148201,
"learning_rate": 1.9492872437899646e-05,
"loss": 0.8136202692985535,
"step": 732
},
{
"epoch": 1.3443223443223444,
"grad_norm": 0.042768679559230804,
"learning_rate": 1.9440763324786843e-05,
"loss": 0.6029387712478638,
"step": 734
},
{
"epoch": 1.347985347985348,
"grad_norm": 1.7473887205123901,
"learning_rate": 1.9388608173483347e-05,
"loss": 1.1386733055114746,
"step": 736
},
{
"epoch": 1.3516483516483517,
"grad_norm": 0.14771875739097595,
"learning_rate": 1.9336407800487642e-05,
"loss": 0.7350283861160278,
"step": 738
},
{
"epoch": 1.3553113553113554,
"grad_norm": 0.1457987129688263,
"learning_rate": 1.9284163023006173e-05,
"loss": 0.9950637221336365,
"step": 740
},
{
"epoch": 1.358974358974359,
"grad_norm": 0.36954575777053833,
"learning_rate": 1.923187465894053e-05,
"loss": 0.5731111168861389,
"step": 742
},
{
"epoch": 1.3626373626373627,
"grad_norm": 0.35019728541374207,
"learning_rate": 1.917954352687468e-05,
"loss": 1.214383602142334,
"step": 744
},
{
"epoch": 1.3663003663003663,
"grad_norm": 0.16928352415561676,
"learning_rate": 1.9127170446062105e-05,
"loss": 1.0261609554290771,
"step": 746
},
{
"epoch": 1.36996336996337,
"grad_norm": 0.4486094117164612,
"learning_rate": 1.907475623641304e-05,
"loss": 1.0434916019439697,
"step": 748
},
{
"epoch": 1.3736263736263736,
"grad_norm": 0.16689936816692352,
"learning_rate": 1.9022301718481554e-05,
"loss": 0.7598743438720703,
"step": 750
},
{
"epoch": 1.3772893772893773,
"grad_norm": 0.14010295271873474,
"learning_rate": 1.8969807713452784e-05,
"loss": 1.101077914237976,
"step": 752
},
{
"epoch": 1.380952380952381,
"grad_norm": 0.08368280529975891,
"learning_rate": 1.8917275043130034e-05,
"loss": 0.9529477953910828,
"step": 754
},
{
"epoch": 1.3846153846153846,
"grad_norm": 0.14239844679832458,
"learning_rate": 1.886470452992191e-05,
"loss": 1.2759732007980347,
"step": 756
},
{
"epoch": 1.3882783882783882,
"grad_norm": 0.20159541070461273,
"learning_rate": 1.8812096996829475e-05,
"loss": 1.1850250959396362,
"step": 758
},
{
"epoch": 1.3919413919413919,
"grad_norm": 0.1525518298149109,
"learning_rate": 1.875945326743333e-05,
"loss": 1.1283670663833618,
"step": 760
},
{
"epoch": 1.3956043956043955,
"grad_norm": 0.14675025641918182,
"learning_rate": 1.8706774165880748e-05,
"loss": 1.1265307664871216,
"step": 762
},
{
"epoch": 1.3992673992673992,
"grad_norm": 0.2952445149421692,
"learning_rate": 1.8654060516872734e-05,
"loss": 0.6828045845031738,
"step": 764
},
{
"epoch": 1.4029304029304028,
"grad_norm": 0.3701895773410797,
"learning_rate": 1.8601313145651178e-05,
"loss": 0.8671898245811462,
"step": 766
},
{
"epoch": 1.4065934065934065,
"grad_norm": 3.4587416648864746,
"learning_rate": 1.8548532877985863e-05,
"loss": 0.8688183426856995,
"step": 768
},
{
"epoch": 1.4102564102564101,
"grad_norm": 0.24293172359466553,
"learning_rate": 1.8495720540161592e-05,
"loss": 1.1538002490997314,
"step": 770
},
{
"epoch": 1.4139194139194138,
"grad_norm": 0.10538533329963684,
"learning_rate": 1.8442876958965228e-05,
"loss": 1.1949889659881592,
"step": 772
},
{
"epoch": 1.4175824175824177,
"grad_norm": 0.26751771569252014,
"learning_rate": 1.8390002961672755e-05,
"loss": 1.0057183504104614,
"step": 774
},
{
"epoch": 1.4212454212454213,
"grad_norm": 0.34948304295539856,
"learning_rate": 1.8337099376036308e-05,
"loss": 1.1333030462265015,
"step": 776
},
{
"epoch": 1.424908424908425,
"grad_norm": 0.16524076461791992,
"learning_rate": 1.828416703027128e-05,
"loss": 0.8215429186820984,
"step": 778
},
{
"epoch": 1.4285714285714286,
"grad_norm": 0.14560294151306152,
"learning_rate": 1.8231206753043253e-05,
"loss": 1.3414307832717896,
"step": 780
},
{
"epoch": 1.4322344322344323,
"grad_norm": 0.259555459022522,
"learning_rate": 1.8178219373455116e-05,
"loss": 1.2006430625915527,
"step": 782
},
{
"epoch": 1.435897435897436,
"grad_norm": 0.18782170116901398,
"learning_rate": 1.8125205721034043e-05,
"loss": 1.1388580799102783,
"step": 784
},
{
"epoch": 1.4395604395604396,
"grad_norm": 0.3346932530403137,
"learning_rate": 1.8072166625718512e-05,
"loss": 0.9242116808891296,
"step": 786
},
{
"epoch": 1.4432234432234432,
"grad_norm": 0.5575185418128967,
"learning_rate": 1.8019102917845315e-05,
"loss": 1.1115480661392212,
"step": 788
},
{
"epoch": 1.4468864468864469,
"grad_norm": 0.04982315003871918,
"learning_rate": 1.7966015428136552e-05,
"loss": 0.703987717628479,
"step": 790
},
{
"epoch": 1.4505494505494505,
"grad_norm": 0.19348366558551788,
"learning_rate": 1.791290498768665e-05,
"loss": 0.8977080583572388,
"step": 792
},
{
"epoch": 1.4542124542124542,
"grad_norm": 0.1676187515258789,
"learning_rate": 1.785977242794931e-05,
"loss": 0.7916489839553833,
"step": 794
},
{
"epoch": 1.4578754578754578,
"grad_norm": 0.17639444768428802,
"learning_rate": 1.7806618580724534e-05,
"loss": 1.1303378343582153,
"step": 796
},
{
"epoch": 1.4615384615384617,
"grad_norm": 0.21372033655643463,
"learning_rate": 1.775344427814557e-05,
"loss": 0.7749331593513489,
"step": 798
},
{
"epoch": 1.4652014652014653,
"grad_norm": 0.4240255057811737,
"learning_rate": 1.770025035266591e-05,
"loss": 1.0045387744903564,
"step": 800
},
{
"epoch": 1.468864468864469,
"grad_norm": 0.32724934816360474,
"learning_rate": 1.7647037637046236e-05,
"loss": 1.5421123504638672,
"step": 802
},
{
"epoch": 1.4725274725274726,
"grad_norm": 0.8365166187286377,
"learning_rate": 1.7593806964341397e-05,
"loss": 1.0063722133636475,
"step": 804
},
{
"epoch": 1.4761904761904763,
"grad_norm": 0.16228164732456207,
"learning_rate": 1.7540559167887365e-05,
"loss": 1.1679903268814087,
"step": 806
},
{
"epoch": 1.47985347985348,
"grad_norm": 0.1366707980632782,
"learning_rate": 1.748729508128819e-05,
"loss": 1.1511813402175903,
"step": 808
},
{
"epoch": 1.4835164835164836,
"grad_norm": 0.27702444791793823,
"learning_rate": 1.7434015538402948e-05,
"loss": 0.9803687930107117,
"step": 810
},
{
"epoch": 1.4871794871794872,
"grad_norm": 0.18855758011341095,
"learning_rate": 1.7380721373332664e-05,
"loss": 0.8224502801895142,
"step": 812
},
{
"epoch": 1.4908424908424909,
"grad_norm": 1.411730170249939,
"learning_rate": 1.7327413420407312e-05,
"loss": 1.188186526298523,
"step": 814
},
{
"epoch": 1.4945054945054945,
"grad_norm": 0.23762649297714233,
"learning_rate": 1.7274092514172685e-05,
"loss": 1.1305307149887085,
"step": 816
},
{
"epoch": 1.4981684981684982,
"grad_norm": 0.09449716657400131,
"learning_rate": 1.7220759489377392e-05,
"loss": 0.3843282461166382,
"step": 818
},
{
"epoch": 1.5018315018315018,
"grad_norm": 0.18217319250106812,
"learning_rate": 1.716741518095973e-05,
"loss": 1.1302298307418823,
"step": 820
},
{
"epoch": 1.5054945054945055,
"grad_norm": 0.14097942411899567,
"learning_rate": 1.7114060424034668e-05,
"loss": 0.8780094981193542,
"step": 822
},
{
"epoch": 1.5091575091575091,
"grad_norm": 0.22164665162563324,
"learning_rate": 1.7060696053880728e-05,
"loss": 0.8111138343811035,
"step": 824
},
{
"epoch": 1.5128205128205128,
"grad_norm": 0.2753220200538635,
"learning_rate": 1.700732290592695e-05,
"loss": 1.022735834121704,
"step": 826
},
{
"epoch": 1.5164835164835164,
"grad_norm": 0.24719539284706116,
"learning_rate": 1.6953941815739775e-05,
"loss": 0.986643373966217,
"step": 828
},
{
"epoch": 1.52014652014652,
"grad_norm": 0.4292539358139038,
"learning_rate": 1.6900553619009987e-05,
"loss": 0.9866463541984558,
"step": 830
},
{
"epoch": 1.5238095238095237,
"grad_norm": 0.1976500153541565,
"learning_rate": 1.684715915153963e-05,
"loss": 1.2311562299728394,
"step": 832
},
{
"epoch": 1.5274725274725274,
"grad_norm": 0.29693394899368286,
"learning_rate": 1.6793759249228907e-05,
"loss": 0.795253574848175,
"step": 834
},
{
"epoch": 1.531135531135531,
"grad_norm": 0.22329799830913544,
"learning_rate": 1.6740354748063115e-05,
"loss": 1.134211540222168,
"step": 836
},
{
"epoch": 1.5347985347985347,
"grad_norm": 0.3141157925128937,
"learning_rate": 1.6686946484099533e-05,
"loss": 1.1773242950439453,
"step": 838
},
{
"epoch": 1.5384615384615383,
"grad_norm": 0.39752843976020813,
"learning_rate": 1.6633535293454363e-05,
"loss": 1.0289901494979858,
"step": 840
},
{
"epoch": 1.542124542124542,
"grad_norm": 0.2214544713497162,
"learning_rate": 1.6580122012289612e-05,
"loss": 0.8504143953323364,
"step": 842
},
{
"epoch": 1.5457875457875456,
"grad_norm": 0.17584171891212463,
"learning_rate": 1.6526707476800024e-05,
"loss": 1.0214329957962036,
"step": 844
},
{
"epoch": 1.5494505494505495,
"grad_norm": 0.28855398297309875,
"learning_rate": 1.6473292523199978e-05,
"loss": 1.1653364896774292,
"step": 846
},
{
"epoch": 1.5531135531135531,
"grad_norm": 0.24624770879745483,
"learning_rate": 1.6419877987710394e-05,
"loss": 0.5219634771347046,
"step": 848
},
{
"epoch": 1.5567765567765568,
"grad_norm": 0.20057709515094757,
"learning_rate": 1.636646470654564e-05,
"loss": 1.0128839015960693,
"step": 850
},
{
"epoch": 1.5604395604395604,
"grad_norm": 0.6557381749153137,
"learning_rate": 1.6313053515900473e-05,
"loss": 1.156237006187439,
"step": 852
},
{
"epoch": 1.564102564102564,
"grad_norm": 1.0166418552398682,
"learning_rate": 1.625964525193689e-05,
"loss": 0.8430522680282593,
"step": 854
},
{
"epoch": 1.5677655677655677,
"grad_norm": 0.13341838121414185,
"learning_rate": 1.6206240750771092e-05,
"loss": 0.7991554737091064,
"step": 856
},
{
"epoch": 1.5714285714285714,
"grad_norm": 1.104068398475647,
"learning_rate": 1.6152840848460376e-05,
"loss": 0.9209327101707458,
"step": 858
},
{
"epoch": 1.575091575091575,
"grad_norm": 0.2087830752134323,
"learning_rate": 1.6099446380990015e-05,
"loss": 0.7249851226806641,
"step": 860
},
{
"epoch": 1.578754578754579,
"grad_norm": 0.19045013189315796,
"learning_rate": 1.604605818426023e-05,
"loss": 1.1386749744415283,
"step": 862
},
{
"epoch": 1.5824175824175826,
"grad_norm": 0.6532570719718933,
"learning_rate": 1.5992677094073055e-05,
"loss": 0.8903250694274902,
"step": 864
},
{
"epoch": 1.5860805860805862,
"grad_norm": 0.3644380569458008,
"learning_rate": 1.5939303946119275e-05,
"loss": 1.1600793600082397,
"step": 866
},
{
"epoch": 1.5897435897435899,
"grad_norm": 0.16693386435508728,
"learning_rate": 1.588593957596534e-05,
"loss": 1.1642889976501465,
"step": 868
},
{
"epoch": 1.5934065934065935,
"grad_norm": 0.6526052355766296,
"learning_rate": 1.5832584819040275e-05,
"loss": 0.5823288559913635,
"step": 870
},
{
"epoch": 1.5970695970695972,
"grad_norm": 0.4178096055984497,
"learning_rate": 1.577924051062261e-05,
"loss": 1.164493203163147,
"step": 872
},
{
"epoch": 1.6007326007326008,
"grad_norm": 0.1560155600309372,
"learning_rate": 1.5725907485827318e-05,
"loss": 1.042616367340088,
"step": 874
},
{
"epoch": 1.6043956043956045,
"grad_norm": 0.3744211792945862,
"learning_rate": 1.567258657959269e-05,
"loss": 0.7366330623626709,
"step": 876
},
{
"epoch": 1.6080586080586081,
"grad_norm": 0.4771806299686432,
"learning_rate": 1.5619278626667336e-05,
"loss": 0.9707854986190796,
"step": 878
},
{
"epoch": 1.6117216117216118,
"grad_norm": 0.09679575264453888,
"learning_rate": 1.556598446159706e-05,
"loss": 0.8865756392478943,
"step": 880
},
{
"epoch": 1.6153846153846154,
"grad_norm": 0.1846759170293808,
"learning_rate": 1.5512704918711812e-05,
"loss": 1.1168344020843506,
"step": 882
},
{
"epoch": 1.619047619047619,
"grad_norm": 0.5885987877845764,
"learning_rate": 1.5459440832112634e-05,
"loss": 0.9253458976745605,
"step": 884
},
{
"epoch": 1.6227106227106227,
"grad_norm": 0.3282431662082672,
"learning_rate": 1.5406193035658606e-05,
"loss": 1.1273139715194702,
"step": 886
},
{
"epoch": 1.6263736263736264,
"grad_norm": 0.14543206989765167,
"learning_rate": 1.535296236295377e-05,
"loss": 0.8357165455818176,
"step": 888
},
{
"epoch": 1.63003663003663,
"grad_norm": 0.08729889988899231,
"learning_rate": 1.5299749647334097e-05,
"loss": 1.1917040348052979,
"step": 890
},
{
"epoch": 1.6336996336996337,
"grad_norm": 0.2723523676395416,
"learning_rate": 1.5246555721854436e-05,
"loss": 0.9028322100639343,
"step": 892
},
{
"epoch": 1.6373626373626373,
"grad_norm": 0.23759818077087402,
"learning_rate": 1.519338141927547e-05,
"loss": 1.1759182214736938,
"step": 894
},
{
"epoch": 1.641025641025641,
"grad_norm": 0.3160013258457184,
"learning_rate": 1.5140227572050696e-05,
"loss": 0.7313938140869141,
"step": 896
},
{
"epoch": 1.6446886446886446,
"grad_norm": 0.37363237142562866,
"learning_rate": 1.5087095012313355e-05,
"loss": 1.1223758459091187,
"step": 898
},
{
"epoch": 1.6483516483516483,
"grad_norm": 0.762113094329834,
"learning_rate": 1.5033984571863445e-05,
"loss": 0.8107267022132874,
"step": 900
},
{
"epoch": 1.652014652014652,
"grad_norm": 0.3437276780605316,
"learning_rate": 1.498089708215469e-05,
"loss": 0.8657619953155518,
"step": 902
},
{
"epoch": 1.6556776556776556,
"grad_norm": 0.352426677942276,
"learning_rate": 1.4927833374281493e-05,
"loss": 1.004289150238037,
"step": 904
},
{
"epoch": 1.6593406593406592,
"grad_norm": 0.22609834372997284,
"learning_rate": 1.4874794278965956e-05,
"loss": 1.1188032627105713,
"step": 906
},
{
"epoch": 1.6630036630036629,
"grad_norm": 0.2916370928287506,
"learning_rate": 1.4821780626544885e-05,
"loss": 0.7417563796043396,
"step": 908
},
{
"epoch": 1.6666666666666665,
"grad_norm": 0.23103374242782593,
"learning_rate": 1.476879324695675e-05,
"loss": 1.1878173351287842,
"step": 910
},
{
"epoch": 1.6703296703296702,
"grad_norm": 0.2126466929912567,
"learning_rate": 1.4715832969728727e-05,
"loss": 1.2045222520828247,
"step": 912
},
{
"epoch": 1.673992673992674,
"grad_norm": 0.2584460973739624,
"learning_rate": 1.4662900623963691e-05,
"loss": 1.084285855293274,
"step": 914
},
{
"epoch": 1.6776556776556777,
"grad_norm": 0.6230321526527405,
"learning_rate": 1.4609997038327249e-05,
"loss": 1.1955914497375488,
"step": 916
},
{
"epoch": 1.6813186813186813,
"grad_norm": 0.3801193833351135,
"learning_rate": 1.4557123041034773e-05,
"loss": 0.8334836363792419,
"step": 918
},
{
"epoch": 1.684981684981685,
"grad_norm": 0.12849542498588562,
"learning_rate": 1.4504279459838412e-05,
"loss": 1.2025550603866577,
"step": 920
},
{
"epoch": 1.6886446886446886,
"grad_norm": 0.18971043825149536,
"learning_rate": 1.4451467122014144e-05,
"loss": 1.0005592107772827,
"step": 922
},
{
"epoch": 1.6923076923076923,
"grad_norm": 0.20084154605865479,
"learning_rate": 1.439868685434883e-05,
"loss": 0.8383995294570923,
"step": 924
},
{
"epoch": 1.695970695970696,
"grad_norm": 0.41390281915664673,
"learning_rate": 1.4345939483127269e-05,
"loss": 0.9622109532356262,
"step": 926
},
{
"epoch": 1.6996336996336996,
"grad_norm": 0.20219890773296356,
"learning_rate": 1.4293225834119256e-05,
"loss": 0.7931950688362122,
"step": 928
},
{
"epoch": 1.7032967032967035,
"grad_norm": 0.0588349848985672,
"learning_rate": 1.4240546732566674e-05,
"loss": 0.863927960395813,
"step": 930
},
{
"epoch": 1.7069597069597071,
"grad_norm": 0.5111618638038635,
"learning_rate": 1.4187903003170524e-05,
"loss": 0.9076425433158875,
"step": 932
},
{
"epoch": 1.7106227106227108,
"grad_norm": 0.36183348298072815,
"learning_rate": 1.413529547007809e-05,
"loss": 1.3784637451171875,
"step": 934
},
{
"epoch": 1.7142857142857144,
"grad_norm": 0.17682304978370667,
"learning_rate": 1.4082724956869973e-05,
"loss": 1.1786631345748901,
"step": 936
},
{
"epoch": 1.717948717948718,
"grad_norm": 2.0445046424865723,
"learning_rate": 1.4030192286547219e-05,
"loss": 0.46664801239967346,
"step": 938
},
{
"epoch": 1.7216117216117217,
"grad_norm": 0.08176091313362122,
"learning_rate": 1.3977698281518447e-05,
"loss": 0.510556697845459,
"step": 940
},
{
"epoch": 1.7252747252747254,
"grad_norm": 0.26817288994789124,
"learning_rate": 1.3925243763586967e-05,
"loss": 1.123125433921814,
"step": 942
},
{
"epoch": 1.728937728937729,
"grad_norm": 0.44335806369781494,
"learning_rate": 1.3872829553937894e-05,
"loss": 0.9174548983573914,
"step": 944
},
{
"epoch": 1.7326007326007327,
"grad_norm": 0.14424346387386322,
"learning_rate": 1.3820456473125325e-05,
"loss": 1.1279373168945312,
"step": 946
},
{
"epoch": 1.7362637362637363,
"grad_norm": 0.14489899575710297,
"learning_rate": 1.3768125341059474e-05,
"loss": 1.0826152563095093,
"step": 948
},
{
"epoch": 1.73992673992674,
"grad_norm": 0.5113826990127563,
"learning_rate": 1.3715836976993831e-05,
"loss": 0.9802412390708923,
"step": 950
},
{
"epoch": 1.7435897435897436,
"grad_norm": 0.221163809299469,
"learning_rate": 1.3663592199512362e-05,
"loss": 0.9261195659637451,
"step": 952
},
{
"epoch": 1.7472527472527473,
"grad_norm": 0.2066127210855484,
"learning_rate": 1.3611391826516656e-05,
"loss": 1.2245968580245972,
"step": 954
},
{
"epoch": 1.750915750915751,
"grad_norm": 0.4393288791179657,
"learning_rate": 1.355923667521316e-05,
"loss": 0.7634697556495667,
"step": 956
},
{
"epoch": 1.7545787545787546,
"grad_norm": 0.3519604504108429,
"learning_rate": 1.3507127562100358e-05,
"loss": 1.1724518537521362,
"step": 958
},
{
"epoch": 1.7582417582417582,
"grad_norm": 0.1020163968205452,
"learning_rate": 1.3455065302955996e-05,
"loss": 0.5178546905517578,
"step": 960
},
{
"epoch": 1.7619047619047619,
"grad_norm": 0.3099201023578644,
"learning_rate": 1.340305071282432e-05,
"loss": 0.9680802822113037,
"step": 962
},
{
"epoch": 1.7655677655677655,
"grad_norm": 0.4689222574234009,
"learning_rate": 1.3351084606003303e-05,
"loss": 1.0113738775253296,
"step": 964
},
{
"epoch": 1.7692307692307692,
"grad_norm": 0.14611108601093292,
"learning_rate": 1.3299167796031904e-05,
"loss": 1.1686060428619385,
"step": 966
},
{
"epoch": 1.7728937728937728,
"grad_norm": 0.6452361345291138,
"learning_rate": 1.3247301095677334e-05,
"loss": 0.5665026307106018,
"step": 968
},
{
"epoch": 1.7765567765567765,
"grad_norm": 0.24231980741024017,
"learning_rate": 1.3195485316922322e-05,
"loss": 0.8176436424255371,
"step": 970
},
{
"epoch": 1.7802197802197801,
"grad_norm": 1.8586475849151611,
"learning_rate": 1.3143721270952416e-05,
"loss": 0.8834027051925659,
"step": 972
},
{
"epoch": 1.7838827838827838,
"grad_norm": 0.4368327558040619,
"learning_rate": 1.3092009768143276e-05,
"loss": 1.2015571594238281,
"step": 974
},
{
"epoch": 1.7875457875457874,
"grad_norm": 0.16700586676597595,
"learning_rate": 1.3040351618047987e-05,
"loss": 1.17520272731781,
"step": 976
},
{
"epoch": 1.791208791208791,
"grad_norm": 0.16994211077690125,
"learning_rate": 1.2988747629384393e-05,
"loss": 0.9097808599472046,
"step": 978
},
{
"epoch": 1.7948717948717947,
"grad_norm": 0.252722829580307,
"learning_rate": 1.2937198610022422e-05,
"loss": 0.9264700412750244,
"step": 980
},
{
"epoch": 1.7985347985347986,
"grad_norm": 0.2016926258802414,
"learning_rate": 1.2885705366971466e-05,
"loss": 1.1281893253326416,
"step": 982
},
{
"epoch": 1.8021978021978022,
"grad_norm": 0.37757280468940735,
"learning_rate": 1.2834268706367693e-05,
"loss": 1.085845708847046,
"step": 984
},
{
"epoch": 1.8058608058608059,
"grad_norm": 0.14141914248466492,
"learning_rate": 1.2782889433461504e-05,
"loss": 0.625054657459259,
"step": 986
},
{
"epoch": 1.8095238095238095,
"grad_norm": 0.32942265272140503,
"learning_rate": 1.273156835260485e-05,
"loss": 0.793144702911377,
"step": 988
},
{
"epoch": 1.8131868131868132,
"grad_norm": 0.23757028579711914,
"learning_rate": 1.2680306267238703e-05,
"loss": 0.7292351126670837,
"step": 990
},
{
"epoch": 1.8168498168498168,
"grad_norm": 0.19823825359344482,
"learning_rate": 1.2629103979880435e-05,
"loss": 0.7737856507301331,
"step": 992
},
{
"epoch": 1.8205128205128205,
"grad_norm": 0.20640012621879578,
"learning_rate": 1.2577962292111268e-05,
"loss": 0.8391002416610718,
"step": 994
},
{
"epoch": 1.8241758241758241,
"grad_norm": 0.28856727480888367,
"learning_rate": 1.2526882004563725e-05,
"loss": 1.1040489673614502,
"step": 996
},
{
"epoch": 1.8278388278388278,
"grad_norm": 0.27883633971214294,
"learning_rate": 1.2475863916909116e-05,
"loss": 0.8360787034034729,
"step": 998
},
{
"epoch": 1.8315018315018317,
"grad_norm": 0.23196260631084442,
"learning_rate": 1.2424908827844971e-05,
"loss": 1.102561354637146,
"step": 1000
},
{
"epoch": 1.8351648351648353,
"grad_norm": 0.37346774339675903,
"learning_rate": 1.2374017535082588e-05,
"loss": 1.1243289709091187,
"step": 1002
},
{
"epoch": 1.838827838827839,
"grad_norm": 0.30218714475631714,
"learning_rate": 1.232319083533452e-05,
"loss": 1.1285227537155151,
"step": 1004
},
{
"epoch": 1.8424908424908426,
"grad_norm": 0.6622296571731567,
"learning_rate": 1.2272429524302087e-05,
"loss": 1.1261472702026367,
"step": 1006
},
{
"epoch": 1.8461538461538463,
"grad_norm": 0.4616243243217468,
"learning_rate": 1.2221734396662956e-05,
"loss": 0.7861797213554382,
"step": 1008
},
{
"epoch": 1.84981684981685,
"grad_norm": 0.19894073903560638,
"learning_rate": 1.2171106246058676e-05,
"loss": 1.0877313613891602,
"step": 1010
},
{
"epoch": 1.8534798534798536,
"grad_norm": 0.17219941318035126,
"learning_rate": 1.212054586508225e-05,
"loss": 0.7592611312866211,
"step": 1012
},
{
"epoch": 1.8571428571428572,
"grad_norm": 0.1579883098602295,
"learning_rate": 1.2070054045265746e-05,
"loss": 1.1483254432678223,
"step": 1014
},
{
"epoch": 1.8608058608058609,
"grad_norm": 0.3374389410018921,
"learning_rate": 1.2019631577067883e-05,
"loss": 1.1327685117721558,
"step": 1016
},
{
"epoch": 1.8644688644688645,
"grad_norm": 0.5364305973052979,
"learning_rate": 1.1969279249861678e-05,
"loss": 1.2064580917358398,
"step": 1018
},
{
"epoch": 1.8681318681318682,
"grad_norm": 0.09599751979112625,
"learning_rate": 1.1918997851922078e-05,
"loss": 0.9491077661514282,
"step": 1020
},
{
"epoch": 1.8717948717948718,
"grad_norm": 0.942574679851532,
"learning_rate": 1.1868788170413608e-05,
"loss": 1.0983389616012573,
"step": 1022
},
{
"epoch": 1.8754578754578755,
"grad_norm": 0.16687260568141937,
"learning_rate": 1.1818650991378069e-05,
"loss": 0.5345883965492249,
"step": 1024
},
{
"epoch": 1.879120879120879,
"grad_norm": 0.19313207268714905,
"learning_rate": 1.1768587099722221e-05,
"loss": 1.136318325996399,
"step": 1026
},
{
"epoch": 1.8827838827838828,
"grad_norm": 0.5639015436172485,
"learning_rate": 1.171859727920549e-05,
"loss": 0.9058306813240051,
"step": 1028
},
{
"epoch": 1.8864468864468864,
"grad_norm": 0.1919068694114685,
"learning_rate": 1.1668682312427716e-05,
"loss": 1.1377428770065308,
"step": 1030
},
{
"epoch": 1.89010989010989,
"grad_norm": 0.23700670897960663,
"learning_rate": 1.1618842980816885e-05,
"loss": 0.9264558553695679,
"step": 1032
},
{
"epoch": 1.8937728937728937,
"grad_norm": 1.17103111743927,
"learning_rate": 1.1569080064616892e-05,
"loss": 0.7801966071128845,
"step": 1034
},
{
"epoch": 1.8974358974358974,
"grad_norm": 0.3914653956890106,
"learning_rate": 1.1519394342875344e-05,
"loss": 1.174466609954834,
"step": 1036
},
{
"epoch": 1.901098901098901,
"grad_norm": 0.26649200916290283,
"learning_rate": 1.1469786593431362e-05,
"loss": 1.132654070854187,
"step": 1038
},
{
"epoch": 1.9047619047619047,
"grad_norm": 0.4480797052383423,
"learning_rate": 1.1420257592903375e-05,
"loss": 1.1989985704421997,
"step": 1040
},
{
"epoch": 1.9084249084249083,
"grad_norm": 0.13727733492851257,
"learning_rate": 1.1370808116677003e-05,
"loss": 0.5950027108192444,
"step": 1042
},
{
"epoch": 1.912087912087912,
"grad_norm": 0.21032774448394775,
"learning_rate": 1.1321438938892891e-05,
"loss": 0.8601200580596924,
"step": 1044
},
{
"epoch": 1.9157509157509156,
"grad_norm": 0.5848696231842041,
"learning_rate": 1.127215083243459e-05,
"loss": 1.1005771160125732,
"step": 1046
},
{
"epoch": 1.9194139194139193,
"grad_norm": 0.2439524233341217,
"learning_rate": 1.1222944568916477e-05,
"loss": 0.42851558327674866,
"step": 1048
},
{
"epoch": 1.9230769230769231,
"grad_norm": 0.25956040620803833,
"learning_rate": 1.1173820918671653e-05,
"loss": 0.8421081900596619,
"step": 1050
},
{
"epoch": 1.9267399267399268,
"grad_norm": 0.24298180639743805,
"learning_rate": 1.1124780650739898e-05,
"loss": 0.796335756778717,
"step": 1052
},
{
"epoch": 1.9304029304029304,
"grad_norm": 0.38977521657943726,
"learning_rate": 1.1075824532855632e-05,
"loss": 1.1081124544143677,
"step": 1054
},
{
"epoch": 1.934065934065934,
"grad_norm": 0.12988051772117615,
"learning_rate": 1.1026953331435875e-05,
"loss": 0.7289028167724609,
"step": 1056
},
{
"epoch": 1.9377289377289377,
"grad_norm": 0.21617981791496277,
"learning_rate": 1.0978167811568275e-05,
"loss": 0.9675345420837402,
"step": 1058
},
{
"epoch": 1.9413919413919414,
"grad_norm": 0.3005717098712921,
"learning_rate": 1.092946873699913e-05,
"loss": 1.0561689138412476,
"step": 1060
},
{
"epoch": 1.945054945054945,
"grad_norm": 0.21354320645332336,
"learning_rate": 1.0880856870121389e-05,
"loss": 0.815872848033905,
"step": 1062
},
{
"epoch": 1.9487179487179487,
"grad_norm": 0.1887470930814743,
"learning_rate": 1.0832332971962779e-05,
"loss": 0.7894753217697144,
"step": 1064
},
{
"epoch": 1.9523809523809523,
"grad_norm": 0.16195048391819,
"learning_rate": 1.0783897802173859e-05,
"loss": 1.143406867980957,
"step": 1066
},
{
"epoch": 1.9560439560439562,
"grad_norm": 0.36799949407577515,
"learning_rate": 1.07355521190161e-05,
"loss": 1.1662700176239014,
"step": 1068
},
{
"epoch": 1.9597069597069599,
"grad_norm": 0.14215613901615143,
"learning_rate": 1.0687296679350072e-05,
"loss": 0.6720587611198425,
"step": 1070
},
{
"epoch": 1.9633699633699635,
"grad_norm": 0.18162143230438232,
"learning_rate": 1.063913223862357e-05,
"loss": 0.8290569186210632,
"step": 1072
},
{
"epoch": 1.9670329670329672,
"grad_norm": 0.357164204120636,
"learning_rate": 1.0591059550859753e-05,
"loss": 1.134069561958313,
"step": 1074
},
{
"epoch": 1.9706959706959708,
"grad_norm": 0.17122821509838104,
"learning_rate": 1.0543079368645398e-05,
"loss": 1.1059997081756592,
"step": 1076
},
{
"epoch": 1.9743589743589745,
"grad_norm": 1.0652984380722046,
"learning_rate": 1.0495192443119076e-05,
"loss": 0.9779770374298096,
"step": 1078
},
{
"epoch": 1.978021978021978,
"grad_norm": 0.4729015529155731,
"learning_rate": 1.044739952395942e-05,
"loss": 0.9445675611495972,
"step": 1080
},
{
"epoch": 1.9816849816849818,
"grad_norm": 0.20131130516529083,
"learning_rate": 1.039970135937337e-05,
"loss": 1.0889768600463867,
"step": 1082
},
{
"epoch": 1.9853479853479854,
"grad_norm": 0.062233198434114456,
"learning_rate": 1.0352098696084461e-05,
"loss": 0.7019329071044922,
"step": 1084
},
{
"epoch": 1.989010989010989,
"grad_norm": 0.4449816048145294,
"learning_rate": 1.0304592279321138e-05,
"loss": 0.9577308893203735,
"step": 1086
},
{
"epoch": 1.9926739926739927,
"grad_norm": 0.5346907377243042,
"learning_rate": 1.02571828528051e-05,
"loss": 0.9598353505134583,
"step": 1088
},
{
"epoch": 1.9963369963369964,
"grad_norm": 0.12335490435361862,
"learning_rate": 1.0209871158739632e-05,
"loss": 0.9065490961074829,
"step": 1090
},
{
"epoch": 2.0,
"grad_norm": 0.15246722102165222,
"learning_rate": 1.0162657937798014e-05,
"loss": 1.0153100490570068,
"step": 1092
},
{
"epoch": 2.0036630036630036,
"grad_norm": 0.2916777729988098,
"learning_rate": 1.0115543929111896e-05,
"loss": 1.041072964668274,
"step": 1094
},
{
"epoch": 2.0073260073260073,
"grad_norm": 0.32960745692253113,
"learning_rate": 1.0068529870259744e-05,
"loss": 1.115196704864502,
"step": 1096
},
{
"epoch": 2.010989010989011,
"grad_norm": 0.2304605394601822,
"learning_rate": 1.0021616497255306e-05,
"loss": 1.0467466115951538,
"step": 1098
},
{
"epoch": 2.0146520146520146,
"grad_norm": 0.23327237367630005,
"learning_rate": 9.97480454453607e-06,
"loss": 1.1273348331451416,
"step": 1100
},
{
"epoch": 2.0183150183150182,
"grad_norm": 0.22104838490486145,
"learning_rate": 9.928094744951743e-06,
"loss": 0.807178795337677,
"step": 1102
},
{
"epoch": 2.021978021978022,
"grad_norm": 0.20996502041816711,
"learning_rate": 9.881487829752845e-06,
"loss": 1.1244595050811768,
"step": 1104
},
{
"epoch": 2.0256410256410255,
"grad_norm": 0.29240646958351135,
"learning_rate": 9.834984528579202e-06,
"loss": 0.7429551482200623,
"step": 1106
},
{
"epoch": 2.029304029304029,
"grad_norm": 0.417041540145874,
"learning_rate": 9.788585569448547e-06,
"loss": 1.1591891050338745,
"step": 1108
},
{
"epoch": 2.032967032967033,
"grad_norm": 0.2673436105251312,
"learning_rate": 9.742291678745116e-06,
"loss": 0.8807531595230103,
"step": 1110
},
{
"epoch": 2.0366300366300365,
"grad_norm": 0.056937478482723236,
"learning_rate": 9.696103581208279e-06,
"loss": 0.5669000744819641,
"step": 1112
},
{
"epoch": 2.04029304029304,
"grad_norm": 0.3005562126636505,
"learning_rate": 9.650021999921201e-06,
"loss": 0.8045889139175415,
"step": 1114
},
{
"epoch": 2.043956043956044,
"grad_norm": 0.21965253353118896,
"learning_rate": 9.604047656299518e-06,
"loss": 1.1283351182937622,
"step": 1116
},
{
"epoch": 2.0476190476190474,
"grad_norm": 0.413673460483551,
"learning_rate": 9.558181270080027e-06,
"loss": 1.0900503396987915,
"step": 1118
},
{
"epoch": 2.051282051282051,
"grad_norm": 0.32383593916893005,
"learning_rate": 9.512423559309438e-06,
"loss": 0.7838891744613647,
"step": 1120
},
{
"epoch": 2.0549450549450547,
"grad_norm": 0.4343249797821045,
"learning_rate": 9.46677524033314e-06,
"loss": 1.2635877132415771,
"step": 1122
},
{
"epoch": 2.0586080586080584,
"grad_norm": 0.21069136261940002,
"learning_rate": 9.421237027783945e-06,
"loss": 1.1110318899154663,
"step": 1124
},
{
"epoch": 2.062271062271062,
"grad_norm": 0.33282044529914856,
"learning_rate": 9.37580963457096e-06,
"loss": 0.7298644185066223,
"step": 1126
},
{
"epoch": 2.065934065934066,
"grad_norm": 1.3254681825637817,
"learning_rate": 9.330493771868376e-06,
"loss": 1.0816922187805176,
"step": 1128
},
{
"epoch": 2.06959706959707,
"grad_norm": 0.15853942930698395,
"learning_rate": 9.285290149104353e-06,
"loss": 1.062219500541687,
"step": 1130
},
{
"epoch": 2.0732600732600734,
"grad_norm": 0.5063254833221436,
"learning_rate": 9.240199473949919e-06,
"loss": 1.1059317588806152,
"step": 1132
},
{
"epoch": 2.076923076923077,
"grad_norm": 0.21920864284038544,
"learning_rate": 9.195222452307901e-06,
"loss": 1.0895856618881226,
"step": 1134
},
{
"epoch": 2.0805860805860807,
"grad_norm": 0.484958291053772,
"learning_rate": 9.15035978830183e-06,
"loss": 0.6875643730163574,
"step": 1136
},
{
"epoch": 2.0842490842490844,
"grad_norm": 0.14739057421684265,
"learning_rate": 9.105612184264966e-06,
"loss": 1.0934994220733643,
"step": 1138
},
{
"epoch": 2.087912087912088,
"grad_norm": 0.2776440978050232,
"learning_rate": 9.060980340729273e-06,
"loss": 0.9783607125282288,
"step": 1140
},
{
"epoch": 2.0915750915750917,
"grad_norm": 0.4692443609237671,
"learning_rate": 9.01646495641448e-06,
"loss": 1.0817747116088867,
"step": 1142
},
{
"epoch": 2.0952380952380953,
"grad_norm": 0.08335136622190475,
"learning_rate": 8.972066728217119e-06,
"loss": 0.3554433584213257,
"step": 1144
},
{
"epoch": 2.098901098901099,
"grad_norm": 0.4141383767127991,
"learning_rate": 8.927786351199602e-06,
"loss": 0.8502892255783081,
"step": 1146
},
{
"epoch": 2.1025641025641026,
"grad_norm": 0.1972590684890747,
"learning_rate": 8.883624518579383e-06,
"loss": 0.7862613797187805,
"step": 1148
},
{
"epoch": 2.1062271062271063,
"grad_norm": 0.34781157970428467,
"learning_rate": 8.839581921718077e-06,
"loss": 0.7815465927124023,
"step": 1150
},
{
"epoch": 2.10989010989011,
"grad_norm": 0.47747403383255005,
"learning_rate": 8.795659250110636e-06,
"loss": 0.9284940958023071,
"step": 1152
},
{
"epoch": 2.1135531135531136,
"grad_norm": 0.23778925836086273,
"learning_rate": 8.751857191374557e-06,
"loss": 0.7169199585914612,
"step": 1154
},
{
"epoch": 2.1172161172161172,
"grad_norm": 0.4363749921321869,
"learning_rate": 8.708176431239132e-06,
"loss": 1.0282258987426758,
"step": 1156
},
{
"epoch": 2.120879120879121,
"grad_norm": 0.4536285102367401,
"learning_rate": 8.664617653534689e-06,
"loss": 0.9804978966712952,
"step": 1158
},
{
"epoch": 2.1245421245421245,
"grad_norm": 0.6671960353851318,
"learning_rate": 8.62118154018191e-06,
"loss": 0.8351024985313416,
"step": 1160
},
{
"epoch": 2.128205128205128,
"grad_norm": 0.23498846590518951,
"learning_rate": 8.577868771181137e-06,
"loss": 0.7204782366752625,
"step": 1162
},
{
"epoch": 2.131868131868132,
"grad_norm": 0.3603092432022095,
"learning_rate": 8.534680024601725e-06,
"loss": 0.8736187815666199,
"step": 1164
},
{
"epoch": 2.1355311355311355,
"grad_norm": 0.7029445171356201,
"learning_rate": 8.491615976571454e-06,
"loss": 0.3972383737564087,
"step": 1166
},
{
"epoch": 2.139194139194139,
"grad_norm": 0.4033673405647278,
"learning_rate": 8.448677301265912e-06,
"loss": 0.39612606167793274,
"step": 1168
},
{
"epoch": 2.142857142857143,
"grad_norm": 0.13584394752979279,
"learning_rate": 8.405864670897965e-06,
"loss": 0.8458325862884521,
"step": 1170
},
{
"epoch": 2.1465201465201464,
"grad_norm": 0.4635600745677948,
"learning_rate": 8.363178755707208e-06,
"loss": 1.0956076383590698,
"step": 1172
},
{
"epoch": 2.15018315018315,
"grad_norm": 0.3557581305503845,
"learning_rate": 8.32062022394949e-06,
"loss": 1.2519314289093018,
"step": 1174
},
{
"epoch": 2.1538461538461537,
"grad_norm": 0.2930179238319397,
"learning_rate": 8.278189741886461e-06,
"loss": 0.7233254909515381,
"step": 1176
},
{
"epoch": 2.1575091575091574,
"grad_norm": 0.2605000138282776,
"learning_rate": 8.235887973775122e-06,
"loss": 1.1106122732162476,
"step": 1178
},
{
"epoch": 2.161172161172161,
"grad_norm": 0.10101241618394852,
"learning_rate": 8.193715581857427e-06,
"loss": 1.106199860572815,
"step": 1180
},
{
"epoch": 2.1648351648351647,
"grad_norm": 0.43605750799179077,
"learning_rate": 8.151673226349922e-06,
"loss": 1.0831317901611328,
"step": 1182
},
{
"epoch": 2.1684981684981683,
"grad_norm": 0.14158222079277039,
"learning_rate": 8.109761565433432e-06,
"loss": 1.121016025543213,
"step": 1184
},
{
"epoch": 2.172161172161172,
"grad_norm": 0.0938902422785759,
"learning_rate": 8.067981255242707e-06,
"loss": 0.2544163167476654,
"step": 1186
},
{
"epoch": 2.1758241758241756,
"grad_norm": 0.45011454820632935,
"learning_rate": 8.02633294985618e-06,
"loss": 1.1836673021316528,
"step": 1188
},
{
"epoch": 2.1794871794871793,
"grad_norm": 0.37969857454299927,
"learning_rate": 7.984817301285743e-06,
"loss": 0.7146179676055908,
"step": 1190
},
{
"epoch": 2.183150183150183,
"grad_norm": 0.6176649332046509,
"learning_rate": 7.943434959466499e-06,
"loss": 0.9474976062774658,
"step": 1192
},
{
"epoch": 2.186813186813187,
"grad_norm": 0.1341545134782791,
"learning_rate": 7.902186572246633e-06,
"loss": 0.7110726237297058,
"step": 1194
},
{
"epoch": 2.1904761904761907,
"grad_norm": 0.3326786160469055,
"learning_rate": 7.861072785377226e-06,
"loss": 0.8745055794715881,
"step": 1196
},
{
"epoch": 2.1941391941391943,
"grad_norm": 1.0451160669326782,
"learning_rate": 7.820094242502165e-06,
"loss": 1.0933572053909302,
"step": 1198
},
{
"epoch": 2.197802197802198,
"grad_norm": 0.3398984372615814,
"learning_rate": 7.779251585148091e-06,
"loss": 0.701580822467804,
"step": 1200
},
{
"epoch": 2.2014652014652016,
"grad_norm": 0.18037645518779755,
"learning_rate": 7.7385454527143e-06,
"loss": 1.1557986736297607,
"step": 1202
},
{
"epoch": 2.2051282051282053,
"grad_norm": 0.3244955539703369,
"learning_rate": 7.697976482462797e-06,
"loss": 1.1087169647216797,
"step": 1204
},
{
"epoch": 2.208791208791209,
"grad_norm": 1.0840704441070557,
"learning_rate": 7.657545309508264e-06,
"loss": 0.4594711661338806,
"step": 1206
},
{
"epoch": 2.2124542124542126,
"grad_norm": 0.49626442790031433,
"learning_rate": 7.617252566808145e-06,
"loss": 0.7104368209838867,
"step": 1208
},
{
"epoch": 2.2161172161172162,
"grad_norm": 0.10250002890825272,
"learning_rate": 7.577098885152746e-06,
"loss": 0.5505958199501038,
"step": 1210
},
{
"epoch": 2.21978021978022,
"grad_norm": 0.2756267488002777,
"learning_rate": 7.537084893155339e-06,
"loss": 1.0437278747558594,
"step": 1212
},
{
"epoch": 2.2234432234432235,
"grad_norm": 0.5076342821121216,
"learning_rate": 7.497211217242321e-06,
"loss": 0.7847756147384644,
"step": 1214
},
{
"epoch": 2.227106227106227,
"grad_norm": 0.3894617557525635,
"learning_rate": 7.457478481643422e-06,
"loss": 0.8570993542671204,
"step": 1216
},
{
"epoch": 2.230769230769231,
"grad_norm": 3.8103349208831787,
"learning_rate": 7.417887308381932e-06,
"loss": 1.0898782014846802,
"step": 1218
},
{
"epoch": 2.2344322344322345,
"grad_norm": 0.24857106804847717,
"learning_rate": 7.378438317264942e-06,
"loss": 1.0935941934585571,
"step": 1220
},
{
"epoch": 2.238095238095238,
"grad_norm": 0.25874483585357666,
"learning_rate": 7.339132125873669e-06,
"loss": 1.1081104278564453,
"step": 1222
},
{
"epoch": 2.241758241758242,
"grad_norm": 0.7777099609375,
"learning_rate": 7.299969349553767e-06,
"loss": 0.45728471875190735,
"step": 1224
},
{
"epoch": 2.2454212454212454,
"grad_norm": 0.2610487937927246,
"learning_rate": 7.260950601405695e-06,
"loss": 1.0573861598968506,
"step": 1226
},
{
"epoch": 2.249084249084249,
"grad_norm": 0.8370478749275208,
"learning_rate": 7.222076492275143e-06,
"loss": 0.5640978217124939,
"step": 1228
},
{
"epoch": 2.2527472527472527,
"grad_norm": 0.20843127369880676,
"learning_rate": 7.183347630743432e-06,
"loss": 0.5819165706634521,
"step": 1230
},
{
"epoch": 2.2564102564102564,
"grad_norm": 1.6624743938446045,
"learning_rate": 7.1447646231180085e-06,
"loss": 0.5743231177330017,
"step": 1232
},
{
"epoch": 2.26007326007326,
"grad_norm": 0.3201636075973511,
"learning_rate": 7.10632807342296e-06,
"loss": 1.057806372642517,
"step": 1234
},
{
"epoch": 2.2637362637362637,
"grad_norm": 0.33637064695358276,
"learning_rate": 7.068038583389534e-06,
"loss": 0.7458894848823547,
"step": 1236
},
{
"epoch": 2.2673992673992673,
"grad_norm": 0.18435066938400269,
"learning_rate": 7.029896752446748e-06,
"loss": 0.9134790897369385,
"step": 1238
},
{
"epoch": 2.271062271062271,
"grad_norm": 0.286512166261673,
"learning_rate": 6.991903177711974e-06,
"loss": 0.821914553642273,
"step": 1240
},
{
"epoch": 2.2747252747252746,
"grad_norm": 0.5748969912528992,
"learning_rate": 6.9540584539816095e-06,
"loss": 0.8821867108345032,
"step": 1242
},
{
"epoch": 2.2783882783882783,
"grad_norm": 0.39332640171051025,
"learning_rate": 6.916363173721768e-06,
"loss": 1.0724459886550903,
"step": 1244
},
{
"epoch": 2.282051282051282,
"grad_norm": 0.09601716697216034,
"learning_rate": 6.878817927058999e-06,
"loss": 0.7068093419075012,
"step": 1246
},
{
"epoch": 2.2857142857142856,
"grad_norm": 1.2170416116714478,
"learning_rate": 6.841423301771039e-06,
"loss": 0.6818535327911377,
"step": 1248
},
{
"epoch": 2.2893772893772892,
"grad_norm": 0.274168998003006,
"learning_rate": 6.804179883277623e-06,
"loss": 1.077278733253479,
"step": 1250
},
{
"epoch": 2.293040293040293,
"grad_norm": 1.0322331190109253,
"learning_rate": 6.76708825463132e-06,
"loss": 0.7963767051696777,
"step": 1252
},
{
"epoch": 2.2967032967032965,
"grad_norm": 0.42168205976486206,
"learning_rate": 6.730148996508395e-06,
"loss": 1.1314303874969482,
"step": 1254
},
{
"epoch": 2.3003663003663,
"grad_norm": 0.21344096958637238,
"learning_rate": 6.693362687199734e-06,
"loss": 1.1485631465911865,
"step": 1256
},
{
"epoch": 2.304029304029304,
"grad_norm": 0.24906200170516968,
"learning_rate": 6.656729902601769e-06,
"loss": 0.7444168925285339,
"step": 1258
},
{
"epoch": 2.3076923076923075,
"grad_norm": 0.527003824710846,
"learning_rate": 6.620251216207478e-06,
"loss": 1.1242973804473877,
"step": 1260
},
{
"epoch": 2.311355311355311,
"grad_norm": 0.39932265877723694,
"learning_rate": 6.583927199097413e-06,
"loss": 0.985255241394043,
"step": 1262
},
{
"epoch": 2.315018315018315,
"grad_norm": 0.6875927448272705,
"learning_rate": 6.547758419930738e-06,
"loss": 0.8462859392166138,
"step": 1264
},
{
"epoch": 2.3186813186813184,
"grad_norm": 0.31612420082092285,
"learning_rate": 6.51174544493634e-06,
"loss": 0.7504773139953613,
"step": 1266
},
{
"epoch": 2.3223443223443225,
"grad_norm": 0.9443672895431519,
"learning_rate": 6.47588883790397e-06,
"loss": 0.7943575978279114,
"step": 1268
},
{
"epoch": 2.326007326007326,
"grad_norm": 0.861373782157898,
"learning_rate": 6.440189160175403e-06,
"loss": 1.1603039503097534,
"step": 1270
},
{
"epoch": 2.32967032967033,
"grad_norm": 0.2723812758922577,
"learning_rate": 6.404646970635663e-06,
"loss": 0.9737915396690369,
"step": 1272
},
{
"epoch": 2.3333333333333335,
"grad_norm": 0.35865145921707153,
"learning_rate": 6.369262825704263e-06,
"loss": 0.55995112657547,
"step": 1274
},
{
"epoch": 2.336996336996337,
"grad_norm": 0.2957666218280792,
"learning_rate": 6.334037279326493e-06,
"loss": 0.9831885099411011,
"step": 1276
},
{
"epoch": 2.340659340659341,
"grad_norm": 0.28850671648979187,
"learning_rate": 6.2989708829647665e-06,
"loss": 0.8018981218338013,
"step": 1278
},
{
"epoch": 2.3443223443223444,
"grad_norm": 0.2929391860961914,
"learning_rate": 6.264064185589969e-06,
"loss": 0.743938148021698,
"step": 1280
},
{
"epoch": 2.347985347985348,
"grad_norm": 0.20474638044834137,
"learning_rate": 6.229317733672865e-06,
"loss": 0.5940007567405701,
"step": 1282
},
{
"epoch": 2.3516483516483517,
"grad_norm": 0.28941309452056885,
"learning_rate": 6.194732071175547e-06,
"loss": 1.0675817728042603,
"step": 1284
},
{
"epoch": 2.3553113553113554,
"grad_norm": 0.44983819127082825,
"learning_rate": 6.160307739542927e-06,
"loss": 0.5571872591972351,
"step": 1286
},
{
"epoch": 2.358974358974359,
"grad_norm": 0.6326022148132324,
"learning_rate": 6.126045277694242e-06,
"loss": 0.9014766216278076,
"step": 1288
},
{
"epoch": 2.3626373626373627,
"grad_norm": 0.19782356917858124,
"learning_rate": 6.091945222014643e-06,
"loss": 0.8522058725357056,
"step": 1290
},
{
"epoch": 2.3663003663003663,
"grad_norm": 0.2529771625995636,
"learning_rate": 6.058008106346765e-06,
"loss": 0.7481414675712585,
"step": 1292
},
{
"epoch": 2.36996336996337,
"grad_norm": 1.297630786895752,
"learning_rate": 6.0242344619823924e-06,
"loss": 0.8610998392105103,
"step": 1294
},
{
"epoch": 2.3736263736263736,
"grad_norm": 0.19470833241939545,
"learning_rate": 5.99062481765415e-06,
"loss": 1.1782889366149902,
"step": 1296
},
{
"epoch": 2.3772893772893773,
"grad_norm": 0.2811782658100128,
"learning_rate": 5.95717969952719e-06,
"loss": 1.062893271446228,
"step": 1298
},
{
"epoch": 2.380952380952381,
"grad_norm": 0.18573309481143951,
"learning_rate": 5.9238996311909985e-06,
"loss": 1.1228328943252563,
"step": 1300
},
{
"epoch": 2.3846153846153846,
"grad_norm": 0.37206050753593445,
"learning_rate": 5.890785133651159e-06,
"loss": 0.8302013278007507,
"step": 1302
},
{
"epoch": 2.3882783882783882,
"grad_norm": 1.416977882385254,
"learning_rate": 5.857836725321219e-06,
"loss": 0.44114768505096436,
"step": 1304
},
{
"epoch": 2.391941391941392,
"grad_norm": 0.25646287202835083,
"learning_rate": 5.825054922014571e-06,
"loss": 0.8713302612304688,
"step": 1306
},
{
"epoch": 2.3956043956043955,
"grad_norm": 0.19512596726417542,
"learning_rate": 5.792440236936386e-06,
"loss": 1.0893810987472534,
"step": 1308
},
{
"epoch": 2.399267399267399,
"grad_norm": 0.27859073877334595,
"learning_rate": 5.759993180675542e-06,
"loss": 0.4952473044395447,
"step": 1310
},
{
"epoch": 2.402930402930403,
"grad_norm": 0.2890464663505554,
"learning_rate": 5.727714261196677e-06,
"loss": 1.092551589012146,
"step": 1312
},
{
"epoch": 2.4065934065934065,
"grad_norm": 0.13183972239494324,
"learning_rate": 5.695603983832217e-06,
"loss": 0.7963238954544067,
"step": 1314
},
{
"epoch": 2.41025641025641,
"grad_norm": 0.251984566450119,
"learning_rate": 5.663662851274458e-06,
"loss": 1.0940154790878296,
"step": 1316
},
{
"epoch": 2.413919413919414,
"grad_norm": 0.3628130555152893,
"learning_rate": 5.631891363567699e-06,
"loss": 0.733137309551239,
"step": 1318
},
{
"epoch": 2.4175824175824174,
"grad_norm": 0.3337002694606781,
"learning_rate": 5.600290018100429e-06,
"loss": 0.7282842397689819,
"step": 1320
},
{
"epoch": 2.421245421245421,
"grad_norm": 0.37955641746520996,
"learning_rate": 5.568859309597517e-06,
"loss": 0.607200562953949,
"step": 1322
},
{
"epoch": 2.4249084249084247,
"grad_norm": 0.22879095375537872,
"learning_rate": 5.537599730112495e-06,
"loss": 0.7371411323547363,
"step": 1324
},
{
"epoch": 2.4285714285714284,
"grad_norm": 0.6866037845611572,
"learning_rate": 5.50651176901982e-06,
"loss": 0.7481037974357605,
"step": 1326
},
{
"epoch": 2.4322344322344325,
"grad_norm": 0.15306240320205688,
"learning_rate": 5.475595913007242e-06,
"loss": 0.6662076115608215,
"step": 1328
},
{
"epoch": 2.435897435897436,
"grad_norm": 0.4582768678665161,
"learning_rate": 5.4448526460681765e-06,
"loss": 0.7923579216003418,
"step": 1330
},
{
"epoch": 2.4395604395604398,
"grad_norm": 0.16219736635684967,
"learning_rate": 5.414282449494118e-06,
"loss": 1.004856824874878,
"step": 1332
},
{
"epoch": 2.4432234432234434,
"grad_norm": 0.9368967413902283,
"learning_rate": 5.3838858018671185e-06,
"loss": 0.6690689921379089,
"step": 1334
},
{
"epoch": 2.446886446886447,
"grad_norm": 0.3386232256889343,
"learning_rate": 5.353663179052286e-06,
"loss": 0.61957848072052,
"step": 1336
},
{
"epoch": 2.4505494505494507,
"grad_norm": 0.4595009982585907,
"learning_rate": 5.323615054190335e-06,
"loss": 0.9979118704795837,
"step": 1338
},
{
"epoch": 2.4542124542124544,
"grad_norm": 0.5019035935401917,
"learning_rate": 5.293741897690192e-06,
"loss": 0.8308780789375305,
"step": 1340
},
{
"epoch": 2.457875457875458,
"grad_norm": 0.07066061347723007,
"learning_rate": 5.264044177221619e-06,
"loss": 0.5998324155807495,
"step": 1342
},
{
"epoch": 2.4615384615384617,
"grad_norm": 0.14323291182518005,
"learning_rate": 5.23452235770788e-06,
"loss": 0.7848570346832275,
"step": 1344
},
{
"epoch": 2.4652014652014653,
"grad_norm": 0.6829573512077332,
"learning_rate": 5.205176901318497e-06,
"loss": 1.0242949724197388,
"step": 1346
},
{
"epoch": 2.468864468864469,
"grad_norm": 0.2986952066421509,
"learning_rate": 5.176008267461988e-06,
"loss": 1.0757365226745605,
"step": 1348
},
{
"epoch": 2.4725274725274726,
"grad_norm": 0.30357444286346436,
"learning_rate": 5.14701691277868e-06,
"loss": 0.6865537166595459,
"step": 1350
},
{
"epoch": 2.4761904761904763,
"grad_norm": 0.3342417776584625,
"learning_rate": 5.118203291133559e-06,
"loss": 0.7819088101387024,
"step": 1352
},
{
"epoch": 2.47985347985348,
"grad_norm": 0.4895050525665283,
"learning_rate": 5.0895678536091705e-06,
"loss": 0.8793179392814636,
"step": 1354
},
{
"epoch": 2.4835164835164836,
"grad_norm": 0.0322754792869091,
"learning_rate": 5.061111048498556e-06,
"loss": 0.8886638283729553,
"step": 1356
},
{
"epoch": 2.4871794871794872,
"grad_norm": 0.23428204655647278,
"learning_rate": 5.032833321298238e-06,
"loss": 1.0602209568023682,
"step": 1358
},
{
"epoch": 2.490842490842491,
"grad_norm": 0.4217919111251831,
"learning_rate": 5.004735114701233e-06,
"loss": 0.8512158989906311,
"step": 1360
},
{
"epoch": 2.4945054945054945,
"grad_norm": 0.3208416998386383,
"learning_rate": 4.97681686859013e-06,
"loss": 0.7614161372184753,
"step": 1362
},
{
"epoch": 2.498168498168498,
"grad_norm": 0.46275365352630615,
"learning_rate": 4.949079020030214e-06,
"loss": 0.7351796627044678,
"step": 1364
},
{
"epoch": 2.501831501831502,
"grad_norm": 0.2943776249885559,
"learning_rate": 4.921522003262595e-06,
"loss": 1.0767401456832886,
"step": 1366
},
{
"epoch": 2.5054945054945055,
"grad_norm": 0.13059748709201813,
"learning_rate": 4.89414624969745e-06,
"loss": 0.7166144847869873,
"step": 1368
},
{
"epoch": 2.509157509157509,
"grad_norm": 1.816151738166809,
"learning_rate": 4.8669521879072295e-06,
"loss": 0.8858543634414673,
"step": 1370
},
{
"epoch": 2.5128205128205128,
"grad_norm": 0.145990252494812,
"learning_rate": 4.839940243619968e-06,
"loss": 0.6029247641563416,
"step": 1372
},
{
"epoch": 2.5164835164835164,
"grad_norm": 0.281157523393631,
"learning_rate": 4.813110839712629e-06,
"loss": 0.7404302954673767,
"step": 1374
},
{
"epoch": 2.52014652014652,
"grad_norm": 0.7384238243103027,
"learning_rate": 4.786464396204463e-06,
"loss": 0.9365022778511047,
"step": 1376
},
{
"epoch": 2.5238095238095237,
"grad_norm": 0.2541159391403198,
"learning_rate": 4.760001330250443e-06,
"loss": 0.7452998161315918,
"step": 1378
},
{
"epoch": 2.5274725274725274,
"grad_norm": 0.16486378014087677,
"learning_rate": 4.733722056134734e-06,
"loss": 0.8475244641304016,
"step": 1380
},
{
"epoch": 2.531135531135531,
"grad_norm": 0.03009054809808731,
"learning_rate": 4.707626985264201e-06,
"loss": 0.8245116472244263,
"step": 1382
},
{
"epoch": 2.5347985347985347,
"grad_norm": 0.15181203186511993,
"learning_rate": 4.681716526161982e-06,
"loss": 0.7709764838218689,
"step": 1384
},
{
"epoch": 2.5384615384615383,
"grad_norm": 0.8943299651145935,
"learning_rate": 4.655991084461084e-06,
"loss": 1.0719456672668457,
"step": 1386
},
{
"epoch": 2.542124542124542,
"grad_norm": 0.26511991024017334,
"learning_rate": 4.630451062898016e-06,
"loss": 0.7442341446876526,
"step": 1388
},
{
"epoch": 2.5457875457875456,
"grad_norm": 0.1325429081916809,
"learning_rate": 4.6050968613065214e-06,
"loss": 0.7767658233642578,
"step": 1390
},
{
"epoch": 2.5494505494505493,
"grad_norm": 0.29752233624458313,
"learning_rate": 4.579928876611288e-06,
"loss": 0.9441577196121216,
"step": 1392
},
{
"epoch": 2.553113553113553,
"grad_norm": 0.20435328781604767,
"learning_rate": 4.554947502821745e-06,
"loss": 0.6972905993461609,
"step": 1394
},
{
"epoch": 2.5567765567765566,
"grad_norm": 0.10555291175842285,
"learning_rate": 4.53015313102589e-06,
"loss": 0.30237066745758057,
"step": 1396
},
{
"epoch": 2.5604395604395602,
"grad_norm": 0.12275908142328262,
"learning_rate": 4.505546149384179e-06,
"loss": 1.0943961143493652,
"step": 1398
},
{
"epoch": 2.564102564102564,
"grad_norm": 0.357999324798584,
"learning_rate": 4.481126943123428e-06,
"loss": 1.0810667276382446,
"step": 1400
},
{
"epoch": 2.5677655677655675,
"grad_norm": 0.21030539274215698,
"learning_rate": 4.45689589453081e-06,
"loss": 0.8171728849411011,
"step": 1402
},
{
"epoch": 2.571428571428571,
"grad_norm": 0.17423567175865173,
"learning_rate": 4.432853382947845e-06,
"loss": 1.0781317949295044,
"step": 1404
},
{
"epoch": 2.575091575091575,
"grad_norm": 0.625450611114502,
"learning_rate": 4.408999784764466e-06,
"loss": 0.4379269778728485,
"step": 1406
},
{
"epoch": 2.578754578754579,
"grad_norm": 0.6425191164016724,
"learning_rate": 4.3853354734131475e-06,
"loss": 1.1319248676300049,
"step": 1408
},
{
"epoch": 2.5824175824175826,
"grad_norm": 0.7310582995414734,
"learning_rate": 4.361860819363036e-06,
"loss": 0.8296176195144653,
"step": 1410
},
{
"epoch": 2.586080586080586,
"grad_norm": 0.8113889694213867,
"learning_rate": 4.338576190114154e-06,
"loss": 0.860105574131012,
"step": 1412
},
{
"epoch": 2.58974358974359,
"grad_norm": 0.1353234499692917,
"learning_rate": 4.315481950191659e-06,
"loss": 0.9085178375244141,
"step": 1414
},
{
"epoch": 2.5934065934065935,
"grad_norm": 0.1562958061695099,
"learning_rate": 4.292578461140117e-06,
"loss": 1.063266396522522,
"step": 1416
},
{
"epoch": 2.597069597069597,
"grad_norm": 0.6112256646156311,
"learning_rate": 4.269866081517867e-06,
"loss": 0.7252788543701172,
"step": 1418
},
{
"epoch": 2.600732600732601,
"grad_norm": 0.3065638244152069,
"learning_rate": 4.2473451668913935e-06,
"loss": 1.058609127998352,
"step": 1420
},
{
"epoch": 2.6043956043956045,
"grad_norm": 0.7296745181083679,
"learning_rate": 4.225016069829747e-06,
"loss": 1.003778100013733,
"step": 1422
},
{
"epoch": 2.608058608058608,
"grad_norm": 1.7596337795257568,
"learning_rate": 4.2028791398990525e-06,
"loss": 1.1591205596923828,
"step": 1424
},
{
"epoch": 2.6117216117216118,
"grad_norm": 0.2541678249835968,
"learning_rate": 4.180934723657021e-06,
"loss": 1.1994425058364868,
"step": 1426
},
{
"epoch": 2.6153846153846154,
"grad_norm": 0.25000277161598206,
"learning_rate": 4.159183164647525e-06,
"loss": 1.1111985445022583,
"step": 1428
},
{
"epoch": 2.619047619047619,
"grad_norm": 0.1384405642747879,
"learning_rate": 4.137624803395217e-06,
"loss": 1.056754469871521,
"step": 1430
},
{
"epoch": 2.6227106227106227,
"grad_norm": 0.3041793406009674,
"learning_rate": 4.116259977400214e-06,
"loss": 0.8524636626243591,
"step": 1432
},
{
"epoch": 2.6263736263736264,
"grad_norm": 0.45003196597099304,
"learning_rate": 4.0950890211327875e-06,
"loss": 0.9021925926208496,
"step": 1434
},
{
"epoch": 2.63003663003663,
"grad_norm": 0.4313253164291382,
"learning_rate": 4.0741122660281595e-06,
"loss": 1.2607853412628174,
"step": 1436
},
{
"epoch": 2.6336996336996337,
"grad_norm": 0.2014421969652176,
"learning_rate": 4.053330040481287e-06,
"loss": 1.1134793758392334,
"step": 1438
},
{
"epoch": 2.6373626373626373,
"grad_norm": 0.20670831203460693,
"learning_rate": 4.032742669841728e-06,
"loss": 1.0734494924545288,
"step": 1440
},
{
"epoch": 2.641025641025641,
"grad_norm": 0.1786481887102127,
"learning_rate": 4.012350476408563e-06,
"loss": 1.0962891578674316,
"step": 1442
},
{
"epoch": 2.6446886446886446,
"grad_norm": 0.567785918712616,
"learning_rate": 3.992153779425325e-06,
"loss": 1.0348354578018188,
"step": 1444
},
{
"epoch": 2.6483516483516483,
"grad_norm": 0.29523712396621704,
"learning_rate": 3.972152895075025e-06,
"loss": 0.7652750611305237,
"step": 1446
},
{
"epoch": 2.652014652014652,
"grad_norm": 0.1414322853088379,
"learning_rate": 3.952348136475182e-06,
"loss": 0.7914263010025024,
"step": 1448
},
{
"epoch": 2.6556776556776556,
"grad_norm": 0.16183124482631683,
"learning_rate": 3.932739813672935e-06,
"loss": 1.0636370182037354,
"step": 1450
},
{
"epoch": 2.659340659340659,
"grad_norm": 0.4736715257167816,
"learning_rate": 3.913328233640182e-06,
"loss": 0.6482869982719421,
"step": 1452
},
{
"epoch": 2.663003663003663,
"grad_norm": 0.06749854981899261,
"learning_rate": 3.894113700268784e-06,
"loss": 0.6567748785018921,
"step": 1454
},
{
"epoch": 2.6666666666666665,
"grad_norm": 0.5604910850524902,
"learning_rate": 3.8750965143657906e-06,
"loss": 1.042414903640747,
"step": 1456
},
{
"epoch": 2.67032967032967,
"grad_norm": 0.18970206379890442,
"learning_rate": 3.8562769736487434e-06,
"loss": 0.713631808757782,
"step": 1458
},
{
"epoch": 2.6739926739926743,
"grad_norm": 0.07728546112775803,
"learning_rate": 3.8376553727410175e-06,
"loss": 0.7130610942840576,
"step": 1460
},
{
"epoch": 2.677655677655678,
"grad_norm": 0.17561237514019012,
"learning_rate": 3.819232003167198e-06,
"loss": 0.7644565105438232,
"step": 1462
},
{
"epoch": 2.6813186813186816,
"grad_norm": 0.8455438017845154,
"learning_rate": 3.801007153348521e-06,
"loss": 1.0220839977264404,
"step": 1464
},
{
"epoch": 2.684981684981685,
"grad_norm": 0.18888050317764282,
"learning_rate": 3.7829811085983675e-06,
"loss": 0.8391492366790771,
"step": 1466
},
{
"epoch": 2.688644688644689,
"grad_norm": 0.4664005637168884,
"learning_rate": 3.765154151117778e-06,
"loss": 1.0013421773910522,
"step": 1468
},
{
"epoch": 2.6923076923076925,
"grad_norm": 0.5776689648628235,
"learning_rate": 3.747526559991056e-06,
"loss": 0.6194254159927368,
"step": 1470
},
{
"epoch": 2.695970695970696,
"grad_norm": 0.7255598306655884,
"learning_rate": 3.7300986111813788e-06,
"loss": 0.734879732131958,
"step": 1472
},
{
"epoch": 2.6996336996337,
"grad_norm": 0.31850919127464294,
"learning_rate": 3.7128705775264885e-06,
"loss": 0.40417546033859253,
"step": 1474
},
{
"epoch": 2.7032967032967035,
"grad_norm": 0.14459845423698425,
"learning_rate": 3.695842728734425e-06,
"loss": 1.110984206199646,
"step": 1476
},
{
"epoch": 2.706959706959707,
"grad_norm": 0.1938379853963852,
"learning_rate": 3.6790153313792904e-06,
"loss": 0.6085017919540405,
"step": 1478
},
{
"epoch": 2.7106227106227108,
"grad_norm": 0.21937760710716248,
"learning_rate": 3.662388648897086e-06,
"loss": 1.06271493434906,
"step": 1480
},
{
"epoch": 2.7142857142857144,
"grad_norm": 0.2004575878381729,
"learning_rate": 3.6459629415815826e-06,
"loss": 1.077580451965332,
"step": 1482
},
{
"epoch": 2.717948717948718,
"grad_norm": 0.23940826952457428,
"learning_rate": 3.629738466580249e-06,
"loss": 0.9757429361343384,
"step": 1484
},
{
"epoch": 2.7216117216117217,
"grad_norm": 0.1522216647863388,
"learning_rate": 3.6137154778902252e-06,
"loss": 1.1125359535217285,
"step": 1486
},
{
"epoch": 2.7252747252747254,
"grad_norm": 0.3499241769313812,
"learning_rate": 3.5978942263543494e-06,
"loss": 1.1122651100158691,
"step": 1488
},
{
"epoch": 2.728937728937729,
"grad_norm": 0.3598516285419464,
"learning_rate": 3.5822749596572212e-06,
"loss": 0.47823137044906616,
"step": 1490
},
{
"epoch": 2.7326007326007327,
"grad_norm": 0.16963931918144226,
"learning_rate": 3.5668579223213327e-06,
"loss": 1.1062474250793457,
"step": 1492
},
{
"epoch": 2.7362637362637363,
"grad_norm": 0.21394677460193634,
"learning_rate": 3.5516433557032396e-06,
"loss": 0.6056263446807861,
"step": 1494
},
{
"epoch": 2.73992673992674,
"grad_norm": 0.15244966745376587,
"learning_rate": 3.5366314979897804e-06,
"loss": 1.1712305545806885,
"step": 1496
},
{
"epoch": 2.7435897435897436,
"grad_norm": 0.15322832763195038,
"learning_rate": 3.5218225841943505e-06,
"loss": 1.059677243232727,
"step": 1498
},
{
"epoch": 2.7472527472527473,
"grad_norm": 0.3913209140300751,
"learning_rate": 3.5072168461532164e-06,
"loss": 0.7366938591003418,
"step": 1500
},
{
"epoch": 2.750915750915751,
"grad_norm": 0.3031376898288727,
"learning_rate": 3.492814512521892e-06,
"loss": 1.0051300525665283,
"step": 1502
},
{
"epoch": 2.7545787545787546,
"grad_norm": 0.1896335482597351,
"learning_rate": 3.4786158087715646e-06,
"loss": 1.0847052335739136,
"step": 1504
},
{
"epoch": 2.758241758241758,
"grad_norm": 0.15088102221488953,
"learning_rate": 3.4646209571855467e-06,
"loss": 1.0809240341186523,
"step": 1506
},
{
"epoch": 2.761904761904762,
"grad_norm": 0.0716128796339035,
"learning_rate": 3.450830176855816e-06,
"loss": 0.8830230832099915,
"step": 1508
},
{
"epoch": 2.7655677655677655,
"grad_norm": 0.18978838622570038,
"learning_rate": 3.437243683679577e-06,
"loss": 0.9485403895378113,
"step": 1510
},
{
"epoch": 2.769230769230769,
"grad_norm": 0.30790194869041443,
"learning_rate": 3.4238616903558755e-06,
"loss": 0.8989760279655457,
"step": 1512
},
{
"epoch": 2.772893772893773,
"grad_norm": 0.24927957355976105,
"learning_rate": 3.4106844063822806e-06,
"loss": 1.0658760070800781,
"step": 1514
},
{
"epoch": 2.7765567765567765,
"grad_norm": 0.325456440448761,
"learning_rate": 3.397712038051595e-06,
"loss": 0.978020429611206,
"step": 1516
},
{
"epoch": 2.78021978021978,
"grad_norm": 0.062030892819166183,
"learning_rate": 3.3849447884486317e-06,
"loss": 0.9507291913032532,
"step": 1518
},
{
"epoch": 2.7838827838827838,
"grad_norm": 0.306851327419281,
"learning_rate": 3.372382857447029e-06,
"loss": 0.4814833700656891,
"step": 1520
},
{
"epoch": 2.7875457875457874,
"grad_norm": 0.6837298274040222,
"learning_rate": 3.360026441706132e-06,
"loss": 0.7256458401679993,
"step": 1522
},
{
"epoch": 2.791208791208791,
"grad_norm": 0.22658671438694,
"learning_rate": 3.3478757346678978e-06,
"loss": 0.8262401223182678,
"step": 1524
},
{
"epoch": 2.7948717948717947,
"grad_norm": 0.21471837162971497,
"learning_rate": 3.335930926553878e-06,
"loss": 1.1161385774612427,
"step": 1526
},
{
"epoch": 2.7985347985347984,
"grad_norm": 0.1410653293132782,
"learning_rate": 3.324192204362245e-06,
"loss": 1.067908525466919,
"step": 1528
},
{
"epoch": 2.802197802197802,
"grad_norm": 0.22485174238681793,
"learning_rate": 3.3126597518648514e-06,
"loss": 1.078850507736206,
"step": 1530
},
{
"epoch": 2.8058608058608057,
"grad_norm": 0.2020944207906723,
"learning_rate": 3.301333749604362e-06,
"loss": 0.9673135876655579,
"step": 1532
},
{
"epoch": 2.8095238095238093,
"grad_norm": 0.1964569091796875,
"learning_rate": 3.2902143748914256e-06,
"loss": 0.7351154088973999,
"step": 1534
},
{
"epoch": 2.813186813186813,
"grad_norm": 0.21178947389125824,
"learning_rate": 3.279301801801897e-06,
"loss": 1.2468332052230835,
"step": 1536
},
{
"epoch": 2.8168498168498166,
"grad_norm": 0.19619597494602203,
"learning_rate": 3.2685962011741165e-06,
"loss": 0.8753875494003296,
"step": 1538
},
{
"epoch": 2.8205128205128203,
"grad_norm": 0.22640056908130646,
"learning_rate": 3.2580977406062313e-06,
"loss": 0.7495487928390503,
"step": 1540
},
{
"epoch": 2.824175824175824,
"grad_norm": 0.14980071783065796,
"learning_rate": 3.24780658445357e-06,
"loss": 0.8365137577056885,
"step": 1542
},
{
"epoch": 2.8278388278388276,
"grad_norm": 0.17696566879749298,
"learning_rate": 3.237722893826076e-06,
"loss": 1.0462545156478882,
"step": 1544
},
{
"epoch": 2.8315018315018317,
"grad_norm": 0.3583213984966278,
"learning_rate": 3.2278468265857805e-06,
"loss": 1.0920370817184448,
"step": 1546
},
{
"epoch": 2.8351648351648353,
"grad_norm": 0.1547580361366272,
"learning_rate": 3.218178537344335e-06,
"loss": 1.1620938777923584,
"step": 1548
},
{
"epoch": 2.838827838827839,
"grad_norm": 0.35260820388793945,
"learning_rate": 3.208718177460581e-06,
"loss": 0.7892690896987915,
"step": 1550
},
{
"epoch": 2.8424908424908426,
"grad_norm": 0.04815944656729698,
"learning_rate": 3.199465895038196e-06,
"loss": 0.5339184403419495,
"step": 1552
},
{
"epoch": 2.8461538461538463,
"grad_norm": 0.15083415806293488,
"learning_rate": 3.19042183492336e-06,
"loss": 1.0953764915466309,
"step": 1554
},
{
"epoch": 2.84981684981685,
"grad_norm": 0.18838690221309662,
"learning_rate": 3.1815861387025012e-06,
"loss": 0.869490921497345,
"step": 1556
},
{
"epoch": 2.8534798534798536,
"grad_norm": 0.23804758489131927,
"learning_rate": 3.1729589447000673e-06,
"loss": 1.0569744110107422,
"step": 1558
},
{
"epoch": 2.857142857142857,
"grad_norm": 1.9223437309265137,
"learning_rate": 3.164540387976365e-06,
"loss": 0.5447185039520264,
"step": 1560
},
{
"epoch": 2.860805860805861,
"grad_norm": 0.691882848739624,
"learning_rate": 3.1563306003254506e-06,
"loss": 0.657587468624115,
"step": 1562
},
{
"epoch": 2.8644688644688645,
"grad_norm": 0.15072926878929138,
"learning_rate": 3.1483297102730584e-06,
"loss": 0.824471116065979,
"step": 1564
},
{
"epoch": 2.868131868131868,
"grad_norm": 0.7301844358444214,
"learning_rate": 3.1405378430745944e-06,
"loss": 0.6243996620178223,
"step": 1566
},
{
"epoch": 2.871794871794872,
"grad_norm": 0.3597879707813263,
"learning_rate": 3.1329551207131714e-06,
"loss": 0.694602370262146,
"step": 1568
},
{
"epoch": 2.8754578754578755,
"grad_norm": 0.0546979196369648,
"learning_rate": 3.1255816618977038e-06,
"loss": 0.6581955552101135,
"step": 1570
},
{
"epoch": 2.879120879120879,
"grad_norm": 0.16773147881031036,
"learning_rate": 3.1184175820610454e-06,
"loss": 0.6336574554443359,
"step": 1572
},
{
"epoch": 2.8827838827838828,
"grad_norm": 0.23260916769504547,
"learning_rate": 3.111462993358183e-06,
"loss": 0.9589300155639648,
"step": 1574
},
{
"epoch": 2.8864468864468864,
"grad_norm": 0.33641043305397034,
"learning_rate": 3.104718004664481e-06,
"loss": 0.9026850461959839,
"step": 1576
},
{
"epoch": 2.89010989010989,
"grad_norm": 0.04960886761546135,
"learning_rate": 3.09818272157398e-06,
"loss": 0.8551188111305237,
"step": 1578
},
{
"epoch": 2.8937728937728937,
"grad_norm": 0.48587101697921753,
"learning_rate": 3.0918572463977376e-06,
"loss": 1.1478712558746338,
"step": 1580
},
{
"epoch": 2.8974358974358974,
"grad_norm": 1.4616154432296753,
"learning_rate": 3.085741678162231e-06,
"loss": 0.7970556616783142,
"step": 1582
},
{
"epoch": 2.901098901098901,
"grad_norm": 0.0801958218216896,
"learning_rate": 3.079836112607805e-06,
"loss": 0.7464568614959717,
"step": 1584
},
{
"epoch": 2.9047619047619047,
"grad_norm": 0.29297682642936707,
"learning_rate": 3.074140642187176e-06,
"loss": 0.7447859644889832,
"step": 1586
},
{
"epoch": 2.9084249084249083,
"grad_norm": 0.36564430594444275,
"learning_rate": 3.068655356063979e-06,
"loss": 1.1103063821792603,
"step": 1588
},
{
"epoch": 2.912087912087912,
"grad_norm": 0.18257354199886322,
"learning_rate": 3.063380340111379e-06,
"loss": 1.1408166885375977,
"step": 1590
},
{
"epoch": 2.9157509157509156,
"grad_norm": 0.29381296038627625,
"learning_rate": 3.0583156769107198e-06,
"loss": 0.43143635988235474,
"step": 1592
},
{
"epoch": 2.9194139194139193,
"grad_norm": 0.2944744825363159,
"learning_rate": 3.0534614457502347e-06,
"loss": 1.1063368320465088,
"step": 1594
},
{
"epoch": 2.9230769230769234,
"grad_norm": 0.22950078547000885,
"learning_rate": 3.0488177226238068e-06,
"loss": 0.7216494083404541,
"step": 1596
},
{
"epoch": 2.926739926739927,
"grad_norm": 0.26734691858291626,
"learning_rate": 3.0443845802297755e-06,
"loss": 1.123450517654419,
"step": 1598
},
{
"epoch": 2.9304029304029307,
"grad_norm": 0.05690891668200493,
"learning_rate": 3.0401620879697976e-06,
"loss": 0.7358987331390381,
"step": 1600
},
{
"epoch": 2.9340659340659343,
"grad_norm": 0.1592796891927719,
"learning_rate": 3.0361503119477703e-06,
"loss": 1.1032930612564087,
"step": 1602
},
{
"epoch": 2.937728937728938,
"grad_norm": 0.13142678141593933,
"learning_rate": 3.032349314968781e-06,
"loss": 0.8035831451416016,
"step": 1604
},
{
"epoch": 2.9413919413919416,
"grad_norm": 0.2165064513683319,
"learning_rate": 3.028759156538139e-06,
"loss": 1.0834027528762817,
"step": 1606
},
{
"epoch": 2.9450549450549453,
"grad_norm": 0.1683638095855713,
"learning_rate": 3.025379892860435e-06,
"loss": 0.673204779624939,
"step": 1608
},
{
"epoch": 2.948717948717949,
"grad_norm": 0.42520633339881897,
"learning_rate": 3.022211576838662e-06,
"loss": 0.990286111831665,
"step": 1610
},
{
"epoch": 2.9523809523809526,
"grad_norm": 0.7551265954971313,
"learning_rate": 3.0192542580733894e-06,
"loss": 0.8319298624992371,
"step": 1612
},
{
"epoch": 2.956043956043956,
"grad_norm": 0.39639556407928467,
"learning_rate": 3.016507982861989e-06,
"loss": 0.6866034269332886,
"step": 1614
},
{
"epoch": 2.95970695970696,
"grad_norm": 0.1401127725839615,
"learning_rate": 3.013972794197901e-06,
"loss": 0.7790732383728027,
"step": 1616
},
{
"epoch": 2.9633699633699635,
"grad_norm": 0.3247089385986328,
"learning_rate": 3.0116487317699732e-06,
"loss": 1.3222743272781372,
"step": 1618
},
{
"epoch": 2.967032967032967,
"grad_norm": 0.26521044969558716,
"learning_rate": 3.009535831961828e-06,
"loss": 1.1993693113327026,
"step": 1620
},
{
"epoch": 2.970695970695971,
"grad_norm": 0.519102931022644,
"learning_rate": 3.007634127851303e-06,
"loss": 0.8764250874519348,
"step": 1622
},
{
"epoch": 2.9743589743589745,
"grad_norm": 0.7569882273674011,
"learning_rate": 3.005943649209923e-06,
"loss": 0.7481356859207153,
"step": 1624
},
{
"epoch": 2.978021978021978,
"grad_norm": 0.14655716717243195,
"learning_rate": 3.0044644225024444e-06,
"loss": 0.8299301266670227,
"step": 1626
},
{
"epoch": 2.9816849816849818,
"grad_norm": 0.1977003514766693,
"learning_rate": 3.003196470886432e-06,
"loss": 0.9096018671989441,
"step": 1628
},
{
"epoch": 2.9853479853479854,
"grad_norm": 0.18637628853321075,
"learning_rate": 3.002139814211902e-06,
"loss": 1.3610368967056274,
"step": 1630
},
{
"epoch": 2.989010989010989,
"grad_norm": 0.4177876114845276,
"learning_rate": 3.0012944690210082e-06,
"loss": 0.9605894088745117,
"step": 1632
},
{
"epoch": 2.9926739926739927,
"grad_norm": 0.2067408710718155,
"learning_rate": 3.000660448547786e-06,
"loss": 0.7927770614624023,
"step": 1634
},
{
"epoch": 2.9963369963369964,
"grad_norm": 0.1492747664451599,
"learning_rate": 3.0002377627179435e-06,
"loss": 1.2240785360336304,
"step": 1636
},
{
"epoch": 3.0,
"grad_norm": 0.29741978645324707,
"learning_rate": 3.0000264181487013e-06,
"loss": 1.0408343076705933,
"step": 1638
},
{
"epoch": 3.0,
"step": 1638,
"total_flos": 8.4482141520606e+18,
"train_loss": 0.9984596982616499,
"train_runtime": 58109.1712,
"train_samples_per_second": 0.677,
"train_steps_per_second": 0.028
}
],
"logging_steps": 2,
"max_steps": 1638,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 99999,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 8.4482141520606e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}