test_model / trainer_state.json
liyang619's picture
Upload folder using huggingface_hub
45caf2e verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0710377267927425,
"eval_steps": 500,
"global_step": 8000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.003839877123932034,
"grad_norm": 9.855803343729887,
"learning_rate": 1.9984639016897083e-05,
"loss": 0.9511,
"step": 10
},
{
"epoch": 0.007679754247864068,
"grad_norm": 7.900613749182144,
"learning_rate": 1.9969278033794163e-05,
"loss": 0.1502,
"step": 20
},
{
"epoch": 0.011519631371796103,
"grad_norm": 0.22927913857697063,
"learning_rate": 1.9953917050691244e-05,
"loss": 0.0234,
"step": 30
},
{
"epoch": 0.015359508495728137,
"grad_norm": 0.08089452288889179,
"learning_rate": 1.993855606758833e-05,
"loss": 0.0068,
"step": 40
},
{
"epoch": 0.01919938561966017,
"grad_norm": 0.1111623671137423,
"learning_rate": 1.992319508448541e-05,
"loss": 0.0042,
"step": 50
},
{
"epoch": 0.023039262743592206,
"grad_norm": 0.0743738067650186,
"learning_rate": 1.990783410138249e-05,
"loss": 0.0027,
"step": 60
},
{
"epoch": 0.02687913986752424,
"grad_norm": 0.08445026035259467,
"learning_rate": 1.989247311827957e-05,
"loss": 0.0023,
"step": 70
},
{
"epoch": 0.030719016991456273,
"grad_norm": 0.032381204224608405,
"learning_rate": 1.9877112135176652e-05,
"loss": 0.0019,
"step": 80
},
{
"epoch": 0.03455889411538831,
"grad_norm": 0.034738835887687565,
"learning_rate": 1.9861751152073733e-05,
"loss": 0.0015,
"step": 90
},
{
"epoch": 0.03839877123932034,
"grad_norm": 0.03125979639961295,
"learning_rate": 1.9846390168970814e-05,
"loss": 0.0012,
"step": 100
},
{
"epoch": 0.042238648363252376,
"grad_norm": 0.027922819061575684,
"learning_rate": 1.98310291858679e-05,
"loss": 0.001,
"step": 110
},
{
"epoch": 0.04607852548718441,
"grad_norm": 0.027107384881600128,
"learning_rate": 1.981566820276498e-05,
"loss": 0.0009,
"step": 120
},
{
"epoch": 0.04991840261111644,
"grad_norm": 0.030852661769757015,
"learning_rate": 1.980030721966206e-05,
"loss": 0.0008,
"step": 130
},
{
"epoch": 0.05375827973504848,
"grad_norm": 0.017018191812887797,
"learning_rate": 1.978494623655914e-05,
"loss": 0.0007,
"step": 140
},
{
"epoch": 0.05759815685898051,
"grad_norm": 0.02039007906173804,
"learning_rate": 1.9769585253456222e-05,
"loss": 0.0007,
"step": 150
},
{
"epoch": 0.061438033982912546,
"grad_norm": 0.01751117077975313,
"learning_rate": 1.9754224270353303e-05,
"loss": 0.0007,
"step": 160
},
{
"epoch": 0.06527791110684458,
"grad_norm": 0.025527484156853922,
"learning_rate": 1.9738863287250384e-05,
"loss": 0.0007,
"step": 170
},
{
"epoch": 0.06911778823077662,
"grad_norm": 0.010356304510994301,
"learning_rate": 1.9723502304147465e-05,
"loss": 0.0006,
"step": 180
},
{
"epoch": 0.07295766535470866,
"grad_norm": 0.019221562602418918,
"learning_rate": 1.970814132104455e-05,
"loss": 0.0006,
"step": 190
},
{
"epoch": 0.07679754247864068,
"grad_norm": 0.019408746838376397,
"learning_rate": 1.969278033794163e-05,
"loss": 0.0006,
"step": 200
},
{
"epoch": 0.08063741960257272,
"grad_norm": 0.012272661989691892,
"learning_rate": 1.967741935483871e-05,
"loss": 0.0006,
"step": 210
},
{
"epoch": 0.08447729672650475,
"grad_norm": 0.01575335759164804,
"learning_rate": 1.9662058371735792e-05,
"loss": 0.0006,
"step": 220
},
{
"epoch": 0.08831717385043679,
"grad_norm": 0.02337266868962172,
"learning_rate": 1.9646697388632873e-05,
"loss": 0.0006,
"step": 230
},
{
"epoch": 0.09215705097436883,
"grad_norm": 0.02044371550228021,
"learning_rate": 1.9631336405529954e-05,
"loss": 0.0005,
"step": 240
},
{
"epoch": 0.09599692809830085,
"grad_norm": 0.013592727470100728,
"learning_rate": 1.9615975422427035e-05,
"loss": 0.0005,
"step": 250
},
{
"epoch": 0.09983680522223289,
"grad_norm": 0.01722960420346088,
"learning_rate": 1.960061443932412e-05,
"loss": 0.0006,
"step": 260
},
{
"epoch": 0.10367668234616492,
"grad_norm": 0.010831279896438627,
"learning_rate": 1.95852534562212e-05,
"loss": 0.0005,
"step": 270
},
{
"epoch": 0.10751655947009696,
"grad_norm": 0.04396421107808547,
"learning_rate": 1.956989247311828e-05,
"loss": 0.0007,
"step": 280
},
{
"epoch": 0.111356436594029,
"grad_norm": 0.009538347218684752,
"learning_rate": 1.9554531490015362e-05,
"loss": 0.0007,
"step": 290
},
{
"epoch": 0.11519631371796102,
"grad_norm": 0.019732833272054093,
"learning_rate": 1.9539170506912443e-05,
"loss": 0.0006,
"step": 300
},
{
"epoch": 0.11903619084189306,
"grad_norm": 0.01102864765215555,
"learning_rate": 1.9523809523809524e-05,
"loss": 0.0006,
"step": 310
},
{
"epoch": 0.12287606796582509,
"grad_norm": 0.022789866556699984,
"learning_rate": 1.9508448540706605e-05,
"loss": 0.0008,
"step": 320
},
{
"epoch": 0.12671594508975711,
"grad_norm": 0.026179745030787457,
"learning_rate": 1.949308755760369e-05,
"loss": 0.0007,
"step": 330
},
{
"epoch": 0.13055582221368917,
"grad_norm": 0.006770076864326156,
"learning_rate": 1.947772657450077e-05,
"loss": 0.0006,
"step": 340
},
{
"epoch": 0.1343956993376212,
"grad_norm": 0.012631828755799612,
"learning_rate": 1.946236559139785e-05,
"loss": 0.0006,
"step": 350
},
{
"epoch": 0.13823557646155324,
"grad_norm": 0.012963546321523804,
"learning_rate": 1.9447004608294932e-05,
"loss": 0.0005,
"step": 360
},
{
"epoch": 0.14207545358548526,
"grad_norm": 0.024135972419695974,
"learning_rate": 1.9431643625192013e-05,
"loss": 0.0007,
"step": 370
},
{
"epoch": 0.1459153307094173,
"grad_norm": 0.023687976253774837,
"learning_rate": 1.9416282642089094e-05,
"loss": 0.0007,
"step": 380
},
{
"epoch": 0.14975520783334934,
"grad_norm": 0.027951604107350918,
"learning_rate": 1.9400921658986175e-05,
"loss": 0.0007,
"step": 390
},
{
"epoch": 0.15359508495728136,
"grad_norm": 0.010865274419369686,
"learning_rate": 1.9385560675883256e-05,
"loss": 0.0006,
"step": 400
},
{
"epoch": 0.1574349620812134,
"grad_norm": 0.010649058165805126,
"learning_rate": 1.937019969278034e-05,
"loss": 0.0006,
"step": 410
},
{
"epoch": 0.16127483920514543,
"grad_norm": 0.017247417697752888,
"learning_rate": 1.935483870967742e-05,
"loss": 0.0005,
"step": 420
},
{
"epoch": 0.16511471632907748,
"grad_norm": 0.009598794250166713,
"learning_rate": 1.9339477726574502e-05,
"loss": 0.0005,
"step": 430
},
{
"epoch": 0.1689545934530095,
"grad_norm": 0.013190891135776709,
"learning_rate": 1.9324116743471583e-05,
"loss": 0.0005,
"step": 440
},
{
"epoch": 0.17279447057694153,
"grad_norm": 0.015198390723045437,
"learning_rate": 1.9308755760368664e-05,
"loss": 0.0005,
"step": 450
},
{
"epoch": 0.17663434770087358,
"grad_norm": 0.00888530246811667,
"learning_rate": 1.9293394777265745e-05,
"loss": 0.0005,
"step": 460
},
{
"epoch": 0.1804742248248056,
"grad_norm": 0.0072193681049761505,
"learning_rate": 1.9278033794162825e-05,
"loss": 0.0005,
"step": 470
},
{
"epoch": 0.18431410194873765,
"grad_norm": 0.015423575939404187,
"learning_rate": 1.926267281105991e-05,
"loss": 0.0004,
"step": 480
},
{
"epoch": 0.18815397907266967,
"grad_norm": 0.005888900378170728,
"learning_rate": 1.924731182795699e-05,
"loss": 0.0004,
"step": 490
},
{
"epoch": 0.1919938561966017,
"grad_norm": 0.009370771214962732,
"learning_rate": 1.923195084485407e-05,
"loss": 0.0005,
"step": 500
},
{
"epoch": 0.19583373332053375,
"grad_norm": 0.008880476477762786,
"learning_rate": 1.9216589861751153e-05,
"loss": 0.0004,
"step": 510
},
{
"epoch": 0.19967361044446577,
"grad_norm": 0.014710261381765207,
"learning_rate": 1.9201228878648233e-05,
"loss": 0.0004,
"step": 520
},
{
"epoch": 0.20351348756839782,
"grad_norm": 0.010564538973197759,
"learning_rate": 1.9185867895545314e-05,
"loss": 0.0005,
"step": 530
},
{
"epoch": 0.20735336469232984,
"grad_norm": 0.011022024037436684,
"learning_rate": 1.91705069124424e-05,
"loss": 0.0005,
"step": 540
},
{
"epoch": 0.21119324181626187,
"grad_norm": 0.010224510974710398,
"learning_rate": 1.915514592933948e-05,
"loss": 0.0005,
"step": 550
},
{
"epoch": 0.21503311894019392,
"grad_norm": 0.018187207031321574,
"learning_rate": 1.913978494623656e-05,
"loss": 0.0004,
"step": 560
},
{
"epoch": 0.21887299606412594,
"grad_norm": 0.01408592901908285,
"learning_rate": 1.912442396313364e-05,
"loss": 0.0004,
"step": 570
},
{
"epoch": 0.222712873188058,
"grad_norm": 0.00579354434664508,
"learning_rate": 1.9109062980030722e-05,
"loss": 0.0004,
"step": 580
},
{
"epoch": 0.22655275031199001,
"grad_norm": 0.007179585527455935,
"learning_rate": 1.9093701996927803e-05,
"loss": 0.0004,
"step": 590
},
{
"epoch": 0.23039262743592204,
"grad_norm": 0.01087115590194149,
"learning_rate": 1.9078341013824884e-05,
"loss": 0.0004,
"step": 600
},
{
"epoch": 0.2342325045598541,
"grad_norm": 0.013488801384563282,
"learning_rate": 1.906298003072197e-05,
"loss": 0.0004,
"step": 610
},
{
"epoch": 0.2380723816837861,
"grad_norm": 0.014270453003895688,
"learning_rate": 1.904761904761905e-05,
"loss": 0.0004,
"step": 620
},
{
"epoch": 0.24191225880771816,
"grad_norm": 0.005608293770659859,
"learning_rate": 1.903225806451613e-05,
"loss": 0.0004,
"step": 630
},
{
"epoch": 0.24575213593165018,
"grad_norm": 0.01942922314775013,
"learning_rate": 1.901689708141321e-05,
"loss": 0.0004,
"step": 640
},
{
"epoch": 0.24959201305558223,
"grad_norm": 0.008807942106723612,
"learning_rate": 1.9001536098310292e-05,
"loss": 0.0004,
"step": 650
},
{
"epoch": 0.25343189017951423,
"grad_norm": 0.009642123808480296,
"learning_rate": 1.8986175115207373e-05,
"loss": 0.0004,
"step": 660
},
{
"epoch": 0.2572717673034463,
"grad_norm": 0.010839972501351821,
"learning_rate": 1.8970814132104458e-05,
"loss": 0.0004,
"step": 670
},
{
"epoch": 0.26111164442737833,
"grad_norm": 0.007016830642797472,
"learning_rate": 1.895545314900154e-05,
"loss": 0.0004,
"step": 680
},
{
"epoch": 0.26495152155131035,
"grad_norm": 0.01036596596633793,
"learning_rate": 1.894009216589862e-05,
"loss": 0.0004,
"step": 690
},
{
"epoch": 0.2687913986752424,
"grad_norm": 0.00699112176439698,
"learning_rate": 1.89247311827957e-05,
"loss": 0.0004,
"step": 700
},
{
"epoch": 0.2726312757991744,
"grad_norm": 0.023143846781726766,
"learning_rate": 1.890937019969278e-05,
"loss": 0.0004,
"step": 710
},
{
"epoch": 0.2764711529231065,
"grad_norm": 0.014371836855906761,
"learning_rate": 1.8894009216589862e-05,
"loss": 0.0004,
"step": 720
},
{
"epoch": 0.2803110300470385,
"grad_norm": 0.013134481657118259,
"learning_rate": 1.8878648233486943e-05,
"loss": 0.0004,
"step": 730
},
{
"epoch": 0.2841509071709705,
"grad_norm": 0.00936736410572265,
"learning_rate": 1.8863287250384027e-05,
"loss": 0.0004,
"step": 740
},
{
"epoch": 0.28799078429490255,
"grad_norm": 0.0177202255192513,
"learning_rate": 1.8847926267281108e-05,
"loss": 0.0004,
"step": 750
},
{
"epoch": 0.2918306614188346,
"grad_norm": 0.006114501253180761,
"learning_rate": 1.883256528417819e-05,
"loss": 0.0004,
"step": 760
},
{
"epoch": 0.29567053854276665,
"grad_norm": 0.008265452601599499,
"learning_rate": 1.881720430107527e-05,
"loss": 0.0004,
"step": 770
},
{
"epoch": 0.29951041566669867,
"grad_norm": 0.014907543135145678,
"learning_rate": 1.880184331797235e-05,
"loss": 0.0004,
"step": 780
},
{
"epoch": 0.3033502927906307,
"grad_norm": 0.014797049223610434,
"learning_rate": 1.8786482334869432e-05,
"loss": 0.0004,
"step": 790
},
{
"epoch": 0.3071901699145627,
"grad_norm": 0.004652330628741432,
"learning_rate": 1.8771121351766516e-05,
"loss": 0.0003,
"step": 800
},
{
"epoch": 0.3110300470384948,
"grad_norm": 0.005493451170013414,
"learning_rate": 1.8755760368663597e-05,
"loss": 0.0004,
"step": 810
},
{
"epoch": 0.3148699241624268,
"grad_norm": 0.007045732848967435,
"learning_rate": 1.8740399385560678e-05,
"loss": 0.0003,
"step": 820
},
{
"epoch": 0.31870980128635884,
"grad_norm": 0.018179892197985704,
"learning_rate": 1.872503840245776e-05,
"loss": 0.0004,
"step": 830
},
{
"epoch": 0.32254967841029086,
"grad_norm": 0.005668747866614938,
"learning_rate": 1.870967741935484e-05,
"loss": 0.0003,
"step": 840
},
{
"epoch": 0.3263895555342229,
"grad_norm": 0.005624631016307953,
"learning_rate": 1.869431643625192e-05,
"loss": 0.0003,
"step": 850
},
{
"epoch": 0.33022943265815496,
"grad_norm": 0.012701139148209117,
"learning_rate": 1.8678955453149005e-05,
"loss": 0.0004,
"step": 860
},
{
"epoch": 0.334069309782087,
"grad_norm": 0.020568594933285278,
"learning_rate": 1.8663594470046086e-05,
"loss": 0.0004,
"step": 870
},
{
"epoch": 0.337909186906019,
"grad_norm": 0.016752438047633097,
"learning_rate": 1.8648233486943167e-05,
"loss": 0.0003,
"step": 880
},
{
"epoch": 0.34174906402995103,
"grad_norm": 0.022040592531457844,
"learning_rate": 1.8632872503840248e-05,
"loss": 0.0003,
"step": 890
},
{
"epoch": 0.34558894115388306,
"grad_norm": 0.005501761305071796,
"learning_rate": 1.861751152073733e-05,
"loss": 0.0003,
"step": 900
},
{
"epoch": 0.34942881827781513,
"grad_norm": 0.00913565126513445,
"learning_rate": 1.860215053763441e-05,
"loss": 0.0003,
"step": 910
},
{
"epoch": 0.35326869540174716,
"grad_norm": 0.00643352891432492,
"learning_rate": 1.858678955453149e-05,
"loss": 0.0004,
"step": 920
},
{
"epoch": 0.3571085725256792,
"grad_norm": 0.005932277310972733,
"learning_rate": 1.8571428571428575e-05,
"loss": 0.0003,
"step": 930
},
{
"epoch": 0.3609484496496112,
"grad_norm": 0.00854566416057147,
"learning_rate": 1.8556067588325656e-05,
"loss": 0.0004,
"step": 940
},
{
"epoch": 0.3647883267735432,
"grad_norm": 0.004198303186077754,
"learning_rate": 1.8540706605222737e-05,
"loss": 0.0003,
"step": 950
},
{
"epoch": 0.3686282038974753,
"grad_norm": 0.006013969871660999,
"learning_rate": 1.8525345622119818e-05,
"loss": 0.0003,
"step": 960
},
{
"epoch": 0.3724680810214073,
"grad_norm": 0.00821940173101188,
"learning_rate": 1.85099846390169e-05,
"loss": 0.0003,
"step": 970
},
{
"epoch": 0.37630795814533935,
"grad_norm": 0.01706199510535657,
"learning_rate": 1.849462365591398e-05,
"loss": 0.0003,
"step": 980
},
{
"epoch": 0.3801478352692714,
"grad_norm": 0.012195179945468527,
"learning_rate": 1.8479262672811064e-05,
"loss": 0.0003,
"step": 990
},
{
"epoch": 0.3839877123932034,
"grad_norm": 0.00807499700581235,
"learning_rate": 1.8463901689708145e-05,
"loss": 0.0003,
"step": 1000
},
{
"epoch": 0.3878275895171355,
"grad_norm": 0.010945346397012758,
"learning_rate": 1.8448540706605226e-05,
"loss": 0.0003,
"step": 1010
},
{
"epoch": 0.3916674666410675,
"grad_norm": 0.00880396626822876,
"learning_rate": 1.8433179723502307e-05,
"loss": 0.0003,
"step": 1020
},
{
"epoch": 0.3955073437649995,
"grad_norm": 0.014399672052520717,
"learning_rate": 1.8417818740399388e-05,
"loss": 0.0003,
"step": 1030
},
{
"epoch": 0.39934722088893154,
"grad_norm": 0.006831959770996685,
"learning_rate": 1.840245775729647e-05,
"loss": 0.0003,
"step": 1040
},
{
"epoch": 0.40318709801286357,
"grad_norm": 18.516333985308375,
"learning_rate": 1.838709677419355e-05,
"loss": 0.0478,
"step": 1050
},
{
"epoch": 0.40702697513679564,
"grad_norm": 0.6477183946866085,
"learning_rate": 1.837173579109063e-05,
"loss": 0.1816,
"step": 1060
},
{
"epoch": 0.41086685226072767,
"grad_norm": 2.6861506025108475,
"learning_rate": 1.8356374807987715e-05,
"loss": 0.0536,
"step": 1070
},
{
"epoch": 0.4147067293846597,
"grad_norm": 0.4416236285189527,
"learning_rate": 1.8341013824884796e-05,
"loss": 0.0249,
"step": 1080
},
{
"epoch": 0.4185466065085917,
"grad_norm": 0.9786805141459802,
"learning_rate": 1.8325652841781877e-05,
"loss": 0.0101,
"step": 1090
},
{
"epoch": 0.42238648363252373,
"grad_norm": 1.627598109506058,
"learning_rate": 1.8310291858678958e-05,
"loss": 0.0108,
"step": 1100
},
{
"epoch": 0.4262263607564558,
"grad_norm": 2.2472146744387635,
"learning_rate": 1.829493087557604e-05,
"loss": 0.0101,
"step": 1110
},
{
"epoch": 0.43006623788038784,
"grad_norm": 0.5695485826329719,
"learning_rate": 1.827956989247312e-05,
"loss": 0.0043,
"step": 1120
},
{
"epoch": 0.43390611500431986,
"grad_norm": 1.530827959431516,
"learning_rate": 1.82642089093702e-05,
"loss": 0.013,
"step": 1130
},
{
"epoch": 0.4377459921282519,
"grad_norm": 0.029509683112095193,
"learning_rate": 1.8248847926267285e-05,
"loss": 0.0113,
"step": 1140
},
{
"epoch": 0.4415858692521839,
"grad_norm": 0.01781992132649757,
"learning_rate": 1.8233486943164366e-05,
"loss": 0.0007,
"step": 1150
},
{
"epoch": 0.445425746376116,
"grad_norm": 0.010151888479514436,
"learning_rate": 1.8218125960061447e-05,
"loss": 0.0005,
"step": 1160
},
{
"epoch": 0.449265623500048,
"grad_norm": 0.014420471837514583,
"learning_rate": 1.8202764976958527e-05,
"loss": 0.0005,
"step": 1170
},
{
"epoch": 0.45310550062398003,
"grad_norm": 0.010072124184727966,
"learning_rate": 1.818740399385561e-05,
"loss": 0.0004,
"step": 1180
},
{
"epoch": 0.45694537774791205,
"grad_norm": 0.007661769308843087,
"learning_rate": 1.817204301075269e-05,
"loss": 0.0004,
"step": 1190
},
{
"epoch": 0.4607852548718441,
"grad_norm": 0.010038812848366137,
"learning_rate": 1.815668202764977e-05,
"loss": 0.0004,
"step": 1200
},
{
"epoch": 0.46462513199577615,
"grad_norm": 0.011258850363850582,
"learning_rate": 1.8141321044546855e-05,
"loss": 0.0004,
"step": 1210
},
{
"epoch": 0.4684650091197082,
"grad_norm": 0.016055405689836853,
"learning_rate": 1.8125960061443936e-05,
"loss": 0.0004,
"step": 1220
},
{
"epoch": 0.4723048862436402,
"grad_norm": 0.007967416713376401,
"learning_rate": 1.8110599078341016e-05,
"loss": 0.0004,
"step": 1230
},
{
"epoch": 0.4761447633675722,
"grad_norm": 0.008025613224993348,
"learning_rate": 1.8095238095238097e-05,
"loss": 0.0003,
"step": 1240
},
{
"epoch": 0.47998464049150424,
"grad_norm": 0.004966250706848606,
"learning_rate": 1.8079877112135178e-05,
"loss": 0.0004,
"step": 1250
},
{
"epoch": 0.4838245176154363,
"grad_norm": 0.007419454029578721,
"learning_rate": 1.806451612903226e-05,
"loss": 0.0004,
"step": 1260
},
{
"epoch": 0.48766439473936835,
"grad_norm": 0.007372896744459003,
"learning_rate": 1.804915514592934e-05,
"loss": 0.0004,
"step": 1270
},
{
"epoch": 0.49150427186330037,
"grad_norm": 0.007260032706837447,
"learning_rate": 1.803379416282642e-05,
"loss": 0.0004,
"step": 1280
},
{
"epoch": 0.4953441489872324,
"grad_norm": 0.006797112689312068,
"learning_rate": 1.8018433179723505e-05,
"loss": 0.0003,
"step": 1290
},
{
"epoch": 0.49918402611116447,
"grad_norm": 0.006596862668254978,
"learning_rate": 1.8003072196620586e-05,
"loss": 0.0003,
"step": 1300
},
{
"epoch": 0.5030239032350965,
"grad_norm": 0.007442569811616661,
"learning_rate": 1.7987711213517667e-05,
"loss": 0.0004,
"step": 1310
},
{
"epoch": 0.5068637803590285,
"grad_norm": 0.011659097052332864,
"learning_rate": 1.7972350230414748e-05,
"loss": 0.0003,
"step": 1320
},
{
"epoch": 0.5107036574829605,
"grad_norm": 0.003189461384393768,
"learning_rate": 1.795698924731183e-05,
"loss": 0.0003,
"step": 1330
},
{
"epoch": 0.5145435346068926,
"grad_norm": 0.006218121023821658,
"learning_rate": 1.794162826420891e-05,
"loss": 0.0003,
"step": 1340
},
{
"epoch": 0.5183834117308246,
"grad_norm": 0.004661385155418944,
"learning_rate": 1.792626728110599e-05,
"loss": 0.0004,
"step": 1350
},
{
"epoch": 0.5222232888547567,
"grad_norm": 0.007451036130599556,
"learning_rate": 1.7910906298003075e-05,
"loss": 0.0003,
"step": 1360
},
{
"epoch": 0.5260631659786886,
"grad_norm": 0.005739057587058598,
"learning_rate": 1.7895545314900156e-05,
"loss": 0.0003,
"step": 1370
},
{
"epoch": 0.5299030431026207,
"grad_norm": 0.008293201974170215,
"learning_rate": 1.7880184331797237e-05,
"loss": 0.0004,
"step": 1380
},
{
"epoch": 0.5337429202265528,
"grad_norm": 0.011616342167072335,
"learning_rate": 1.7864823348694318e-05,
"loss": 0.0003,
"step": 1390
},
{
"epoch": 0.5375827973504848,
"grad_norm": 0.011567680895725766,
"learning_rate": 1.78494623655914e-05,
"loss": 0.0004,
"step": 1400
},
{
"epoch": 0.5414226744744168,
"grad_norm": 0.0057168290679564795,
"learning_rate": 1.783410138248848e-05,
"loss": 0.0003,
"step": 1410
},
{
"epoch": 0.5452625515983488,
"grad_norm": 0.010165783676708838,
"learning_rate": 1.781874039938556e-05,
"loss": 0.0003,
"step": 1420
},
{
"epoch": 0.5491024287222809,
"grad_norm": 0.0045309573507459015,
"learning_rate": 1.7803379416282645e-05,
"loss": 0.0003,
"step": 1430
},
{
"epoch": 0.552942305846213,
"grad_norm": 0.00811076581038844,
"learning_rate": 1.7788018433179726e-05,
"loss": 0.0003,
"step": 1440
},
{
"epoch": 0.5567821829701449,
"grad_norm": 0.009995480779616097,
"learning_rate": 1.7772657450076807e-05,
"loss": 0.0003,
"step": 1450
},
{
"epoch": 0.560622060094077,
"grad_norm": 0.006925240596184182,
"learning_rate": 1.7757296466973888e-05,
"loss": 0.0003,
"step": 1460
},
{
"epoch": 0.564461937218009,
"grad_norm": 0.013412407169843198,
"learning_rate": 1.774193548387097e-05,
"loss": 0.0003,
"step": 1470
},
{
"epoch": 0.568301814341941,
"grad_norm": 0.012787736722349891,
"learning_rate": 1.772657450076805e-05,
"loss": 0.0003,
"step": 1480
},
{
"epoch": 0.5721416914658731,
"grad_norm": 0.007058357804663414,
"learning_rate": 1.771121351766513e-05,
"loss": 0.0003,
"step": 1490
},
{
"epoch": 0.5759815685898051,
"grad_norm": 0.007736272349706681,
"learning_rate": 1.7695852534562215e-05,
"loss": 0.0003,
"step": 1500
},
{
"epoch": 0.5798214457137372,
"grad_norm": 0.0033192017596056908,
"learning_rate": 1.7680491551459296e-05,
"loss": 0.0003,
"step": 1510
},
{
"epoch": 0.5836613228376692,
"grad_norm": 0.014268997426681756,
"learning_rate": 1.7665130568356377e-05,
"loss": 0.0003,
"step": 1520
},
{
"epoch": 0.5875011999616012,
"grad_norm": 0.007258373149860229,
"learning_rate": 1.7649769585253458e-05,
"loss": 0.0003,
"step": 1530
},
{
"epoch": 0.5913410770855333,
"grad_norm": 0.005355993128279297,
"learning_rate": 1.763440860215054e-05,
"loss": 0.0003,
"step": 1540
},
{
"epoch": 0.5951809542094653,
"grad_norm": 0.006539831015011762,
"learning_rate": 1.761904761904762e-05,
"loss": 0.0003,
"step": 1550
},
{
"epoch": 0.5990208313333973,
"grad_norm": 0.0029413603917756745,
"learning_rate": 1.76036866359447e-05,
"loss": 0.0003,
"step": 1560
},
{
"epoch": 0.6028607084573294,
"grad_norm": 0.005311044247403118,
"learning_rate": 1.758832565284178e-05,
"loss": 0.0003,
"step": 1570
},
{
"epoch": 0.6067005855812614,
"grad_norm": 0.008810927875552908,
"learning_rate": 1.7572964669738866e-05,
"loss": 0.0003,
"step": 1580
},
{
"epoch": 0.6105404627051935,
"grad_norm": 0.019081216576469953,
"learning_rate": 1.7557603686635947e-05,
"loss": 0.0003,
"step": 1590
},
{
"epoch": 0.6143803398291254,
"grad_norm": 0.018890578715268194,
"learning_rate": 1.7542242703533028e-05,
"loss": 0.0003,
"step": 1600
},
{
"epoch": 0.6182202169530575,
"grad_norm": 0.006788409840537928,
"learning_rate": 1.752688172043011e-05,
"loss": 0.0003,
"step": 1610
},
{
"epoch": 0.6220600940769896,
"grad_norm": 0.007667765007600492,
"learning_rate": 1.751152073732719e-05,
"loss": 0.0003,
"step": 1620
},
{
"epoch": 0.6258999712009216,
"grad_norm": 0.0042895580282391686,
"learning_rate": 1.749615975422427e-05,
"loss": 0.0003,
"step": 1630
},
{
"epoch": 0.6297398483248536,
"grad_norm": 0.0046351980587696125,
"learning_rate": 1.748079877112135e-05,
"loss": 0.0003,
"step": 1640
},
{
"epoch": 0.6335797254487856,
"grad_norm": 0.0033174467847263173,
"learning_rate": 1.7465437788018436e-05,
"loss": 0.0003,
"step": 1650
},
{
"epoch": 0.6374196025727177,
"grad_norm": 0.005803214350891364,
"learning_rate": 1.7450076804915517e-05,
"loss": 0.0003,
"step": 1660
},
{
"epoch": 0.6412594796966498,
"grad_norm": 0.010355179051111019,
"learning_rate": 1.7434715821812597e-05,
"loss": 0.0003,
"step": 1670
},
{
"epoch": 0.6450993568205817,
"grad_norm": 0.00695229076668098,
"learning_rate": 1.741935483870968e-05,
"loss": 0.0003,
"step": 1680
},
{
"epoch": 0.6489392339445138,
"grad_norm": 0.0026433167192979326,
"learning_rate": 1.740399385560676e-05,
"loss": 0.0003,
"step": 1690
},
{
"epoch": 0.6527791110684458,
"grad_norm": 0.004001528867502428,
"learning_rate": 1.738863287250384e-05,
"loss": 0.0003,
"step": 1700
},
{
"epoch": 0.6566189881923779,
"grad_norm": 0.005032030468208995,
"learning_rate": 1.737327188940092e-05,
"loss": 0.0003,
"step": 1710
},
{
"epoch": 0.6604588653163099,
"grad_norm": 0.012342312681179527,
"learning_rate": 1.7357910906298005e-05,
"loss": 0.0003,
"step": 1720
},
{
"epoch": 0.6642987424402419,
"grad_norm": 0.006477437534892976,
"learning_rate": 1.7342549923195086e-05,
"loss": 0.0003,
"step": 1730
},
{
"epoch": 0.668138619564174,
"grad_norm": 0.003880319040550072,
"learning_rate": 1.7327188940092167e-05,
"loss": 0.0003,
"step": 1740
},
{
"epoch": 0.6719784966881059,
"grad_norm": 0.007668035468060641,
"learning_rate": 1.7311827956989248e-05,
"loss": 0.0003,
"step": 1750
},
{
"epoch": 0.675818373812038,
"grad_norm": 0.011038361878502203,
"learning_rate": 1.729646697388633e-05,
"loss": 0.0003,
"step": 1760
},
{
"epoch": 0.6796582509359701,
"grad_norm": 0.004974769727903427,
"learning_rate": 1.728110599078341e-05,
"loss": 0.0003,
"step": 1770
},
{
"epoch": 0.6834981280599021,
"grad_norm": 0.004325284236550939,
"learning_rate": 1.726574500768049e-05,
"loss": 0.0003,
"step": 1780
},
{
"epoch": 0.6873380051838341,
"grad_norm": 0.0042080867657549705,
"learning_rate": 1.7250384024577572e-05,
"loss": 0.0003,
"step": 1790
},
{
"epoch": 0.6911778823077661,
"grad_norm": 0.0039328487679914535,
"learning_rate": 1.7235023041474656e-05,
"loss": 0.0003,
"step": 1800
},
{
"epoch": 0.6950177594316982,
"grad_norm": 0.004207050570838415,
"learning_rate": 1.7219662058371737e-05,
"loss": 0.0003,
"step": 1810
},
{
"epoch": 0.6988576365556303,
"grad_norm": 0.0036413526215648487,
"learning_rate": 1.7204301075268818e-05,
"loss": 0.0003,
"step": 1820
},
{
"epoch": 0.7026975136795622,
"grad_norm": 0.004178601636330483,
"learning_rate": 1.71889400921659e-05,
"loss": 0.0003,
"step": 1830
},
{
"epoch": 0.7065373908034943,
"grad_norm": 0.00973331298779335,
"learning_rate": 1.717357910906298e-05,
"loss": 0.0003,
"step": 1840
},
{
"epoch": 0.7103772679274263,
"grad_norm": 0.004286254481889245,
"learning_rate": 1.715821812596006e-05,
"loss": 0.0003,
"step": 1850
},
{
"epoch": 0.7142171450513584,
"grad_norm": 0.005281447209048475,
"learning_rate": 1.7142857142857142e-05,
"loss": 0.0003,
"step": 1860
},
{
"epoch": 0.7180570221752904,
"grad_norm": 0.011520628884629904,
"learning_rate": 1.7127496159754226e-05,
"loss": 0.0003,
"step": 1870
},
{
"epoch": 0.7218968992992224,
"grad_norm": 0.015096661910423118,
"learning_rate": 1.7112135176651307e-05,
"loss": 0.0003,
"step": 1880
},
{
"epoch": 0.7257367764231545,
"grad_norm": 0.004269384954031992,
"learning_rate": 1.7096774193548388e-05,
"loss": 0.0003,
"step": 1890
},
{
"epoch": 0.7295766535470865,
"grad_norm": 0.007284302523487442,
"learning_rate": 1.708141321044547e-05,
"loss": 0.0003,
"step": 1900
},
{
"epoch": 0.7334165306710185,
"grad_norm": 0.013816212104358527,
"learning_rate": 1.706605222734255e-05,
"loss": 0.0003,
"step": 1910
},
{
"epoch": 0.7372564077949506,
"grad_norm": 0.006389002588565134,
"learning_rate": 1.705069124423963e-05,
"loss": 0.0003,
"step": 1920
},
{
"epoch": 0.7410962849188826,
"grad_norm": 0.009485308445395068,
"learning_rate": 1.7035330261136712e-05,
"loss": 0.0003,
"step": 1930
},
{
"epoch": 0.7449361620428147,
"grad_norm": 0.004423329749614452,
"learning_rate": 1.7019969278033796e-05,
"loss": 0.0003,
"step": 1940
},
{
"epoch": 0.7487760391667466,
"grad_norm": 0.007527583045286338,
"learning_rate": 1.7004608294930877e-05,
"loss": 0.0003,
"step": 1950
},
{
"epoch": 0.7526159162906787,
"grad_norm": 0.014586231809369528,
"learning_rate": 1.6989247311827958e-05,
"loss": 0.0003,
"step": 1960
},
{
"epoch": 0.7564557934146108,
"grad_norm": 0.006651075913511302,
"learning_rate": 1.697388632872504e-05,
"loss": 0.0003,
"step": 1970
},
{
"epoch": 0.7602956705385427,
"grad_norm": 0.009325021217663211,
"learning_rate": 1.695852534562212e-05,
"loss": 0.0003,
"step": 1980
},
{
"epoch": 0.7641355476624748,
"grad_norm": 0.00448309467347562,
"learning_rate": 1.69431643625192e-05,
"loss": 0.0003,
"step": 1990
},
{
"epoch": 0.7679754247864068,
"grad_norm": 0.00862440090599278,
"learning_rate": 1.6927803379416285e-05,
"loss": 0.0003,
"step": 2000
},
{
"epoch": 0.7718153019103389,
"grad_norm": 0.003564225910536377,
"learning_rate": 1.6912442396313366e-05,
"loss": 0.0003,
"step": 2010
},
{
"epoch": 0.775655179034271,
"grad_norm": 0.009884322290648858,
"learning_rate": 1.6897081413210447e-05,
"loss": 0.0003,
"step": 2020
},
{
"epoch": 0.7794950561582029,
"grad_norm": 0.0052686365175910795,
"learning_rate": 1.6881720430107528e-05,
"loss": 0.0003,
"step": 2030
},
{
"epoch": 0.783334933282135,
"grad_norm": 0.006169173971857438,
"learning_rate": 1.686635944700461e-05,
"loss": 0.0003,
"step": 2040
},
{
"epoch": 0.787174810406067,
"grad_norm": 0.006194757569521478,
"learning_rate": 1.685099846390169e-05,
"loss": 0.0003,
"step": 2050
},
{
"epoch": 0.791014687529999,
"grad_norm": 0.00415409003374665,
"learning_rate": 1.683563748079877e-05,
"loss": 0.0003,
"step": 2060
},
{
"epoch": 0.7948545646539311,
"grad_norm": 0.01310714973576285,
"learning_rate": 1.6820276497695855e-05,
"loss": 0.0003,
"step": 2070
},
{
"epoch": 0.7986944417778631,
"grad_norm": 0.0046992213585696965,
"learning_rate": 1.6804915514592936e-05,
"loss": 0.0003,
"step": 2080
},
{
"epoch": 0.8025343189017952,
"grad_norm": 0.006291272173141966,
"learning_rate": 1.6789554531490017e-05,
"loss": 0.0003,
"step": 2090
},
{
"epoch": 0.8063741960257271,
"grad_norm": 0.007546577147358044,
"learning_rate": 1.6774193548387098e-05,
"loss": 0.0003,
"step": 2100
},
{
"epoch": 0.8102140731496592,
"grad_norm": 0.005443900744121676,
"learning_rate": 1.675883256528418e-05,
"loss": 0.0003,
"step": 2110
},
{
"epoch": 0.8140539502735913,
"grad_norm": 0.0019010839135315712,
"learning_rate": 1.674347158218126e-05,
"loss": 0.0003,
"step": 2120
},
{
"epoch": 0.8178938273975233,
"grad_norm": 0.009951343848549921,
"learning_rate": 1.6728110599078344e-05,
"loss": 0.0003,
"step": 2130
},
{
"epoch": 0.8217337045214553,
"grad_norm": 0.003896513453508667,
"learning_rate": 1.6712749615975425e-05,
"loss": 0.0003,
"step": 2140
},
{
"epoch": 0.8255735816453873,
"grad_norm": 0.014173651446143855,
"learning_rate": 1.6697388632872506e-05,
"loss": 0.0003,
"step": 2150
},
{
"epoch": 0.8294134587693194,
"grad_norm": 0.003677873891818207,
"learning_rate": 1.6682027649769587e-05,
"loss": 0.0003,
"step": 2160
},
{
"epoch": 0.8332533358932515,
"grad_norm": 0.007358001701120111,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.0003,
"step": 2170
},
{
"epoch": 0.8370932130171834,
"grad_norm": 0.014586891892807584,
"learning_rate": 1.665130568356375e-05,
"loss": 0.0003,
"step": 2180
},
{
"epoch": 0.8409330901411155,
"grad_norm": 0.011685080400631487,
"learning_rate": 1.663594470046083e-05,
"loss": 0.0003,
"step": 2190
},
{
"epoch": 0.8447729672650475,
"grad_norm": 0.006355858658307203,
"learning_rate": 1.6620583717357914e-05,
"loss": 0.0003,
"step": 2200
},
{
"epoch": 0.8486128443889795,
"grad_norm": 0.004711497431600735,
"learning_rate": 1.6605222734254995e-05,
"loss": 0.0003,
"step": 2210
},
{
"epoch": 0.8524527215129116,
"grad_norm": 0.00975542945138729,
"learning_rate": 1.6589861751152075e-05,
"loss": 0.0003,
"step": 2220
},
{
"epoch": 0.8562925986368436,
"grad_norm": 0.009147508665138223,
"learning_rate": 1.6574500768049156e-05,
"loss": 0.0003,
"step": 2230
},
{
"epoch": 0.8601324757607757,
"grad_norm": 0.004608721023888835,
"learning_rate": 1.6559139784946237e-05,
"loss": 0.0003,
"step": 2240
},
{
"epoch": 0.8639723528847076,
"grad_norm": 0.007377051739331657,
"learning_rate": 1.6543778801843318e-05,
"loss": 0.0003,
"step": 2250
},
{
"epoch": 0.8678122300086397,
"grad_norm": 0.012946767220354212,
"learning_rate": 1.6528417818740403e-05,
"loss": 0.0003,
"step": 2260
},
{
"epoch": 0.8716521071325718,
"grad_norm": 0.014416613075616386,
"learning_rate": 1.6513056835637483e-05,
"loss": 0.0003,
"step": 2270
},
{
"epoch": 0.8754919842565038,
"grad_norm": 0.006287700917447439,
"learning_rate": 1.6497695852534564e-05,
"loss": 0.0003,
"step": 2280
},
{
"epoch": 0.8793318613804358,
"grad_norm": 0.0137632558355742,
"learning_rate": 1.6482334869431645e-05,
"loss": 0.0003,
"step": 2290
},
{
"epoch": 0.8831717385043678,
"grad_norm": 0.004387114997812537,
"learning_rate": 1.6466973886328726e-05,
"loss": 0.0003,
"step": 2300
},
{
"epoch": 0.8870116156282999,
"grad_norm": 0.006112696865963161,
"learning_rate": 1.6451612903225807e-05,
"loss": 0.0003,
"step": 2310
},
{
"epoch": 0.890851492752232,
"grad_norm": 0.00335985624587701,
"learning_rate": 1.643625192012289e-05,
"loss": 0.0003,
"step": 2320
},
{
"epoch": 0.8946913698761639,
"grad_norm": 0.012311531291747248,
"learning_rate": 1.6420890937019972e-05,
"loss": 0.0003,
"step": 2330
},
{
"epoch": 0.898531247000096,
"grad_norm": 0.0056121939234220625,
"learning_rate": 1.6405529953917053e-05,
"loss": 0.0003,
"step": 2340
},
{
"epoch": 0.902371124124028,
"grad_norm": 0.008461725308188539,
"learning_rate": 1.6390168970814134e-05,
"loss": 0.0003,
"step": 2350
},
{
"epoch": 0.9062110012479601,
"grad_norm": 0.007674455481245043,
"learning_rate": 1.6374807987711215e-05,
"loss": 0.0003,
"step": 2360
},
{
"epoch": 0.9100508783718921,
"grad_norm": 0.015158413045124617,
"learning_rate": 1.6359447004608296e-05,
"loss": 0.0003,
"step": 2370
},
{
"epoch": 0.9138907554958241,
"grad_norm": 0.012286030614540876,
"learning_rate": 1.6344086021505377e-05,
"loss": 0.0003,
"step": 2380
},
{
"epoch": 0.9177306326197562,
"grad_norm": 0.0030696065567895983,
"learning_rate": 1.632872503840246e-05,
"loss": 0.0003,
"step": 2390
},
{
"epoch": 0.9215705097436881,
"grad_norm": 0.004565605709323647,
"learning_rate": 1.6313364055299542e-05,
"loss": 0.0003,
"step": 2400
},
{
"epoch": 0.9254103868676202,
"grad_norm": 0.006838334269203355,
"learning_rate": 1.6298003072196623e-05,
"loss": 0.0003,
"step": 2410
},
{
"epoch": 0.9292502639915523,
"grad_norm": 0.00990773655397776,
"learning_rate": 1.6282642089093704e-05,
"loss": 0.0003,
"step": 2420
},
{
"epoch": 0.9330901411154843,
"grad_norm": 0.013734696643942659,
"learning_rate": 1.6267281105990785e-05,
"loss": 0.0003,
"step": 2430
},
{
"epoch": 0.9369300182394164,
"grad_norm": 0.004412383577588846,
"learning_rate": 1.6251920122887866e-05,
"loss": 0.0003,
"step": 2440
},
{
"epoch": 0.9407698953633483,
"grad_norm": 0.00711020501768463,
"learning_rate": 1.6236559139784947e-05,
"loss": 0.0003,
"step": 2450
},
{
"epoch": 0.9446097724872804,
"grad_norm": 0.004814816584197371,
"learning_rate": 1.622119815668203e-05,
"loss": 0.0003,
"step": 2460
},
{
"epoch": 0.9484496496112125,
"grad_norm": 0.007067507479770793,
"learning_rate": 1.6205837173579112e-05,
"loss": 0.0003,
"step": 2470
},
{
"epoch": 0.9522895267351444,
"grad_norm": 0.0019971454498982043,
"learning_rate": 1.6190476190476193e-05,
"loss": 0.0003,
"step": 2480
},
{
"epoch": 0.9561294038590765,
"grad_norm": 0.003950118900464544,
"learning_rate": 1.6175115207373274e-05,
"loss": 0.0003,
"step": 2490
},
{
"epoch": 0.9599692809830085,
"grad_norm": 0.007758837909723049,
"learning_rate": 1.6159754224270355e-05,
"loss": 0.0003,
"step": 2500
},
{
"epoch": 0.9638091581069406,
"grad_norm": 0.0033188489546766026,
"learning_rate": 1.6144393241167436e-05,
"loss": 0.0003,
"step": 2510
},
{
"epoch": 0.9676490352308726,
"grad_norm": 0.005559097491387871,
"learning_rate": 1.6129032258064517e-05,
"loss": 0.0003,
"step": 2520
},
{
"epoch": 0.9714889123548046,
"grad_norm": 0.0038490165506594267,
"learning_rate": 1.61136712749616e-05,
"loss": 0.0003,
"step": 2530
},
{
"epoch": 0.9753287894787367,
"grad_norm": 0.00511457830944101,
"learning_rate": 1.6098310291858682e-05,
"loss": 0.0003,
"step": 2540
},
{
"epoch": 0.9791686666026688,
"grad_norm": 0.0030355393802215156,
"learning_rate": 1.6082949308755763e-05,
"loss": 0.0003,
"step": 2550
},
{
"epoch": 0.9830085437266007,
"grad_norm": 0.004994307766841962,
"learning_rate": 1.6067588325652844e-05,
"loss": 0.0003,
"step": 2560
},
{
"epoch": 0.9868484208505328,
"grad_norm": 0.00586917516793509,
"learning_rate": 1.6052227342549925e-05,
"loss": 0.0003,
"step": 2570
},
{
"epoch": 0.9906882979744648,
"grad_norm": 0.003295655448503593,
"learning_rate": 1.6036866359447006e-05,
"loss": 0.0003,
"step": 2580
},
{
"epoch": 0.9945281750983969,
"grad_norm": 0.0017248350640545309,
"learning_rate": 1.6021505376344087e-05,
"loss": 0.0003,
"step": 2590
},
{
"epoch": 0.9983680522223289,
"grad_norm": 0.008360356694852496,
"learning_rate": 1.600614439324117e-05,
"loss": 0.0003,
"step": 2600
},
{
"epoch": 1.001919938561966,
"grad_norm": 0.00941746675646077,
"learning_rate": 1.5990783410138252e-05,
"loss": 0.0003,
"step": 2610
},
{
"epoch": 1.005759815685898,
"grad_norm": 0.007001645607480019,
"learning_rate": 1.5975422427035333e-05,
"loss": 0.0003,
"step": 2620
},
{
"epoch": 1.00959969280983,
"grad_norm": 0.00872263934893287,
"learning_rate": 1.5960061443932414e-05,
"loss": 0.0003,
"step": 2630
},
{
"epoch": 1.0134395699337622,
"grad_norm": 0.006998252185455207,
"learning_rate": 1.5944700460829495e-05,
"loss": 0.0003,
"step": 2640
},
{
"epoch": 1.0172794470576942,
"grad_norm": 0.008293218599741276,
"learning_rate": 1.5929339477726576e-05,
"loss": 0.0003,
"step": 2650
},
{
"epoch": 1.021119324181626,
"grad_norm": 0.006889505177967215,
"learning_rate": 1.5913978494623657e-05,
"loss": 0.0003,
"step": 2660
},
{
"epoch": 1.0249592013055582,
"grad_norm": 0.010069071017105902,
"learning_rate": 1.589861751152074e-05,
"loss": 0.0003,
"step": 2670
},
{
"epoch": 1.0287990784294903,
"grad_norm": 0.024634152994068834,
"learning_rate": 1.5883256528417822e-05,
"loss": 0.0003,
"step": 2680
},
{
"epoch": 1.0326389555534223,
"grad_norm": 0.010662609432049419,
"learning_rate": 1.5867895545314903e-05,
"loss": 0.0003,
"step": 2690
},
{
"epoch": 1.0364788326773544,
"grad_norm": 0.013283658088640498,
"learning_rate": 1.5852534562211984e-05,
"loss": 0.0003,
"step": 2700
},
{
"epoch": 1.0403187098012863,
"grad_norm": 0.004408580576716252,
"learning_rate": 1.5837173579109065e-05,
"loss": 0.0003,
"step": 2710
},
{
"epoch": 1.0441585869252183,
"grad_norm": 0.005069354965439019,
"learning_rate": 1.5821812596006145e-05,
"loss": 0.0003,
"step": 2720
},
{
"epoch": 1.0479984640491504,
"grad_norm": 0.005316965623453909,
"learning_rate": 1.5806451612903226e-05,
"loss": 0.0003,
"step": 2730
},
{
"epoch": 1.0518383411730825,
"grad_norm": 0.004614759009371165,
"learning_rate": 1.5791090629800307e-05,
"loss": 0.0003,
"step": 2740
},
{
"epoch": 1.0556782182970146,
"grad_norm": 0.006520159764607735,
"learning_rate": 1.577572964669739e-05,
"loss": 0.0003,
"step": 2750
},
{
"epoch": 1.0595180954209464,
"grad_norm": 0.007212891832854262,
"learning_rate": 1.5760368663594473e-05,
"loss": 0.0003,
"step": 2760
},
{
"epoch": 1.0633579725448785,
"grad_norm": 0.007329478767553912,
"learning_rate": 1.5745007680491553e-05,
"loss": 0.0003,
"step": 2770
},
{
"epoch": 1.0671978496688106,
"grad_norm": 0.012902138519829355,
"learning_rate": 1.5729646697388634e-05,
"loss": 0.0003,
"step": 2780
},
{
"epoch": 1.0710377267927427,
"grad_norm": 0.008115156348023293,
"learning_rate": 1.5714285714285715e-05,
"loss": 0.0003,
"step": 2790
},
{
"epoch": 1.0748776039166748,
"grad_norm": 0.0031222148539581814,
"learning_rate": 1.5698924731182796e-05,
"loss": 0.0003,
"step": 2800
},
{
"epoch": 1.0787174810406066,
"grad_norm": 0.00358892329366379,
"learning_rate": 1.5683563748079877e-05,
"loss": 0.0003,
"step": 2810
},
{
"epoch": 1.0825573581645387,
"grad_norm": 0.003731764641158579,
"learning_rate": 1.566820276497696e-05,
"loss": 0.0003,
"step": 2820
},
{
"epoch": 1.0863972352884708,
"grad_norm": 0.009651710392543447,
"learning_rate": 1.5652841781874042e-05,
"loss": 0.0003,
"step": 2830
},
{
"epoch": 1.0902371124124028,
"grad_norm": 0.007272437262278606,
"learning_rate": 1.5637480798771123e-05,
"loss": 0.0003,
"step": 2840
},
{
"epoch": 1.094076989536335,
"grad_norm": 0.0038138336241257302,
"learning_rate": 1.5622119815668204e-05,
"loss": 0.0003,
"step": 2850
},
{
"epoch": 1.0979168666602668,
"grad_norm": 0.007645435686600415,
"learning_rate": 1.5606758832565285e-05,
"loss": 0.0003,
"step": 2860
},
{
"epoch": 1.1017567437841989,
"grad_norm": 0.004228910730827151,
"learning_rate": 1.5591397849462366e-05,
"loss": 0.0003,
"step": 2870
},
{
"epoch": 1.105596620908131,
"grad_norm": 0.007428720623939754,
"learning_rate": 1.5576036866359447e-05,
"loss": 0.0003,
"step": 2880
},
{
"epoch": 1.109436498032063,
"grad_norm": 0.0109342599815278,
"learning_rate": 1.556067588325653e-05,
"loss": 0.0003,
"step": 2890
},
{
"epoch": 1.113276375155995,
"grad_norm": 0.004880866544074755,
"learning_rate": 1.5545314900153612e-05,
"loss": 0.0003,
"step": 2900
},
{
"epoch": 1.117116252279927,
"grad_norm": 0.013738617802960406,
"learning_rate": 1.5529953917050693e-05,
"loss": 0.0003,
"step": 2910
},
{
"epoch": 1.120956129403859,
"grad_norm": 0.00498835427587417,
"learning_rate": 1.5514592933947774e-05,
"loss": 0.0003,
"step": 2920
},
{
"epoch": 1.124796006527791,
"grad_norm": 0.0030319982820064474,
"learning_rate": 1.5499231950844855e-05,
"loss": 0.0003,
"step": 2930
},
{
"epoch": 1.1286358836517232,
"grad_norm": 0.01067113935847921,
"learning_rate": 1.5483870967741936e-05,
"loss": 0.0003,
"step": 2940
},
{
"epoch": 1.1324757607756553,
"grad_norm": 0.003620520899235841,
"learning_rate": 1.5468509984639017e-05,
"loss": 0.0003,
"step": 2950
},
{
"epoch": 1.1363156378995871,
"grad_norm": 0.004795888699506633,
"learning_rate": 1.5453149001536098e-05,
"loss": 0.0003,
"step": 2960
},
{
"epoch": 1.1401555150235192,
"grad_norm": 0.006942279632235508,
"learning_rate": 1.5437788018433182e-05,
"loss": 0.0003,
"step": 2970
},
{
"epoch": 1.1439953921474513,
"grad_norm": 0.002130066736956189,
"learning_rate": 1.5422427035330263e-05,
"loss": 0.0003,
"step": 2980
},
{
"epoch": 1.1478352692713834,
"grad_norm": 0.005306111233629307,
"learning_rate": 1.5407066052227344e-05,
"loss": 0.0003,
"step": 2990
},
{
"epoch": 1.1516751463953154,
"grad_norm": 0.007715185339433146,
"learning_rate": 1.5391705069124425e-05,
"loss": 0.0003,
"step": 3000
},
{
"epoch": 1.1555150235192473,
"grad_norm": 0.005655458788113601,
"learning_rate": 1.5376344086021506e-05,
"loss": 0.0003,
"step": 3010
},
{
"epoch": 1.1593549006431794,
"grad_norm": 0.008468884041379838,
"learning_rate": 1.5360983102918587e-05,
"loss": 0.0003,
"step": 3020
},
{
"epoch": 1.1631947777671114,
"grad_norm": 0.015885667831357166,
"learning_rate": 1.5345622119815668e-05,
"loss": 0.0003,
"step": 3030
},
{
"epoch": 1.1670346548910435,
"grad_norm": 0.003501130092621392,
"learning_rate": 1.5330261136712752e-05,
"loss": 0.0003,
"step": 3040
},
{
"epoch": 1.1708745320149756,
"grad_norm": 0.007702536520407232,
"learning_rate": 1.5314900153609833e-05,
"loss": 0.0003,
"step": 3050
},
{
"epoch": 1.1747144091389075,
"grad_norm": 0.00397165276711294,
"learning_rate": 1.5299539170506914e-05,
"loss": 0.0003,
"step": 3060
},
{
"epoch": 1.1785542862628395,
"grad_norm": 0.005806224235699376,
"learning_rate": 1.5284178187403995e-05,
"loss": 0.0003,
"step": 3070
},
{
"epoch": 1.1823941633867716,
"grad_norm": 0.014031967789109468,
"learning_rate": 1.5268817204301076e-05,
"loss": 0.0003,
"step": 3080
},
{
"epoch": 1.1862340405107037,
"grad_norm": 0.011597083317184202,
"learning_rate": 1.5253456221198157e-05,
"loss": 0.0003,
"step": 3090
},
{
"epoch": 1.1900739176346358,
"grad_norm": 0.003137805117200966,
"learning_rate": 1.523809523809524e-05,
"loss": 0.0003,
"step": 3100
},
{
"epoch": 1.1939137947585676,
"grad_norm": 0.008571751181818212,
"learning_rate": 1.5222734254992322e-05,
"loss": 0.0003,
"step": 3110
},
{
"epoch": 1.1977536718824997,
"grad_norm": 0.010354799239461948,
"learning_rate": 1.5207373271889403e-05,
"loss": 0.0003,
"step": 3120
},
{
"epoch": 1.2015935490064318,
"grad_norm": 0.006888806938098952,
"learning_rate": 1.5192012288786484e-05,
"loss": 0.0003,
"step": 3130
},
{
"epoch": 1.2054334261303639,
"grad_norm": 0.00560528529757068,
"learning_rate": 1.5176651305683565e-05,
"loss": 0.0003,
"step": 3140
},
{
"epoch": 1.209273303254296,
"grad_norm": 0.004803989045845827,
"learning_rate": 1.5161290322580646e-05,
"loss": 0.0003,
"step": 3150
},
{
"epoch": 1.2131131803782278,
"grad_norm": 0.010138889246585629,
"learning_rate": 1.5145929339477728e-05,
"loss": 0.0003,
"step": 3160
},
{
"epoch": 1.2169530575021599,
"grad_norm": 0.010337939960763999,
"learning_rate": 1.5130568356374809e-05,
"loss": 0.0003,
"step": 3170
},
{
"epoch": 1.220792934626092,
"grad_norm": 0.0034014308570009524,
"learning_rate": 1.511520737327189e-05,
"loss": 0.0003,
"step": 3180
},
{
"epoch": 1.224632811750024,
"grad_norm": 0.0029402516168536455,
"learning_rate": 1.5099846390168973e-05,
"loss": 0.0003,
"step": 3190
},
{
"epoch": 1.228472688873956,
"grad_norm": 0.0077860538241216895,
"learning_rate": 1.5084485407066054e-05,
"loss": 0.0003,
"step": 3200
},
{
"epoch": 1.232312565997888,
"grad_norm": 0.005558974463946693,
"learning_rate": 1.5069124423963135e-05,
"loss": 0.0003,
"step": 3210
},
{
"epoch": 1.23615244312182,
"grad_norm": 0.010803718895790473,
"learning_rate": 1.5053763440860215e-05,
"loss": 0.0003,
"step": 3220
},
{
"epoch": 1.2399923202457521,
"grad_norm": 0.007530607391625148,
"learning_rate": 1.5038402457757298e-05,
"loss": 0.0003,
"step": 3230
},
{
"epoch": 1.2438321973696842,
"grad_norm": 0.0024515971935964494,
"learning_rate": 1.5023041474654379e-05,
"loss": 0.0003,
"step": 3240
},
{
"epoch": 1.2476720744936163,
"grad_norm": 0.004793434169621187,
"learning_rate": 1.500768049155146e-05,
"loss": 0.0003,
"step": 3250
},
{
"epoch": 1.2515119516175481,
"grad_norm": 0.005198731469624748,
"learning_rate": 1.4992319508448543e-05,
"loss": 0.0003,
"step": 3260
},
{
"epoch": 1.2553518287414802,
"grad_norm": 0.002181117809025224,
"learning_rate": 1.4976958525345623e-05,
"loss": 0.0003,
"step": 3270
},
{
"epoch": 1.2591917058654123,
"grad_norm": 0.0037625493242586724,
"learning_rate": 1.4961597542242704e-05,
"loss": 0.0003,
"step": 3280
},
{
"epoch": 1.2630315829893444,
"grad_norm": 0.005316485072585287,
"learning_rate": 1.4946236559139787e-05,
"loss": 0.0003,
"step": 3290
},
{
"epoch": 1.2668714601132764,
"grad_norm": 0.004784634700102955,
"learning_rate": 1.4930875576036868e-05,
"loss": 0.0003,
"step": 3300
},
{
"epoch": 1.2707113372372083,
"grad_norm": 0.012696100545567314,
"learning_rate": 1.4915514592933949e-05,
"loss": 0.0003,
"step": 3310
},
{
"epoch": 1.2745512143611404,
"grad_norm": 0.005311522663075818,
"learning_rate": 1.490015360983103e-05,
"loss": 0.0003,
"step": 3320
},
{
"epoch": 1.2783910914850725,
"grad_norm": 0.0028746469015276917,
"learning_rate": 1.4884792626728112e-05,
"loss": 0.0003,
"step": 3330
},
{
"epoch": 1.2822309686090045,
"grad_norm": 0.010467141532211954,
"learning_rate": 1.4869431643625193e-05,
"loss": 0.0003,
"step": 3340
},
{
"epoch": 1.2860708457329366,
"grad_norm": 0.004927112231184059,
"learning_rate": 1.4854070660522274e-05,
"loss": 0.0003,
"step": 3350
},
{
"epoch": 1.2899107228568685,
"grad_norm": 0.004240757320008939,
"learning_rate": 1.4838709677419357e-05,
"loss": 0.0003,
"step": 3360
},
{
"epoch": 1.2937505999808006,
"grad_norm": 0.004593149961249671,
"learning_rate": 1.4823348694316438e-05,
"loss": 0.0003,
"step": 3370
},
{
"epoch": 1.2975904771047326,
"grad_norm": 0.012379522683153942,
"learning_rate": 1.4807987711213519e-05,
"loss": 0.0003,
"step": 3380
},
{
"epoch": 1.3014303542286647,
"grad_norm": 0.0030617662082256646,
"learning_rate": 1.47926267281106e-05,
"loss": 0.0003,
"step": 3390
},
{
"epoch": 1.3052702313525968,
"grad_norm": 0.012477621815569138,
"learning_rate": 1.477726574500768e-05,
"loss": 0.0003,
"step": 3400
},
{
"epoch": 1.3091101084765286,
"grad_norm": 0.008973089780140532,
"learning_rate": 1.4761904761904763e-05,
"loss": 0.0003,
"step": 3410
},
{
"epoch": 1.3129499856004607,
"grad_norm": 0.006012620992408176,
"learning_rate": 1.4746543778801846e-05,
"loss": 0.0003,
"step": 3420
},
{
"epoch": 1.3167898627243928,
"grad_norm": 0.002978034783135485,
"learning_rate": 1.4731182795698927e-05,
"loss": 0.0003,
"step": 3430
},
{
"epoch": 1.3206297398483249,
"grad_norm": 0.0076361528432857696,
"learning_rate": 1.4715821812596008e-05,
"loss": 0.0003,
"step": 3440
},
{
"epoch": 1.324469616972257,
"grad_norm": 0.01162698278703534,
"learning_rate": 1.4700460829493089e-05,
"loss": 0.0003,
"step": 3450
},
{
"epoch": 1.3283094940961888,
"grad_norm": 0.0023240753476107792,
"learning_rate": 1.468509984639017e-05,
"loss": 0.0003,
"step": 3460
},
{
"epoch": 1.332149371220121,
"grad_norm": 0.005329365238908933,
"learning_rate": 1.466973886328725e-05,
"loss": 0.0003,
"step": 3470
},
{
"epoch": 1.335989248344053,
"grad_norm": 0.007711907461069916,
"learning_rate": 1.4654377880184335e-05,
"loss": 0.0003,
"step": 3480
},
{
"epoch": 1.339829125467985,
"grad_norm": 0.0051703315905598365,
"learning_rate": 1.4639016897081416e-05,
"loss": 0.0003,
"step": 3490
},
{
"epoch": 1.3436690025919171,
"grad_norm": 0.00656849551678766,
"learning_rate": 1.4623655913978497e-05,
"loss": 0.0003,
"step": 3500
},
{
"epoch": 1.347508879715849,
"grad_norm": 0.004684960157051687,
"learning_rate": 1.4608294930875578e-05,
"loss": 0.0003,
"step": 3510
},
{
"epoch": 1.351348756839781,
"grad_norm": 0.003653453570474896,
"learning_rate": 1.4592933947772658e-05,
"loss": 0.0003,
"step": 3520
},
{
"epoch": 1.3551886339637131,
"grad_norm": 0.0036918985360549997,
"learning_rate": 1.457757296466974e-05,
"loss": 0.0003,
"step": 3530
},
{
"epoch": 1.3590285110876452,
"grad_norm": 0.005265072866815289,
"learning_rate": 1.456221198156682e-05,
"loss": 0.0003,
"step": 3540
},
{
"epoch": 1.3628683882115773,
"grad_norm": 0.002918853603638478,
"learning_rate": 1.4546850998463905e-05,
"loss": 0.0003,
"step": 3550
},
{
"epoch": 1.3667082653355092,
"grad_norm": 0.00935186960220143,
"learning_rate": 1.4531490015360986e-05,
"loss": 0.0003,
"step": 3560
},
{
"epoch": 1.3705481424594412,
"grad_norm": 0.0047564377664160475,
"learning_rate": 1.4516129032258066e-05,
"loss": 0.0003,
"step": 3570
},
{
"epoch": 1.3743880195833733,
"grad_norm": 0.0027827569792821744,
"learning_rate": 1.4500768049155147e-05,
"loss": 0.0003,
"step": 3580
},
{
"epoch": 1.3782278967073054,
"grad_norm": 0.007874618569613728,
"learning_rate": 1.4485407066052228e-05,
"loss": 0.0003,
"step": 3590
},
{
"epoch": 1.3820677738312375,
"grad_norm": 0.0021217629192740463,
"learning_rate": 1.447004608294931e-05,
"loss": 0.0003,
"step": 3600
},
{
"epoch": 1.3859076509551693,
"grad_norm": 0.0015950623575258483,
"learning_rate": 1.445468509984639e-05,
"loss": 0.0003,
"step": 3610
},
{
"epoch": 1.3897475280791014,
"grad_norm": 0.014526354269413554,
"learning_rate": 1.4439324116743471e-05,
"loss": 0.0003,
"step": 3620
},
{
"epoch": 1.3935874052030335,
"grad_norm": 0.004710032072376596,
"learning_rate": 1.4423963133640555e-05,
"loss": 0.0003,
"step": 3630
},
{
"epoch": 1.3974272823269656,
"grad_norm": 0.008945201526158983,
"learning_rate": 1.4408602150537636e-05,
"loss": 0.0003,
"step": 3640
},
{
"epoch": 1.4012671594508976,
"grad_norm": 0.004277811812491989,
"learning_rate": 1.4393241167434717e-05,
"loss": 0.0003,
"step": 3650
},
{
"epoch": 1.4051070365748295,
"grad_norm": 0.0039422467987323675,
"learning_rate": 1.4377880184331798e-05,
"loss": 0.0003,
"step": 3660
},
{
"epoch": 1.4089469136987616,
"grad_norm": 0.00676608035228549,
"learning_rate": 1.4362519201228879e-05,
"loss": 0.0003,
"step": 3670
},
{
"epoch": 1.4127867908226936,
"grad_norm": 0.010133852268400016,
"learning_rate": 1.434715821812596e-05,
"loss": 0.0003,
"step": 3680
},
{
"epoch": 1.4166266679466257,
"grad_norm": 0.009268588463915765,
"learning_rate": 1.4331797235023041e-05,
"loss": 0.0003,
"step": 3690
},
{
"epoch": 1.4204665450705578,
"grad_norm": 0.01313301243339411,
"learning_rate": 1.4316436251920125e-05,
"loss": 0.0003,
"step": 3700
},
{
"epoch": 1.4243064221944897,
"grad_norm": 0.0037448179676893684,
"learning_rate": 1.4301075268817206e-05,
"loss": 0.0003,
"step": 3710
},
{
"epoch": 1.4281462993184217,
"grad_norm": 0.007552592646915242,
"learning_rate": 1.4285714285714287e-05,
"loss": 0.0003,
"step": 3720
},
{
"epoch": 1.4319861764423538,
"grad_norm": 0.0020607608386333867,
"learning_rate": 1.4270353302611368e-05,
"loss": 0.0003,
"step": 3730
},
{
"epoch": 1.435826053566286,
"grad_norm": 0.0017099532229802538,
"learning_rate": 1.4254992319508449e-05,
"loss": 0.0003,
"step": 3740
},
{
"epoch": 1.439665930690218,
"grad_norm": 0.009438446281080615,
"learning_rate": 1.423963133640553e-05,
"loss": 0.0003,
"step": 3750
},
{
"epoch": 1.4435058078141498,
"grad_norm": 0.0031712205983693118,
"learning_rate": 1.422427035330261e-05,
"loss": 0.0003,
"step": 3760
},
{
"epoch": 1.447345684938082,
"grad_norm": 0.004189847354145225,
"learning_rate": 1.4208909370199695e-05,
"loss": 0.0003,
"step": 3770
},
{
"epoch": 1.451185562062014,
"grad_norm": 0.01235381752195261,
"learning_rate": 1.4193548387096776e-05,
"loss": 0.0003,
"step": 3780
},
{
"epoch": 1.455025439185946,
"grad_norm": 0.008228828470936082,
"learning_rate": 1.4178187403993857e-05,
"loss": 0.0003,
"step": 3790
},
{
"epoch": 1.4588653163098781,
"grad_norm": 0.00803929836492854,
"learning_rate": 1.4162826420890938e-05,
"loss": 0.0003,
"step": 3800
},
{
"epoch": 1.46270519343381,
"grad_norm": 0.007786110195975117,
"learning_rate": 1.4147465437788019e-05,
"loss": 0.0003,
"step": 3810
},
{
"epoch": 1.466545070557742,
"grad_norm": 0.001228617041920703,
"learning_rate": 1.41321044546851e-05,
"loss": 0.0003,
"step": 3820
},
{
"epoch": 1.4703849476816742,
"grad_norm": 0.0038876544517334236,
"learning_rate": 1.4116743471582182e-05,
"loss": 0.0003,
"step": 3830
},
{
"epoch": 1.4742248248056062,
"grad_norm": 0.00866982390635113,
"learning_rate": 1.4101382488479263e-05,
"loss": 0.0003,
"step": 3840
},
{
"epoch": 1.4780647019295383,
"grad_norm": 0.011102877135429783,
"learning_rate": 1.4086021505376346e-05,
"loss": 0.0003,
"step": 3850
},
{
"epoch": 1.4819045790534702,
"grad_norm": 0.01047300225237876,
"learning_rate": 1.4070660522273427e-05,
"loss": 0.0003,
"step": 3860
},
{
"epoch": 1.4857444561774023,
"grad_norm": 0.006851930231571451,
"learning_rate": 1.4055299539170508e-05,
"loss": 0.0003,
"step": 3870
},
{
"epoch": 1.4895843333013343,
"grad_norm": 0.004987839668683684,
"learning_rate": 1.4039938556067589e-05,
"loss": 0.0003,
"step": 3880
},
{
"epoch": 1.4934242104252664,
"grad_norm": 0.0034966043588402418,
"learning_rate": 1.4024577572964671e-05,
"loss": 0.0003,
"step": 3890
},
{
"epoch": 1.4972640875491985,
"grad_norm": 0.002801267441148025,
"learning_rate": 1.4009216589861752e-05,
"loss": 0.0003,
"step": 3900
},
{
"epoch": 1.5011039646731303,
"grad_norm": 0.0018703310178060316,
"learning_rate": 1.3993855606758833e-05,
"loss": 0.0003,
"step": 3910
},
{
"epoch": 1.5049438417970626,
"grad_norm": 0.0015330340455295792,
"learning_rate": 1.3978494623655916e-05,
"loss": 0.0003,
"step": 3920
},
{
"epoch": 1.5087837189209945,
"grad_norm": 0.006896242096430408,
"learning_rate": 1.3963133640552997e-05,
"loss": 0.0003,
"step": 3930
},
{
"epoch": 1.5126235960449266,
"grad_norm": 0.0030363392744381756,
"learning_rate": 1.3947772657450078e-05,
"loss": 0.0003,
"step": 3940
},
{
"epoch": 1.5164634731688587,
"grad_norm": 0.0036790867879865252,
"learning_rate": 1.3932411674347159e-05,
"loss": 0.0003,
"step": 3950
},
{
"epoch": 1.5203033502927905,
"grad_norm": 0.003933748182425131,
"learning_rate": 1.3917050691244241e-05,
"loss": 0.0003,
"step": 3960
},
{
"epoch": 1.5241432274167228,
"grad_norm": 0.004219499765943358,
"learning_rate": 1.3901689708141322e-05,
"loss": 0.0003,
"step": 3970
},
{
"epoch": 1.5279831045406547,
"grad_norm": 0.007300405744499423,
"learning_rate": 1.3886328725038403e-05,
"loss": 0.0003,
"step": 3980
},
{
"epoch": 1.5318229816645867,
"grad_norm": 0.004617157024817587,
"learning_rate": 1.3870967741935486e-05,
"loss": 0.0003,
"step": 3990
},
{
"epoch": 1.5356628587885188,
"grad_norm": 0.010017965659017577,
"learning_rate": 1.3855606758832567e-05,
"loss": 0.0003,
"step": 4000
},
{
"epoch": 1.5395027359124507,
"grad_norm": 0.006735678653952309,
"learning_rate": 1.3840245775729648e-05,
"loss": 0.0003,
"step": 4010
},
{
"epoch": 1.543342613036383,
"grad_norm": 0.0050467679764191345,
"learning_rate": 1.382488479262673e-05,
"loss": 0.0003,
"step": 4020
},
{
"epoch": 1.5471824901603148,
"grad_norm": 0.0068324972480196195,
"learning_rate": 1.3809523809523811e-05,
"loss": 0.0003,
"step": 4030
},
{
"epoch": 1.551022367284247,
"grad_norm": 0.0114684792397768,
"learning_rate": 1.3794162826420892e-05,
"loss": 0.0003,
"step": 4040
},
{
"epoch": 1.554862244408179,
"grad_norm": 0.005883994452757438,
"learning_rate": 1.3778801843317973e-05,
"loss": 0.0003,
"step": 4050
},
{
"epoch": 1.5587021215321109,
"grad_norm": 0.01105835794681893,
"learning_rate": 1.3763440860215056e-05,
"loss": 0.0003,
"step": 4060
},
{
"epoch": 1.5625419986560432,
"grad_norm": 0.005406277635716297,
"learning_rate": 1.3748079877112136e-05,
"loss": 0.0003,
"step": 4070
},
{
"epoch": 1.566381875779975,
"grad_norm": 0.003846164185768484,
"learning_rate": 1.3732718894009217e-05,
"loss": 0.0003,
"step": 4080
},
{
"epoch": 1.570221752903907,
"grad_norm": 0.017545096795138392,
"learning_rate": 1.37173579109063e-05,
"loss": 0.0003,
"step": 4090
},
{
"epoch": 1.5740616300278392,
"grad_norm": 0.001157703963834084,
"learning_rate": 1.3701996927803381e-05,
"loss": 0.0003,
"step": 4100
},
{
"epoch": 1.577901507151771,
"grad_norm": 0.0047739921811557765,
"learning_rate": 1.3686635944700462e-05,
"loss": 0.0003,
"step": 4110
},
{
"epoch": 1.5817413842757033,
"grad_norm": 0.005347890941721552,
"learning_rate": 1.3671274961597543e-05,
"loss": 0.0003,
"step": 4120
},
{
"epoch": 1.5855812613996352,
"grad_norm": 0.0057929878311273305,
"learning_rate": 1.3655913978494624e-05,
"loss": 0.0003,
"step": 4130
},
{
"epoch": 1.5894211385235673,
"grad_norm": 0.0025138054493328834,
"learning_rate": 1.3640552995391706e-05,
"loss": 0.0003,
"step": 4140
},
{
"epoch": 1.5932610156474993,
"grad_norm": 0.005021480665595978,
"learning_rate": 1.3625192012288789e-05,
"loss": 0.0003,
"step": 4150
},
{
"epoch": 1.5971008927714312,
"grad_norm": 0.00485782939696147,
"learning_rate": 1.360983102918587e-05,
"loss": 0.0003,
"step": 4160
},
{
"epoch": 1.6009407698953635,
"grad_norm": 0.005107522993390195,
"learning_rate": 1.359447004608295e-05,
"loss": 0.0003,
"step": 4170
},
{
"epoch": 1.6047806470192953,
"grad_norm": 0.004581168496164048,
"learning_rate": 1.3579109062980032e-05,
"loss": 0.0003,
"step": 4180
},
{
"epoch": 1.6086205241432274,
"grad_norm": 0.0034042153583185666,
"learning_rate": 1.3563748079877113e-05,
"loss": 0.0003,
"step": 4190
},
{
"epoch": 1.6124604012671595,
"grad_norm": 0.004002545971611289,
"learning_rate": 1.3548387096774194e-05,
"loss": 0.0003,
"step": 4200
},
{
"epoch": 1.6163002783910914,
"grad_norm": 0.01005471207244953,
"learning_rate": 1.3533026113671278e-05,
"loss": 0.0003,
"step": 4210
},
{
"epoch": 1.6201401555150237,
"grad_norm": 0.007276858107586832,
"learning_rate": 1.3517665130568359e-05,
"loss": 0.0003,
"step": 4220
},
{
"epoch": 1.6239800326389555,
"grad_norm": 0.015261156983378876,
"learning_rate": 1.350230414746544e-05,
"loss": 0.0003,
"step": 4230
},
{
"epoch": 1.6278199097628876,
"grad_norm": 0.001529811428658314,
"learning_rate": 1.348694316436252e-05,
"loss": 0.0003,
"step": 4240
},
{
"epoch": 1.6316597868868197,
"grad_norm": 0.01176486699064342,
"learning_rate": 1.3471582181259602e-05,
"loss": 0.0003,
"step": 4250
},
{
"epoch": 1.6354996640107515,
"grad_norm": 0.005155818155633164,
"learning_rate": 1.3456221198156683e-05,
"loss": 0.0003,
"step": 4260
},
{
"epoch": 1.6393395411346838,
"grad_norm": 0.003646302493009192,
"learning_rate": 1.3440860215053763e-05,
"loss": 0.0003,
"step": 4270
},
{
"epoch": 1.6431794182586157,
"grad_norm": 0.013145592318521696,
"learning_rate": 1.3425499231950848e-05,
"loss": 0.0003,
"step": 4280
},
{
"epoch": 1.6470192953825478,
"grad_norm": 0.00808033295372671,
"learning_rate": 1.3410138248847929e-05,
"loss": 0.0003,
"step": 4290
},
{
"epoch": 1.6508591725064798,
"grad_norm": 0.004814145910232119,
"learning_rate": 1.339477726574501e-05,
"loss": 0.0003,
"step": 4300
},
{
"epoch": 1.6546990496304117,
"grad_norm": 0.0036057449146750355,
"learning_rate": 1.337941628264209e-05,
"loss": 0.0003,
"step": 4310
},
{
"epoch": 1.658538926754344,
"grad_norm": 0.0047424187952164075,
"learning_rate": 1.3364055299539171e-05,
"loss": 0.0003,
"step": 4320
},
{
"epoch": 1.6623788038782759,
"grad_norm": 0.006050475433915184,
"learning_rate": 1.3348694316436252e-05,
"loss": 0.0003,
"step": 4330
},
{
"epoch": 1.666218681002208,
"grad_norm": 0.0036218140340270734,
"learning_rate": 1.3333333333333333e-05,
"loss": 0.0003,
"step": 4340
},
{
"epoch": 1.67005855812614,
"grad_norm": 0.006466212744722768,
"learning_rate": 1.3317972350230414e-05,
"loss": 0.0003,
"step": 4350
},
{
"epoch": 1.6738984352500719,
"grad_norm": 0.00888961973510433,
"learning_rate": 1.3302611367127499e-05,
"loss": 0.0003,
"step": 4360
},
{
"epoch": 1.6777383123740042,
"grad_norm": 0.003812962996861351,
"learning_rate": 1.328725038402458e-05,
"loss": 0.0003,
"step": 4370
},
{
"epoch": 1.681578189497936,
"grad_norm": 0.0022713408875346015,
"learning_rate": 1.327188940092166e-05,
"loss": 0.0003,
"step": 4380
},
{
"epoch": 1.685418066621868,
"grad_norm": 0.006949027274056569,
"learning_rate": 1.3256528417818741e-05,
"loss": 0.0003,
"step": 4390
},
{
"epoch": 1.6892579437458002,
"grad_norm": 0.002962901611754243,
"learning_rate": 1.3241167434715822e-05,
"loss": 0.0003,
"step": 4400
},
{
"epoch": 1.693097820869732,
"grad_norm": 0.003682820073862977,
"learning_rate": 1.3225806451612903e-05,
"loss": 0.0003,
"step": 4410
},
{
"epoch": 1.6969376979936643,
"grad_norm": 0.006163319026317314,
"learning_rate": 1.3210445468509984e-05,
"loss": 0.0003,
"step": 4420
},
{
"epoch": 1.7007775751175962,
"grad_norm": 0.012379479151877055,
"learning_rate": 1.3195084485407068e-05,
"loss": 0.0003,
"step": 4430
},
{
"epoch": 1.7046174522415283,
"grad_norm": 0.010094598031404283,
"learning_rate": 1.317972350230415e-05,
"loss": 0.0003,
"step": 4440
},
{
"epoch": 1.7084573293654604,
"grad_norm": 0.006009523374733768,
"learning_rate": 1.316436251920123e-05,
"loss": 0.0003,
"step": 4450
},
{
"epoch": 1.7122972064893922,
"grad_norm": 0.006067418145337392,
"learning_rate": 1.3149001536098311e-05,
"loss": 0.0003,
"step": 4460
},
{
"epoch": 1.7161370836133245,
"grad_norm": 0.0027024918930963412,
"learning_rate": 1.3133640552995392e-05,
"loss": 0.0003,
"step": 4470
},
{
"epoch": 1.7199769607372564,
"grad_norm": 0.0021877878803625056,
"learning_rate": 1.3118279569892473e-05,
"loss": 0.0003,
"step": 4480
},
{
"epoch": 1.7238168378611884,
"grad_norm": 0.007268918222066073,
"learning_rate": 1.3102918586789554e-05,
"loss": 0.0003,
"step": 4490
},
{
"epoch": 1.7276567149851205,
"grad_norm": 0.006001244259273792,
"learning_rate": 1.3087557603686638e-05,
"loss": 0.0003,
"step": 4500
},
{
"epoch": 1.7314965921090524,
"grad_norm": 0.00978511362310727,
"learning_rate": 1.307219662058372e-05,
"loss": 0.0003,
"step": 4510
},
{
"epoch": 1.7353364692329847,
"grad_norm": 0.0061450285375501765,
"learning_rate": 1.30568356374808e-05,
"loss": 0.0003,
"step": 4520
},
{
"epoch": 1.7391763463569165,
"grad_norm": 0.009230442874108066,
"learning_rate": 1.3041474654377881e-05,
"loss": 0.0003,
"step": 4530
},
{
"epoch": 1.7430162234808486,
"grad_norm": 0.00791075409422416,
"learning_rate": 1.3026113671274962e-05,
"loss": 0.0003,
"step": 4540
},
{
"epoch": 1.7468561006047807,
"grad_norm": 0.011115169586698243,
"learning_rate": 1.3010752688172043e-05,
"loss": 0.0003,
"step": 4550
},
{
"epoch": 1.7506959777287125,
"grad_norm": 0.009573241931257201,
"learning_rate": 1.2995391705069126e-05,
"loss": 0.0003,
"step": 4560
},
{
"epoch": 1.7545358548526448,
"grad_norm": 0.004473006067490304,
"learning_rate": 1.2980030721966206e-05,
"loss": 0.0003,
"step": 4570
},
{
"epoch": 1.7583757319765767,
"grad_norm": 0.012766183527931843,
"learning_rate": 1.2964669738863289e-05,
"loss": 0.0003,
"step": 4580
},
{
"epoch": 1.7622156091005088,
"grad_norm": 0.0009086621286683109,
"learning_rate": 1.294930875576037e-05,
"loss": 0.0003,
"step": 4590
},
{
"epoch": 1.7660554862244409,
"grad_norm": 0.002772310397620897,
"learning_rate": 1.2933947772657451e-05,
"loss": 0.0003,
"step": 4600
},
{
"epoch": 1.7698953633483727,
"grad_norm": 0.00251248063627462,
"learning_rate": 1.2918586789554532e-05,
"loss": 0.0003,
"step": 4610
},
{
"epoch": 1.773735240472305,
"grad_norm": 0.005139480616844398,
"learning_rate": 1.2903225806451613e-05,
"loss": 0.0003,
"step": 4620
},
{
"epoch": 1.7775751175962369,
"grad_norm": 0.005373139082891636,
"learning_rate": 1.2887864823348695e-05,
"loss": 0.0003,
"step": 4630
},
{
"epoch": 1.781414994720169,
"grad_norm": 0.0061935858766664015,
"learning_rate": 1.2872503840245776e-05,
"loss": 0.0003,
"step": 4640
},
{
"epoch": 1.785254871844101,
"grad_norm": 0.0021300277356281886,
"learning_rate": 1.2857142857142859e-05,
"loss": 0.0003,
"step": 4650
},
{
"epoch": 1.7890947489680329,
"grad_norm": 0.002853522173265363,
"learning_rate": 1.284178187403994e-05,
"loss": 0.0003,
"step": 4660
},
{
"epoch": 1.7929346260919652,
"grad_norm": 0.005011976697348055,
"learning_rate": 1.282642089093702e-05,
"loss": 0.0003,
"step": 4670
},
{
"epoch": 1.796774503215897,
"grad_norm": 0.009681647398622931,
"learning_rate": 1.2811059907834102e-05,
"loss": 0.0003,
"step": 4680
},
{
"epoch": 1.8006143803398291,
"grad_norm": 0.004303781195289312,
"learning_rate": 1.2795698924731184e-05,
"loss": 0.0003,
"step": 4690
},
{
"epoch": 1.8044542574637612,
"grad_norm": 0.007494764921984889,
"learning_rate": 1.2780337941628265e-05,
"loss": 0.0003,
"step": 4700
},
{
"epoch": 1.808294134587693,
"grad_norm": 0.007339386856967861,
"learning_rate": 1.2764976958525346e-05,
"loss": 0.0003,
"step": 4710
},
{
"epoch": 1.8121340117116254,
"grad_norm": 0.00291836055735903,
"learning_rate": 1.2749615975422429e-05,
"loss": 0.0003,
"step": 4720
},
{
"epoch": 1.8159738888355572,
"grad_norm": 0.0029509058375776632,
"learning_rate": 1.273425499231951e-05,
"loss": 0.0003,
"step": 4730
},
{
"epoch": 1.8198137659594893,
"grad_norm": 0.0032136174047263694,
"learning_rate": 1.271889400921659e-05,
"loss": 0.0003,
"step": 4740
},
{
"epoch": 1.8236536430834214,
"grad_norm": 0.006459170016331068,
"learning_rate": 1.2703533026113673e-05,
"loss": 0.0003,
"step": 4750
},
{
"epoch": 1.8274935202073532,
"grad_norm": 0.004128532101702144,
"learning_rate": 1.2688172043010754e-05,
"loss": 0.0003,
"step": 4760
},
{
"epoch": 1.8313333973312855,
"grad_norm": 0.007831239303432262,
"learning_rate": 1.2672811059907835e-05,
"loss": 0.0003,
"step": 4770
},
{
"epoch": 1.8351732744552174,
"grad_norm": 0.006446091317683293,
"learning_rate": 1.2657450076804916e-05,
"loss": 0.0003,
"step": 4780
},
{
"epoch": 1.8390131515791495,
"grad_norm": 0.005149611980043962,
"learning_rate": 1.2642089093701997e-05,
"loss": 0.0003,
"step": 4790
},
{
"epoch": 1.8428530287030815,
"grad_norm": 0.00870763552581347,
"learning_rate": 1.262672811059908e-05,
"loss": 0.0003,
"step": 4800
},
{
"epoch": 1.8466929058270134,
"grad_norm": 0.00174497439177369,
"learning_rate": 1.261136712749616e-05,
"loss": 0.0003,
"step": 4810
},
{
"epoch": 1.8505327829509457,
"grad_norm": 0.004544066116015543,
"learning_rate": 1.2596006144393243e-05,
"loss": 0.0003,
"step": 4820
},
{
"epoch": 1.8543726600748776,
"grad_norm": 0.009197541337159318,
"learning_rate": 1.2580645161290324e-05,
"loss": 0.0003,
"step": 4830
},
{
"epoch": 1.8582125371988096,
"grad_norm": 0.005155440888093791,
"learning_rate": 1.2565284178187405e-05,
"loss": 0.0003,
"step": 4840
},
{
"epoch": 1.8620524143227417,
"grad_norm": 0.0033030466348720976,
"learning_rate": 1.2549923195084486e-05,
"loss": 0.0003,
"step": 4850
},
{
"epoch": 1.8658922914466736,
"grad_norm": 0.002903647020793528,
"learning_rate": 1.2534562211981567e-05,
"loss": 0.0003,
"step": 4860
},
{
"epoch": 1.8697321685706059,
"grad_norm": 0.0037452166656142243,
"learning_rate": 1.251920122887865e-05,
"loss": 0.0003,
"step": 4870
},
{
"epoch": 1.8735720456945377,
"grad_norm": 0.005423455715151995,
"learning_rate": 1.2503840245775732e-05,
"loss": 0.0003,
"step": 4880
},
{
"epoch": 1.8774119228184698,
"grad_norm": 0.004529789228413102,
"learning_rate": 1.2488479262672813e-05,
"loss": 0.0003,
"step": 4890
},
{
"epoch": 1.8812517999424019,
"grad_norm": 0.005454742890452505,
"learning_rate": 1.2473118279569894e-05,
"loss": 0.0003,
"step": 4900
},
{
"epoch": 1.8850916770663337,
"grad_norm": 0.003634537048057344,
"learning_rate": 1.2457757296466975e-05,
"loss": 0.0003,
"step": 4910
},
{
"epoch": 1.888931554190266,
"grad_norm": 0.005577515598529365,
"learning_rate": 1.2442396313364056e-05,
"loss": 0.0002,
"step": 4920
},
{
"epoch": 1.892771431314198,
"grad_norm": 0.003272095460140622,
"learning_rate": 1.2427035330261137e-05,
"loss": 0.0003,
"step": 4930
},
{
"epoch": 1.89661130843813,
"grad_norm": 0.002842547869737635,
"learning_rate": 1.2411674347158221e-05,
"loss": 0.0003,
"step": 4940
},
{
"epoch": 1.900451185562062,
"grad_norm": 0.0030756210619306856,
"learning_rate": 1.2396313364055302e-05,
"loss": 0.0003,
"step": 4950
},
{
"epoch": 1.904291062685994,
"grad_norm": 0.003876374581736294,
"learning_rate": 1.2380952380952383e-05,
"loss": 0.0003,
"step": 4960
},
{
"epoch": 1.9081309398099262,
"grad_norm": 0.0036495611616124695,
"learning_rate": 1.2365591397849464e-05,
"loss": 0.0003,
"step": 4970
},
{
"epoch": 1.911970816933858,
"grad_norm": 0.0022680105975665986,
"learning_rate": 1.2350230414746545e-05,
"loss": 0.0003,
"step": 4980
},
{
"epoch": 1.9158106940577901,
"grad_norm": 0.007439434020570674,
"learning_rate": 1.2334869431643626e-05,
"loss": 0.0003,
"step": 4990
},
{
"epoch": 1.9196505711817222,
"grad_norm": 0.004474265624120062,
"learning_rate": 1.2319508448540707e-05,
"loss": 0.0003,
"step": 5000
},
{
"epoch": 1.923490448305654,
"grad_norm": 0.009088460064078376,
"learning_rate": 1.2304147465437787e-05,
"loss": 0.0003,
"step": 5010
},
{
"epoch": 1.9273303254295864,
"grad_norm": 0.003261287645656727,
"learning_rate": 1.2288786482334872e-05,
"loss": 0.0003,
"step": 5020
},
{
"epoch": 1.9311702025535182,
"grad_norm": 0.003381354646964584,
"learning_rate": 1.2273425499231953e-05,
"loss": 0.0003,
"step": 5030
},
{
"epoch": 1.9350100796774503,
"grad_norm": 0.011785456037052021,
"learning_rate": 1.2258064516129034e-05,
"loss": 0.0003,
"step": 5040
},
{
"epoch": 1.9388499568013824,
"grad_norm": 0.003962236601696711,
"learning_rate": 1.2242703533026115e-05,
"loss": 0.0003,
"step": 5050
},
{
"epoch": 1.9426898339253142,
"grad_norm": 0.009479934166440634,
"learning_rate": 1.2227342549923195e-05,
"loss": 0.0003,
"step": 5060
},
{
"epoch": 1.9465297110492465,
"grad_norm": 0.0017963941477855995,
"learning_rate": 1.2211981566820276e-05,
"loss": 0.0003,
"step": 5070
},
{
"epoch": 1.9503695881731784,
"grad_norm": 0.003122530310710232,
"learning_rate": 1.2196620583717357e-05,
"loss": 0.0003,
"step": 5080
},
{
"epoch": 1.9542094652971105,
"grad_norm": 0.007807269576409189,
"learning_rate": 1.2181259600614442e-05,
"loss": 0.0003,
"step": 5090
},
{
"epoch": 1.9580493424210426,
"grad_norm": 0.010712992154630164,
"learning_rate": 1.2165898617511523e-05,
"loss": 0.0003,
"step": 5100
},
{
"epoch": 1.9618892195449744,
"grad_norm": 0.007059579575096798,
"learning_rate": 1.2150537634408604e-05,
"loss": 0.0003,
"step": 5110
},
{
"epoch": 1.9657290966689067,
"grad_norm": 0.006965891986586176,
"learning_rate": 1.2135176651305684e-05,
"loss": 0.0003,
"step": 5120
},
{
"epoch": 1.9695689737928386,
"grad_norm": 0.0032147842412904005,
"learning_rate": 1.2119815668202765e-05,
"loss": 0.0003,
"step": 5130
},
{
"epoch": 1.9734088509167707,
"grad_norm": 0.002816251401671389,
"learning_rate": 1.2104454685099846e-05,
"loss": 0.0003,
"step": 5140
},
{
"epoch": 1.9772487280407027,
"grad_norm": 0.00406117662486731,
"learning_rate": 1.2089093701996927e-05,
"loss": 0.0003,
"step": 5150
},
{
"epoch": 1.9810886051646346,
"grad_norm": 0.005187669887429079,
"learning_rate": 1.2073732718894012e-05,
"loss": 0.0003,
"step": 5160
},
{
"epoch": 1.9849284822885669,
"grad_norm": 0.006087920999209694,
"learning_rate": 1.2058371735791092e-05,
"loss": 0.0003,
"step": 5170
},
{
"epoch": 1.9887683594124987,
"grad_norm": 0.004575814464986424,
"learning_rate": 1.2043010752688173e-05,
"loss": 0.0003,
"step": 5180
},
{
"epoch": 1.9926082365364308,
"grad_norm": 0.014370083560726126,
"learning_rate": 1.2027649769585254e-05,
"loss": 0.0003,
"step": 5190
},
{
"epoch": 1.996448113660363,
"grad_norm": 0.005014106197288017,
"learning_rate": 1.2012288786482335e-05,
"loss": 0.0003,
"step": 5200
},
{
"epoch": 2.0,
"grad_norm": 0.007128867284239335,
"learning_rate": 1.1996927803379416e-05,
"loss": 0.0002,
"step": 5210
},
{
"epoch": 2.003839877123932,
"grad_norm": 0.0035767749443771112,
"learning_rate": 1.1981566820276497e-05,
"loss": 0.0003,
"step": 5220
},
{
"epoch": 2.007679754247864,
"grad_norm": 0.010409746160305624,
"learning_rate": 1.1966205837173581e-05,
"loss": 0.0003,
"step": 5230
},
{
"epoch": 2.011519631371796,
"grad_norm": 0.006663355286188172,
"learning_rate": 1.1950844854070662e-05,
"loss": 0.0003,
"step": 5240
},
{
"epoch": 2.0153595084957283,
"grad_norm": 0.007056337672257391,
"learning_rate": 1.1935483870967743e-05,
"loss": 0.0003,
"step": 5250
},
{
"epoch": 2.01919938561966,
"grad_norm": 0.007042233672731786,
"learning_rate": 1.1920122887864824e-05,
"loss": 0.0002,
"step": 5260
},
{
"epoch": 2.023039262743592,
"grad_norm": 0.0034724836039492263,
"learning_rate": 1.1904761904761905e-05,
"loss": 0.0002,
"step": 5270
},
{
"epoch": 2.0268791398675243,
"grad_norm": 0.00582311493886213,
"learning_rate": 1.1889400921658986e-05,
"loss": 0.0003,
"step": 5280
},
{
"epoch": 2.030719016991456,
"grad_norm": 0.008442699615605335,
"learning_rate": 1.1874039938556069e-05,
"loss": 0.0003,
"step": 5290
},
{
"epoch": 2.0345588941153885,
"grad_norm": 0.007067754727575771,
"learning_rate": 1.185867895545315e-05,
"loss": 0.0003,
"step": 5300
},
{
"epoch": 2.0383987712393203,
"grad_norm": 0.011473488398813741,
"learning_rate": 1.1843317972350232e-05,
"loss": 0.0003,
"step": 5310
},
{
"epoch": 2.042238648363252,
"grad_norm": 0.0059529034647220706,
"learning_rate": 1.1827956989247313e-05,
"loss": 0.0003,
"step": 5320
},
{
"epoch": 2.0460785254871845,
"grad_norm": 0.005899117884748007,
"learning_rate": 1.1812596006144394e-05,
"loss": 0.0003,
"step": 5330
},
{
"epoch": 2.0499184026111164,
"grad_norm": 0.011298726784506993,
"learning_rate": 1.1797235023041475e-05,
"loss": 0.0003,
"step": 5340
},
{
"epoch": 2.0537582797350487,
"grad_norm": 0.005795088286120419,
"learning_rate": 1.1781874039938556e-05,
"loss": 0.0003,
"step": 5350
},
{
"epoch": 2.0575981568589805,
"grad_norm": 0.0036837813367058586,
"learning_rate": 1.1766513056835639e-05,
"loss": 0.0003,
"step": 5360
},
{
"epoch": 2.0614380339829124,
"grad_norm": 0.006214527860410358,
"learning_rate": 1.175115207373272e-05,
"loss": 0.0003,
"step": 5370
},
{
"epoch": 2.0652779111068447,
"grad_norm": 0.005963634039163199,
"learning_rate": 1.1735791090629802e-05,
"loss": 0.0003,
"step": 5380
},
{
"epoch": 2.0691177882307765,
"grad_norm": 0.009711371016533323,
"learning_rate": 1.1720430107526883e-05,
"loss": 0.0003,
"step": 5390
},
{
"epoch": 2.072957665354709,
"grad_norm": 0.004479690428922436,
"learning_rate": 1.1705069124423964e-05,
"loss": 0.0003,
"step": 5400
},
{
"epoch": 2.0767975424786407,
"grad_norm": 0.004339498596198677,
"learning_rate": 1.1689708141321045e-05,
"loss": 0.0003,
"step": 5410
},
{
"epoch": 2.0806374196025725,
"grad_norm": 0.005524750165344554,
"learning_rate": 1.1674347158218127e-05,
"loss": 0.0003,
"step": 5420
},
{
"epoch": 2.084477296726505,
"grad_norm": 0.0068414029393528645,
"learning_rate": 1.1658986175115208e-05,
"loss": 0.0003,
"step": 5430
},
{
"epoch": 2.0883171738504367,
"grad_norm": 0.0029993939116700393,
"learning_rate": 1.164362519201229e-05,
"loss": 0.0003,
"step": 5440
},
{
"epoch": 2.092157050974369,
"grad_norm": 0.0065445333490569925,
"learning_rate": 1.1628264208909372e-05,
"loss": 0.0003,
"step": 5450
},
{
"epoch": 2.095996928098301,
"grad_norm": 0.004157809191374774,
"learning_rate": 1.1612903225806453e-05,
"loss": 0.0003,
"step": 5460
},
{
"epoch": 2.0998368052222327,
"grad_norm": 0.01036397645054627,
"learning_rate": 1.1597542242703534e-05,
"loss": 0.0003,
"step": 5470
},
{
"epoch": 2.103676682346165,
"grad_norm": 0.004619789710041621,
"learning_rate": 1.1582181259600616e-05,
"loss": 0.0003,
"step": 5480
},
{
"epoch": 2.107516559470097,
"grad_norm": 0.0031701670908054584,
"learning_rate": 1.1566820276497697e-05,
"loss": 0.0003,
"step": 5490
},
{
"epoch": 2.111356436594029,
"grad_norm": 0.0049090365155047685,
"learning_rate": 1.1551459293394778e-05,
"loss": 0.0003,
"step": 5500
},
{
"epoch": 2.115196313717961,
"grad_norm": 0.006228538445781431,
"learning_rate": 1.153609831029186e-05,
"loss": 0.0003,
"step": 5510
},
{
"epoch": 2.119036190841893,
"grad_norm": 0.003439458661613514,
"learning_rate": 1.152073732718894e-05,
"loss": 0.0003,
"step": 5520
},
{
"epoch": 2.122876067965825,
"grad_norm": 0.0019012366528865447,
"learning_rate": 1.1505376344086023e-05,
"loss": 0.0003,
"step": 5530
},
{
"epoch": 2.126715945089757,
"grad_norm": 0.003757506993975558,
"learning_rate": 1.1490015360983104e-05,
"loss": 0.0003,
"step": 5540
},
{
"epoch": 2.1305558222136893,
"grad_norm": 0.007477619429838125,
"learning_rate": 1.1474654377880186e-05,
"loss": 0.0003,
"step": 5550
},
{
"epoch": 2.134395699337621,
"grad_norm": 0.0031273099272674763,
"learning_rate": 1.1459293394777267e-05,
"loss": 0.0003,
"step": 5560
},
{
"epoch": 2.138235576461553,
"grad_norm": 0.00735341646000325,
"learning_rate": 1.1443932411674348e-05,
"loss": 0.0003,
"step": 5570
},
{
"epoch": 2.1420754535854853,
"grad_norm": 0.00804142143071962,
"learning_rate": 1.1428571428571429e-05,
"loss": 0.0003,
"step": 5580
},
{
"epoch": 2.145915330709417,
"grad_norm": 0.004355210176544316,
"learning_rate": 1.141321044546851e-05,
"loss": 0.0003,
"step": 5590
},
{
"epoch": 2.1497552078333495,
"grad_norm": 0.004213055601660093,
"learning_rate": 1.1397849462365593e-05,
"loss": 0.0003,
"step": 5600
},
{
"epoch": 2.1535950849572814,
"grad_norm": 0.0052307406743254785,
"learning_rate": 1.1382488479262675e-05,
"loss": 0.0003,
"step": 5610
},
{
"epoch": 2.157434962081213,
"grad_norm": 0.004583788106873781,
"learning_rate": 1.1367127496159756e-05,
"loss": 0.0003,
"step": 5620
},
{
"epoch": 2.1612748392051455,
"grad_norm": 0.0019315254969018546,
"learning_rate": 1.1351766513056837e-05,
"loss": 0.0003,
"step": 5630
},
{
"epoch": 2.1651147163290774,
"grad_norm": 0.0037899992818163615,
"learning_rate": 1.1336405529953918e-05,
"loss": 0.0003,
"step": 5640
},
{
"epoch": 2.1689545934530097,
"grad_norm": 0.006764631940574645,
"learning_rate": 1.1321044546850999e-05,
"loss": 0.0003,
"step": 5650
},
{
"epoch": 2.1727944705769415,
"grad_norm": 0.009761685741017033,
"learning_rate": 1.130568356374808e-05,
"loss": 0.0003,
"step": 5660
},
{
"epoch": 2.1766343477008734,
"grad_norm": 0.0020006686679916847,
"learning_rate": 1.1290322580645164e-05,
"loss": 0.0003,
"step": 5670
},
{
"epoch": 2.1804742248248057,
"grad_norm": 0.0037488861983966605,
"learning_rate": 1.1274961597542245e-05,
"loss": 0.0003,
"step": 5680
},
{
"epoch": 2.1843141019487375,
"grad_norm": 0.00537849863699846,
"learning_rate": 1.1259600614439326e-05,
"loss": 0.0003,
"step": 5690
},
{
"epoch": 2.18815397907267,
"grad_norm": 0.010248964745953612,
"learning_rate": 1.1244239631336407e-05,
"loss": 0.0003,
"step": 5700
},
{
"epoch": 2.1919938561966017,
"grad_norm": 0.005824548989521622,
"learning_rate": 1.1228878648233488e-05,
"loss": 0.0003,
"step": 5710
},
{
"epoch": 2.1958337333205336,
"grad_norm": 0.004325296871628092,
"learning_rate": 1.1213517665130569e-05,
"loss": 0.0003,
"step": 5720
},
{
"epoch": 2.199673610444466,
"grad_norm": 0.0035972982963026214,
"learning_rate": 1.119815668202765e-05,
"loss": 0.0003,
"step": 5730
},
{
"epoch": 2.2035134875683977,
"grad_norm": 0.004846244232237887,
"learning_rate": 1.118279569892473e-05,
"loss": 0.0003,
"step": 5740
},
{
"epoch": 2.20735336469233,
"grad_norm": 0.008745922011732979,
"learning_rate": 1.1167434715821815e-05,
"loss": 0.0003,
"step": 5750
},
{
"epoch": 2.211193241816262,
"grad_norm": 0.00778907419188414,
"learning_rate": 1.1152073732718896e-05,
"loss": 0.0003,
"step": 5760
},
{
"epoch": 2.2150331189401937,
"grad_norm": 0.0043654759196176075,
"learning_rate": 1.1136712749615977e-05,
"loss": 0.0003,
"step": 5770
},
{
"epoch": 2.218872996064126,
"grad_norm": 0.005005097526262015,
"learning_rate": 1.1121351766513058e-05,
"loss": 0.0003,
"step": 5780
},
{
"epoch": 2.222712873188058,
"grad_norm": 0.005100545621721535,
"learning_rate": 1.1105990783410139e-05,
"loss": 0.0003,
"step": 5790
},
{
"epoch": 2.22655275031199,
"grad_norm": 0.005711204873227365,
"learning_rate": 1.109062980030722e-05,
"loss": 0.0003,
"step": 5800
},
{
"epoch": 2.230392627435922,
"grad_norm": 0.005985518729144408,
"learning_rate": 1.10752688172043e-05,
"loss": 0.0003,
"step": 5810
},
{
"epoch": 2.234232504559854,
"grad_norm": 0.009325255194251696,
"learning_rate": 1.1059907834101385e-05,
"loss": 0.0003,
"step": 5820
},
{
"epoch": 2.238072381683786,
"grad_norm": 0.0009671272882854296,
"learning_rate": 1.1044546850998466e-05,
"loss": 0.0003,
"step": 5830
},
{
"epoch": 2.241912258807718,
"grad_norm": 0.007433837280776717,
"learning_rate": 1.1029185867895547e-05,
"loss": 0.0003,
"step": 5840
},
{
"epoch": 2.2457521359316504,
"grad_norm": 0.0069928744941826155,
"learning_rate": 1.1013824884792628e-05,
"loss": 0.0003,
"step": 5850
},
{
"epoch": 2.249592013055582,
"grad_norm": 0.004247699109310477,
"learning_rate": 1.0998463901689708e-05,
"loss": 0.0003,
"step": 5860
},
{
"epoch": 2.253431890179514,
"grad_norm": 0.004112472440297516,
"learning_rate": 1.098310291858679e-05,
"loss": 0.0003,
"step": 5870
},
{
"epoch": 2.2572717673034464,
"grad_norm": 0.003591482285585461,
"learning_rate": 1.096774193548387e-05,
"loss": 0.0003,
"step": 5880
},
{
"epoch": 2.261111644427378,
"grad_norm": 0.0021551424115954687,
"learning_rate": 1.0952380952380955e-05,
"loss": 0.0003,
"step": 5890
},
{
"epoch": 2.2649515215513105,
"grad_norm": 0.0037350002650392232,
"learning_rate": 1.0937019969278036e-05,
"loss": 0.0003,
"step": 5900
},
{
"epoch": 2.2687913986752424,
"grad_norm": 0.004979865757746785,
"learning_rate": 1.0921658986175116e-05,
"loss": 0.0003,
"step": 5910
},
{
"epoch": 2.2726312757991742,
"grad_norm": 0.005203006741838927,
"learning_rate": 1.0906298003072197e-05,
"loss": 0.0003,
"step": 5920
},
{
"epoch": 2.2764711529231065,
"grad_norm": 0.006728064653510132,
"learning_rate": 1.0890937019969278e-05,
"loss": 0.0003,
"step": 5930
},
{
"epoch": 2.2803110300470384,
"grad_norm": 0.009359939665881964,
"learning_rate": 1.087557603686636e-05,
"loss": 0.0003,
"step": 5940
},
{
"epoch": 2.2841509071709707,
"grad_norm": 0.0035717146182484686,
"learning_rate": 1.086021505376344e-05,
"loss": 0.0003,
"step": 5950
},
{
"epoch": 2.2879907842949025,
"grad_norm": 0.00761174029980443,
"learning_rate": 1.0844854070660523e-05,
"loss": 0.0003,
"step": 5960
},
{
"epoch": 2.2918306614188344,
"grad_norm": 0.005512789873618017,
"learning_rate": 1.0829493087557605e-05,
"loss": 0.0003,
"step": 5970
},
{
"epoch": 2.2956705385427667,
"grad_norm": 0.003572666545585101,
"learning_rate": 1.0814132104454686e-05,
"loss": 0.0003,
"step": 5980
},
{
"epoch": 2.2995104156666986,
"grad_norm": 0.0047703107744972405,
"learning_rate": 1.0798771121351767e-05,
"loss": 0.0003,
"step": 5990
},
{
"epoch": 2.303350292790631,
"grad_norm": 0.0034190136655308364,
"learning_rate": 1.0783410138248848e-05,
"loss": 0.0003,
"step": 6000
},
{
"epoch": 2.3071901699145627,
"grad_norm": 0.0036676135087216543,
"learning_rate": 1.0768049155145929e-05,
"loss": 0.0003,
"step": 6010
},
{
"epoch": 2.3110300470384946,
"grad_norm": 0.005769656112064071,
"learning_rate": 1.0752688172043012e-05,
"loss": 0.0003,
"step": 6020
},
{
"epoch": 2.314869924162427,
"grad_norm": 0.004110269843427993,
"learning_rate": 1.0737327188940093e-05,
"loss": 0.0003,
"step": 6030
},
{
"epoch": 2.3187098012863587,
"grad_norm": 0.0023331666885418414,
"learning_rate": 1.0721966205837175e-05,
"loss": 0.0003,
"step": 6040
},
{
"epoch": 2.322549678410291,
"grad_norm": 0.0027550349293288595,
"learning_rate": 1.0706605222734256e-05,
"loss": 0.0003,
"step": 6050
},
{
"epoch": 2.326389555534223,
"grad_norm": 0.00739949517331183,
"learning_rate": 1.0691244239631337e-05,
"loss": 0.0003,
"step": 6060
},
{
"epoch": 2.3302294326581547,
"grad_norm": 0.007948349024988999,
"learning_rate": 1.0675883256528418e-05,
"loss": 0.0003,
"step": 6070
},
{
"epoch": 2.334069309782087,
"grad_norm": 0.0068282243290130416,
"learning_rate": 1.0660522273425499e-05,
"loss": 0.0003,
"step": 6080
},
{
"epoch": 2.337909186906019,
"grad_norm": 0.0027681416973556576,
"learning_rate": 1.0645161290322582e-05,
"loss": 0.0003,
"step": 6090
},
{
"epoch": 2.341749064029951,
"grad_norm": 0.0048135041265458395,
"learning_rate": 1.0629800307219663e-05,
"loss": 0.0003,
"step": 6100
},
{
"epoch": 2.345588941153883,
"grad_norm": 0.003988841136950386,
"learning_rate": 1.0614439324116745e-05,
"loss": 0.0003,
"step": 6110
},
{
"epoch": 2.349428818277815,
"grad_norm": 0.001427852368200693,
"learning_rate": 1.0599078341013826e-05,
"loss": 0.0003,
"step": 6120
},
{
"epoch": 2.353268695401747,
"grad_norm": 0.0028685965319891656,
"learning_rate": 1.0583717357910907e-05,
"loss": 0.0003,
"step": 6130
},
{
"epoch": 2.357108572525679,
"grad_norm": 0.004228704639871503,
"learning_rate": 1.0568356374807988e-05,
"loss": 0.0003,
"step": 6140
},
{
"epoch": 2.3609484496496114,
"grad_norm": 0.004544577992861476,
"learning_rate": 1.055299539170507e-05,
"loss": 0.0003,
"step": 6150
},
{
"epoch": 2.3647883267735432,
"grad_norm": 0.0014514795044217516,
"learning_rate": 1.0537634408602151e-05,
"loss": 0.0003,
"step": 6160
},
{
"epoch": 2.368628203897475,
"grad_norm": 0.005123018146892371,
"learning_rate": 1.0522273425499232e-05,
"loss": 0.0003,
"step": 6170
},
{
"epoch": 2.3724680810214074,
"grad_norm": 0.007113409507414172,
"learning_rate": 1.0506912442396313e-05,
"loss": 0.0003,
"step": 6180
},
{
"epoch": 2.3763079581453392,
"grad_norm": 0.0051352903847912985,
"learning_rate": 1.0491551459293396e-05,
"loss": 0.0003,
"step": 6190
},
{
"epoch": 2.3801478352692715,
"grad_norm": 0.002104107790233039,
"learning_rate": 1.0476190476190477e-05,
"loss": 0.0003,
"step": 6200
},
{
"epoch": 2.3839877123932034,
"grad_norm": 0.0025435299650544215,
"learning_rate": 1.046082949308756e-05,
"loss": 0.0003,
"step": 6210
},
{
"epoch": 2.3878275895171353,
"grad_norm": 0.0012255752995362287,
"learning_rate": 1.044546850998464e-05,
"loss": 0.0003,
"step": 6220
},
{
"epoch": 2.3916674666410676,
"grad_norm": 0.011675318963073902,
"learning_rate": 1.0430107526881721e-05,
"loss": 0.0003,
"step": 6230
},
{
"epoch": 2.3955073437649994,
"grad_norm": 0.002461069919600081,
"learning_rate": 1.0414746543778802e-05,
"loss": 0.0003,
"step": 6240
},
{
"epoch": 2.3993472208889317,
"grad_norm": 0.0034531583429282644,
"learning_rate": 1.0399385560675883e-05,
"loss": 0.0003,
"step": 6250
},
{
"epoch": 2.4031870980128636,
"grad_norm": 0.004785216112107441,
"learning_rate": 1.0384024577572966e-05,
"loss": 0.0003,
"step": 6260
},
{
"epoch": 2.4070269751367954,
"grad_norm": 0.005178536980072039,
"learning_rate": 1.0368663594470047e-05,
"loss": 0.0003,
"step": 6270
},
{
"epoch": 2.4108668522607277,
"grad_norm": 0.007894702946313798,
"learning_rate": 1.035330261136713e-05,
"loss": 0.0003,
"step": 6280
},
{
"epoch": 2.4147067293846596,
"grad_norm": 0.006680693675035738,
"learning_rate": 1.033794162826421e-05,
"loss": 0.0003,
"step": 6290
},
{
"epoch": 2.418546606508592,
"grad_norm": 0.0012495646198747838,
"learning_rate": 1.0322580645161291e-05,
"loss": 0.0003,
"step": 6300
},
{
"epoch": 2.4223864836325237,
"grad_norm": 0.0019525091355373828,
"learning_rate": 1.0307219662058372e-05,
"loss": 0.0003,
"step": 6310
},
{
"epoch": 2.4262263607564556,
"grad_norm": 0.010231795338312226,
"learning_rate": 1.0291858678955453e-05,
"loss": 0.0003,
"step": 6320
},
{
"epoch": 2.430066237880388,
"grad_norm": 0.0030330611729114856,
"learning_rate": 1.0276497695852536e-05,
"loss": 0.0002,
"step": 6330
},
{
"epoch": 2.4339061150043197,
"grad_norm": 0.004059549924408756,
"learning_rate": 1.0261136712749618e-05,
"loss": 0.0003,
"step": 6340
},
{
"epoch": 2.437745992128252,
"grad_norm": 0.0019612251556256,
"learning_rate": 1.02457757296467e-05,
"loss": 0.0003,
"step": 6350
},
{
"epoch": 2.441585869252184,
"grad_norm": 0.015437204765968675,
"learning_rate": 1.023041474654378e-05,
"loss": 0.0003,
"step": 6360
},
{
"epoch": 2.4454257463761158,
"grad_norm": 0.012071394006097472,
"learning_rate": 1.0215053763440861e-05,
"loss": 0.0003,
"step": 6370
},
{
"epoch": 2.449265623500048,
"grad_norm": 0.0036098717816734674,
"learning_rate": 1.0199692780337942e-05,
"loss": 0.0003,
"step": 6380
},
{
"epoch": 2.45310550062398,
"grad_norm": 0.0019974234019019756,
"learning_rate": 1.0184331797235023e-05,
"loss": 0.0003,
"step": 6390
},
{
"epoch": 2.456945377747912,
"grad_norm": 0.0020862007756023856,
"learning_rate": 1.0168970814132104e-05,
"loss": 0.0003,
"step": 6400
},
{
"epoch": 2.460785254871844,
"grad_norm": 0.003547053858291849,
"learning_rate": 1.0153609831029188e-05,
"loss": 0.0003,
"step": 6410
},
{
"epoch": 2.464625131995776,
"grad_norm": 0.007416597783167198,
"learning_rate": 1.0138248847926269e-05,
"loss": 0.0003,
"step": 6420
},
{
"epoch": 2.4684650091197082,
"grad_norm": 0.013003680388147372,
"learning_rate": 1.012288786482335e-05,
"loss": 0.0003,
"step": 6430
},
{
"epoch": 2.47230488624364,
"grad_norm": 0.008074660660103197,
"learning_rate": 1.0107526881720431e-05,
"loss": 0.0003,
"step": 6440
},
{
"epoch": 2.4761447633675724,
"grad_norm": 0.009777230368946704,
"learning_rate": 1.0092165898617512e-05,
"loss": 0.0003,
"step": 6450
},
{
"epoch": 2.4799846404915042,
"grad_norm": 0.004447494802395034,
"learning_rate": 1.0076804915514593e-05,
"loss": 0.0003,
"step": 6460
},
{
"epoch": 2.483824517615436,
"grad_norm": 0.007205042771433959,
"learning_rate": 1.0061443932411674e-05,
"loss": 0.0003,
"step": 6470
},
{
"epoch": 2.4876643947393684,
"grad_norm": 0.004129432165666555,
"learning_rate": 1.0046082949308758e-05,
"loss": 0.0003,
"step": 6480
},
{
"epoch": 2.4915042718633003,
"grad_norm": 0.003293907536270406,
"learning_rate": 1.0030721966205839e-05,
"loss": 0.0003,
"step": 6490
},
{
"epoch": 2.4953441489872326,
"grad_norm": 0.003480440018564576,
"learning_rate": 1.001536098310292e-05,
"loss": 0.0003,
"step": 6500
},
{
"epoch": 2.4991840261111644,
"grad_norm": 0.00941404682853377,
"learning_rate": 1e-05,
"loss": 0.0003,
"step": 6510
},
{
"epoch": 2.5030239032350963,
"grad_norm": 0.0027230276406464524,
"learning_rate": 9.984639016897082e-06,
"loss": 0.0003,
"step": 6520
},
{
"epoch": 2.5068637803590286,
"grad_norm": 0.005538952293239004,
"learning_rate": 9.969278033794164e-06,
"loss": 0.0003,
"step": 6530
},
{
"epoch": 2.5107036574829604,
"grad_norm": 0.0050333328452282,
"learning_rate": 9.953917050691245e-06,
"loss": 0.0003,
"step": 6540
},
{
"epoch": 2.5145435346068927,
"grad_norm": 0.00522155205199295,
"learning_rate": 9.938556067588326e-06,
"loss": 0.0002,
"step": 6550
},
{
"epoch": 2.5183834117308246,
"grad_norm": 0.007188536096954456,
"learning_rate": 9.923195084485407e-06,
"loss": 0.0003,
"step": 6560
},
{
"epoch": 2.5222232888547564,
"grad_norm": 0.003184906537752492,
"learning_rate": 9.90783410138249e-06,
"loss": 0.0002,
"step": 6570
},
{
"epoch": 2.5260631659786887,
"grad_norm": 0.007133938771763716,
"learning_rate": 9.89247311827957e-06,
"loss": 0.0002,
"step": 6580
},
{
"epoch": 2.5299030431026206,
"grad_norm": 0.002331216788213924,
"learning_rate": 9.877112135176652e-06,
"loss": 0.0003,
"step": 6590
},
{
"epoch": 2.533742920226553,
"grad_norm": 0.00479250433136117,
"learning_rate": 9.861751152073733e-06,
"loss": 0.0003,
"step": 6600
},
{
"epoch": 2.5375827973504848,
"grad_norm": 0.005137631489738605,
"learning_rate": 9.846390168970815e-06,
"loss": 0.0002,
"step": 6610
},
{
"epoch": 2.5414226744744166,
"grad_norm": 0.0018644753603249751,
"learning_rate": 9.831029185867896e-06,
"loss": 0.0002,
"step": 6620
},
{
"epoch": 2.545262551598349,
"grad_norm": 0.00606148048013519,
"learning_rate": 9.815668202764977e-06,
"loss": 0.0003,
"step": 6630
},
{
"epoch": 2.5491024287222808,
"grad_norm": 0.0046942947495266455,
"learning_rate": 9.80030721966206e-06,
"loss": 0.0003,
"step": 6640
},
{
"epoch": 2.552942305846213,
"grad_norm": 0.0033581298827081688,
"learning_rate": 9.78494623655914e-06,
"loss": 0.0003,
"step": 6650
},
{
"epoch": 2.556782182970145,
"grad_norm": 0.0050898256061523055,
"learning_rate": 9.769585253456221e-06,
"loss": 0.0003,
"step": 6660
},
{
"epoch": 2.560622060094077,
"grad_norm": 0.007276030994283694,
"learning_rate": 9.754224270353302e-06,
"loss": 0.0003,
"step": 6670
},
{
"epoch": 2.564461937218009,
"grad_norm": 0.008102589186392525,
"learning_rate": 9.738863287250385e-06,
"loss": 0.0003,
"step": 6680
},
{
"epoch": 2.568301814341941,
"grad_norm": 0.007025327596243143,
"learning_rate": 9.723502304147466e-06,
"loss": 0.0003,
"step": 6690
},
{
"epoch": 2.5721416914658732,
"grad_norm": 0.005845460452608034,
"learning_rate": 9.708141321044547e-06,
"loss": 0.0003,
"step": 6700
},
{
"epoch": 2.575981568589805,
"grad_norm": 0.012489139495375251,
"learning_rate": 9.692780337941628e-06,
"loss": 0.0003,
"step": 6710
},
{
"epoch": 2.579821445713737,
"grad_norm": 0.012227770449223498,
"learning_rate": 9.67741935483871e-06,
"loss": 0.0003,
"step": 6720
},
{
"epoch": 2.5836613228376692,
"grad_norm": 0.005069955164515195,
"learning_rate": 9.662058371735791e-06,
"loss": 0.0003,
"step": 6730
},
{
"epoch": 2.587501199961601,
"grad_norm": 0.0061167037017676555,
"learning_rate": 9.646697388632872e-06,
"loss": 0.0002,
"step": 6740
},
{
"epoch": 2.5913410770855334,
"grad_norm": 0.08374847767752854,
"learning_rate": 9.631336405529955e-06,
"loss": 0.0004,
"step": 6750
},
{
"epoch": 2.5951809542094653,
"grad_norm": 0.8655212802945428,
"learning_rate": 9.615975422427036e-06,
"loss": 0.014,
"step": 6760
},
{
"epoch": 2.599020831333397,
"grad_norm": 0.5910483392483017,
"learning_rate": 9.600614439324117e-06,
"loss": 0.0096,
"step": 6770
},
{
"epoch": 2.6028607084573294,
"grad_norm": 0.013833647799367984,
"learning_rate": 9.5852534562212e-06,
"loss": 0.0017,
"step": 6780
},
{
"epoch": 2.6067005855812613,
"grad_norm": 0.006258343437812146,
"learning_rate": 9.56989247311828e-06,
"loss": 0.0003,
"step": 6790
},
{
"epoch": 2.6105404627051936,
"grad_norm": 0.007017002859118159,
"learning_rate": 9.554531490015361e-06,
"loss": 0.0003,
"step": 6800
},
{
"epoch": 2.6143803398291254,
"grad_norm": 0.003369751095391674,
"learning_rate": 9.539170506912442e-06,
"loss": 0.0003,
"step": 6810
},
{
"epoch": 2.6182202169530573,
"grad_norm": 0.003526571263237409,
"learning_rate": 9.523809523809525e-06,
"loss": 0.0003,
"step": 6820
},
{
"epoch": 2.6220600940769896,
"grad_norm": 0.014811925020528588,
"learning_rate": 9.508448540706606e-06,
"loss": 0.0003,
"step": 6830
},
{
"epoch": 2.6258999712009214,
"grad_norm": 0.0031378309452129815,
"learning_rate": 9.493087557603687e-06,
"loss": 0.0003,
"step": 6840
},
{
"epoch": 2.6297398483248537,
"grad_norm": 0.003306012394640935,
"learning_rate": 9.47772657450077e-06,
"loss": 0.0003,
"step": 6850
},
{
"epoch": 2.6335797254487856,
"grad_norm": 0.00579074066003059,
"learning_rate": 9.46236559139785e-06,
"loss": 0.0003,
"step": 6860
},
{
"epoch": 2.6374196025727175,
"grad_norm": 0.006190406938703898,
"learning_rate": 9.447004608294931e-06,
"loss": 0.0003,
"step": 6870
},
{
"epoch": 2.6412594796966498,
"grad_norm": 0.0019373298398581817,
"learning_rate": 9.431643625192014e-06,
"loss": 0.0003,
"step": 6880
},
{
"epoch": 2.6450993568205816,
"grad_norm": 0.003767593626643142,
"learning_rate": 9.416282642089095e-06,
"loss": 0.0003,
"step": 6890
},
{
"epoch": 2.648939233944514,
"grad_norm": 0.0021313401606799036,
"learning_rate": 9.400921658986176e-06,
"loss": 0.0003,
"step": 6900
},
{
"epoch": 2.6527791110684458,
"grad_norm": 0.0073770172122847415,
"learning_rate": 9.385560675883258e-06,
"loss": 0.0003,
"step": 6910
},
{
"epoch": 2.6566189881923776,
"grad_norm": 0.0067510318233110125,
"learning_rate": 9.370199692780339e-06,
"loss": 0.0003,
"step": 6920
},
{
"epoch": 2.66045886531631,
"grad_norm": 0.005588267928353358,
"learning_rate": 9.35483870967742e-06,
"loss": 0.0003,
"step": 6930
},
{
"epoch": 2.664298742440242,
"grad_norm": 0.006170795151990667,
"learning_rate": 9.339477726574503e-06,
"loss": 0.0003,
"step": 6940
},
{
"epoch": 2.668138619564174,
"grad_norm": 0.0012556712204226065,
"learning_rate": 9.324116743471584e-06,
"loss": 0.0003,
"step": 6950
},
{
"epoch": 2.671978496688106,
"grad_norm": 0.006374796263066127,
"learning_rate": 9.308755760368664e-06,
"loss": 0.0003,
"step": 6960
},
{
"epoch": 2.675818373812038,
"grad_norm": 0.003408128999224669,
"learning_rate": 9.293394777265745e-06,
"loss": 0.0003,
"step": 6970
},
{
"epoch": 2.67965825093597,
"grad_norm": 0.004088069338470728,
"learning_rate": 9.278033794162828e-06,
"loss": 0.0003,
"step": 6980
},
{
"epoch": 2.683498128059902,
"grad_norm": 0.0039123405460172464,
"learning_rate": 9.262672811059909e-06,
"loss": 0.0003,
"step": 6990
},
{
"epoch": 2.6873380051838343,
"grad_norm": 0.0039015005051876795,
"learning_rate": 9.24731182795699e-06,
"loss": 0.0003,
"step": 7000
},
{
"epoch": 2.691177882307766,
"grad_norm": 0.00822944757312854,
"learning_rate": 9.231950844854072e-06,
"loss": 0.0003,
"step": 7010
},
{
"epoch": 2.695017759431698,
"grad_norm": 0.009264902761011475,
"learning_rate": 9.216589861751153e-06,
"loss": 0.0003,
"step": 7020
},
{
"epoch": 2.6988576365556303,
"grad_norm": 0.0027568421905536863,
"learning_rate": 9.201228878648234e-06,
"loss": 0.0003,
"step": 7030
},
{
"epoch": 2.702697513679562,
"grad_norm": 0.007150407401258832,
"learning_rate": 9.185867895545315e-06,
"loss": 0.0003,
"step": 7040
},
{
"epoch": 2.7065373908034944,
"grad_norm": 0.0037931552321473815,
"learning_rate": 9.170506912442398e-06,
"loss": 0.0003,
"step": 7050
},
{
"epoch": 2.7103772679274263,
"grad_norm": 0.0035618752848147433,
"learning_rate": 9.155145929339479e-06,
"loss": 0.0003,
"step": 7060
},
{
"epoch": 2.714217145051358,
"grad_norm": 0.005406421463186957,
"learning_rate": 9.13978494623656e-06,
"loss": 0.0003,
"step": 7070
},
{
"epoch": 2.7180570221752904,
"grad_norm": 0.006311758799823545,
"learning_rate": 9.124423963133642e-06,
"loss": 0.0002,
"step": 7080
},
{
"epoch": 2.7218968992992223,
"grad_norm": 0.00455306940271519,
"learning_rate": 9.109062980030723e-06,
"loss": 0.0003,
"step": 7090
},
{
"epoch": 2.7257367764231546,
"grad_norm": 0.009213759005302373,
"learning_rate": 9.093701996927804e-06,
"loss": 0.0003,
"step": 7100
},
{
"epoch": 2.7295766535470865,
"grad_norm": 0.001955875427936466,
"learning_rate": 9.078341013824885e-06,
"loss": 0.0003,
"step": 7110
},
{
"epoch": 2.7334165306710183,
"grad_norm": 0.002645510266753951,
"learning_rate": 9.062980030721968e-06,
"loss": 0.0003,
"step": 7120
},
{
"epoch": 2.7372564077949506,
"grad_norm": 0.0015492690289872618,
"learning_rate": 9.047619047619049e-06,
"loss": 0.0003,
"step": 7130
},
{
"epoch": 2.7410962849188825,
"grad_norm": 0.004407452957932437,
"learning_rate": 9.03225806451613e-06,
"loss": 0.0003,
"step": 7140
},
{
"epoch": 2.7449361620428148,
"grad_norm": 0.003584874788768233,
"learning_rate": 9.01689708141321e-06,
"loss": 0.0003,
"step": 7150
},
{
"epoch": 2.7487760391667466,
"grad_norm": 0.003426299476431039,
"learning_rate": 9.001536098310293e-06,
"loss": 0.0002,
"step": 7160
},
{
"epoch": 2.7526159162906785,
"grad_norm": 0.003706527902281191,
"learning_rate": 8.986175115207374e-06,
"loss": 0.0003,
"step": 7170
},
{
"epoch": 2.7564557934146108,
"grad_norm": 0.005686639850889155,
"learning_rate": 8.970814132104455e-06,
"loss": 0.0003,
"step": 7180
},
{
"epoch": 2.7602956705385426,
"grad_norm": 0.0052320038014579216,
"learning_rate": 8.955453149001538e-06,
"loss": 0.0002,
"step": 7190
},
{
"epoch": 2.764135547662475,
"grad_norm": 0.0035293581965504047,
"learning_rate": 8.940092165898619e-06,
"loss": 0.0003,
"step": 7200
},
{
"epoch": 2.767975424786407,
"grad_norm": 0.0030584138772079433,
"learning_rate": 8.9247311827957e-06,
"loss": 0.0003,
"step": 7210
},
{
"epoch": 2.7718153019103386,
"grad_norm": 0.001802338947140362,
"learning_rate": 8.90937019969278e-06,
"loss": 0.0003,
"step": 7220
},
{
"epoch": 2.775655179034271,
"grad_norm": 0.003941656074171531,
"learning_rate": 8.894009216589863e-06,
"loss": 0.0003,
"step": 7230
},
{
"epoch": 2.779495056158203,
"grad_norm": 0.007971016203195725,
"learning_rate": 8.878648233486944e-06,
"loss": 0.0003,
"step": 7240
},
{
"epoch": 2.783334933282135,
"grad_norm": 0.0030065159813745896,
"learning_rate": 8.863287250384025e-06,
"loss": 0.0003,
"step": 7250
},
{
"epoch": 2.787174810406067,
"grad_norm": 0.012438848822631438,
"learning_rate": 8.847926267281107e-06,
"loss": 0.0003,
"step": 7260
},
{
"epoch": 2.791014687529999,
"grad_norm": 0.004555632702125586,
"learning_rate": 8.832565284178188e-06,
"loss": 0.0003,
"step": 7270
},
{
"epoch": 2.794854564653931,
"grad_norm": 0.009624526970252073,
"learning_rate": 8.81720430107527e-06,
"loss": 0.0003,
"step": 7280
},
{
"epoch": 2.798694441777863,
"grad_norm": 0.006646400675058001,
"learning_rate": 8.80184331797235e-06,
"loss": 0.0003,
"step": 7290
},
{
"epoch": 2.8025343189017953,
"grad_norm": 0.00547555841349304,
"learning_rate": 8.786482334869433e-06,
"loss": 0.0003,
"step": 7300
},
{
"epoch": 2.806374196025727,
"grad_norm": 0.006365409097436495,
"learning_rate": 8.771121351766514e-06,
"loss": 0.0003,
"step": 7310
},
{
"epoch": 2.810214073149659,
"grad_norm": 0.004289823731985147,
"learning_rate": 8.755760368663595e-06,
"loss": 0.0003,
"step": 7320
},
{
"epoch": 2.8140539502735913,
"grad_norm": 0.0009114031462774198,
"learning_rate": 8.740399385560676e-06,
"loss": 0.0003,
"step": 7330
},
{
"epoch": 2.817893827397523,
"grad_norm": 0.0040925566094191235,
"learning_rate": 8.725038402457758e-06,
"loss": 0.0003,
"step": 7340
},
{
"epoch": 2.8217337045214554,
"grad_norm": 0.004769056547562217,
"learning_rate": 8.70967741935484e-06,
"loss": 0.0003,
"step": 7350
},
{
"epoch": 2.8255735816453873,
"grad_norm": 0.00403037096763482,
"learning_rate": 8.69431643625192e-06,
"loss": 0.0003,
"step": 7360
},
{
"epoch": 2.829413458769319,
"grad_norm": 0.0018637193862899913,
"learning_rate": 8.678955453149003e-06,
"loss": 0.0003,
"step": 7370
},
{
"epoch": 2.8332533358932515,
"grad_norm": 0.003596057132391645,
"learning_rate": 8.663594470046084e-06,
"loss": 0.0002,
"step": 7380
},
{
"epoch": 2.8370932130171833,
"grad_norm": 0.003687354810004547,
"learning_rate": 8.648233486943165e-06,
"loss": 0.0003,
"step": 7390
},
{
"epoch": 2.8409330901411156,
"grad_norm": 0.005906169853780166,
"learning_rate": 8.632872503840246e-06,
"loss": 0.0003,
"step": 7400
},
{
"epoch": 2.8447729672650475,
"grad_norm": 0.008785446912324073,
"learning_rate": 8.617511520737328e-06,
"loss": 0.0003,
"step": 7410
},
{
"epoch": 2.8486128443889793,
"grad_norm": 0.006765639595362392,
"learning_rate": 8.602150537634409e-06,
"loss": 0.0003,
"step": 7420
},
{
"epoch": 2.8524527215129116,
"grad_norm": 0.0034460169241499507,
"learning_rate": 8.58678955453149e-06,
"loss": 0.0003,
"step": 7430
},
{
"epoch": 2.8562925986368435,
"grad_norm": 0.0029012348201393513,
"learning_rate": 8.571428571428571e-06,
"loss": 0.0003,
"step": 7440
},
{
"epoch": 2.860132475760776,
"grad_norm": 0.009176699580615625,
"learning_rate": 8.556067588325654e-06,
"loss": 0.0003,
"step": 7450
},
{
"epoch": 2.8639723528847076,
"grad_norm": 0.007162151782757863,
"learning_rate": 8.540706605222734e-06,
"loss": 0.0002,
"step": 7460
},
{
"epoch": 2.8678122300086395,
"grad_norm": 0.011405641448979225,
"learning_rate": 8.525345622119815e-06,
"loss": 0.0002,
"step": 7470
},
{
"epoch": 2.871652107132572,
"grad_norm": 0.002892641787357458,
"learning_rate": 8.509984639016898e-06,
"loss": 0.0003,
"step": 7480
},
{
"epoch": 2.8754919842565037,
"grad_norm": 0.0045100170495708195,
"learning_rate": 8.494623655913979e-06,
"loss": 0.0003,
"step": 7490
},
{
"epoch": 2.879331861380436,
"grad_norm": 0.0042421348190372936,
"learning_rate": 8.47926267281106e-06,
"loss": 0.0002,
"step": 7500
},
{
"epoch": 2.883171738504368,
"grad_norm": 0.007795804545941071,
"learning_rate": 8.463901689708142e-06,
"loss": 0.0002,
"step": 7510
},
{
"epoch": 2.8870116156282997,
"grad_norm": 0.008298948469422989,
"learning_rate": 8.448540706605223e-06,
"loss": 0.0003,
"step": 7520
},
{
"epoch": 2.890851492752232,
"grad_norm": 0.0031254716702214095,
"learning_rate": 8.433179723502304e-06,
"loss": 0.0003,
"step": 7530
},
{
"epoch": 2.894691369876164,
"grad_norm": 0.005192654900334731,
"learning_rate": 8.417818740399385e-06,
"loss": 0.0003,
"step": 7540
},
{
"epoch": 2.898531247000096,
"grad_norm": 0.01235662584667344,
"learning_rate": 8.402457757296468e-06,
"loss": 0.0003,
"step": 7550
},
{
"epoch": 2.902371124124028,
"grad_norm": 0.00496363081609215,
"learning_rate": 8.387096774193549e-06,
"loss": 0.0003,
"step": 7560
},
{
"epoch": 2.90621100124796,
"grad_norm": 0.0036361713152800973,
"learning_rate": 8.37173579109063e-06,
"loss": 0.0003,
"step": 7570
},
{
"epoch": 2.910050878371892,
"grad_norm": 0.001736163807789765,
"learning_rate": 8.356374807987712e-06,
"loss": 0.0003,
"step": 7580
},
{
"epoch": 2.913890755495824,
"grad_norm": 0.0037050159683279602,
"learning_rate": 8.341013824884793e-06,
"loss": 0.0003,
"step": 7590
},
{
"epoch": 2.9177306326197563,
"grad_norm": 0.0039102919819924975,
"learning_rate": 8.325652841781874e-06,
"loss": 0.0002,
"step": 7600
},
{
"epoch": 2.921570509743688,
"grad_norm": 0.004742691161045984,
"learning_rate": 8.310291858678957e-06,
"loss": 0.0003,
"step": 7610
},
{
"epoch": 2.92541038686762,
"grad_norm": 0.005839965213671288,
"learning_rate": 8.294930875576038e-06,
"loss": 0.0003,
"step": 7620
},
{
"epoch": 2.9292502639915523,
"grad_norm": 0.005078116114432955,
"learning_rate": 8.279569892473119e-06,
"loss": 0.0003,
"step": 7630
},
{
"epoch": 2.933090141115484,
"grad_norm": 0.0047744269458432275,
"learning_rate": 8.264208909370201e-06,
"loss": 0.0003,
"step": 7640
},
{
"epoch": 2.9369300182394165,
"grad_norm": 0.003503766564796104,
"learning_rate": 8.248847926267282e-06,
"loss": 0.0003,
"step": 7650
},
{
"epoch": 2.9407698953633483,
"grad_norm": 0.002919305506009367,
"learning_rate": 8.233486943164363e-06,
"loss": 0.0003,
"step": 7660
},
{
"epoch": 2.94460977248728,
"grad_norm": 0.005234438137723877,
"learning_rate": 8.218125960061446e-06,
"loss": 0.0002,
"step": 7670
},
{
"epoch": 2.9484496496112125,
"grad_norm": 0.0058797401671725905,
"learning_rate": 8.202764976958527e-06,
"loss": 0.0003,
"step": 7680
},
{
"epoch": 2.9522895267351443,
"grad_norm": 0.003378937083777895,
"learning_rate": 8.187403993855608e-06,
"loss": 0.0002,
"step": 7690
},
{
"epoch": 2.9561294038590766,
"grad_norm": 0.0027598025310823916,
"learning_rate": 8.172043010752689e-06,
"loss": 0.0003,
"step": 7700
},
{
"epoch": 2.9599692809830085,
"grad_norm": 0.006302224478544003,
"learning_rate": 8.156682027649771e-06,
"loss": 0.0003,
"step": 7710
},
{
"epoch": 2.9638091581069403,
"grad_norm": 0.005721010444274242,
"learning_rate": 8.141321044546852e-06,
"loss": 0.0003,
"step": 7720
},
{
"epoch": 2.9676490352308726,
"grad_norm": 0.00895258523648075,
"learning_rate": 8.125960061443933e-06,
"loss": 0.0003,
"step": 7730
},
{
"epoch": 2.9714889123548045,
"grad_norm": 0.006250897732901797,
"learning_rate": 8.110599078341016e-06,
"loss": 0.0003,
"step": 7740
},
{
"epoch": 2.975328789478737,
"grad_norm": 0.004710767186934721,
"learning_rate": 8.095238095238097e-06,
"loss": 0.0003,
"step": 7750
},
{
"epoch": 2.9791686666026687,
"grad_norm": 0.004749110615282267,
"learning_rate": 8.079877112135177e-06,
"loss": 0.0003,
"step": 7760
},
{
"epoch": 2.9830085437266005,
"grad_norm": 0.0024146312389459854,
"learning_rate": 8.064516129032258e-06,
"loss": 0.0003,
"step": 7770
},
{
"epoch": 2.986848420850533,
"grad_norm": 0.0017491812102154464,
"learning_rate": 8.049155145929341e-06,
"loss": 0.0002,
"step": 7780
},
{
"epoch": 2.9906882979744647,
"grad_norm": 0.008039358375107466,
"learning_rate": 8.033794162826422e-06,
"loss": 0.0003,
"step": 7790
},
{
"epoch": 2.994528175098397,
"grad_norm": 0.003445401481616659,
"learning_rate": 8.018433179723503e-06,
"loss": 0.0003,
"step": 7800
},
{
"epoch": 2.998368052222329,
"grad_norm": 0.004124288268774834,
"learning_rate": 8.003072196620585e-06,
"loss": 0.0003,
"step": 7810
},
{
"epoch": 3.001919938561966,
"grad_norm": 0.002674977868985105,
"learning_rate": 7.987711213517666e-06,
"loss": 0.0002,
"step": 7820
},
{
"epoch": 3.0057598156858982,
"grad_norm": 0.0050648363913657094,
"learning_rate": 7.972350230414747e-06,
"loss": 0.0003,
"step": 7830
},
{
"epoch": 3.00959969280983,
"grad_norm": 0.0007924830296134951,
"learning_rate": 7.956989247311828e-06,
"loss": 0.0003,
"step": 7840
},
{
"epoch": 3.013439569933762,
"grad_norm": 0.001913560970676822,
"learning_rate": 7.941628264208911e-06,
"loss": 0.0003,
"step": 7850
},
{
"epoch": 3.0172794470576942,
"grad_norm": 0.0031764216786887415,
"learning_rate": 7.926267281105992e-06,
"loss": 0.0002,
"step": 7860
},
{
"epoch": 3.021119324181626,
"grad_norm": 0.00509301739025399,
"learning_rate": 7.910906298003073e-06,
"loss": 0.0003,
"step": 7870
},
{
"epoch": 3.0249592013055584,
"grad_norm": 0.004796585886393999,
"learning_rate": 7.895545314900154e-06,
"loss": 0.0003,
"step": 7880
},
{
"epoch": 3.0287990784294903,
"grad_norm": 0.006878984874609061,
"learning_rate": 7.880184331797236e-06,
"loss": 0.0003,
"step": 7890
},
{
"epoch": 3.032638955553422,
"grad_norm": 0.0028825313800368113,
"learning_rate": 7.864823348694317e-06,
"loss": 0.0003,
"step": 7900
},
{
"epoch": 3.0364788326773544,
"grad_norm": 0.008888757116371175,
"learning_rate": 7.849462365591398e-06,
"loss": 0.0002,
"step": 7910
},
{
"epoch": 3.0403187098012863,
"grad_norm": 0.011189685195352432,
"learning_rate": 7.83410138248848e-06,
"loss": 0.0003,
"step": 7920
},
{
"epoch": 3.0441585869252186,
"grad_norm": 0.004445925224066104,
"learning_rate": 7.818740399385562e-06,
"loss": 0.0003,
"step": 7930
},
{
"epoch": 3.0479984640491504,
"grad_norm": 0.005046704426390252,
"learning_rate": 7.803379416282643e-06,
"loss": 0.0002,
"step": 7940
},
{
"epoch": 3.0518383411730823,
"grad_norm": 0.00325039934600695,
"learning_rate": 7.788018433179724e-06,
"loss": 0.0002,
"step": 7950
},
{
"epoch": 3.0556782182970146,
"grad_norm": 0.009122195080615519,
"learning_rate": 7.772657450076806e-06,
"loss": 0.0002,
"step": 7960
},
{
"epoch": 3.0595180954209464,
"grad_norm": 0.0035357373556863343,
"learning_rate": 7.757296466973887e-06,
"loss": 0.0002,
"step": 7970
},
{
"epoch": 3.0633579725448787,
"grad_norm": 0.0030570656872309174,
"learning_rate": 7.741935483870968e-06,
"loss": 0.0003,
"step": 7980
},
{
"epoch": 3.0671978496688106,
"grad_norm": 0.0021996646452019718,
"learning_rate": 7.726574500768049e-06,
"loss": 0.0003,
"step": 7990
},
{
"epoch": 3.0710377267927425,
"grad_norm": 0.0018882278920401504,
"learning_rate": 7.711213517665132e-06,
"loss": 0.0003,
"step": 8000
}
],
"logging_steps": 10,
"max_steps": 13020,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.7534171666710528e+16,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}